Source code for crate_anon.anonymise.tests.researcher_report_tests

"""
crate_anon/anonymise/tests/researcher_report_tests.py

===============================================================================

    Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
    Created by Rudolf Cardinal (rnc1001@cam.ac.uk).

    This file is part of CRATE.

    CRATE is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    CRATE is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with CRATE. If not, see <https://www.gnu.org/licenses/>.

===============================================================================

Researcher report tests.

"""

import os.path
import random
from tempfile import TemporaryDirectory
from typing import List, TYPE_CHECKING
from unittest import mock

import factory
from pypdf import PdfReader
import pytest
from sqlalchemy import (
    Column,
    DateTime,
    ForeignKey,
    Integer,
    Text,
)
from sqlalchemy.orm import relationship

from crate_anon.anonymise.researcher_report import (
    mk_researcher_report_pdf,
    ResearcherReportConfig,
    TEMPLATE_DIR,
)
from crate_anon.testing import AnonTestBase
from crate_anon.testing.classes import DatabaseTestCase
from crate_anon.testing.factories import AnonTestBaseFactory, Fake
from crate_anon.testing.models import SexColType

if TYPE_CHECKING:
    from django.conf import LazySettings
    from factory.builder import Resolver


[docs]class AnonNote(AnonTestBase): __tablename__ = "anon_note" note_id = Column(Integer, primary_key=True, comment="Note ID") patient_id = Column( Integer, ForeignKey("anon_patient.patient_id"), comment="Patient ID" ) note = Column(Text, comment="Text of the note") note_datetime = Column(DateTime, comment="Date/time of the note") patient = relationship("AnonPatient")
[docs]class AnonPatient(AnonTestBase): __tablename__ = "anon_patient" patient_id = Column( Integer, primary_key=True, autoincrement=False, comment="Patient ID", ) sex = Column( SexColType, comment="Sex (M, F, X)", ) age = Column(Integer, comment="Age")
[docs]class AnonPatientFactory(AnonTestBaseFactory): class Meta: model = AnonPatient patient_id = factory.Sequence(lambda n: n + 1) sex = factory.LazyFunction(Fake.en_gb.sex) age = factory.LazyFunction(Fake.en_gb.age) @factory.post_generation def notes(obj: "Resolver", create: bool, extracted: int, **kwargs) -> None: if not create: return if extracted: AnonNoteFactory.create_batch(size=extracted, patient=obj, **kwargs)
[docs]class AnonNoteFactory(AnonTestBaseFactory): class Meta: model = AnonNote class Params: words_per_note = 100 note_datetime = factory.LazyFunction(Fake.en_gb.incrementing_date) @factory.lazy_attribute def note(obj: "Resolver") -> str: # Use en_US because you get Lorem ipsum with en_GB. paragraph = Fake.en_us.paragraph( nb_sentences=obj.words_per_note / 2, # way more than we need ) return " ".join(paragraph.split()[: obj.words_per_note])
@pytest.fixture def django_test_settings(settings: "LazySettings") -> None: settings.TEMPLATES = [ { "BACKEND": "django.template.backends.django.DjangoTemplates", "DIRS": [TEMPLATE_DIR], } ]
[docs]class ResearcherReportTests(DatabaseTestCase):
[docs] def setUp(self) -> None: super().setUp() self.num_patients = 100 self.notes_per_patient = 5 seed = 1234 # Seed both the global python RNG and Faker's RNG as we don't use Faker # for everything and Factory Boy's interface with Faker doesn't seem to # allow for sharing with the global RNG used by python (though Faker on # its own does). The value of the seed isn't particularly important # unless we're checking particular details but it's better to have one # for consistency of tests. random.seed(seed) factory.random.reseed_random(seed) AnonPatientFactory.create_batch( self.num_patients, notes=self.notes_per_patient ) self.anon_dbsession.commit() self.tempdir = TemporaryDirectory()
@pytest.mark.usefixtures("django_test_settings") def test_report_has_pages_for_each_table(self) -> None: def index_of_list_substring(items: List[str], substr: str) -> int: for i, item in enumerate(items): if substr in item: return i return -1 anon_config = mock.Mock() reportfilename = os.path.join(self.tempdir.name, "tmpreport.pdf") with open(reportfilename, mode="w") as f: mock_db = mock.Mock( table_names=["anon_patient", "anon_note"], metadata=AnonTestBase.metadata, ) with mock.patch.multiple( "crate_anon.anonymise.researcher_report.ResearcherReportConfig", # noqa: E501 __post_init__=mock.Mock(), ): report_config = ResearcherReportConfig( output_filename=reportfilename, anonconfig=anon_config, use_dd=False, ) report_config.db_session = self.anon_dbsession report_config.db = mock_db mk_researcher_report_pdf(report_config) with open(reportfilename, "rb") as f: reader = PdfReader(f) patient_found = False note_found = False for page in reader.pages: lines = page.extract_text().replace("\t", " ").splitlines() # Sometimes spaces come back as tabs; fix that. rows_index = index_of_list_substring( lines, "Number of rows in this table:", ) # The label text here is from # crate_anon/anonymise/templates/researcher_report/table.html. if rows_index < 0: continue num_rows = int(lines[rows_index + 1]) table_name = lines[0] if table_name == "anon_patient": patient_found = True self.assertEqual(num_rows, self.num_patients) elif table_name == "anon_note": note_found = True self.assertEqual( num_rows, self.num_patients * self.notes_per_patient ) self.assertTrue(patient_found) self.assertTrue(note_found)