Source code for crate_anon.preprocess.tests.systmone_ddgen_tests

"""
crate_anon/preprocess/tests/systmone_ddgen_tests.py

===============================================================================

    Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
    Created by Rudolf Cardinal (rnc1001@cam.ac.uk).

    This file is part of CRATE.

    CRATE is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    CRATE is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with CRATE. If not, see <https://www.gnu.org/licenses/>.

===============================================================================

Unit testing.

"""

# =============================================================================
# Imports
# =============================================================================

import csv
from tempfile import NamedTemporaryFile
from typing import List, TYPE_CHECKING
from unittest import mock, TestCase

from crate_anon.anonymise.dd import DataDictionary
from crate_anon.anonymise.ddr import DataDictionaryRow
from crate_anon.preprocess.systmone_ddgen import (
    core_tablename,
    eq,
    eq_re,
    is_free_text,
    is_in_re,
    modify_dd_for_systmone,
    OMIT_AND_IGNORE_TABLES_REGEX,
    SystmOneContext,
    SystmOneSRESpecRow,
)

if TYPE_CHECKING:
    from crate_anon.anonymise.config import Config


# =============================================================================
# Unit tests
# =============================================================================


[docs]class SystmOneDDGenTests(TestCase):
[docs] def test_excluded_tables(self) -> None: """ Test some regex functions for excluding tables. """ cpft = SystmOneContext.CPFT_DW test_referralsopen = "S1_ReferralsOpen" # CPFT version test_referralsopen_core = core_tablename( tablename=test_referralsopen, from_context=cpft, allow_unprefixed=True, ) self.assertTrue(eq(test_referralsopen_core, "ReferralsOpen")) self.assertTrue(eq_re(test_referralsopen_core, "ReferralsOpen$")) omit_tables = OMIT_AND_IGNORE_TABLES_REGEX[cpft] self.assertTrue(is_in_re(test_referralsopen_core, omit_tables)) self.assertTrue(is_in_re("Accommodation_20210329", omit_tables)) self.assertTrue(is_in_re("Accommodation_20210329_blah", omit_tables)) self.assertTrue(is_in_re("S1_Accommodation_20210329", omit_tables))
def test_freetext_columns(self) -> None: sre = SystmOneContext.TPP_SRE cpft = SystmOneContext.CPFT_DW # Free-text columns in all environments: for context in [sre, cpft]: self.assertTrue(is_free_text("FreeText", "FreeText", context)) # CPFT but not SRE environment: self.assertTrue( is_free_text( "FreeText_CYPFRS_TelephoneTriage", "RiskofAbsconding", cpft ) ) self.assertFalse( is_free_text( "FreeText_CYPFRS_TelephoneTriage", "RiskofAbsconding", sre ) ) # Not even in CPFT: self.assertFalse( is_free_text("FreeText_Honos_Scoring_Answers", "FreeText", cpft) )
[docs]class SystmOneDDGenTestCase(TestCase):
[docs] def setUp(self) -> None: super().setUp() self.src_spec_row_dict = dict( TableName="", TableDescription="", ColumnName="", ColumnDescription="", ColumnDataType="", ColumnLength=0, DateDefining="Yes", ColumnOrdinal=0, LinkedTable="", LinkedColumn1="", LinkedColumn2="", ) self.context = SystmOneContext.CPFT_DW
[docs]class SystmOneSRESpecRowTests(SystmOneDDGenTestCase): def test_comment_has_table_and_column_descriptions(self) -> None: self.src_spec_row_dict.update( TableName="SRPatient", ColumnName="IDPatient", TableDescription="SRPatient description from spec", ColumnDescription="IDPatient description from spec", ) row = SystmOneSRESpecRow(self.src_spec_row_dict) self.assertEqual( row.comment(self.context), ( "TABLE: SRPatient description from spec // " "COLUMN: IDPatient description from spec" ), ) def test_description_has_translated_table_column_and_spec_descriptions( self, ) -> None: self.src_spec_row_dict.update( TableName="SRPatient", ColumnName="IDPatient", TableDescription="SRPatient description from spec", ColumnDescription="IDPatient description from spec", ) row = SystmOneSRESpecRow(self.src_spec_row_dict) description = row.description(self.context) self.assertEqual( description, ( "S1_Patient.IDPatient // " "TABLE: SRPatient description from spec // " "COLUMN: IDPatient description from spec" ), )
[docs]class TestDataDictionary(DataDictionary):
[docs] def __init__( self, config: "Config", rows: List[DataDictionaryRow] ) -> None: super().__init__(config) self.rows = rows
[docs]class ModifyDDForSystmOneTests(SystmOneDDGenTestCase): def test_table_comments_from_spec_added_to_data_dictionary(self) -> None: mock_config = mock.Mock() dd_row_1 = DataDictionaryRow(mock_config) dd_row_1.src_db = "Source" dd_row_1.src_table = "S1_Patient" dd_row_1.src_field = "IDPatient" dd_row_1.comment = "IDPatient comment" dd_row_2 = DataDictionaryRow(mock_config) dd_row_2.src_db = "Source" dd_row_2.src_table = "S1_Patient" dd_row_2.src_field = "NHSNumber" dd_row_2.comment = "NHSNumber comment" dd = TestDataDictionary(mock_config, [dd_row_1, dd_row_2]) context = SystmOneContext.CPFT_DW with NamedTemporaryFile(delete=False, mode="w") as f: fieldnames = self.src_spec_row_dict.keys() writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() spec_row_1 = self.src_spec_row_dict.copy() spec_row_1.update( TableName="SRPatient", ColumnName="IDPatient", TableDescription="SRPatient description from spec", ColumnDescription="IDPatient description from spec", ) spec_row_2 = self.src_spec_row_dict.copy() spec_row_2.update( TableName="SRPatient", ColumnName="NHSNumber", TableDescription="SRPatient description from spec", ColumnDescription="NHSNumber description from spec", ) writer.writerow(spec_row_1) writer.writerow(spec_row_2) with open(f.name, mode="r") as f: modify_dd_for_systmone( dd, context, sre_spec_csv_filename=f.name, append_comments=True ) self.assertEqual(len(dd.rows), 3) # Comment row is sorted to the top self.assertEqual(dd.rows[0].comment, "SRPatient description from spec") self.assertEqual( dd.rows[1].comment, ( "IDPatient comment // " "TABLE: SRPatient description from spec // " "COLUMN: IDPatient description from spec" ), ) self.assertEqual( dd.rows[2].comment, ( "NHSNumber comment // " "TABLE: SRPatient description from spec // " "COLUMN: NHSNumber description from spec" ), ) def test_ddr_existing_table_comment_appended_with_spec_description( self, ) -> None: mock_config = mock.Mock() dd_row_1 = DataDictionaryRow(mock_config) dd_row_1.src_db = "Source" dd_row_1.src_table = "S1_Patient" dd_row_1.src_field = "IDPatient" dd_row_1.comment = "IDPatient comment" dd_row_2 = DataDictionaryRow(mock_config) dd_row_2.src_db = "Source" dd_row_2.src_table = "S1_Patient" dd_row_2.src_field = "NHSNumber" dd_row_2.comment = "NHSNumber comment" dd_row_3 = DataDictionaryRow(mock_config) dd_row_3.src_db = "Source" dd_row_3.src_table = "S1_Patient" dd_row_3.src_field = "" dd_row_3.comment = "Existing table comment" dd = TestDataDictionary(mock_config, [dd_row_1, dd_row_2, dd_row_3]) context = SystmOneContext.CPFT_DW with NamedTemporaryFile(delete=False, mode="w") as f: fieldnames = self.src_spec_row_dict.keys() writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() spec_row_1 = self.src_spec_row_dict.copy() spec_row_1.update( TableName="SRPatient", ColumnName="IDPatient", TableDescription="SRPatient description from spec", ColumnDescription="IDPatient description from spec", ) spec_row_2 = self.src_spec_row_dict.copy() spec_row_2.update( TableName="SRPatient", ColumnName="NHSNumber", TableDescription="SRPatient description from spec", ColumnDescription="NHSNumber description from spec", ) writer.writerow(spec_row_1) writer.writerow(spec_row_2) with open(f.name, mode="r") as f: modify_dd_for_systmone( dd, context, sre_spec_csv_filename=f.name, append_comments=True ) self.assertEqual(len(dd.rows), 3) # Comment row is sorted to the top self.assertEqual( dd.rows[0].comment, "Existing table comment // SRPatient description from spec", ) self.assertEqual( dd.rows[1].comment, ( "IDPatient comment // " "TABLE: SRPatient description from spec // " "COLUMN: IDPatient description from spec" ), ) self.assertEqual( dd.rows[2].comment, ( "NHSNumber comment // " "TABLE: SRPatient description from spec // " "COLUMN: NHSNumber description from spec" ), )