"""
crate_anon/preprocess/tests/systmone_ddgen_tests.py
===============================================================================
Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
This file is part of CRATE.
CRATE is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
CRATE is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with CRATE. If not, see <https://www.gnu.org/licenses/>.
===============================================================================
Unit testing.
"""
# =============================================================================
# Imports
# =============================================================================
import csv
from tempfile import NamedTemporaryFile
from typing import List, TYPE_CHECKING
from unittest import mock, TestCase
from crate_anon.anonymise.dd import DataDictionary
from crate_anon.anonymise.ddr import DataDictionaryRow
from crate_anon.preprocess.systmone_ddgen import (
core_tablename,
eq,
eq_re,
is_free_text,
is_in_re,
modify_dd_for_systmone,
OMIT_AND_IGNORE_TABLES_REGEX,
SystmOneContext,
SystmOneSRESpecRow,
)
if TYPE_CHECKING:
from crate_anon.anonymise.config import Config
# =============================================================================
# Unit tests
# =============================================================================
[docs]class SystmOneDDGenTests(TestCase):
[docs] def test_excluded_tables(self) -> None:
"""
Test some regex functions for excluding tables.
"""
cpft = SystmOneContext.CPFT_DW
test_referralsopen = "S1_ReferralsOpen" # CPFT version
test_referralsopen_core = core_tablename(
tablename=test_referralsopen,
from_context=cpft,
allow_unprefixed=True,
)
self.assertTrue(eq(test_referralsopen_core, "ReferralsOpen"))
self.assertTrue(eq_re(test_referralsopen_core, "ReferralsOpen$"))
omit_tables = OMIT_AND_IGNORE_TABLES_REGEX[cpft]
self.assertTrue(is_in_re(test_referralsopen_core, omit_tables))
self.assertTrue(is_in_re("Accommodation_20210329", omit_tables))
self.assertTrue(is_in_re("Accommodation_20210329_blah", omit_tables))
self.assertTrue(is_in_re("S1_Accommodation_20210329", omit_tables))
def test_freetext_columns(self) -> None:
sre = SystmOneContext.TPP_SRE
cpft = SystmOneContext.CPFT_DW
# Free-text columns in all environments:
for context in [sre, cpft]:
self.assertTrue(is_free_text("FreeText", "FreeText", context))
# CPFT but not SRE environment:
self.assertTrue(
is_free_text(
"FreeText_CYPFRS_TelephoneTriage", "RiskofAbsconding", cpft
)
)
self.assertFalse(
is_free_text(
"FreeText_CYPFRS_TelephoneTriage", "RiskofAbsconding", sre
)
)
# Not even in CPFT:
self.assertFalse(
is_free_text("FreeText_Honos_Scoring_Answers", "FreeText", cpft)
)
[docs]class SystmOneDDGenTestCase(TestCase):
[docs] def setUp(self) -> None:
super().setUp()
self.src_spec_row_dict = dict(
TableName="",
TableDescription="",
ColumnName="",
ColumnDescription="",
ColumnDataType="",
ColumnLength=0,
DateDefining="Yes",
ColumnOrdinal=0,
LinkedTable="",
LinkedColumn1="",
LinkedColumn2="",
)
self.context = SystmOneContext.CPFT_DW
[docs]class SystmOneSRESpecRowTests(SystmOneDDGenTestCase):
def test_comment_has_table_and_column_descriptions(self) -> None:
self.src_spec_row_dict.update(
TableName="SRPatient",
ColumnName="IDPatient",
TableDescription="SRPatient description from spec",
ColumnDescription="IDPatient description from spec",
)
row = SystmOneSRESpecRow(self.src_spec_row_dict)
self.assertEqual(
row.comment(self.context),
(
"TABLE: SRPatient description from spec // "
"COLUMN: IDPatient description from spec"
),
)
def test_description_has_translated_table_column_and_spec_descriptions(
self,
) -> None:
self.src_spec_row_dict.update(
TableName="SRPatient",
ColumnName="IDPatient",
TableDescription="SRPatient description from spec",
ColumnDescription="IDPatient description from spec",
)
row = SystmOneSRESpecRow(self.src_spec_row_dict)
description = row.description(self.context)
self.assertEqual(
description,
(
"S1_Patient.IDPatient // "
"TABLE: SRPatient description from spec // "
"COLUMN: IDPatient description from spec"
),
)
[docs]class TestDataDictionary(DataDictionary):
[docs] def __init__(
self, config: "Config", rows: List[DataDictionaryRow]
) -> None:
super().__init__(config)
self.rows = rows
[docs]class ModifyDDForSystmOneTests(SystmOneDDGenTestCase):
def test_table_comments_from_spec_added_to_data_dictionary(self) -> None:
mock_config = mock.Mock()
dd_row_1 = DataDictionaryRow(mock_config)
dd_row_1.src_db = "Source"
dd_row_1.src_table = "S1_Patient"
dd_row_1.src_field = "IDPatient"
dd_row_1.comment = "IDPatient comment"
dd_row_2 = DataDictionaryRow(mock_config)
dd_row_2.src_db = "Source"
dd_row_2.src_table = "S1_Patient"
dd_row_2.src_field = "NHSNumber"
dd_row_2.comment = "NHSNumber comment"
dd = TestDataDictionary(mock_config, [dd_row_1, dd_row_2])
context = SystmOneContext.CPFT_DW
with NamedTemporaryFile(delete=False, mode="w") as f:
fieldnames = self.src_spec_row_dict.keys()
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
spec_row_1 = self.src_spec_row_dict.copy()
spec_row_1.update(
TableName="SRPatient",
ColumnName="IDPatient",
TableDescription="SRPatient description from spec",
ColumnDescription="IDPatient description from spec",
)
spec_row_2 = self.src_spec_row_dict.copy()
spec_row_2.update(
TableName="SRPatient",
ColumnName="NHSNumber",
TableDescription="SRPatient description from spec",
ColumnDescription="NHSNumber description from spec",
)
writer.writerow(spec_row_1)
writer.writerow(spec_row_2)
with open(f.name, mode="r") as f:
modify_dd_for_systmone(
dd, context, sre_spec_csv_filename=f.name, append_comments=True
)
self.assertEqual(len(dd.rows), 3)
# Comment row is sorted to the top
self.assertEqual(dd.rows[0].comment, "SRPatient description from spec")
self.assertEqual(
dd.rows[1].comment,
(
"IDPatient comment // "
"TABLE: SRPatient description from spec // "
"COLUMN: IDPatient description from spec"
),
)
self.assertEqual(
dd.rows[2].comment,
(
"NHSNumber comment // "
"TABLE: SRPatient description from spec // "
"COLUMN: NHSNumber description from spec"
),
)
def test_ddr_existing_table_comment_appended_with_spec_description(
self,
) -> None:
mock_config = mock.Mock()
dd_row_1 = DataDictionaryRow(mock_config)
dd_row_1.src_db = "Source"
dd_row_1.src_table = "S1_Patient"
dd_row_1.src_field = "IDPatient"
dd_row_1.comment = "IDPatient comment"
dd_row_2 = DataDictionaryRow(mock_config)
dd_row_2.src_db = "Source"
dd_row_2.src_table = "S1_Patient"
dd_row_2.src_field = "NHSNumber"
dd_row_2.comment = "NHSNumber comment"
dd_row_3 = DataDictionaryRow(mock_config)
dd_row_3.src_db = "Source"
dd_row_3.src_table = "S1_Patient"
dd_row_3.src_field = ""
dd_row_3.comment = "Existing table comment"
dd = TestDataDictionary(mock_config, [dd_row_1, dd_row_2, dd_row_3])
context = SystmOneContext.CPFT_DW
with NamedTemporaryFile(delete=False, mode="w") as f:
fieldnames = self.src_spec_row_dict.keys()
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
spec_row_1 = self.src_spec_row_dict.copy()
spec_row_1.update(
TableName="SRPatient",
ColumnName="IDPatient",
TableDescription="SRPatient description from spec",
ColumnDescription="IDPatient description from spec",
)
spec_row_2 = self.src_spec_row_dict.copy()
spec_row_2.update(
TableName="SRPatient",
ColumnName="NHSNumber",
TableDescription="SRPatient description from spec",
ColumnDescription="NHSNumber description from spec",
)
writer.writerow(spec_row_1)
writer.writerow(spec_row_2)
with open(f.name, mode="r") as f:
modify_dd_for_systmone(
dd, context, sre_spec_csv_filename=f.name, append_comments=True
)
self.assertEqual(len(dd.rows), 3)
# Comment row is sorted to the top
self.assertEqual(
dd.rows[0].comment,
"Existing table comment // SRPatient description from spec",
)
self.assertEqual(
dd.rows[1].comment,
(
"IDPatient comment // "
"TABLE: SRPatient description from spec // "
"COLUMN: IDPatient description from spec"
),
)
self.assertEqual(
dd.rows[2].comment,
(
"NHSNumber comment // "
"TABLE: SRPatient description from spec // "
"COLUMN: NHSNumber description from spec"
),
)