Source code for crate_anon.nlp_manager.tests.regex_test_helperfunc

"""
crate_anon/nlp_manager/tests/regex_test_helperfunc.py

===============================================================================

    Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
    Created by Rudolf Cardinal (rnc1001@cam.ac.uk).

    This file is part of CRATE.

    CRATE is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    CRATE is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with CRATE. If not, see <https://www.gnu.org/licenses/>.

===============================================================================

**Functions for testing regular expressions.**

"""

import logging

from typing import List, Pattern, Tuple
from crate_anon.nlp_manager.regex_func import compile_regex
from crate_anon.nlp_manager.regex_parser import BaseNlpParser, ValidatorBase

log = logging.getLogger(__name__)


[docs]def f_score(precision: float, recall: float, beta: float = 1) -> float: """ Calculates an F score (e.g. an F1 score for ``beta == 1``). See https://en.wikipedia.org/wiki/F1_score. Args: precision: precision of the test, P(really positive | test positive) recall: recall of the test, P(test positive | really positive) beta: controls the type of the F score (the relative emphasis on precision versus recall) Returns: the F score """ beta_sq = beta**2 return ( (1 + beta_sq) * precision * recall / ((beta_sq * precision) + recall) )
[docs]def get_compiled_regex_results( compiled_regex: Pattern, text: str ) -> List[str]: """ Finds all the hits for a regex when applied to text. Args: compiled_regex: a compiled regular expression text: text to parse Returns: a list of all the (entire) hits for this regex in ``text`` """ results = [] # type: List[str] for m in compiled_regex.finditer(text): results.append(m.group(0)) return results
[docs]def assert_text_regex( name: str, regex_text: str, test_expected_list: List[Tuple[str, List[str]]], verbose: bool = False, ) -> None: """ Test a regex upon some text. Args: name: regex name (for display purposes only) regex_text: text that should be compiled to give our regex test_expected_list: list of tuples ``teststring, expected_results``, where ``teststring`` is some text and ``expected_results`` is a list of expected hits for the regex within ``teststring`` verbose: be verbose? Returns: """ log.info(f"Testing regex named {name}") compiled_regex = compile_regex(regex_text) if verbose: log.debug(f"... regex text:\n{regex_text}") for test_string, expected_values in test_expected_list: actual_values = get_compiled_regex_results(compiled_regex, test_string) assert actual_values == expected_values, ( "Regex {name}: Expected {expected_values}, got {actual_values}, " "when parsing {test_string}. Regex text:\n{regex_text}]".format( name=name, expected_values=expected_values, actual_values=actual_values, test_string=repr(test_string), regex_text=regex_text, ) ) log.info("... OK")
[docs]def run_tests_nlp_and_validator_classes( all_nlp_and_validators: List[Tuple[BaseNlpParser, ValidatorBase]] ) -> None: """ Tests multiple pairs of NLP classes and their associated validators. """ all_nlp_classes, all_validator_classes = zip(*all_nlp_and_validators) for cls in all_nlp_classes: cls(None, None).test(verbose=True) for cls in all_validator_classes: cls(None, None).test(verbose=True)