Source code for crate_anon.nlp_manager.tests.regex_units_tests

#!/usr/bin/env python

"""
crate_anon/nlp_manager/tests/regex_units_tests.py

===============================================================================

    Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
    Created by Rudolf Cardinal (rnc1001@cam.ac.uk).

    This file is part of CRATE.

    CRATE is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    CRATE is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with CRATE. If not, see <https://www.gnu.org/licenses/>.

===============================================================================

Unit tests.

"""

import unittest

from crate_anon.nlp_manager.regex_units import (
    # ---------------------------------------------------------------------
    # Relationships
    # ---------------------------------------------------------------------
    out_of,
    per,
    # ---------------------------------------------------------------------
    # Distance
    # ---------------------------------------------------------------------
    CM,
    FEET,
    INCHES,
    M,
    MM,
    # ---------------------------------------------------------------------
    # Mass
    # ---------------------------------------------------------------------
    G,
    KG,
    LB,
    MCG,
    MG,
    STONES,
    # ---------------------------------------------------------------------
    # Volume
    # ---------------------------------------------------------------------
    CUBIC_MM,
    CUBIC_MM_OR_MICROLITRE,
    DL,
    FEMTOLITRE,
    L,
    MICROLITRE,
    ML,
    # ---------------------------------------------------------------------
    # Reciprocal volume
    # ---------------------------------------------------------------------
    PER_CUBIC_MM,
    # ---------------------------------------------------------------------
    # Time
    # ---------------------------------------------------------------------
    HOUR,
    # ---------------------------------------------------------------------
    # Proportion
    # ---------------------------------------------------------------------
    PERCENT,
    # -------------------------------------------------------------------------
    # Arbitrary count things
    # -------------------------------------------------------------------------
    CELLS,
    MICROUNITS,
    MILLIUNITS,
    SCORE,
    UNITS,
    # -------------------------------------------------------------------------
    # Moles
    # -------------------------------------------------------------------------
    MICROEQ,
    MICROMOLES,
    MILLIEQ,
    MILLIMOLES,
    MOLES,
    # -------------------------------------------------------------------------
    # Concentration (molarity)
    # -------------------------------------------------------------------------
    MILLIMOLAR,
    MILLIMOLES_PER_L,
    MICROEQ_PER_L,
    MICROMOLAR,
    MICROMOLES_PER_L,
    MILLIEQ_PER_L,
    # -------------------------------------------------------------------------
    # Concentration (mass)
    # -------------------------------------------------------------------------
    G_PER_DL,
    G_PER_L,
    MG_PER_DL,
    MG_PER_L,
    # -------------------------------------------------------------------------
    # Concentration (arbitrary count and dimensionless things)
    # -------------------------------------------------------------------------
    BILLION_PER_L,
    CELLS_PER_CUBIC_MM,
    CELLS_PER_CUBIC_MM_OR_MICROLITRE,
    L_PER_L,
    MICROUNITS_PER_ML,
    MILLIMOLES_PER_MOL,
    MILLIUNITS_PER_L,
    TRILLION_PER_L,
    UNITS_PER_L,
    # -------------------------------------------------------------------------
    # Speed
    # -------------------------------------------------------------------------
    MM_PER_H,
    # -------------------------------------------------------------------------
    # Pressure
    # -------------------------------------------------------------------------
    MM_HG,
    # -------------------------------------------------------------------------
    # Area and related
    # -------------------------------------------------------------------------
    SQ_M,
    KG_PER_SQ_M,
)
from crate_anon.nlp_manager.tests.regex_test_helperfunc import (
    assert_text_regex,
)


# =============================================================================
# Unit tests
# =============================================================================


[docs]class TestUnitRegexes(unittest.TestCase):
[docs] @staticmethod def test_unit_regexes() -> None: """ Test all "unit" regexes. """ verbose = True # --------------------------------------------------------------------- # Relationships # --------------------------------------------------------------------- assert_text_regex( "out_of(5)", out_of(5), [ ("4 out of 5", ["out of 5"]), ("4/5", ["/5"]), ("4 / 5", ["/ 5"]), ], verbose=verbose, ) assert_text_regex( "per(n, d)", per("n", "d"), [ ("blah n per d blah", ["n per d"]), ("blah n/d blah", ["n/d"]), ("n / d", ["n / d"]), ("n d -1", ["n d -1"]), ("n d -1", ["n d -1"]), ("n blah d", []), ], verbose=verbose, ) assert_text_regex( "per(n, d, numerator_optional=True)", per("n", "d", numerator_optional=True), [ ("blah n per d blah", ["n per d"]), ("blah n/d blah", ["n/d"]), ("n / d", ["n / d"]), ("n d -1", ["n d -1"]), ("n d -1", ["n d -1"]), ("n blah d", []), ("/ d", ["/ d"]), (" / d", ["/ d"]), (" per d", ["per d"]), ], verbose=verbose, ) # --------------------------------------------------------------------- # Distance # --------------------------------------------------------------------- assert_text_regex( "CM", CM, [ ("5 centimetres long", ["centimetres"]), ("5 centimeters long", ["centimeters"]), ("5cm long", ["cm"]), ], verbose=verbose, ) assert_text_regex( "FEET", FEET, [ ("5 feet long", ["feet"]), ("5 foot long", ["foot"]), ("5' long", ["'"]), # ASCII apostrophe ("5’ long", ["’"]), # right single quote (U+2019) ("5′ long", ["′"]), # prime (U+2032) ], verbose=verbose, ) assert_text_regex( "INCHES", INCHES, [ ("5 inches long", ["inches"]), ("5 in long", ["in"]), ('5" long', ['"']), # ASCII double quote ("5” long", ["”"]), # right double quote (U+2014) ("5″ long", ["″"]), # double prime (U+2033) ], verbose=verbose, ) assert_text_regex( "M", M, [ ("5 metres long", ["metres"]), ("5 meters long", ["meters"]), ("5m long", ["m"]), ], verbose=verbose, ) assert_text_regex( "MM", MM, [ ("5 millimetres long", ["millimetres"]), ("5 millimeters long", ["millimeters"]), ("5mm long", ["mm"]), ], verbose=verbose, ) # --------------------------------------------------------------------- # Mass # --------------------------------------------------------------------- assert_text_regex( "G", G, [ ("5 grams", ["grams"]), ("5 g", ["g"]), ], verbose=verbose, ) assert_text_regex( "KG", KG, [ ("5 kilograms", ["kilograms"]), ("5 kg", ["kg"]), ], verbose=verbose, ) assert_text_regex( "LB", LB, [ ("5 pounds", ["pounds"]), ("5 lb", ["lb"]), ], verbose=verbose, ) assert_text_regex( "MCG", MCG, [ ("5 micrograms", ["micrograms"]), ("5 mcg", ["mcg"]), ("5 ug", ["ug"]), ("5 μg", ["μg"]), ], verbose=verbose, ) assert_text_regex( "MG", MG, [ ("5 milligrams", ["milligrams"]), ("5 mg", ["mg"]), ], verbose=verbose, ) assert_text_regex( "STONES", STONES, [ ("5 stones", ["stones"]), ("5 stone", ["stone"]), ("5 st", ["st"]), ], verbose=verbose, ) # --------------------------------------------------------------------- # Volume # --------------------------------------------------------------------- assert_text_regex( "CUBIC_MM", CUBIC_MM, [ ("mm3", ["mm3"]), ("blibble", []), ("5 mm^3", ["mm^3"]), ("5 cubic mm", ["cubic mm"]), ("5 cubic millimetres", ["cubic millimetres"]), ], verbose=verbose, ) assert_text_regex( "CUBIC_MM_OR_MICROLITRE", CUBIC_MM_OR_MICROLITRE, [ ("5 mm^3", ["mm^3"]), ("5 cubic mm", ["cubic mm"]), ("5 cubic millimetres", ["cubic millimetres"]), ("5 microlitre", ["microlitre"]), ("5 microL", ["microL"]), ("5 microliters", ["microliters"]), ("5 μL", ["μL"]), ("5 ul", ["ul"]), ], verbose=verbose, ) assert_text_regex( "DL", DL, [ ("5 decilitres", ["decilitres"]), ("5 deciliters", ["deciliters"]), ("5 dl", ["dl"]), ("5 dL", ["dL"]), ], verbose=verbose, ) assert_text_regex( "FEMTOLITRE", FEMTOLITRE, [ ("5 femtolitres", ["femtolitres"]), ("5 femtoliters", ["femtoliters"]), ("5 fl", ["fl"]), ("5 fL", ["fL"]), ], verbose=verbose, ) assert_text_regex( "L", L, [ ("5 litres", ["litres"]), ("5 liters", ["liters"]), ("5 l", ["l"]), ("5 L", ["L"]), ], verbose=verbose, ) assert_text_regex( "MICROLITRE", MICROLITRE, [ ("5 microlitre", ["microlitre"]), ("5 microL", ["microL"]), ("5 microliters", ["microliters"]), ("5 μL", ["μL"]), ("5 ul", ["ul"]), ], verbose=verbose, ) assert_text_regex( "ML", ML, [ ("5 millilitres", ["millilitres"]), ("5 milliliters", ["milliliters"]), ("5 ml", ["ml"]), ("5 mL", ["mL"]), ], verbose=verbose, ) # --------------------------------------------------------------------- # Reciprocal volume # --------------------------------------------------------------------- assert_text_regex( "PER_CUBIC_MM", PER_CUBIC_MM, [ ("per cubic mm", ["per cubic mm"]), ("5/mm^3", ["/mm^3"]), ("5 per cubic mm", ["per cubic mm"]), ("5 per cubic millimetres", ["per cubic millimetres"]), ], verbose=verbose, ) # --------------------------------------------------------------------- # Time # --------------------------------------------------------------------- assert_text_regex( "HOUR", HOUR, [ ("5 hours", ["hours"]), ("5 hr", ["hr"]), ("5 h", ["h"]), ], verbose=verbose, ) # --------------------------------------------------------------------- # Proportion # --------------------------------------------------------------------- assert_text_regex( "PERCENT", PERCENT, [ ("5 percent", ["percent"]), ("5 per cent", ["per cent"]), ("5 pct", ["pct"]), ("5%", ["%"]), ], verbose=verbose, ) # --------------------------------------------------------------------- # Arbitrary count things # --------------------------------------------------------------------- assert_text_regex( "CELLS", CELLS, [ ("cells", ["cells"]), ("blibble", []), ("5 cells", ["cells"]), ("5 cell", ["cell"]), ], verbose=verbose, ) assert_text_regex( "MICROUNITS", MICROUNITS, [ ("5 uU", ["uU"]), ("5 μU", ["μU"]), ("5 uIU", ["uIU"]), ("5 μIU", ["μIU"]), ], verbose=verbose, ) assert_text_regex( "MILLIUNITS", MILLIUNITS, [ ("5 mU", ["mU"]), ("5 mIU", ["mIU"]), ], verbose=verbose, ) assert_text_regex( "SCORE", SCORE, [ ("I scored 5", ["scored"]), ("MMSE score 5", ["score"]), ], verbose=verbose, ) assert_text_regex( "UNITS", UNITS, [ ("5 U", ["U"]), ("5 IU", ["IU"]), ], verbose=verbose, ) # --------------------------------------------------------------------- # Moles # --------------------------------------------------------------------- assert_text_regex( "MICROEQ", MICROEQ, [ ("5 μEq", ["μEq"]), ("5 uEq", ["uEq"]), ], verbose=verbose, ) assert_text_regex( "MICROMOLES", MICROMOLES, [ ("5 micromoles", ["micromoles"]), ("5 micromol", ["micromol"]), ("5 umol", ["umol"]), ("5 μmol", ["μmol"]), ], verbose=verbose, ) assert_text_regex( "MILLIEQ", MILLIEQ, [ ("5 mEq", ["mEq"]), ], verbose=verbose, ) assert_text_regex( "MILLIMOLES", MILLIMOLES, [ ("5 millimoles", ["millimoles"]), ("5 millimol", ["millimol"]), ("5 mmol", ["mmol"]), ], verbose=verbose, ) assert_text_regex( "MOLES", MOLES, [ ("5 moles", ["moles"]), ("5 mol", ["mol"]), ], verbose=verbose, ) # ------------------------------------------------------------------------- # Concentration (molarity) # ------------------------------------------------------------------------- assert_text_regex( "MILLIMOLAR", MILLIMOLAR, [ ("5 mM", ["mM"]), ], verbose=verbose, ) assert_text_regex( "MILLIMOLES_PER_L", MILLIMOLES_PER_L, [ ("5 mmol/L", ["mmol/L"]), ("5 millimoles per litre", ["millimoles per litre"]), ], verbose=verbose, ) assert_text_regex( "MICROEQ_PER_L", MICROEQ_PER_L, [ ("5 μEq/L", ["μEq/L"]), ("5 microequivalents per litre", []), # not supported ("5 microEq per litre", ["microEq per litre"]), ], verbose=verbose, ) assert_text_regex( "MICROMOLAR", MICROMOLAR, [ ("5 micromolar", ["micromolar"]), ("5 μM", ["μM"]), ("5 uM", ["uM"]), ], verbose=verbose, ) assert_text_regex( "MICROMOLES_PER_L", MICROMOLES_PER_L, [ ("5 micromol/L", ["micromol/L"]), ("5 micromoles/litre", ["micromoles/litre"]), ("5 umol/L", ["umol/L"]), ("5 μmol/L", ["μmol/L"]), ], verbose=verbose, ) assert_text_regex( "MILLIEQ_PER_L", MILLIEQ_PER_L, [ ("5 mEq/L", ["mEq/L"]), ("5 milliequivalents per litre", []), # not supported ("5 milliEq per litre", ["milliEq per litre"]), ], verbose=verbose, ) # ------------------------------------------------------------------------- # Concentration (mass) # ------------------------------------------------------------------------- assert_text_regex( "G_PER_DL", G_PER_DL, [ ("5 g/dL", ["g/dL"]), ("5 grams per deciliter", ["grams per deciliter"]), ], verbose=verbose, ) assert_text_regex( "G_PER_L", G_PER_L, [ ("5 g/L", ["g/L"]), ("5 g L-1", ["g L-1"]), ("5 grams per liter", ["grams per liter"]), ], verbose=verbose, ) assert_text_regex( "MG_PER_DL", MG_PER_DL, [ ("5 mg/dL", ["mg/dL"]), ("5 milligrams per deciliter", ["milligrams per deciliter"]), ], verbose=verbose, ) assert_text_regex( "MG_PER_L", MG_PER_L, [ ("5 mg/L", ["mg/L"]), ("5 mg L-1", ["mg L-1"]), ("5 milligrams per liter", ["milligrams per liter"]), ], verbose=verbose, ) # ------------------------------------------------------------------------- # Concentration (arbitrary count and dimensionless things) # ------------------------------------------------------------------------- assert_text_regex( "BILLION_PER_L", BILLION_PER_L, [ ("5 × 10^9/L", ["× 10^9/L"]), ("5 * 10e9/L", ["* 10e9/L"]), ("5 x 10e9 per litre", ["x 10e9 per litre"]), ], verbose=verbose, ) assert_text_regex( "CELLS_PER_CUBIC_MM", CELLS_PER_CUBIC_MM, [ ("cells/mm3", ["cells/mm3"]), ("blibble", []), ("9800 / mm3", ["/ mm3"]), ("9800 cell/mm3", ["cell/mm3"]), ("9800 cells/mm3", ["cells/mm3"]), ("9800 cells per cubic mm", ["cells per cubic mm"]), ("9800 per cubic mm", ["per cubic mm"]), ("9800 per cmm", ["per cmm"]), ], verbose=verbose, ) assert_text_regex( "CELLS_PER_CUBIC_MM_OR_MICROLITRE", CELLS_PER_CUBIC_MM_OR_MICROLITRE, [ ("9800 / mm3", ["/ mm3"]), ("9800 cell/mm3", ["cell/mm3"]), ("9800 cells/mm3", ["cells/mm3"]), ("9800 cells per cubic mm", ["cells per cubic mm"]), ("9800 per cubic mm", ["per cubic mm"]), ("9800 per cmm", ["per cmm"]), ("9800 per μL", ["per μL"]), ("9800 per microliter", ["per microliter"]), ("9800 / microlitre", ["/ microlitre"]), ], verbose=verbose, ) assert_text_regex( "L_PER_L", L_PER_L, [ ("5 L/L", ["L/L"]), ("5 l/l", ["l/l"]), ("5 litre per liter", ["litre per liter"]), ], verbose=verbose, ) assert_text_regex( "MICROUNITS_PER_ML", MICROUNITS_PER_ML, [ ("5 microunits/mL", ["microunits/mL"]), ("5 microU/millilitre", ["microU/millilitre"]), ("5 uU/mL", ["uU/mL"]), ("5 μIU/ml", ["μIU/ml"]), ], verbose=verbose, ) assert_text_regex( "MILLIMOLES_PER_MOL", MILLIMOLES_PER_MOL, [ ("5 mmol/mol", ["mmol/mol"]), ("5 millimoles per mole", ["millimoles per mole"]), ], verbose=verbose, ) assert_text_regex( "MILLIUNITS_PER_L", MILLIUNITS_PER_L, [ ("5 milliunits/L", ["milliunits/L"]), ("5 milliU/litre", ["milliU/litre"]), ("5 mIU/litre", ["mIU/litre"]), ("5 mU/L", ["mU/L"]), ], verbose=verbose, ) assert_text_regex( "TRILLION_PER_L", TRILLION_PER_L, [ ("5 × 10^12/L", ["× 10^12/L"]), ("5 * 10e12/L", ["* 10e12/L"]), ("5 x 10e12 per litre", ["x 10e12 per litre"]), ], verbose=verbose, ) assert_text_regex( "UNITS_PER_L", UNITS_PER_L, [ ("5 units/L", ["units/L"]), ("5 U/litre", ["U/litre"]), ("5 U/L", ["U/L"]), ], verbose=verbose, ) # ------------------------------------------------------------------------- # Speed # ------------------------------------------------------------------------- assert_text_regex( "MM_PER_H", MM_PER_H, [ ("5 mm/h", ["mm/h"]), ("5 mm per h", ["mm per h"]), ], verbose=verbose, ) # ------------------------------------------------------------------------- # Pressure # ------------------------------------------------------------------------- assert_text_regex( "MM_HG", MM_HG, [ ("5 mmHg", ["mmHg"]), ("5 mm Hg", ["mm Hg"]), ], verbose=verbose, ) # ------------------------------------------------------------------------- # Area and related # ------------------------------------------------------------------------- assert_text_regex( "SQ_M", SQ_M, [ ("5 square metres", ["square metres"]), ("5 sq m", ["sq m"]), ("5 m^2", ["m^2"]), ], verbose=verbose, ) assert_text_regex( "KG_PER_SQ_M", KG_PER_SQ_M, [ ("5 kg per square metre", ["kg per square metre"]), ("5 kg/sq m", ["kg/sq m"]), ("5 kg/m^2", ["kg/m^2"]), ("5 kg*m^-2", ["kg*m^-2"]), ], verbose=verbose, )