Source code for crate_anon.nlp_manager.parse_cognitive

"""
crate_anon/nlp_manager/parse_cognitive.py

===============================================================================

    Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
    Created by Rudolf Cardinal (rnc1001@cam.ac.uk).

    This file is part of CRATE.

    CRATE is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    CRATE is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with CRATE. If not, see <https://www.gnu.org/licenses/>.

===============================================================================

**Python regex-based NLP processors for cognitive tests.**

All inherit from
:class:`crate_anon.nlp_manager.regex_parser.NumeratorOutOfDenominatorParser`
and are constructed with these arguments:

nlpdef:
    a :class:`crate_anon.nlp_manager.nlp_definition.NlpDefinition`
cfgsection:
    the name of a CRATE NLP config file section (from which we may
    choose to get extra config information)
commit:
    force a COMMIT whenever we insert data? You should specify this
    in multiprocess mode, or you may get database deadlocks.

"""

import logging
from typing import List, Optional, Tuple

from crate_anon.common.regex_helpers import WORD_BOUNDARY
from crate_anon.nlp_manager.nlp_definition import NlpDefinition
from crate_anon.nlp_manager.regex_numbers import IGNORESIGN_INTEGER
from crate_anon.nlp_manager.regex_parser import (
    APOSTROPHE,
    NumeratorOutOfDenominatorParser,
    ValidatorBase,
)
from crate_anon.nlp_manager.regex_units import OUT_OF_SEPARATOR

log = logging.getLogger(__name__)


# =============================================================================
# Mini-mental state examination (MMSE)
# =============================================================================


[docs]class Mmse(NumeratorOutOfDenominatorParser): """ COGNITIVE. Mini-mental state examination (MMSE). The default denominator is 30, but it supports other values if given explicitly. """ MMSE = rf""" (?: {WORD_BOUNDARY} (?: MMSE | mini[-\s]*mental (?: \s+ state)? (?: \s+ exam(?:ination)? )? ) {WORD_BOUNDARY} ) """ NAME = "MMSE"
[docs] def __init__( self, nlpdef: Optional[NlpDefinition], cfg_processor_name: Optional[str], commit: bool = False, ) -> None: # see documentation above super().__init__( nlpdef=nlpdef, cfg_processor_name=cfg_processor_name, commit=commit, variable_name=self.NAME, variable_regex_str=self.MMSE, expected_denominator=30, take_absolute=True, )
[docs] def test(self, verbose: bool = False) -> None: # docstring in superclass self.test_numerator_denominator_parser( [ ("MMSE", []), # should fail; no values ("MMSE 30/30", [(30, 30)]), ("MMSE 25 / 30", [(25, 30)]), ("MMSE 25 / 29", [(25, 29)]), ("MMSE 25 / 31", [(25, 31)]), ("mini-mental state exam 30", [(30, None)]), ("minimental 25", [(25, None)]), ("MMSE 30", [(30, None)]), ("MMSE-27", [(27, None)]), ("MMSE score was 30", [(30, None)]), ("ACE 79", []), ], verbose=verbose, )
[docs]class MmseValidator(ValidatorBase): """ Validator for Mmse (see help for explanation). """
[docs] @classmethod def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: return Mmse.NAME, [Mmse.MMSE]
# ============================================================================= # Addenbrooke's Cognitive Examination (ACE, ACE-R, ACE-III) # =============================================================================
[docs]class Ace(NumeratorOutOfDenominatorParser): """ COGNITIVE. Addenbrooke's Cognitive Examination (ACE, ACE-R, ACE-III) total score. The default denominator is 100 but it supports other values if given explicitly. """ NAME = "ACE" ACE = rf""" (?: {WORD_BOUNDARY} (?: ACE | (?: Addenbrooke{APOSTROPHE}?s \s+ cognitive \s+ (?: (?:evaluation) | exam(?:ination)? ) ) ) (?: \s* -? \s* (?: R | III | 111 # or: 3 when not followed by an "out of X" expression | (?: 3 (?! \s* {OUT_OF_SEPARATOR} \s* {IGNORESIGN_INTEGER})) ) \b )?+ {WORD_BOUNDARY} ) """ # noqa # ... note the possessive "?+" above; see tests below.
[docs] def __init__( self, nlpdef: Optional[NlpDefinition], cfg_processor_name: Optional[str], commit: bool = False, ) -> None: # see documentation above super().__init__( nlpdef=nlpdef, cfg_processor_name=cfg_processor_name, commit=commit, variable_name=self.NAME, variable_regex_str=self.ACE, expected_denominator=100, take_absolute=True, )
[docs] def test(self, verbose: bool = False) -> None: # docstring in superclass self.test_numerator_denominator_parser( [ ("MMSE", []), ("MMSE 30/30", []), ("MMSE 25 / 30", []), ("mini-mental state exam 30", []), ("minimental 25", []), ("MMSE 30", []), ("ACE 79", [(79, None)]), ("ACE 79/100", [(79, 100)]), ("ACE 79/95", [(79, 95)]), ("ACE 79 / 100", [(79, 100)]), ("Addenbrooke's cognitive examination 79", [(79, None)]), ("Addenbrookes cognitive evaluation 79", [(79, None)]), ("ACE-R 79", [(79, None)]), ("ACE-R 79 out of 100", [(79, 100)]), ("ACE-III 79", [(79, None)]), ("ACE-III score was 79", [(79, None)]), ("ACE R 79", [(79, None)]), ("ACE III 79", [(79, None)]), ("ACE-82", [(82, None)]), ( "ACE 111 99", [(99, None)], ), # "ACE 111" (for III) from real data # Note the difficulties created by the "ACE-3" representation # of the task's name. We have to get these right: ("ACE-3 79", [(79, None)]), ("ACE 3 79", [(79, None)]), ("ACE 3 79/100", [(79, 100)]), ("ACE 3 3", [(3, None)]), ("ACE 3 3/100", [(3, 100)]), # ... but also a score of 3 (!) on the older ACE: ("ACE 3/100", [(3, 100)]), ("ACE 3 out of 100", [(3, 100)]), # - This next one is ambiguous. Reference to new task? To old # score? Making the "3" optional as part of the task name # means that this will be accepted by the regex as a score. # - We need a special exception to get "ACE 3" not to give a # score. # - We do this with a "possessive" quantifier on the "3" (or # similar) part of the ACE descriptor. # - http://www.rexegg.com/regex-quantifiers.html # - Possessive quantifiers are in regex, not re: # https://pypi.python.org/pypi/regex # https://docs.python.org/3.5/library/re.html # - Ah, no. That makes "ACE 3/100" fail. # - But if we combine a possessive "3" with saying "3 unless # it's "3 out of...", then we win. ("ACE 3", []), ("ACE 3/MOCA", []), ("ACE 3 / MOCA", []), ], verbose=verbose, )
[docs]class AceValidator(ValidatorBase): """ Validator for Ace (see help for explanation). """
[docs] @classmethod def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: return Ace.NAME, [Ace.ACE]
[docs] def test(self, verbose: bool = False) -> None: # docstring in superclass self.test_validator( [ ("pass me my mace, my boy", False), ("he scored 10 on the ACE today", True), ("he scored 10 on the ACE 3 today", True), ("he scored 10 on the ACE3 today", True), ("ACE 3/100", True), ("ACE 3 3/100", True), ("ACE3 4", True), ("ACE 3", True), ("ACE3", True), ("ACE 3/MOCA", True), ("ACE 3 / MOCA", True), ], verbose=verbose, )
# ============================================================================= # Mini-Addenbrooke's Cognitive Examination (M-ACE) # =============================================================================
[docs]class MiniAce(NumeratorOutOfDenominatorParser): """ COGNITIVE. Mini-Addenbrooke's Cognitive Examination (M-ACE). The default denominator is 30, but it supports other values if given explicitly. """ MACE = rf""" (?: {WORD_BOUNDARY} (?: mini | M ) \s* -? \s* (?: ACE | (?: Addenbrooke{APOSTROPHE}?s \s+ cognitive \s+ (?: (?:evaluation) | exam(?:ination)? ) ) ) {WORD_BOUNDARY} ) """ NAME = "MiniACE"
[docs] def __init__( self, nlpdef: Optional[NlpDefinition], cfg_processor_name: Optional[str], commit: bool = False, ) -> None: # see documentation above super().__init__( nlpdef=nlpdef, cfg_processor_name=cfg_processor_name, commit=commit, variable_name=self.NAME, variable_regex_str=self.MACE, expected_denominator=30, # mini-ACE is out of 30 take_absolute=True, )
[docs] def test(self, verbose: bool = False) -> None: # docstring in superclass self.test_numerator_denominator_parser( [ ("MMSE 30", []), ("ACE 79", []), ("ACE 79/100", []), ("Addenbrooke's cognitive examination 79", []), ("Addenbrookes cognitive evaluation 79", []), ("mini-Addenbrooke's cognitive examination 79", [(79, None)]), ("mini-Addenbrooke’s cognitive examination 79", [(79, None)]), ("mini-Addenbrookes cognitive evaluation 79", [(79, None)]), ("M-ACE 20", [(20, None)]), ("M-ACE score is 20", [(20, None)]), ("M-ACE 29/30", [(29, 30)]), ("M-ACE 29/29", [(29, 29)]), ("MACE 29", [(29, None)]), ("MACE-29", [(29, None)]), ("mini-ACE 29", [(29, None)]), ], verbose=verbose, )
[docs]class MiniAceValidator(ValidatorBase): """ Validator for MiniAce (see help for explanation). """
[docs] @classmethod def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: return MiniAce.NAME, [MiniAce.MACE]
# ============================================================================= # Montreal Cognitive Assessment (MOCA) # =============================================================================
[docs]class Moca(NumeratorOutOfDenominatorParser): """ COGNITIVE. Montreal Cognitive Assessment (MOCA). The default denominator is 30, but it supports other values if given explicitly. """ # todo:: MOCA NLP parser: support also "scored X on the MOCA"? MOCA = rf""" (?: {WORD_BOUNDARY} (?: MOCA | (?: Montreal \s+ cognitive \s+ assessment ) ) {WORD_BOUNDARY} ) """ NAME = "MOCA"
[docs] def __init__( self, nlpdef: Optional[NlpDefinition], cfg_processor_name: Optional[str], commit: bool = False, ) -> None: # see documentation above super().__init__( nlpdef=nlpdef, cfg_processor_name=cfg_processor_name, commit=commit, variable_name=self.NAME, variable_regex_str=self.MOCA, expected_denominator=30, take_absolute=True, )
[docs] def test(self, verbose: bool = False) -> None: # docstring in superclass self.test_numerator_denominator_parser( [ ("MOCA 30", [(30, None)]), ("MOCA 30/30", [(30, 30)]), ("MOCA 25/30", [(25, 30)]), ("MOCA score was 25", [(25, None)]), ("MOCA 25/29", [(25, 29)]), ("MOCA-25", [(25, None)]), ("Montreal Cognitive Assessment 25/30", [(25, 30)]), ] )
[docs]class MocaValidator(ValidatorBase): """ Validator for Moca (see help for explanation). """
[docs] @classmethod def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: return Moca.NAME, [Moca.MOCA]
# ============================================================================= # All classes in this module # ============================================================================= ALL_COGNITIVE_NLP_AND_VALIDATORS = [ (Ace, AceValidator), (MiniAce, MiniAceValidator), (Mmse, MmseValidator), (Moca, MocaValidator), ]