Source code for crate_anon.linkage.person

#!/usr/bin/env python

r"""
crate_anon/linkage/person.py

===============================================================================

    Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
    Created by Rudolf Cardinal (rnc1001@cam.ac.uk).

    This file is part of CRATE.

    CRATE is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    CRATE is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with CRATE. If not, see <https://www.gnu.org/licenses/>.

===============================================================================

**Person representations for fuzzy matching.**

"""


# =============================================================================
# Imports
# =============================================================================

import json
import logging
import random
from typing import (
    Any,
    Dict,
    Generator,
    List,
    Optional,
    Union,
)

from cardinal_pythonlib.reprfunc import auto_repr

from crate_anon.linkage.comparison import bayes_compare, Comparison
from crate_anon.linkage.helpers import (
    getdictval,
    mutate_name,
    mutate_postcode,
)
from crate_anon.linkage.identifiers import (
    DateOfBirth,
    Forename,
    gen_best_comparisons,
    Gender,
    Identifier,
    PerfectID,
    Postcode,
    Surname,
    TemporalIDHolder,
)
from crate_anon.linkage.matchconfig import MatchConfig

log = logging.getLogger(__name__)


# =============================================================================
# Person
# =============================================================================


[docs]class Person:
    """
    A proper representation of a person that can do hashing and comparisons.
    The information may be incomplete or slightly wrong.
    Includes frequency information and requires a config.
    """

    # -------------------------------------------------------------------------
    # Class attributes
    # -------------------------------------------------------------------------

    class PersonKey:
        LOCAL_ID = "local_id"  # person ID within relevant DB (proband/sample)
        FORENAMES = "forenames"
        SURNAMES = "surnames"
        DOB = "dob"
        GENDER = "gender"
        POSTCODES = "postcodes"
        PERFECT_ID = "perfect_id"
        OTHER_INFO = "other_info"  # anything the user may want to attach

    # [getattr(PersonKey, x) for x in vars(PersonKey)...] does not work here as
    # PersonKey is not in scope within a list comprehension here; see
    # check_inner_class_attr_access.py and
    # https://stackoverflow.com/questions/13905741. But this works:
    ALL_PERSON_KEYS = []  # type: List[str]
    for tmp in vars(PersonKey):
        if not tmp.startswith("_"):
            ALL_PERSON_KEYS.append(getattr(PersonKey, tmp))
    del tmp

    # For reading CSV:
    SEMICOLON_DELIMIT = [
        PersonKey.FORENAMES,
        PersonKey.SURNAMES,
        PersonKey.POSTCODES,
    ]
    TEMPORAL_IDENTIFIERS = SEMICOLON_DELIMIT
    PLAINTEXT_CSV_FORMAT_HELP = (
        f"(1) CSV format with header row. Columns: {ALL_PERSON_KEYS}. "
        f"(2) Semicolon-separated values are allowed within "
        f"{SEMICOLON_DELIMIT}. "
        f"(3) The fields {TEMPORAL_IDENTIFIERS} are in TemporalIdentifier "
        f"format. {Identifier.TEMPORAL_ID_FORMAT_HELP} "
        f"(4) {PersonKey.PERFECT_ID}, if specified, contains one or more "
        f"perfect person identifiers as key:value pairs, e.g. "
        f"'nhs:12345;ni:AB6789XY'. The keys will be forced to lower case; "
        f"values will be forced to upper case. "
        f"(5) {PersonKey.OTHER_INFO!r} is an arbitrary string for you to use "
        f"(e.g. for validation)."
    )
    HASHED_JSONLINES_FORMAT_HELP = (
        "File created by CRATE in JSON Lines (.jsonl) format. (You could use "
        "the 'jq' tool to inspect these.)"
    )

    # -------------------------------------------------------------------------
    # Creation
    # -------------------------------------------------------------------------

[docs]    def __init__(
        self,
        cfg: MatchConfig,
        local_id: str = "",
        other_info: str = "",
        forenames: List[Union[None, str, TemporalIDHolder, Forename]] = None,
        surnames: List[Union[None, str, TemporalIDHolder, Surname]] = None,
        dob: Union[None, str, DateOfBirth] = "",
        gender: Union[None, str, Gender] = "",
        postcodes: List[Union[None, str, TemporalIDHolder, Postcode]] = None,
        perfect_id: Union[None, Dict[str, Any], PerfectID] = None,
    ) -> None:
        """
        Args:
            cfg:
                The config object.
            local_id:
                Identifier within this person's local database (e.g. proband ID
                or sample ID). Typically a research pseudonym, not itself
                identifying.
            other_info:
                String containing any other attributes the user may wish to
                remember (e.g. in JSON). Only used for validation research
                (e.g. ensuring linkage is not biased by ethnicity).

            forenames:
                The person's forenames (given names, first/middle names), as
                strings or Forename objects.
            surnames:
                The person's surname(s), as strings or Surname or
                TemporalIDHolder objects.
            dob:
                The date of birth, in ISO-8061 "YYYY-MM-DD" string format,
                or as a DateOfBirth object, or None, or ''.
            gender:
                The gender: 'M', 'F', 'X', or '', or None, or a Gender object.
            postcodes:
                Any UK postcodes for this person, with optional associated
                dates.
            perfect_id:
                Any named person-unique identifiers (e.g. UK NHS numbers, UK
                National Insurance numbers), for non-fuzzy matching. Dictionary
                keys will be forced to lower case, and dictionary values to
                upper case.
        """
        self._is_plaintext = None  # type: Optional[bool]

        def chk_plaintext(new_identifier: Identifier) -> None:
            """
            Ensure we don't mix plaintext and hashed data.
            """
            new_plaintext = new_identifier.is_plaintext
            if self._is_plaintext is None:
                self._is_plaintext = new_plaintext
            elif new_plaintext != self._is_plaintext:
                new = self.plain_or_hashed_txt(new_plaintext)
                old = self.plain_or_hashed_txt(self._is_plaintext)
                raise ValueError(
                    f"Trying to add {new} information to a Person containing "
                    f"only {old} information; new data was "
                    f"{new_identifier!r}; current is {self!r}"
                )

        assert isinstance(cfg, MatchConfig)
        self.cfg = cfg
        self.baseline_log_odds_same_person = (
            self.cfg.baseline_log_odds_same_person
        )  # for speed

        # local_id
        self.local_id = str(local_id) if local_id is not None else None
        if not self.local_id:
            raise ValueError(f"Bad local_id: {local_id!r}")

        # other_info
        self.other_info = other_info or ""
        if not isinstance(self.other_info, str):
            raise ValueError(f"Bad other_info: {self.other_info!r}")

        # gender
        gender = "" if gender is None else gender
        # DO NOT DO: gender = gender or ""
        # ... because bool(Gender(cfg, gender="")) == False.
        if isinstance(gender, Gender):
            self.gender = gender
        else:
            self.gender = Gender(cfg=cfg, gender=gender)
        chk_plaintext(self.gender)

        # forenames
        forenames = forenames or []
        if not isinstance(forenames, list):
            raise ValueError(f"Bad forenames: {forenames!r}")
        self.forenames = []  # type: List[Forename]
        for f in forenames:
            if not f:  # None or ""
                continue
            elif isinstance(f, str):
                f = Forename(cfg=cfg, name=f, gender=self.gender.gender_str)
            elif isinstance(f, TemporalIDHolder):
                f = Forename(
                    cfg=cfg,
                    name=f.identifier,
                    start_date=f.start_date,
                    end_date=f.end_date,
                )
            elif not isinstance(f, Forename):
                raise ValueError(f"Bad forename: {f!r}")
            if not bool(f):
                continue  # skip blank names not detected above
            chk_plaintext(f)
            self.forenames.append(f)

        # surnames
        surnames = surnames or []
        if not isinstance(surnames, list):
            raise ValueError(f"Bad surnames: {surnames!r}")
        self.surnames = []  # type: List[Surname]
        for s in surnames:
            if not s:
                continue
            elif isinstance(s, str):
                s = Surname(cfg=cfg, name=s, gender=self.gender.gender_str)
            elif isinstance(s, TemporalIDHolder):
                s = Surname(
                    cfg=cfg,
                    name=s.identifier,
                    start_date=s.start_date,
                    end_date=s.end_date,
                )
            elif not isinstance(s, Surname):
                raise ValueError(f"Bad surname: {s!r}")
            if not bool(s):
                continue  # skip blank names not detected above
            chk_plaintext(s)
            self.surnames.append(s)

        # dob (NB highly desirable for real work, but not mandatory, and we
        # also want to be able to create Person objects without a DOB for
        # testing)
        dob = "" if dob is None else dob
        if isinstance(dob, DateOfBirth):
            self.dob = dob
        else:
            self.dob = DateOfBirth(cfg=cfg, dob=dob or "")
        chk_plaintext(self.dob)

        # postcodes
        postcodes = postcodes or []
        if not isinstance(postcodes, list):
            raise ValueError(f"Bad postcodes: {postcodes!r}")
        self.postcodes = []  # type: List[Postcode]
        for p in postcodes:
            if not p:  # None or ""
                continue
            elif isinstance(p, str):
                p = Postcode(cfg=cfg, postcode=p)
            elif isinstance(p, TemporalIDHolder):
                p = Postcode(
                    cfg=cfg,
                    postcode=p.identifier,
                    start_date=p.start_date,
                    end_date=p.end_date,
                )
            elif not isinstance(p, Postcode):
                raise ValueError(f"Bad data structure for postcode: {p!r}")
            if not bool(p):
                continue  # skip blanks not detected above
            chk_plaintext(p)
            self.postcodes.append(p)

        # perfect_id
        if isinstance(perfect_id, PerfectID):
            self.perfect_id = perfect_id
        else:
            self.perfect_id = PerfectID(cfg=cfg, identifiers=perfect_id)
        chk_plaintext(self.perfect_id)

[docs]    @staticmethod
    def plain_or_hashed_txt(plaintext: bool) -> str:
        """
        Used for error messages.
        """
        return "plaintext" if plaintext else "hashed"

[docs]    @classmethod
    def from_plaintext_csv(
        cls, cfg: MatchConfig, rowdict: Dict[str, str]
    ) -> "Person":
        """
        Returns a :class:`Person` object from a CSV row.

        Args:
            cfg: a configuration object
            rowdict: a CSV row, read via :class:`csv.DictReader`.
        """
        kwargs = {}  # type: Dict[str, Any]
        for attr in cls.ALL_PERSON_KEYS:
            vstr = rowdict[attr]
            if attr in cls.SEMICOLON_DELIMIT:
                v = [x.strip() for x in vstr.split(";") if x]
                if attr == cls.PersonKey.PERFECT_ID:
                    v = PerfectID.from_plaintext_str(cfg, vstr)
                elif attr in cls.TEMPORAL_IDENTIFIERS:
                    v = [
                        TemporalIDHolder.from_plaintext_str(cfg, x) for x in v
                    ]
            else:
                # All TEMPORAL_IDENTIFIERS are in SEMICOLON_DELIMIT
                assert attr not in cls.TEMPORAL_IDENTIFIERS
                v = vstr
            kwargs[attr] = v
        return Person(cfg=cfg, **kwargs)

[docs]    @classmethod
    def from_json_dict(
        cls, cfg: MatchConfig, d: Dict[str, Any], hashed: bool = True
    ) -> "Person":
        """
        Restore a hashed or plaintext version from a dictionary (which has been
        read from JSONL).
        """

        def check_is_dict(d_: Any, name_: str) -> None:
            if not isinstance(d_, dict):
                raise ValueError(
                    f"{name_} contains something that is not a dict: {d_!r}"
                )

        pk = cls.PersonKey
        forenames = []  # type: List[Forename]
        for mnd in getdictval(d, pk.FORENAMES, list):
            check_is_dict(mnd, pk.FORENAMES)
            forenames.append(Forename.from_dict(cfg, mnd, hashed))
        surnames = []  # type: List[Surname]
        for sur in getdictval(d, pk.SURNAMES, list):
            check_is_dict(sur, pk.SURNAMES)
            surnames.append(Surname.from_dict(cfg, sur, hashed))
        postcodes = []  # type: List[Postcode]
        for pd in getdictval(d, pk.POSTCODES, list):
            check_is_dict(pd, pk.POSTCODES)
            postcodes.append(Postcode.from_dict(cfg, pd, hashed))
        return Person(
            cfg=cfg,
            local_id=getdictval(d, pk.LOCAL_ID, str),
            other_info=getdictval(d, pk.OTHER_INFO, str, mandatory=False),
            forenames=forenames,
            surnames=surnames,
            dob=DateOfBirth.from_dict(
                cfg, getdictval(d, pk.DOB, dict), hashed
            ),
            gender=Gender.from_dict(
                cfg, getdictval(d, pk.GENDER, dict), hashed
            ),
            postcodes=postcodes,
            perfect_id=PerfectID.from_dict(
                cfg, getdictval(d, pk.PERFECT_ID, dict), hashed
            ),
        )

[docs]    @classmethod
    def from_json_str(cls, cfg: MatchConfig, s: str) -> "Person":
        """
        Restore a hashed version from a string representing JSON.
        """
        d = json.loads(s)
        return cls.from_json_dict(cfg, d)

    # -------------------------------------------------------------------------
    # Equality, hashing -- local_id should be unique
    # -------------------------------------------------------------------------
    # Be careful:
    # - https://inventwithpython.com/blog/2019/02/01/hashable-objects-must-be-immutable/  # noqa
    # - https://docs.python.org/3/glossary.html [re "hashable"]
    # Here, we define equality based on local_id, which will not change. In
    # practice, nothing else will either.

    def __eq__(self, other: "Person") -> bool:
        return self.local_id == other.local_id

    def __hash__(self) -> int:
        return hash(self.local_id)

    # -------------------------------------------------------------------------
    # Representation: string
    # -------------------------------------------------------------------------

    def __repr__(self):
        return auto_repr(self)

    def __str__(self) -> str:
        if self.is_hashed():
            return f"Person<HASHED, local_id={self.local_id!r}>"
        names = " ".join(
            [str(f) for f in self.forenames] + [str(s) for s in self.surnames]
        )
        postcodes = " - ".join(str(x) for x in self.postcodes)
        k = self.PersonKey
        details = ", ".join(
            [
                f"{k.LOCAL_ID}={self.local_id}",
                f"{k.PERFECT_ID}={self.perfect_id}",
                f"name={names}",
                f"{k.GENDER}={self.gender}",
                f"{k.DOB}={self.dob}",
                f"{k.POSTCODES}={postcodes}",
                f"{k.OTHER_INFO}={self.other_info!r}",
            ]
        )
        return f"Person<{details}>"

    # -------------------------------------------------------------------------
    # Representation: CSV
    # -------------------------------------------------------------------------

[docs]    @classmethod
    def plaintext_csv_columns(cls) -> List[str]:
        """
        CSV column names -- including user-specified "other" information.
        """
        return cls.ALL_PERSON_KEYS

[docs]    def plaintext_csv_dict(self) -> Dict[str, str]:
        """
        Returns a dictionary suitable for :class:`csv.DictWriter`.
        This is for writing identifiable content.
        """
        d = {}  # type: Dict[str, str]
        for k in self.ALL_PERSON_KEYS:
            a = getattr(self, k)
            if k in self.SEMICOLON_DELIMIT and k != self.PersonKey.PERFECT_ID:
                v = ";".join(str(x) for x in a)
            else:
                v = str(a)
            d[k] = v
        return d

    # -------------------------------------------------------------------------
    # Representation: JSON
    # -------------------------------------------------------------------------

[docs]    def as_dict(
        self,
        hashed: bool = True,
        include_frequencies: bool = True,
        include_other_info: bool = False,
    ) -> Dict[str, Any]:
        """
        For JSON.

        Args:
            hashed:
                Create a hashed/encrypted version?
            include_frequencies:
                Include frequency information. If you don't, this makes the
                resulting file suitable for use as a sample, but not as a
                proband file.
            include_other_info:
                include the (potentially identifying) ``other_info`` data?
                Usually ``False``; may be ``True`` for validation.
        """
        pk = self.PersonKey

        # This could be terser, but to be clear:
        if hashed:
            if self._is_plaintext:
                encrypt = True
                local_id = self.cfg.local_id_hash_fn(self.local_id)
            else:
                encrypt = False  # already encrypted; don't do it twice
                local_id = self.local_id
        else:
            if self._is_plaintext:
                encrypt = False
                local_id = self.local_id
            else:
                raise AssertionError(
                    "Can't create plaintext from hashed Person"
                )

        d = {
            pk.LOCAL_ID: local_id,
            pk.FORENAMES: [
                f.as_dict(encrypt, include_frequencies) for f in self.forenames
            ],
            pk.SURNAMES: [
                s.as_dict(encrypt, include_frequencies) for s in self.surnames
            ],
            pk.DOB: self.dob.as_dict(encrypt, include_frequencies),
            pk.GENDER: self.gender.as_dict(encrypt, include_frequencies),
            pk.POSTCODES: [
                p.as_dict(encrypt, include_frequencies) for p in self.postcodes
            ],
            pk.PERFECT_ID: self.perfect_id.as_dict(encrypt),
        }
        if include_other_info:
            d[pk.OTHER_INFO] = self.other_info
        return d

    # -------------------------------------------------------------------------
    # Copying
    # -------------------------------------------------------------------------

[docs]    def copy(self) -> "Person":
        """
        Returns a copy of this object.

        - :func:`copy.deepcopy` is incredibly slow, yet :func:`copy.copy` isn't
          enough when we want to mutate this object.
        - We did do it quasi-manually, copying attributes but using
          ``[copy.copy(x) for x in value]`` if the value was a list.
        - However, since we have functions to convert to/from a dict
          representation, we may as well use them.
        """
        hashed = self.is_hashed()
        return self.from_json_dict(
            self.cfg,
            self.as_dict(
                hashed=hashed,
                include_frequencies=True,
                include_other_info=True,
            ),
            hashed=hashed,
        )

    # -------------------------------------------------------------------------
    # Created hashed version
    # -------------------------------------------------------------------------

[docs]    def hashed(
        self,
        include_frequencies: bool = True,
        include_other_info: bool = False,
    ) -> "Person":
        """
        Returns a :class:`Person` object but with all the elements hashed (if
        they are not blank).

        Note that you do NOT need to do this just to write a hashed version to
        disk. This function is primarily for comparing an entire sample of
        hashed people to plaintext people, or vice versa; we hash the plaintext
        version first.

        Args:
            include_frequencies:
                Include frequency information. If you don't, this makes the
                resulting file suitable for use as a sample, but not as a
                proband file.
            include_other_info:
                include the (potentially identifying) ``other_info`` data?
                Usually ``False``; may be ``True`` for validation.
        """
        d = self.as_dict(
            hashed=True,
            include_frequencies=include_frequencies,
            include_other_info=include_other_info,
        )
        return self.from_json_dict(self.cfg, d)

    # -------------------------------------------------------------------------
    # Main comparison function
    # -------------------------------------------------------------------------

[docs]    def log_odds_same(self, candidate: "Person") -> float:
        """
        Returns the log odds that ``self`` (the proband) and ``candidate`` are
        the same person.

        Args:
            candidate: another :class:`Person` object

        Returns:
            float: the log odds they're the same person
        """
        # High speed function.
        return bayes_compare(
            log_odds=self.baseline_log_odds_same_person,
            comparisons=self._gen_comparisons(candidate),
        )

    # -------------------------------------------------------------------------
    # Comparison helper functions
    # -------------------------------------------------------------------------

    def _gen_comparisons(
        self, candidate: "Person"
    ) -> Generator[Optional[Comparison], None, None]:
        """
        Generates all relevant comparisons.

        Args:
            candidate: another :class:`Person` object.

        **Note**

        In general, frequency information is associated with the proband,
        not the candidate, so use ``self.thing.comparison(candidate.thing)``.

        """
        # A perfect match would already have been tested for. The shortlisting
        # process may already have ensured a DOB partial match, or maybe not.
        # Regardless, there are no identifiers that will cause a complete
        # disqualification if they mismatch, so order here becomes unimportant
        # for speed.

        # Surnames
        yield from gen_best_comparisons(
            proband_identifiers=self.surnames,
            candidate_identifiers=candidate.surnames,
            ordered=False,
        )

        # Forenames
        yield from gen_best_comparisons(
            proband_identifiers=self.forenames,
            candidate_identifiers=candidate.forenames,
            ordered=True,
            p_u=self.cfg.p_u_forename,
        )

        # DOB (see above)
        # There is no special treatment of 29 Feb (since this DOB is
        # approximately 4 times less common than other birthdays, in principle
        # it does merit special treatment, but we ignore that).
        yield self.dob.comparison(candidate.dob)

        # Gender
        yield self.gender.comparison(candidate.gender)

        # Postcodes
        yield from gen_best_comparisons(
            proband_identifiers=self.postcodes,
            candidate_identifiers=candidate.postcodes,
            ordered=False,
        )

    # -------------------------------------------------------------------------
    # Info functions
    # -------------------------------------------------------------------------

[docs]    def is_plaintext(self) -> bool:
        """
        Is this a plaintext (identifiable) Person?
        """
        return self._is_plaintext

[docs]    def is_hashed(self) -> bool:
        """
        Is this a hashed (de-identified) Person?
        """
        return not self.is_plaintext()

[docs]    def n_forenames(self) -> int:
        """
        Number of forenames
        """
        return len(self.forenames)

[docs]    def has_dob(self) -> bool:
        """
        Do we have a DOB?
        """
        return bool(self.dob)

[docs]    def n_postcodes(self) -> int:
        """
        How many postcodes does this person have?
        """
        return len(self.postcodes)

    # -------------------------------------------------------------------------
    # Validation
    # -------------------------------------------------------------------------

[docs]    def ensure_valid_as_proband(self) -> None:
        """
        Ensures this person has sufficient information to act as a proband, or
        raises :exc:`ValueError`.

        We previously required a DOB unless debugging, but no longer.
        """
        for f in self.forenames:
            f.ensure_has_freq_info_if_id_present()
        for s in self.surnames:
            s.ensure_has_freq_info_if_id_present()
        self.dob.ensure_has_freq_info_if_id_present()
        self.gender.ensure_has_freq_info_if_id_present()
        for p in self.postcodes:
            p.ensure_has_freq_info_if_id_present()

[docs]    def ensure_valid_as_candidate(self) -> None:
        """
        Ensures this person has sufficient information to act as a candidate,
        or raises :exc:`AssertionError`.

        We previously required a DOB unless debugging, but no longer.
        """
        pass

    # -------------------------------------------------------------------------
    # Debugging functions to check this object
    # -------------------------------------------------------------------------

[docs]    def debug_gen_identifiers(self) -> Generator[Identifier, None, None]:
        """
        Yield all identifiers.
        """
        yield from self.forenames
        yield from self.surnames
        if self.dob:
            yield self.dob
        if self.gender:
            yield self.gender
        yield from self.postcodes

[docs]    def debug_comparison_report(
        self, candidate: "Person", verbose: bool = True
    ) -> str:
        """
        Compare a person with another, log every step of the way, and return
        the result as a string.
        """
        lines = []  # type: List[str]

        def report(msg_: str) -> None:
            lines.append(f"{msg_} -> log_odds = {log_odds}")

        if verbose:
            spacer = "  - "
            self_id = (
                "\n".join(
                    spacer + repr(i) for i in self.debug_gen_identifiers()
                )
                + "\n"
            )
            candidate_id = (
                "\n".join(
                    spacer + repr(i) for i in candidate.debug_gen_identifiers()
                )
                + "\n"
            )
        else:
            self_id = ""
            candidate_id = ""
        lines.append("VERBOSE COMPARISON:")
        lines.append(f"- self (proband) = {self}")
        lines.append(self_id)
        lines.append(f"- candidate      = {candidate}")
        lines.append(candidate_id)
        lines.append(f"- self dict      = {self.as_dict(hashed=False)}")
        lines.append(self_id)
        lines.append(f"- candidate dict = {candidate.as_dict(hashed=False)}")
        lines.append(candidate_id)

        log_odds = self.cfg.baseline_log_odds_same_person
        report("Baseline")
        for comp in self._gen_comparisons(candidate=candidate):
            if not comp:
                continue
            log_odds = comp.posterior_log_odds(log_odds)
            report(str(comp))

        return "\n".join(filter(None, lines))

[docs]    def debug_compare(self, candidate: "Person", verbose: bool = True) -> None:
        """
        Compare a person with another, and log every step of the way.
        """
        log.info(self.debug_comparison_report(candidate, verbose=verbose))

    # -------------------------------------------------------------------------
    # Debugging functions to mutate this object
    # -------------------------------------------------------------------------

[docs]    def debug_delete_something(self) -> None:
        """
        Randomly delete one of: a forename, or a postcode.
        """
        n_forenames = self.n_forenames()
        n_postcodes = self.n_postcodes()
        n_possibilities = n_forenames + n_postcodes
        if n_possibilities == 0:
            log.warning(f"Unable to delete info from {self}")
            return
        which = random.randint(0, n_possibilities - 1)

        if which < n_forenames:
            del self.forenames[which]
            return
        which -= n_forenames

        del self.postcodes[which]

[docs]    def debug_mutate_something(self) -> None:
        """
        Randomly mutate one of: a forename, or a postcode.
        """
        n_forenames = self.n_forenames()
        n_postcodes = self.n_postcodes()
        n_possibilities = n_forenames + n_postcodes
        if n_possibilities == 0:
            log.warning(f"Unable to mutate info from {self}")
            return
        which = random.randrange(n_possibilities)

        cfg = self.cfg

        if which < n_forenames:
            oldname = self.forenames[which]
            assert oldname.is_plaintext
            self.forenames[which] = Forename(
                cfg, name=mutate_name(oldname.name), gender=oldname.gender
            )
            return
        which -= n_forenames

        oldpostcode = self.postcodes[which]
        assert oldpostcode.is_plaintext
        self.postcodes[which] = Postcode(
            cfg, postcode=mutate_postcode(oldpostcode.postcode_unit, cfg)
        )