#!/usr/bin/env python
"""
crate_anon/anonymise/summarize_dd.py
===============================================================================
Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
This file is part of CRATE.
CRATE is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
CRATE is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with CRATE. If not, see <https://www.gnu.org/licenses/>.
===============================================================================
**Summarize an anonymisation data dictionary.**
"""
import argparse
from dataclasses import astuple, fields
import logging
import os
from cardinal_pythonlib.logs import main_only_quicksetup_rootlogger
from rich_argparse import ArgumentDefaultsRichHelpFormatter
from crate_anon.anonymise.config import Config
from crate_anon.anonymise.constants import ANON_CONFIG_ENV_VAR
from crate_anon.anonymise.dd import DDTableSummary
from crate_anon.common.spreadsheet import write_spreadsheet
from crate_anon.version import CRATE_VERSION_PRETTY
log = logging.getLogger(__name__)
# =============================================================================
# Summarize a data dictionary
# =============================================================================
[docs]def summarize_dd(config: Config, output_filename: str) -> None:
"""
Produces a summary report about a data dictionary. The report has one row
per table.
Args:
config:
Anonymisation config object.
output_filename:
File for output ('-' for stdout).
"""
config.load_dd(check_against_source_db=False)
dd = config.dd
header_row = tuple(f.name for f in fields(DDTableSummary))
rows = [header_row] + [
astuple(x) for x in dd.get_summary_info_all_tables()
]
data = {"data_dictionary_report": rows}
write_spreadsheet(output_filename, data)
# =============================================================================
# Main
# =============================================================================
[docs]def main() -> None:
"""
Command-line entry point.
"""
# noinspection PyTypeChecker
parser = argparse.ArgumentParser(
description=f"Summarize a data dictionary for the anonymiser. The "
f"resulting spreadsheet-style report has one row per source table. "
f"({CRATE_VERSION_PRETTY})",
formatter_class=ArgumentDefaultsRichHelpFormatter,
)
parser.add_argument(
"--config",
help=f"Config file (overriding environment variable "
f"{ANON_CONFIG_ENV_VAR}).",
)
parser.add_argument(
"--verbose", "-v", action="store_true", help="Be verbose"
)
parser.add_argument(
"--output", default="-", help="File for output; use '-' for stdout."
)
args = parser.parse_args()
# -------------------------------------------------------------------------
# Verbosity, logging
# -------------------------------------------------------------------------
loglevel = logging.DEBUG if args.verbose else logging.INFO
main_only_quicksetup_rootlogger(level=loglevel)
# -------------------------------------------------------------------------
# Onwards
# -------------------------------------------------------------------------
if args.config:
os.environ[ANON_CONFIG_ENV_VAR] = args.config
from crate_anon.anonymise.config_singleton import config # delayed import
summarize_dd(config, args.output)
if __name__ == "__main__":
main()