"""
crate_anon/nlp_manager/cloud_config.py
===============================================================================
Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
This file is part of CRATE.
CRATE is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
CRATE is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with CRATE. If not, see <https://www.gnu.org/licenses/>.
===============================================================================
Config object used for cloud NLP requests.
"""
import logging
import os
from typing import TYPE_CHECKING, Dict, Tuple
from crate_anon.nlp_manager.constants import (
CloudNlpConfigKeys,
NlpDefValues,
DEFAULT_CLOUD_LIMIT_BEFORE_COMMIT,
DEFAULT_CLOUD_MAX_CONTENT_LENGTH,
DEFAULT_CLOUD_MAX_RECORDS_PER_REQUEST,
DEFAULT_CLOUD_MAX_TRIES,
DEFAULT_CLOUD_RATE_LIMIT_HZ,
DEFAULT_CLOUD_WAIT_ON_CONN_ERR_S,
full_sectionname,
NlpConfigPrefixes,
)
if TYPE_CHECKING:
from crate_anon.nlp_manager.nlp_definition import NlpDefinition
log = logging.getLogger(__name__)
# =============================================================================
# CloudConfig
# =============================================================================
[docs]class CloudConfig:
"""
Common config object for cloud NLP.
"""
[docs] def __init__(
self, nlpdef: "NlpDefinition", name: str, req_data_dir: str
) -> None:
"""
Reads the config from the NLP definition's config file.
Args:
nlpdef:
a :class:`crate_anon.nlp_manager.nlp_definition.NlpDefinition`
name:
name for the cloud NLP configuration (to which a standard
prefix will be added to get the config section name)
req_data_dir:
directory in which to store temporary request files
"""
from crate_anon.nlp_manager.cloud_parser import (
Cloud,
) # delayed import
self._nlpdef = nlpdef
self.req_data_dir = req_data_dir
cfg = nlpdef.get_config_section(
full_sectionname(NlpConfigPrefixes.CLOUD, name)
)
self.url = cfg.opt_str(CloudNlpConfigKeys.CLOUD_URL, required=True)
self.verify_ssl = cfg.opt_bool(CloudNlpConfigKeys.VERIFY_SSL, True)
self.compress = cfg.opt_bool(CloudNlpConfigKeys.COMPRESS, True)
self.username = cfg.opt_str(CloudNlpConfigKeys.USERNAME, default="")
self.password = cfg.opt_str(CloudNlpConfigKeys.PASSWORD, default="")
self.max_content_length = cfg.opt_int(
CloudNlpConfigKeys.MAX_CONTENT_LENGTH,
DEFAULT_CLOUD_MAX_CONTENT_LENGTH,
)
self.limit_before_commit = cfg.opt_int(
CloudNlpConfigKeys.LIMIT_BEFORE_COMMIT,
DEFAULT_CLOUD_LIMIT_BEFORE_COMMIT,
)
self.max_records_per_request = cfg.opt_int(
CloudNlpConfigKeys.MAX_RECORDS_PER_REQUEST,
DEFAULT_CLOUD_MAX_RECORDS_PER_REQUEST,
)
self.stop_at_failure = cfg.opt_bool(
CloudNlpConfigKeys.STOP_AT_FAILURE, True
)
self.wait_on_conn_err = cfg.opt_int(
CloudNlpConfigKeys.WAIT_ON_CONN_ERR,
DEFAULT_CLOUD_WAIT_ON_CONN_ERR_S,
)
self.max_tries = cfg.opt_int(
CloudNlpConfigKeys.MAX_TRIES, DEFAULT_CLOUD_MAX_TRIES
)
self.rate_limit_hz = cfg.opt_int(
CloudNlpConfigKeys.RATE_LIMIT_HZ, DEFAULT_CLOUD_RATE_LIMIT_HZ
)
self.test_length_function_speed = cfg.opt_bool(
CloudNlpConfigKeys.TEST_LENGTH_FUNCTION_SPEED, True
)
self.remote_processors = {} # type: Dict[Tuple[str, str], 'Cloud']
for processor in self._nlpdef.processors:
if not isinstance(processor, Cloud):
# ... only add 'Cloud' processors
log.warning(
f"Skipping NLP processor of non-cloud (e.g. local) "
f"type: {processor.friendly_name}"
)
continue
self.remote_processors[
(processor.procname, processor.procversion)
] = processor
# NOTE: KEY IS A TUPLE!
# We need the following in order to decide whether to ask to include
# text in reply - if a processor is GATE we need to, as it does not
# send back the content of the nlp snippet
self.has_gate_processors = any(
(x.format == NlpDefValues.FORMAT_GATE)
for x in self.remote_processors.values()
)
@property
def data_filename(self) -> str:
"""
Returns the filename to be used for storing data.
"""
nlpname = self._nlpdef.name
return os.path.abspath(
os.path.join(self.req_data_dir, f"request_data_{nlpname}.csv")
)