r"""
crate_anon/nlp_webserver/models.py
===============================================================================
Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
This file is part of CRATE.
CRATE is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
CRATE is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with CRATE. If not, see <https://www.gnu.org/licenses/>.
===============================================================================
SQLAlchemy models for CRATE's implementation of an NLPRP server.
"""
import datetime
from typing import List, Optional
import uuid
from cardinal_pythonlib.datetimefunc import coerce_to_pendulum
from pendulum import DateTime as Pendulum
from sqlalchemy import Column, Text, VARCHAR, Boolean, DateTime
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import deferred, relationship, scoped_session, sessionmaker
from sqlalchemy.sql.schema import ForeignKey
# noinspection PyPackageRequirements
from zope.sqlalchemy import register
# =============================================================================
# SQLAlchemy setup
# =============================================================================
# Session = sessionmaker(extension=ZopeTransactionExtension())
Session = sessionmaker()
register(Session)
dbsession = scoped_session(Session)
Base = declarative_base()
# =============================================================================
# Constants
# =============================================================================
UUID64_LEN = 36 # see make_unique_id()
MAX_DOC_ID_LEN = UUID64_LEN
MAX_DOCPROC_ID_LEN = UUID64_LEN
MAX_QUEUE_ID_LEN = UUID64_LEN
MAX_JOB_ID_LEN = 255 # specified by client
MAX_USERNAME_LEN = 255 # arbitrary
MAX_PROCESSOR_ID_LEN = (
255 # e.g. Python fully-qualified name, underscore, version
)
# =============================================================================
# Helper functions
# =============================================================================
[docs]def make_unique_id() -> str:
"""
Generates a random unique ID for labelling objects, via :func:`uuid.uuid4`.
They look like '79cc4bac-6e8b-4ac6-bbd9-a65b5e1d1e29' (that is, hex with
format 8-4-4-4-12, so 32 informative characters and overall length 36
including the hyphens). The space is 16^32 = 3.4e38. See
https://docs.python.org/3.7/library/uuid.html.
"""
return str(uuid.uuid4())
# =============================================================================
# Model classes
# =============================================================================
[docs]class Document(Base):
"""
Represents a user-submitted document for processing. (A single document
may be processed by multiple processors.)
"""
__tablename__ = "documents"
document_id = Column(
"document_id",
VARCHAR(MAX_DOC_ID_LEN),
primary_key=True,
comment="Primary key (UUID) for the document",
) # type: str
doctext = deferred(
Column("doctext", Text, comment="Text contents of the document")
) # type: Optional[str]
client_job_id = Column(
"client_job_id",
VARCHAR(MAX_JOB_ID_LEN),
comment="Client job ID (supplied by the client)",
index=True,
) # type: Optional[str]
queue_id = Column(
"queue_id",
VARCHAR(MAX_QUEUE_ID_LEN),
comment="The UUID of the client request, if in queued mode",
index=True,
) # type: Optional[str]
username = Column(
"username",
VARCHAR(MAX_USERNAME_LEN),
comment="Username that submitted this document",
nullable=False,
index=True,
) # type: Optional[str]
client_metadata = deferred(
Column(
"client_metadata", Text, comment="Metadata submitted by the client"
)
) # type: Optional[str]
include_text = Column(
"include_text",
Boolean,
nullable=False,
default=False,
comment="Include the source text in the reply?",
) # type: Optional[bool]
datetime_submitted_utc = Column(
"datetime_submitted_utc",
DateTime,
nullable=False,
# Is the following OK, given that it's not exactly when it was
# submitted?
default=datetime.datetime.utcnow,
comment="Date/time when the request was submitted (in UTC)",
) # type: Optional[datetime.datetime]
docprocrequests = relationship(
"DocProcRequest",
cascade="all, delete-orphan",
passive_deletes=True,
back_populates="document",
lazy="select",
# https://docs.sqlalchemy.org/en/13/orm/collections.html#using-passive-deletes # noqa
) # type: List[DocProcRequest]
@property
def datetime_submitted_pendulum(self) -> Optional[Pendulum]:
return coerce_to_pendulum(
self.datetime_submitted_utc, assume_local=False
)
[docs]class DocProcRequest(Base):
"""
SQLAlchemy table recording processor requests for a given document (that
is, document/processor pairs).
"""
__tablename__ = "docprocrequests"
docprocrequest_id = Column(
"docprocrequest_id",
VARCHAR(MAX_DOCPROC_ID_LEN),
primary_key=True,
comment="Primary key (UUID) for the document/processor pair; also "
"used as the Celery task ID",
) # type: str
document_id = Column(
"document_id",
VARCHAR(MAX_DOC_ID_LEN),
ForeignKey("documents.document_id", ondelete="CASCADE"),
# ... delete DocProcRequests when their Documents are deleted
# ... https://stackoverflow.com/questions/5033547/sqlalchemy-cascade-delete # noqa
# ... https://docs.sqlalchemy.org/en/13/orm/collections.html#using-passive-deletes # noqa
nullable=False,
comment="Document ID (FK to documents.document_id)",
) # type: str
processor_id = Column(
"processor_id",
VARCHAR(MAX_PROCESSOR_ID_LEN),
nullable=False,
comment="Processor ID, in '<name>_<version>' format",
) # type: str
done = Column(
"done",
Boolean,
nullable=False,
default=False,
comment="Has the task associated with this request been completed?",
) # type: bool
when_done_utc = Column(
"when_done_utc",
DateTime,
default=None,
comment="Date/time when the request was completed (in UTC)",
) # type: Optional[datetime.datetime]
results = deferred(
Column("results", Text, comment="Results (as JSON)")
) # type: Optional[str]
document = relationship(
"Document", back_populates="docprocrequests", lazy="select"
) # type: Document
@property
def doctext(self) -> Optional[str]:
return self.document.doctext