Source code for crate_anon.nlp_webserver.views

r"""
crate_anon/nlp_webserver/views.py

===============================================================================

    Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
    Created by Rudolf Cardinal (rnc1001@cam.ac.uk).

    This file is part of CRATE.

    CRATE is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    CRATE is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with CRATE. If not, see <https://www.gnu.org/licenses/>.

===============================================================================

Pyramid views making up the CRATE NLPRP web server.

"""

from contextlib import contextmanager
import datetime
import logging
import json
from typing import Dict, Generator, List, Optional, Tuple, Any
import redis

from cardinal_pythonlib.httpconst import HttpStatus
from cardinal_pythonlib.json.typing_helpers import (
    JsonArrayType,
    JsonObjectType,
    JsonValueType,
)
from cardinal_pythonlib.sqlalchemy.core_query import fetch_all_first_values
from celery.result import AsyncResult, ResultSet
from pyramid.view import view_config, view_defaults
from pyramid.request import Request
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.sql.expression import and_, ClauseElement, select
import transaction

from crate_anon.common.constants import JSON_SEPARATORS_COMPACT
from crate_anon.nlp_webserver.security import (
    check_password,
    get_auth_credentials,
    encrypt_password,
)

# from crate_anon.common.profiling import do_cprofile
from crate_anon.nlprp.api import (
    json_get_array,
    json_get_array_of_str,
    json_get_bool,
    json_get_str,
    json_get_toplevel_args,
    json_get_value,
    pendulum_to_nlprp_datetime,
)
from crate_anon.nlprp.constants import (
    NlprpCommands,
    NlprpKeys as NKeys,
    NlprpValues,
)
from crate_anon.nlprp.errors import (
    BAD_REQUEST,
    INTERNAL_SERVER_ERROR,
    key_missing_error,
    NlprpError,
    mkerror,
    NOT_FOUND,
    UNAUTHORIZED,
)
from crate_anon.nlprp.version import NLPRP_VERSION_STRING
from crate_anon.nlp_webserver.manage_users import get_users
from crate_anon.nlp_webserver.models import (
    dbsession,
    Document,
    DocProcRequest,
    make_unique_id,
)
from crate_anon.nlp_webserver.server_processor import ServerProcessor
from crate_anon.nlp_webserver.constants import (
    SERVER_NAME,
    SERVER_VERSION,
    NlpServerConfigKeys,
)
from crate_anon.nlp_webserver.tasks import (
    celery_app,
    process_nlp_text,
    process_nlp_text_immediate,
    TaskSession,
    start_task_session,
)
from crate_anon.nlp_webserver.settings import SETTINGS

log = logging.getLogger(__name__)


# =============================================================================
# Debugging settings
# =============================================================================

DEBUG_SHOW_REQUESTS = False


if DEBUG_SHOW_REQUESTS:
    log.warning("Debugging options enabled! Turn off for production.")


# =============================================================================
# Constants
# =============================================================================

COOKIE_SESSION_TOKEN = "session_token"

DEFAULT_REDIS_HOST = "localhost"
DEFAULT_REDIS_PORT = 6379  # https://redis.io/topics/quickstart
DEFAULT_REDIS_DB_NUMBER = 0  # https://redis.io/commands/select

REDIS_HOST = SETTINGS.get(NlpServerConfigKeys.REDIS_HOST, DEFAULT_REDIS_HOST)
REDIS_PORT = SETTINGS.get(NlpServerConfigKeys.REDIS_PORT, DEFAULT_REDIS_PORT)
REDIS_DB_NUMBER = SETTINGS.get(
    NlpServerConfigKeys.REDIS_DB_NUMBER, DEFAULT_REDIS_DB_NUMBER
)
REDIS_PASSWORD = SETTINGS.get(NlpServerConfigKeys.REDIS_PASSWORD, None)
# If the redis server doesn't require a password, it's fine to pass
# 'password=None' to StrictRedis.

REDIS_SESSIONS = redis.StrictRedis(
    host=REDIS_HOST,
    port=REDIS_PORT,
    db=REDIS_DB_NUMBER,
    password=REDIS_PASSWORD,
)

SESSION_TOKEN_EXPIRY_S = 300


# =============================================================================
# SQLAlchemy context
# =============================================================================


@contextmanager
def sqla_transaction_commit():
    try:
        yield
        transaction.commit()
    except SQLAlchemyError as e:
        log.critical(f"SQLAlchemy error: {e}")
        dbsession.rollback()
        raise INTERNAL_SERVER_ERROR


# =============================================================================
# NlprpProcessRequest
# =============================================================================


[docs]class NlprpProcessRequest:
    """
    Represents an NLPRP :ref:`process <nlprp_process>` command. Takes the
    request JSON, and offers efficient views on it.

    Uses the global :class:`crate_anon.nlp_server.procs.Processors` class to
    find processors.
    """

[docs]    def __init__(self, nlprp_request: JsonObjectType) -> None:
        """
        Args:
            nlprp_request: dictionary from the (entire) JSON NLPRP request

        Raises:
            :exc:`NlprpError` for malformed requests
        """
        self.nlprp_request = nlprp_request

        args = json_get_toplevel_args(nlprp_request)

        # The processors being requested. We fetch all of them now, so they
        # can be iterated through fast for each document.
        requested_processors = json_get_array(
            args, NKeys.PROCESSORS, required=True
        )
        self.processors = [
            ServerProcessor.get_processor_nlprp(d)
            for d in requested_processors
        ]

        # Queue?
        self.queue = json_get_bool(args, NKeys.QUEUE, default=False)

        # Client job ID
        self.client_job_id = json_get_str(
            args, NKeys.CLIENT_JOB_ID, default=""
        )

        # Include the source text in the reply?
        self.include_text = json_get_bool(args, NKeys.INCLUDE_TEXT)

        # Content: list of objects (each with text and metadata)
        self.content = json_get_array(args, NKeys.CONTENT, required=True)

[docs]    def processor_ids(self) -> List[str]:
        """
        Return the IDs of all processors.
        """
        return [p.processor_id for p in self.processors]

[docs]    def processor_ids_jsonstr(self) -> str:
        """
        Returns the IDs of all processors as a string of JSON-encoded IDs.
        """
        return json.dumps(
            self.processor_ids(), separators=JSON_SEPARATORS_COMPACT
        )

[docs]    def gen_text_metadataobj(
        self,
    ) -> Generator[Tuple[str, JsonValueType], None, None]:
        """
        Generates text and metadata pairs from the request, with the metadata
        in JSON object (Python dictionary) format.

        Yields:
            tuple: ``(text, metadata)``, as above
        """
        for document in self.content:
            text = json_get_str(document, NKeys.TEXT, required=True)
            metadata = json_get_value(
                document, NKeys.METADATA, default=None, required=False
            )
            yield text, metadata

[docs]    def gen_text_metadatastr(self) -> Generator[Tuple[str, str], None, None]:
        """
        Generates text and metadata pairs from the request, with the metadata
        in string (serialized JSON) format.

        Yields:
            tuple: ``(text, metadata)``, as above
        """
        try:
            for document in self.content:
                text = json_get_str(document, NKeys.TEXT, required=True)
                metadata = json_get_value(
                    document, NKeys.METADATA, default=None, required=False
                )
                metadata_str = json.dumps(
                    metadata, separators=JSON_SEPARATORS_COMPACT
                )
                yield text, metadata_str
        except KeyError:
            raise key_missing_error(key=NKeys.TEXT)


# =============================================================================
# NlpWebViews
# =============================================================================


[docs]@view_defaults(renderer="json")  # all views can now return JsonObjectType
class NlpWebViews:
    """
    Class to provide HTTP views (via Pyramid) for our NLPRP server.
    """

    # -------------------------------------------------------------------------
    # Constructor
    # -------------------------------------------------------------------------

[docs]    def __init__(self, request: Request) -> None:
        """
        Args:
            request: a :class:`pyramid.request.Request`
        """
        self.request = request
        # Assign this later so we can return error to client if problem
        self.body = None  # type: Optional[JsonObjectType]
        # Get username and password
        self.credentials = get_auth_credentials(self.request)
        # Assign these later after authentication
        self.username = None  # type: Optional[str]
        self.password = None  # type: Optional[str]
        # Start database sessions
        dbsession()
        start_task_session()

    # -------------------------------------------------------------------------
    # Responses and errors
    # -------------------------------------------------------------------------

[docs]    def set_http_response_status(self, status: int) -> None:
        """
        Sets the HTTP status code for our response.

        Args:
            status: HTTP status code
        """
        self.request.response.status = status

[docs]    def create_response(
        self, status: int, extra_info: JsonObjectType = None
    ) -> JsonObjectType:
        """
        Returns a JSON HTTP response with some standard information for a given
        HTTP status and extra information to add to the response.

        Ensures the HTTP status matches the NLPRP JSON status.
        """
        # Put status in HTTP header
        self.set_http_response_status(status)
        response_dict = {
            NKeys.STATUS: status,
            NKeys.PROTOCOL: {
                NKeys.NAME: NlprpValues.NLPRP_PROTOCOL_NAME,
                NKeys.VERSION: NLPRP_VERSION_STRING,
            },
            NKeys.SERVER_INFO: {
                NKeys.NAME: SERVER_NAME,
                NKeys.VERSION: SERVER_VERSION,
            },
        }
        if extra_info is not None:
            response_dict.update(extra_info)
        dbsession.remove()
        TaskSession.remove()
        return response_dict

[docs]    def create_error_response(self, error: NlprpError) -> JsonObjectType:
        """
        Returns an HTTP response for a given error and description of the error
        """
        # Turned 'errors' into array
        # Should this allow for multiple errors?
        error_info = {
            NKeys.ERRORS: [
                {
                    NKeys.CODE: error.code,
                    NKeys.MESSAGE: error.message,
                    NKeys.DESCRIPTION: error.description,
                }
            ]
        }
        return self.create_response(error.http_status, error_info)

    # -------------------------------------------------------------------------
    # Security
    # -------------------------------------------------------------------------

[docs]    def check_token(self) -> bool:
        """
        Checks to see if the user has given the correct token for the current
        session connected to their username.
        """
        try:
            redis_token = REDIS_SESSIONS.get(self.username)
        except redis.exceptions.ConnectionError:
            log.critical(
                f"Could not connect to Redis (host={REDIS_HOST!r}, "
                f"port={REDIS_PORT!r}, password not shown)"
            )
            raise
        if redis_token:
            redis_token = redis_token.decode()
        token = self.request.cookies.get(COOKIE_SESSION_TOKEN)
        if token and token == redis_token:
            return True
        else:
            return False

    # -------------------------------------------------------------------------
    # Main view
    # -------------------------------------------------------------------------

    # @do_cprofile
[docs]    @view_config(route_name="index")
    def index(self) -> JsonObjectType:
        """
        The top-level "index" view. Passes all the work to
        :meth:`handle_nlprp_request`, except for error handling.
        """
        try:
            return self.handle_nlprp_request()
        except NlprpError as error:
            return self.create_error_response(error)

    def _authenticate(self) -> None:
        """
        Authenticates the user, or raise an :exc:`NlprpError`.
        """
        if self.credentials is None:
            raise mkerror(
                BAD_REQUEST,
                "Credentials were absent or not in the correct format",
            )
        # See if the user exists
        users = get_users()
        self.username = self.credentials.username
        try:
            hashed_pw = users[self.username]
        except KeyError:
            raise UNAUTHORIZED
        # Check if password is correct
        pw = self.credentials.password
        # pw = 'testpass'
        if self.check_token():
            self.password = pw
        elif check_password(pw, hashed_pw):
            self.password = pw
            token = make_unique_id()
            self.request.response.set_cookie(
                name=COOKIE_SESSION_TOKEN, value=token
            )
            REDIS_SESSIONS.set(self.username, token)
            REDIS_SESSIONS.expire(self.username, SESSION_TOKEN_EXPIRY_S)
        else:
            raise UNAUTHORIZED

    def _set_body_json_from_request(self) -> None:
        """
        Get JSON from request if it is in this form, otherwise raise an
        :exc:`NlprpError`.
        """
        try:
            body = self.request.json
            assert isinstance(body, dict)
        except (json.decoder.JSONDecodeError, AssertionError):
            raise mkerror(
                BAD_REQUEST,
                "Request body was absent or not in JSON object format",
            )
        self.body = body  # type: JsonObjectType

[docs]    def handle_nlprp_request(self) -> JsonObjectType:
        """
        The main function. Authenticates user and checks the request is not
        malformed, then calls the function relating to the command specified
        by the user.
        """
        self._authenticate()
        self._set_body_json_from_request()
        command = json_get_str(self.body, NKeys.COMMAND, required=True)
        log.debug(
            f"NLPRP request received from {self.request.remote_addr}: "
            f"username={self.username}, command={command}"
        )
        if DEBUG_SHOW_REQUESTS:
            log.debug(f"Request: {self.body!r}")
        return self.parse_command(command)

[docs]    def parse_command(self, command: str) -> JsonObjectType:
        """
        Parse the NLPRP command.
        """
        if command == NlprpCommands.LIST_PROCESSORS:
            return self.list_processors()
        elif command == NlprpCommands.PROCESS:
            process_request = NlprpProcessRequest(self.body)
            if process_request.queue:
                return self.put_in_queue(process_request)
            else:
                return self.process_now(process_request)
        elif command == NlprpCommands.SHOW_QUEUE:
            return self.show_queue()
        elif command == NlprpCommands.FETCH_FROM_QUEUE:
            return self.fetch_from_queue()
        elif command == NlprpCommands.DELETE_FROM_QUEUE:
            return self.delete_from_queue()

    # -------------------------------------------------------------------------
    # NLPRP command handlers
    # -------------------------------------------------------------------------

[docs]    def list_processors(self) -> JsonObjectType:
        """
        Returns an HTTP response listing the available NLP processors.
        """
        return self.create_response(
            status=HttpStatus.OK,
            extra_info={
                NKeys.PROCESSORS: [
                    proc.infodict
                    for proc in ServerProcessor.processors.values()
                ]
            },
        )

[docs]    def process_now(
        self, process_request: NlprpProcessRequest
    ) -> JsonObjectType:
        """
        Processes the text supplied by the user immediately, without putting
        it in the queue.

        Args:
            process_request: a :class:`NlprpProcessRequest`
        """
        results = []  # type: JsonArrayType
        for text, metadata in process_request.gen_text_metadataobj():
            processor_data = []  # type: JsonArrayType
            for processor in process_request.processors:
                # Send the text off for processing
                procresult = process_nlp_text_immediate(
                    text=text,
                    processor=processor,
                    username=self.username,
                    password=self.password,
                )
                # proc_dict = procresult.nlprp_processor_dict(processor)
                if procresult[NKeys.NAME] is None:
                    procresult[NKeys.NAME] = processor.name
                    procresult[NKeys.TITLE] = processor.title
                    procresult[NKeys.VERSION] = processor.version
                processor_data.append(procresult)

            doc_result = {
                NKeys.METADATA: metadata,
                NKeys.PROCESSORS: processor_data,
            }
            if process_request.include_text:
                doc_result[NKeys.TEXT] = text
            results.append(doc_result)

        response_info = {
            NKeys.CLIENT_JOB_ID: process_request.client_job_id,
            NKeys.RESULTS: results,
        }
        return self.create_response(
            status=HttpStatus.OK, extra_info=response_info
        )

[docs]    def put_in_queue(
        self, process_request: NlprpProcessRequest
    ) -> JsonObjectType:
        """
        Puts the document-processor pairs specified by the user into a celery
        queue to be processed.

        Args:
            process_request: a :class:`NlprpProcessRequest`
        """
        # Generate unique queue_id for whole client request
        queue_id = make_unique_id()

        # Encrypt password using reversible encryption for passing to the
        # processors.
        # We must pass the password as a string to the task because it won't
        # let us pass a bytes object
        crypt_pass = encrypt_password(self.password).decode()

        docprocrequest_ids = []  # type: List[str]
        with transaction.manager:  # one COMMIT for everything inside this
            # Iterate through documents...
            for doctext, metadata in process_request.gen_text_metadatastr():
                doc_id = make_unique_id()
                # PyCharm doesn't like the "deferred" columns, so:
                # noinspection PyArgumentList
                doc = Document(
                    document_id=doc_id,
                    doctext=doctext,
                    client_job_id=process_request.client_job_id,
                    queue_id=queue_id,
                    username=self.username,
                    client_metadata=metadata,
                    include_text=process_request.include_text,
                )
                dbsession.add(doc)  # add to database
                # Iterate through processors...
                for processor in process_request.processors:
                    # The combination of a document and a processor gives us
                    # a docproc.
                    docprocreq_id = make_unique_id()
                    docprocreq = DocProcRequest(
                        docprocrequest_id=docprocreq_id,
                        document_id=doc_id,
                        processor_id=processor.processor_id,
                    )
                    dbsession.add(docprocreq)  # add to database
                    docprocrequest_ids.append(docprocreq_id)

        # Now everything's in the database and committed, we can fire off
        # back-end jobs:
        for dpr_id in docprocrequest_ids:
            process_nlp_text.apply_async(
                # unlike delay(), this allows us to specify task_id, and
                # then the Celery task ID is the same as the DocProcRequest
                # ID.
                args=(dpr_id,),  # docprocrequest_id
                kwargs=dict(username=self.username, crypt_pass=crypt_pass),
                task_id=dpr_id,  # for Celery
            )

        response_info = {NKeys.QUEUE_ID: queue_id}
        return self.create_response(
            status=HttpStatus.ACCEPTED, extra_info=response_info
        )

[docs]    def fetch_from_queue(self) -> JsonObjectType:
        """
        Fetches requests for all document-processor pairs for the queue_id
        supplied by the user (if complete).
        """
        # ---------------------------------------------------------------------
        # Args
        # ---------------------------------------------------------------------
        args = json_get_toplevel_args(self.body)
        queue_id = json_get_str(args, NKeys.QUEUE_ID, required=True)

        # ---------------------------------------------------------------------
        # Start with the DocProcRequests, because if some are still busy,
        # we will return a "busy" response.
        # ---------------------------------------------------------------------
        q_dpr = (
            dbsession.query(DocProcRequest)
            .join(Document)
            .filter(Document.username == self.username)
            .filter(Document.queue_id == queue_id)
        )
        q_doc = (
            dbsession.query(Document)
            .filter(Document.username == self.username)
            .filter(Document.queue_id == queue_id)
        )
        dprs = list(q_dpr.all())  # type: List[DocProcRequest]
        if not dprs:
            raise mkerror(NOT_FOUND, "The queue_id given was not found")
        n = len(dprs)
        n_done = sum(dpr.done for dpr in dprs)
        busy = n_done < n
        if busy:
            return self.create_response(
                HttpStatus.ACCEPTED,
                {
                    NKeys.N_DOCPROCS: n,
                    NKeys.N_DOCPROCS_COMPLETED: n_done,
                },
            )

        # ---------------------------------------------------------------------
        # Make it easy to look up processors
        # ---------------------------------------------------------------------

        processor_cache = {}  # type: Dict[str, ServerProcessor]

        def get_processor_cached(_processor_id: str) -> ServerProcessor:
            """
            Cache lookups for speed. (All documents will share the same set
            of processors, so there'll be a fair bit of duplication.)
            """
            nonlocal processor_cache
            try:
                return processor_cache[_processor_id]
            except KeyError:
                _processor = ServerProcessor.get_processor_from_id(
                    _processor_id
                )  # may raise
                processor_cache[_processor_id] = _processor
                return _processor

        # ---------------------------------------------------------------------
        # Collect results by document
        # ---------------------------------------------------------------------

        doc_results = []  # type: JsonArrayType
        client_job_id = None  # type: Optional[str]
        docs = set(dpr.document for dpr in dprs)
        for doc in docs:
            if client_job_id is None:
                client_job_id = doc.client_job_id
            processor_data = []  # type: JsonArrayType
            # ... data for *all* the processors for this doc
            for dpr in doc.docprocrequests:
                procresult = json.loads(dpr.results)  # type: Dict[str, Any]
                if procresult[NKeys.NAME] is None:
                    processor = get_processor_cached(dpr.processor_id)
                    procresult[NKeys.NAME] = processor.name
                    procresult[NKeys.TITLE] = processor.title
                    procresult[NKeys.VERSION] = processor.version
                processor_data.append(procresult)
            metadata = json.loads(doc.client_metadata)
            doc_result = {
                NKeys.METADATA: metadata,
                NKeys.PROCESSORS: processor_data,
            }
            if doc.include_text:
                doc_result[NKeys.TEXT] = doc.doctext
            doc_results.append(doc_result)

        # ---------------------------------------------------------------------
        # Delete leftovers
        # ---------------------------------------------------------------------

        with sqla_transaction_commit():
            q_doc.delete(synchronize_session=False)
            # ... will also delete the DocProcRequests via a cascade

        response_info = {
            NKeys.CLIENT_JOB_ID: (
                client_job_id if client_job_id is not None else ""
            ),
            NKeys.RESULTS: doc_results,
        }
        return self.create_response(
            status=HttpStatus.OK, extra_info=response_info
        )

    # @do_cprofile
[docs]    def show_queue(self) -> JsonObjectType:
        """
        Finds the queue entries associated with the client, optionally
        restricted to one client job id.
        """
        args = json_get_toplevel_args(self.body, required=False)
        if args:
            client_job_id = json_get_str(
                args, NKeys.CLIENT_JOB_ID, default="", required=False
            )
        else:
            client_job_id = ""

        # Queue IDs that are of interest
        queue_id_wheres = [
            Document.username == self.username
        ]  # type: List[ClauseElement]
        if client_job_id:
            queue_id_wheres.append(Document.client_job_id == client_job_id)
        # noinspection PyUnresolvedReferences
        queue_ids = fetch_all_first_values(
            dbsession,
            select([Document.queue_id])
            .select_from(Document.__table__)
            .where(and_(*queue_id_wheres))
            .distinct()
            .order_by(Document.queue_id),
        )  # type: List[str]

        queue_answer = []  # type: JsonArrayType
        for queue_id in queue_ids:
            # DocProcRequest objects that are of interest
            dprs = list(
                dbsession.query(DocProcRequest)
                .join(Document)
                .filter(Document.queue_id == queue_id)
                .all()
            )  # type: List[DocProcRequest]
            busy = not all([dpr.done for dpr in dprs])
            if busy:
                max_time = datetime.datetime.min
            else:
                max_time = max([dpr.when_done_utc for dpr in dprs])
            assert dprs, "No DocProcRequests found; bug?"
            dt_submitted = dprs[0].document.datetime_submitted_pendulum

            queue_answer.append(
                {
                    NKeys.QUEUE_ID: queue_id,
                    NKeys.CLIENT_JOB_ID: client_job_id,
                    NKeys.STATUS: (
                        NlprpValues.BUSY if busy else NlprpValues.READY
                    ),
                    NKeys.DATETIME_SUBMITTED: pendulum_to_nlprp_datetime(
                        dt_submitted, to_utc=True
                    ),
                    NKeys.DATETIME_COMPLETED: (
                        None
                        if busy
                        else pendulum_to_nlprp_datetime(max_time, to_utc=True)
                    ),
                }
            )
        return self.create_response(
            status=HttpStatus.OK, extra_info={NKeys.QUEUE: queue_answer}
        )

[docs]    def delete_from_queue(self) -> JsonObjectType:
        """
        Deletes from the queue all entries specified by the client.
        """
        args = json_get_toplevel_args(self.body)
        delete_all = json_get_bool(args, NKeys.DELETE_ALL, default=False)
        client_job_ids = json_get_array_of_str(args, NKeys.CLIENT_JOB_IDS)

        # Establish what to cancel/delete
        q_dpr = (
            dbsession.query(DocProcRequest)
            .join(Document)
            .filter(Document.username == self.username)
        )
        if not delete_all:
            q_dpr = q_dpr.filter(Document.client_job_id.in_(client_job_ids))

        # Remove from Celery queue (cancel ongoing jobs)
        task_ids_to_cancel = [dpr.docprocrequest_id for dpr in q_dpr.all()]
        # Quicker to use ResultSet than forget them all separately
        results = []  # type: List[AsyncResult]
        for task_id in task_ids_to_cancel:
            results.append(AsyncResult(id=task_id, app=celery_app))
        res_set = ResultSet(results=results, app=celery_app)
        log.debug("About to revoke jobs...")
        res_set.revoke()  # will hang if backend not operational
        log.debug("... jobs revoked.")

        q_docs = dbsession.query(Document).filter(
            Document.username == self.username
        )
        if not delete_all:
            q_docs = q_docs.filter(Document.client_job_id.in_(client_job_ids))

        with sqla_transaction_commit():
            # Delete the Document objects, which will cascade-delete the
            # DocProcRequest objects.
            q_docs.delete(synchronize_session=False)

        # Return response
        return self.create_response(status=HttpStatus.OK)