From 18b6e46768eb8a23d5d496bf6041ebfc99bbdd0d Mon Sep 17 00:00:00 2001
From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.>
Date: Mon, 9 Mar 2026 13:20:03 +0000
Subject: [PATCH 1/3] fix(medcat-trainer): Fix remote model service errors on
cache_project_model and call_remote_model_service
---
medcat-trainer/webapp/api/api/model_cache.py | 9 ++
medcat-trainer/webapp/api/api/utils.py | 123 ++++++++++++++++---
medcat-trainer/webapp/api/api/views.py | 72 ++++++-----
3 files changed, 156 insertions(+), 48 deletions(-)
diff --git a/medcat-trainer/webapp/api/api/model_cache.py b/medcat-trainer/webapp/api/api/model_cache.py
index 861070dce..da9b31480 100644
--- a/medcat-trainer/webapp/api/api/model_cache.py
+++ b/medcat-trainer/webapp/api/api/model_cache.py
@@ -200,6 +200,15 @@ def get_cached_medcat(project, cat_map: Dict[str, CAT]=CAT_MAP):
if project.model_pack is not None:
cat_id = 'mp' + str(project.model_pack.id)
else:
+ # Guard against misconfigured projects that don't have a CDB/Vocab set
+ if project.use_model_service:
+ raise ValueError(
+ "get_cached_medcat should not be called for projects where use_model_service=True"
+ )
+ if project.concept_db is None or project.vocab is None:
+ raise Exception(
+ f"Project is misconfigured: concept_db is {project.concept_db} and vocab is {project.vocab}"
+ )
cdb_id = project.concept_db.id
vocab_id = project.vocab.id
cat_id = str(cdb_id) + "-" + str(vocab_id)
diff --git a/medcat-trainer/webapp/api/api/utils.py b/medcat-trainer/webapp/api/api/utils.py
index 71e2baa7c..1a09735e7 100644
--- a/medcat-trainer/webapp/api/api/utils.py
+++ b/medcat-trainer/webapp/api/api/utils.py
@@ -23,6 +23,7 @@
class RemoteEntity:
"""A simple class to mimic spaCy entity structure for remote API responses."""
+
def __init__(self, entity_data, text):
self.cui = entity_data.get('cui', '')
self.start_char_index = entity_data.get('start', 0)
@@ -41,6 +42,7 @@ def get_addon_data(self, key):
class RemoteSpacyDoc:
"""A simple class to mimic spaCy document structure for remote API responses."""
+
def __init__(self, linked_ents):
self.linked_ents = linked_ents
@@ -49,6 +51,23 @@ def call_remote_model_service(service_url, text):
"""
Call the remote MedCAT service API to process text.
+ There are two service types, with different input and output formats.
+
+ This should be temporary until we determine which one is meant to be used.
+ """
+ service_type = os.getenv('REMOTE_MODEL_SERVICE_TYPE', 'spacy')
+ if service_type == 'spacy':
+ return call_remote_model_service_spacy(service_url, text)
+ elif service_type == 'medcat':
+ return call_remote_model_service_medcat(service_url, text)
+ else:
+ raise ValueError(f"Invalid service type: {service_type}")
+
+
+def call_remote_model_service_spacy(service_url, text):
+ """
+ Call the remote MedCAT service API to process text.
+
Args:
service_url: Base URL of the remote service (e.g., http://medcat-service:8000)
text: Text to process
@@ -68,6 +87,9 @@ def call_remote_model_service(service_url, text):
timeout = int(os.getenv('REMOTE_MODEL_SERVICE_TIMEOUT', '60'))
try:
+ logger.info(
+ f"Calling remote model service at {api_url} (text length: {len(payload['text'])} chars)"
+ )
response = requests.post(api_url, json=payload, timeout=timeout)
response.raise_for_status()
result = response.json()
@@ -88,6 +110,62 @@ def call_remote_model_service(service_url, text):
raise Exception(f"Failed to process remote model service response: {str(e)}") from e
+def call_remote_model_service_medcat(service_url, text):
+ """
+ Call the remote MedCAT service API to process text.
+ Uses the medcat-service response shape: { "medcat_info", "result": { "text", "annotations", ... } }.
+
+ Args:
+ service_url: Base URL of the remote service (e.g., http://medcat-service:8000)
+ text: Text to process
+
+ Returns:
+ RemoteSpacyDoc object with linked_ents
+ """
+ service_url = service_url.rstrip('/')
+ api_url = f"{service_url}/api/process"
+
+ payload = {
+ "content": {
+ "text": text
+ },
+ }
+
+ timeout = int(os.getenv('REMOTE_MODEL_SERVICE_TIMEOUT', '60'))
+
+ try:
+ logger.info(
+ f"Calling remote model service for medcat at {api_url} (text length: {len(payload['content']['text'])} chars)"
+ )
+ response = requests.post(api_url, json=payload, timeout=timeout)
+ response.raise_for_status()
+ body = response.json()
+
+ # API returns { "medcat_info": {...}, "result": { "text", "annotations", ... } }
+ data = body.get('result')
+ if data is None:
+ raise Exception("Remote model service response missing 'result'")
+ if 'errors' in data:
+ raise Exception(f"Remote model service returned errors: {data['errors']}")
+
+ result_text = data.get('text', text)
+ annotations = data.get('annotations', [])
+ linked_ents = []
+ for ann_item in annotations:
+ if not isinstance(ann_item, dict):
+ continue
+ for entity_data in ann_item.values():
+ linked_ents.append(RemoteEntity(entity_data, result_text))
+
+ return RemoteSpacyDoc(linked_ents)
+ except requests.exceptions.RequestException as e:
+ logger.error(f"Error calling remote model service at {api_url}: {e}")
+ raise Exception(f"Failed to call remote model service: {str(e)}") from e
+ except Exception as e:
+ logger.error(f"Error processing remote model service response: {e}")
+ raise Exception(f"Failed to process remote model service response: {str(e)}") from e
+
+
def remove_annotations(document, project, partial=False):
try:
if partial:
@@ -106,6 +184,7 @@ def remove_annotations(document, project, partial=False):
class SimpleFilters:
"""Simple filter object for remote service when cat is not available."""
+
def __init__(self, cuis=None, cuis_exclude=None):
self.cuis = cuis or set()
self.cuis_exclude = cuis_exclude or set()
@@ -127,23 +206,27 @@ def add_annotations(spacy_doc, user, project, document, existing_annotations, ca
"""
spacy_doc.linked_ents.sort(key=lambda x: len(x.text), reverse=True)
- tkns_in = []
ents = []
- existing_annos_intervals = [(ann.start_ind, ann.end_ind) for ann in existing_annotations]
+
+ # NOTE: The code to create metatask2obj and metataskvals2obj is currently unused.
+ # Note if this is uncommented, this will error out with remote model services.
+ # Choosing to keep this commented out for now until the usage of it is required.
+ # tkns_in = []
+ # existing_annos_intervals = [(ann.start_ind, ann.end_ind) for ann in existing_annotations]
# all MetaTasks and associated values
# that can be produced are expected to have available models
- try:
- metatask2obj = {task_name: MetaTask.objects.get(name=task_name)
- for task_name in spacy_doc.linked_ents[0].get_addon_data('meta_cat_meta_anns').keys()}
- metataskvals2obj = {task_name: {v.name: v for v in MetaTask.objects.get(name=task_name).values.all()}
- for task_name in spacy_doc.linked_ents[0].get_addon_data('meta_cat_meta_anns').keys()}
- except (AttributeError, IndexError, UnregisteredDataPathException):
- # IndexError: ignore if there are no annotations in this doc
- # AttributeError: ignore meta_anns that are not present - i.e. non model pack preds
- # or model pack preds with no meta_anns
- metatask2obj = {}
- metataskvals2obj = {}
- pass
+ # try:
+ # metatask2obj = {task_name: MetaTask.objects.get(name=task_name)
+ # for task_name in spacy_doc.linked_ents[0].get_addon_data('meta_cat_meta_anns').keys()}
+ # metataskvals2obj = {task_name: {v.name: v for v in MetaTask.objects.get(name=task_name).values.all()}
+ # for task_name in spacy_doc.linked_ents[0].get_addon_data('meta_cat_meta_anns').keys()}
+ # except (AttributeError, IndexError, UnregisteredDataPathException):
+ # # IndexError: ignore if there are no annotations in this doc
+ # # AttributeError: ignore meta_anns that are not present - i.e. non model pack preds
+ # # or model pack preds with no meta_anns
+ # metatask2obj = {}
+ # metataskvals2obj = {}
+ # pass
# Get filters and similarity threshold
if cat is not None:
@@ -177,10 +260,10 @@ def check_filters(cui, filters):
entity = Entity.objects.get(label=label)
ann_ent = AnnotatedEntity.objects.filter(project=project,
- document=document,
- entity=entity,
- start_ind=ent.start_char_index,
- end_ind=ent.end_char_index).first()
+ document=document,
+ entity=entity,
+ start_ind=ent.start_char_index,
+ end_ind=ent.end_char_index).first()
if ann_ent is None:
# If this entity doesn't exist already
ann_ent = AnnotatedEntity()
@@ -350,7 +433,8 @@ def prep_docs(project_id: List[int], doc_ids: List[int], user_id: int):
logger.info('Using remote model service in bg process for project: %s', project.id)
filters = SimpleFilters(cuis=cuis)
for doc in docs:
- logger.info('Running remote MedCAT service for project %s:%s over doc: %s', project.id, project.name, doc.id)
+ logger.info('Running remote MedCAT service for project %s:%s over doc: %s',
+ project.id, project.name, doc.id)
spacy_doc = call_remote_model_service(project.model_service_url, doc.text)
anns = AnnotatedEntity.objects.filter(document=doc).filter(project=project)
with transaction.atomic():
@@ -403,7 +487,6 @@ def save_project_anno(sender, instance, **kwargs):
post_save.connect(save_project_anno, sender=ProjectAnnotateEntities)
-
def env_str_to_bool(var: str, default: bool):
val = os.environ.get(var, default)
if isinstance(val, str):
diff --git a/medcat-trainer/webapp/api/api/views.py b/medcat-trainer/webapp/api/api/views.py
index 43efddab4..5600c07e5 100644
--- a/medcat-trainer/webapp/api/api/views.py
+++ b/medcat-trainer/webapp/api/api/views.py
@@ -47,12 +47,15 @@
# Get the basic version of MedCAT
cat = None
+
def index(request):
return render(request, 'index.html')
class TextInFilter(drf.BaseInFilter, drf.CharFilter):
pass
+
+
class NumInFilter(drf.BaseInFilter, drf.NumberFilter):
pass
@@ -93,6 +96,7 @@ class Meta:
model = ProjectGroup
fields = ['id', 'name', 'description']
+
class ProjectGroupViewSet(viewsets.ModelViewSet):
permission_classes = [permissions.IsAuthenticated]
queryset = ProjectGroup.objects.all()
@@ -225,6 +229,7 @@ class DatasetViewSet(viewsets.ModelViewSet):
class ResetPasswordView(PasswordResetView):
email_template_name = 'password_reset_email.html'
subject_template_name = 'password_reset_subject.txt'
+
def post(self, request, *args, **kwargs):
try:
return super().post(request, *args, **kwargs)
@@ -233,9 +238,11 @@ def post(self, request, *args, **kwargs):
Please visit https://medcattrainer.readthedocs.io for more information to resolve this.
You can also ask a question at: https://discourse.cogstack.org/c/medcat/5''')
+
class ResetPasswordView(PasswordResetView):
email_template_name = 'password_reset_email.html'
subject_template_name = 'password_reset_subject.txt'
+
def post(self, request, *args, **kwargs):
try:
return super().post(request, *args, **kwargs)
@@ -244,6 +251,7 @@ def post(self, request, *args, **kwargs):
Please visit https://medcattrainer.readthedocs.io for more information to resolve this.
You can also ask a question at: https://discourse.cogstack.org/c/medcat/5''')
+
@api_view(http_method_names=['GET'])
def get_anno_tool_conf(_):
return Response({k: v for k, v in os.environ.items()})
@@ -275,9 +283,9 @@ def prepare_documents(request):
cuis.update(json.load(open(project.cuis_file.path)))
except FileNotFoundError:
return Response({'message': 'Missing CUI filter file',
- 'description': 'Missing CUI filter file, %s, cannot be found on the filesystem, '
- 'but is still set on the project. To fix remove and reset the '
- 'cui filter file' % project.cuis_file}, status=500)
+ 'description': 'Missing CUI filter file, %s, cannot be found on the filesystem, '
+ 'but is still set on the project. To fix remove and reset the '
+ 'cui filter file' % project.cuis_file}, status=500)
try:
for d_id in d_ids:
document = Document.objects.get(id=d_id)
@@ -338,7 +346,7 @@ def prepare_documents(request):
except Exception as e:
logger.warning('Error preparing documents for project %s', p_id, exc_info=e)
return Response({'message': e.args[0] if len(e.args) > 0 else 'Internal Server Error',
- 'description': e.args[1] if len(e.args) > 1 else '',}, status=500)
+ 'description': e.args[1] if len(e.args) > 1 else '', }, status=500)
return Response({'message': 'Documents prepared successfully'})
@@ -382,7 +390,8 @@ def prepare_docs_bg_task(request, proj_id):
try:
proj = ProjectAnnotateEntities.objects.get(id=proj_id)
prepd_docs_count = proj.prepared_documents.count()
- ds_total_count = Document.objects.filter(dataset=ProjectAnnotateEntities.objects.get(id=proj_id).dataset.id).count()
+ ds_total_count = Document.objects.filter(
+ dataset=ProjectAnnotateEntities.objects.get(id=proj_id).dataset.id).count()
return Response({'proj_id': proj_id, 'dataset_len': ds_total_count, 'prepd_docs_len': prepd_docs_count})
except ObjectDoesNotExist:
return HttpResponseBadRequest('No Project found for the given ID')
@@ -395,6 +404,7 @@ def prepare_docs_bg_task(request, proj_id):
else:
return HttpResponseBadRequest('Could not find running BG Process to stop')
+
@api_view(http_method_names=['POST'])
def add_annotation(request):
# Get project id
@@ -443,11 +453,10 @@ def add_concept(request):
if project.use_model_service:
# Use remote model service
- logger.error('Adding concepts is not supported for remote model service'\
+ logger.error('Adding concepts is not supported for remote model service'
'projects, you likely want to use a local model')
raise NotImplementedError('Adding concepts is not supported for remote model service projects')
-
cat = get_medcat(project=project)
if cui in cat.cdb.cui2info:
@@ -473,12 +482,12 @@ def add_concept(request):
end = start + len(source_val)
# Find tokens that overlap with the span [start, end)
# A token overlaps if: token_start < end AND token_end > start
- spacy_entity = [tkn for tkn in spacy_doc if tkn.char_index < end and (tkn.char_index + len(tkn.text)) > start]
+ spacy_entity = [tkn for tkn in spacy_doc if tkn.char_index <
+ end and (tkn.char_index + len(tkn.text)) > start]
# if len(spacy_entity) == 0:
# spacy_entity = None
cat.trainer.add_and_train_concept(cui=cui, name=name, name_status='P', mut_doc=spacy_doc, mut_entity=spacy_entity)
-
id = create_annotation(source_val=source_val,
selection_occurrence_index=sel_occur_idx,
cui=cui,
@@ -514,7 +523,7 @@ def _submit_document(project: ProjectAnnotateEntities, document: Document):
if project.use_model_service:
# TODO: Implement this, already available in CMS / gateway instances.
# interim model training not supported for remote model service projects
- logger.warning('Interim model training is not supported for remote model service projects')
+ logger.warning('Interim model training is not supported for remote model service projects')
else:
cat = get_medcat(project=project)
train_medcat(cat, project, document)
@@ -619,17 +628,17 @@ def update_meta_annotation(request):
meta_task_id = request.data['meta_task_id']
meta_task_value = request.data['meta_task_value']
- annotation = AnnotatedEntity.objects.filter(project= project_id, entity=entity_id, document=document_id)[0]
+ annotation = AnnotatedEntity.objects.filter(project=project_id, entity=entity_id, document=document_id)[0]
annotation.correct = True
annotation.validated = True
logger.debug(annotation)
annotation.save()
- meta_task = MetaTask.objects.filter(id = meta_task_id)[0]
- meta_task_value = MetaTaskValue.objects.filter(id = meta_task_value)[0]
+ meta_task = MetaTask.objects.filter(id=meta_task_id)[0]
+ meta_task_value = MetaTaskValue.objects.filter(id=meta_task_value)[0]
- meta_annotation_list = MetaAnnotation.objects.filter(annotated_entity = annotation)
+ meta_annotation_list = MetaAnnotation.objects.filter(annotated_entity=annotation)
logger.debug(meta_annotation_list)
@@ -806,14 +815,14 @@ def upload_deployment(request):
try:
upload_projects_export(deployment_upload,
- cdb_id,
- vocab_id,
- modelpack_id,
- project_name_suffix,
- cdb_search_filter_id,
- members,
- import_project_name_suffix,
- set_validated_docs)
+ cdb_id,
+ vocab_id,
+ modelpack_id,
+ project_name_suffix,
+ cdb_search_filter_id,
+ members,
+ import_project_name_suffix,
+ set_validated_docs)
return Response("successfully uploaded", 200)
except Exception as e:
logger.error(f"Failed to upload projects export: {e}", exc_info=e)
@@ -824,6 +833,11 @@ def upload_deployment(request):
def cache_project_model(request, project_id):
try:
project = ProjectAnnotateEntities.objects.get(id=project_id)
+ # For projects using a remote MedCAT service, there is no local model
+ # cache to warm or clear; treat cache operations as no-ops.
+ if project.use_model_service:
+ return Response('success', 200)
+
is_loaded = is_model_loaded(project)
if request.method == 'GET':
if not is_loaded:
@@ -838,6 +852,7 @@ def cache_project_model(request, project_id):
except ProjectAnnotateEntities.DoesNotExist:
return Response(f'Project with id:{project_id} does not exist', 404)
except Exception as e:
+ logger.error('cache_project_model failed for project_id=%s: %s', project_id, e, exc_info=e)
return Response({'message': f'{str(e)}'}, 500)
@@ -856,10 +871,10 @@ def cache_modelpack(request, modelpack_id: int):
except ModelPack.DoesNotExist:
return Response(f'ModelPack with id:{modelpack_id} does not exist', 404)
except Exception as e:
+ logger.error('cache_modelpack failed for modelpack_id=%s: %s', modelpack_id, e, exc_info=e)
return Response({'message': f'{str(e)}'}, 500)
-
@api_view(http_method_names=['GET'])
def model_loaded(_):
models_loaded = {}
@@ -954,7 +969,8 @@ def view_metrics(request, report_id):
running_pending_report = Task.objects.filter(id=report_id, queue='metrics').first()
completed_report = CompletedTask.objects.filter(id=report_id, queue='metrics').first()
if running_pending_report is None and completed_report is None:
- HttpResponseBadRequest(f'Cannot find report_id:{report_id} in either pending, running or complete report lists. ')
+ HttpResponseBadRequest(
+ f'Cannot find report_id:{report_id} in either pending, running or complete report lists. ')
elif running_pending_report is not None:
HttpResponseBadRequest(f'Cannot view a running or pending metrics report with id:{report_id}')
pm_obj = ProjectMetrics.objects.filter(report_name_generated=completed_report.verbose_name).first()
@@ -1024,7 +1040,7 @@ def generate_concept_filter_flat_json(request):
for cui in cuis:
ch_nodes = get_all_ch(cui, cdb)
final_filter += [n for n in ch_nodes if n not in excluded_nodes]
- final_filter = {cui:1 for cui in final_filter}.keys()
+ final_filter = {cui: 1 for cui in final_filter}.keys()
filter_json = json.dumps(final_filter)
response = HttpResponse(filter_json, content_type='application/json')
response['Content-Disposition'] = 'attachment; filename=filter.json'
@@ -1157,9 +1173,9 @@ def project_admin_detail(request, project_id):
# Convert string booleans to actual booleans
boolean_fields = ['project_locked', 'annotation_classification', 'require_entity_validation',
- 'train_model_on_submit', 'add_new_entities', 'restrict_concept_lookup',
- 'terminate_available', 'irrelevant_available', 'enable_entity_annotation_comments',
- 'use_model_service']
+ 'train_model_on_submit', 'add_new_entities', 'restrict_concept_lookup',
+ 'terminate_available', 'irrelevant_available', 'enable_entity_annotation_comments',
+ 'use_model_service']
for field in boolean_fields:
if field in data:
if isinstance(data[field], str):
From 37cd4b320dbdb2f4f97399c3765a724b88d990dc Mon Sep 17 00:00:00 2001
From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.>
Date: Mon, 9 Mar 2026 14:06:53 +0000
Subject: [PATCH 2/3] test(medcat-trainer): Add e2e test for remote model
service
---
.../tests/test_remote_model_service_e2e.py | 171 ++++++++++++++++++
1 file changed, 171 insertions(+)
create mode 100644 medcat-trainer/webapp/api/api/tests/test_remote_model_service_e2e.py
diff --git a/medcat-trainer/webapp/api/api/tests/test_remote_model_service_e2e.py b/medcat-trainer/webapp/api/api/tests/test_remote_model_service_e2e.py
new file mode 100644
index 000000000..833e2bac3
--- /dev/null
+++ b/medcat-trainer/webapp/api/api/tests/test_remote_model_service_e2e.py
@@ -0,0 +1,171 @@
+"""End-to-end happy-path test for projects using a remote MedCAT model service.
+
+This test mirrors the UI flow when a user works with a remote-model project:
+1. Set up a project configured with use_model_service=True and a dataset containing one document.
+2. Call GET /api/cache-project-model// and assert 200 (no local model to load; endpoint returns success).
+3. Call POST /api/prepare-documents/ with project_id and document_ids; the view calls the remote
+ MedCAT service to get annotations. Only the HTTP call to the remote service is stubbed (requests.post);
+ the rest of the stack (auth, DB, add_annotations, prepared_documents) runs for real.
+
+Assertions include: both endpoints return 200, the stub was called with the expected URL and document
+text, and the document is added to the project's prepared_documents.
+"""
+
+import json
+import os
+from unittest.mock import MagicMock, patch
+
+from django.contrib.auth.models import User
+from django.core.files.uploadedfile import SimpleUploadedFile
+from django.test import TestCase
+from rest_framework.test import APIClient
+
+from ..models import Dataset, Document, ProjectAnnotateEntities
+
+
+class RemoteModelServiceE2ETestCase(TestCase):
+ """Single test: create remote project + dataset with one document, then call cache and prepare-documents."""
+
+ def setUp(self):
+ self.user = User.objects.create_user(username="testuser", password="testpass")
+ csv_content = b"name,text\ndoc1,Patient had acute kidney failure."
+ self.dataset = Dataset(
+ name="Test Remote Dataset",
+ original_file=SimpleUploadedFile("test.csv", csv_content, content_type="text/csv"),
+ )
+ self.dataset.save()
+ self.document = Document.objects.create(
+ dataset=self.dataset,
+ name="doc1",
+ text="Patient had acute kidney failure.",
+ )
+ self.project = ProjectAnnotateEntities.objects.create(
+ name="Test Remote Project",
+ dataset=self.dataset,
+ use_model_service=True,
+ model_service_url="http://medcat-service:8000",
+ cuis="",
+ )
+ self.project.members.add(self.user)
+
+ def _run_cache_and_prepare_then_assert_annotated_entities(
+ self, mock_json_return_value, expected_annotated_entities_str
+ ):
+ """Shared flow: stub medcat-service with given response, call cache + prepare-documents + annotated-entities, assert response matches expected JSON string."""
+ mock_response = MagicMock()
+ mock_response.raise_for_status = MagicMock()
+ mock_response.json.return_value = mock_json_return_value
+
+ with patch.dict(os.environ, {"REMOTE_MODEL_SERVICE_TYPE": "medcat"}):
+ with patch("api.utils.requests.post", return_value=mock_response) as mock_post:
+ client = APIClient()
+ client.force_authenticate(user=self.user)
+
+ cache_resp = client.get(f"/api/cache-project-model/{self.project.id}/")
+ self.assertEqual(cache_resp.status_code, 200)
+
+ prepare_resp = client.post(
+ "/api/prepare-documents/",
+ data={"project_id": self.project.id, "document_ids": [self.document.id]},
+ format="json",
+ )
+ self.assertEqual(prepare_resp.status_code, 200)
+ self.assertEqual(prepare_resp.json().get("message"), "Documents prepared successfully")
+
+ mock_post.assert_called_once()
+ call_args, call_kwargs = mock_post.call_args
+ self.assertEqual(call_args[0], f"{self.project.model_service_url.rstrip('/')}/api/process")
+ self.assertEqual(call_kwargs["json"], {"content": {"text": self.document.text}})
+ self.assertIn("timeout", call_kwargs)
+
+ self.project.refresh_from_db()
+ self.assertIn(self.document, self.project.prepared_documents.all())
+
+ ann_resp = client.get(
+ "/api/annotated-entities/",
+ data={"project": self.project.id, "document": self.document.id},
+ )
+ self.assertEqual(ann_resp.status_code, 200)
+ expected = json.loads(expected_annotated_entities_str)
+ actual = ann_resp.json()
+ self.assertEqual(actual["count"], expected["count"])
+ self.assertEqual(actual["next"], expected["next"])
+ self.assertEqual(actual["previous"], expected["previous"])
+ self.assertEqual(len(actual["results"]), len(expected["results"]))
+ for i, exp_result in enumerate(expected["results"]):
+ for key in exp_result:
+ self.assertEqual(
+ actual["results"][i].get(key), exp_result[key], f"results[{i}].{key}"
+ )
+
+ def test_cache_and_prepare_documents_remote_project_empty_annotations(self):
+ """GET cache-project-model returns 200; POST prepare-documents with stubbed medcat-service returns 200."""
+ mock_json = {
+ "result": {
+ "text": self.document.text,
+ "annotations": [],
+ "success": True,
+ "timestamp": "",
+ "elapsed_time": 0,
+ "footer": None,
+ }
+ }
+ expected_str = """
+ {
+ "count": 0,
+ "next": null,
+ "previous": null,
+ "results": []
+ }
+ """
+ self._run_cache_and_prepare_then_assert_annotated_entities(mock_json, expected_str)
+
+ def test_cache_and_prepare_documents_remote_project_with_annotations(self):
+ """Same flow but mock returns one annotation; assert annotated-entities list includes it."""
+ mock_json = {
+ "result": {
+ "text": self.document.text,
+ "annotations": [
+ {
+ "0": {
+ "cui": "C0022660",
+ "start": 10,
+ "end": 30,
+ "source_value": "acute kidney failure",
+ "detected_name": "acute~kidney~failure",
+ "acc": 0.99,
+ "context_similarity": 0.99,
+ "meta_anns": {},
+ }
+ }
+ ],
+ "success": True,
+ "timestamp": "",
+ "elapsed_time": 0,
+ "footer": None,
+ }
+ }
+ expected_str = """
+ {
+ "count": 1,
+ "next": null,
+ "previous": null,
+ "results": [
+ {
+ "value": "acute~kidney~failure",
+ "start_ind": 10,
+ "end_ind": 30,
+ "acc": 0.99,
+ "comment": null,
+ "validated": false,
+ "correct": false,
+ "alternative": false,
+ "manually_created": false,
+ "deleted": false,
+ "killed": false,
+ "irrelevant": false
+ }
+ ]
+ }
+ """
+ self._run_cache_and_prepare_then_assert_annotated_entities(mock_json, expected_str)
From 5807ba6ccbd0d19f5fe60c8a1d04563a522f8f44 Mon Sep 17 00:00:00 2001
From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.>
Date: Mon, 9 Mar 2026 14:12:52 +0000
Subject: [PATCH 3/3] test(medcat-trainer): Add e2e test for remote model
service
---
medcat-trainer/docker-compose-dev.yml | 1 +
1 file changed, 1 insertion(+)
diff --git a/medcat-trainer/docker-compose-dev.yml b/medcat-trainer/docker-compose-dev.yml
index f90fce63f..908093ac7 100644
--- a/medcat-trainer/docker-compose-dev.yml
+++ b/medcat-trainer/docker-compose-dev.yml
@@ -25,6 +25,7 @@ services:
environment:
- MCT_VERSION=latest
- MCT_DEV_LIVERELOAD=1
+ - REMOTE_MODEL_SERVICE_TYPE=medcat
# OIDC Settings
- USE_OIDC=0
- KEYCLOAK_URL=http://keycloak.cogstack.localhost