CogStack · tomolopolis · Dec 17, 2025 · Dec 10, 2025 · Dec 16, 2025 · Dec 17, 2025
diff --git a/medcat-trainer/webapp/api/api/admin/models.py b/medcat-trainer/webapp/api/api/admin/models.py
@@ -9,7 +9,7 @@
 
 _PROJECT_ANNO_ENTS_SETTINGS_FIELD_ORDER = (
     'concept_db', 'vocab', 'model_pack', 'cdb_search_filter', 'deid_model_annotation', 'require_entity_validation', 'train_model_on_submit',
-    'add_new_entities', 'restrict_concept_lookup', 'terminate_available', 'irrelevant_available',
+    'use_model_service', 'model_service_url', 'add_new_entities', 'restrict_concept_lookup', 'terminate_available', 'irrelevant_available',
     'enable_entity_annotation_comments', 'tasks', 'relations'
 )
 
@@ -177,7 +177,7 @@ class ModelPackAdmin(admin.ModelAdmin):
 
     def metacats(self, obj):
         return ", ".join(str(m_c) for m_c in obj.meta_cats.all())
-    
+
     def save_model(self, request, obj, form, change):
         obj.last_modified_by = request.user
         super().save_model(request, obj, form, change)

diff --git a/medcat-trainer/webapp/api/api/migrations/0093_add_remote_model_service_fields.py b/medcat-trainer/webapp/api/api/migrations/0093_add_remote_model_service_fields.py
@@ -0,0 +1,33 @@
+# Generated by Django 5.1.11 on 2025-12-11 11:47
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('api', '0092_exportedproject_cdb_search_filter_id_and_more'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='projectannotateentities',
+            name='model_service_url',
+            field=models.CharField(blank=True, help_text='URL of the remote MedCAT service API (e.g., http://medcat-service:8000)', max_length=500, null=True),
+        ),
+        migrations.AddField(
+            model_name='projectannotateentities',
+            name='use_model_service',
+            field=models.BooleanField(default=False, help_text='Use a remote MedCAT service API for document processing instead of local models'),
+        ),
+        migrations.AddField(
+            model_name='projectgroup',
+            name='model_service_url',
+            field=models.CharField(blank=True, help_text='URL of the remote MedCAT service API (e.g., http://medcat-service:8000)', max_length=500, null=True),
+        ),
+        migrations.AddField(
+            model_name='projectgroup',
+            name='use_model_service',
+            field=models.BooleanField(default=False, help_text='Use a remote MedCAT service API for document processing instead of local models'),
+        ),
+    ]
diff --git a/medcat-trainer/webapp/api/api/models.py b/medcat-trainer/webapp/api/api/models.py
@@ -458,15 +458,24 @@ class Meta:
                                              'if a model pack is used for the project')
     relations = models.ManyToManyField('Relation', blank=True, default=None,
                                        help_text='Relations that will be available for this project')
+    use_model_service = models.BooleanField(default=False,
+                                            help_text='Use a remote MedCAT service API for document processing instead of local models'\
+                                                      '(note: interim model training is not supported for remote model service projects)')
+    model_service_url = models.CharField(max_length=500, blank=True, null=True,
+                                         help_text='URL of the remote MedCAT service API (e.g., http://medcat-service:8003)')
 
     def save(self, *args, **kwargs):
-        if self.model_pack is None and (self.concept_db is None or self.vocab is None):
-            raise ValidationError('Must set at least the ModelPack or a Concept Database and Vocab Pair')
-        if self.model_pack and (self.concept_db is not None or self.vocab is not None):
-            raise ValidationError('Cannot set model pack and ConceptDB or a Vocab. You must use one or the other.')
-        if self.deid_model_annotation and self.model_pack is None:
-            raise ValidationError('Must set a DeID ModelPack for De-ID Model Annotation, cannot only set a cdb / vocab pair as'
-                                  ' not be a DeId model')
+        # If using remote model service, skip local model validation
+        if not self.use_model_service:
+            if self.model_pack is None and (self.concept_db is None or self.vocab is None):
+                raise ValidationError('Must set at least the ModelPack or a Concept Database and Vocab Pair')
+            if self.model_pack and (self.concept_db is not None or self.vocab is not None):
+                raise ValidationError('Cannot set model pack and ConceptDB or a Vocab. You must use one or the other.')
+            if self.deid_model_annotation and self.model_pack is None:
+                raise ValidationError('Must set a DeID ModelPack for De-ID Model Annotation, cannot only set a cdb / vocab pair as'
+                                      ' not be a DeId model')
+        elif self.use_model_service and not self.model_service_url:
+            raise ValidationError('When using model service, model_service_url must be set')
         super().save(*args, **kwargs)
 
 

diff --git a/medcat-trainer/webapp/api/api/utils.py b/medcat-trainer/webapp/api/api/utils.py
@@ -3,6 +3,7 @@
 import os
 from typing import List
 
+import requests
 from background_task import background
 from django.contrib.auth.models import User
 from django.db import transaction
@@ -20,6 +21,70 @@
 logger = logging.getLogger('trainer')
 
 
+class RemoteEntity:
+    """A simple class to mimic spaCy entity structure for remote API responses."""
+    def __init__(self, entity_data, text):
+        self.cui = entity_data.get('cui', '')
+        self.start_char_index = entity_data.get('start', 0)
+        self.end_char_index = entity_data.get('end', 0)
+        self.text = entity_data.get('detected_name') or entity_data.get('source_value', '')
+        self.context_similarity = entity_data.get('context_similarity', entity_data.get('acc', 0.0))
+        self._meta_anns = entity_data.get('meta_anns', {})
+        self._text = text
+
+    def get_addon_data(self, key):
+        """Mimic get_addon_data for meta_cat_meta_anns."""
+        if key == 'meta_cat_meta_anns':
+            return self._meta_anns
+        return {}
+
+
+class RemoteSpacyDoc:
+    """A simple class to mimic spaCy document structure for remote API responses."""
+    def __init__(self, linked_ents):
+        self.linked_ents = linked_ents
+
+
+def call_remote_model_service(service_url, text):
+    """
+    Call the remote MedCAT service API to process text.
+
+    Args:
+        service_url: Base URL of the remote service (e.g., http://medcat-service:8000)
+        text: Text to process
+
+    Returns:
+        RemoteSpacyDoc object with linked_ents
+    """
+    # Ensure service_url doesn't end with /
+    service_url = service_url.rstrip('/')
+    api_url = f"{service_url}/api/process"
+
+    payload = {
+        "text": text
+    }
+
+    try:
+        response = requests.post(api_url, json=payload, timeout=60)
+        response.raise_for_status()
+        result = response.json()
+
+        # Extract entities from the response
+        entities_data = result.get('entities', {})
+        linked_ents = []
+
+        for _, entity_data in entities_data.items():
+            linked_ents.append(RemoteEntity(entity_data, text))
+
+        return RemoteSpacyDoc(linked_ents)
+    except requests.exceptions.RequestException as e:
+        logger.error(f"Error calling remote model service at {api_url}: {e}")
+        raise Exception(f"Failed to call remote model service: {str(e)}") from e
+    except Exception as e:
+        logger.error(f"Error processing remote model service response: {e}")
+        raise Exception(f"Failed to process remote model service response: {str(e)}") from e
+
+
 def remove_annotations(document, project, partial=False):
     try:
         if partial:
@@ -36,7 +101,27 @@ def remove_annotations(document, project, partial=False):
         logger.debug(f"Something went wrong: {e}")
 
 
-def add_annotations(spacy_doc, user, project, document, existing_annotations, cat):
+class SimpleFilters:
+    """Simple filter object for remote service when cat is not available."""
+    def __init__(self, cuis=None, cuis_exclude=None):
+        self.cuis = cuis or set()
+        self.cuis_exclude = cuis_exclude or set()
+
+
+def add_annotations(spacy_doc, user, project, document, existing_annotations, cat=None, filters=None, similarity_threshold=0.3):
+    """
+    Add annotations from spacy_doc to the database.
+
+    Args:
+        spacy_doc: spaCy document with linked_ents or RemoteSpacyDoc
+        user: User object
+        project: ProjectAnnotateEntities object
+        document: Document object
+        existing_annotations: List of existing AnnotatedEntity objects
+        cat: CAT object (optional, required if filters not provided)
+        filters: SimpleFilters object (optional, used when cat is None)
+        similarity_threshold: float (optional, default 0.3, used when cat is None)
+    """
     spacy_doc.linked_ents.sort(key=lambda x: len(x.text), reverse=True)
 
     tkns_in = []
@@ -57,9 +142,13 @@ def add_annotations(spacy_doc, user, project, document, existing_annotations, ca
         metataskvals2obj = {}
         pass
 
-    def check_ents(ent):
-        return any((ea[0] < ent.start_char_index < ea[1]) or
-                   (ea[0] < ent.end_char_index < ea[1]) for ea in existing_annos_intervals)
+    # Get filters and similarity threshold
+    if cat is not None:
+        filters_obj = cat.config.components.linking.filters
+        MIN_ACC = cat.config.components.linking.similarity_threshold
+    else:
+        filters_obj = filters or SimpleFilters()
+        MIN_ACC = similarity_threshold
 
     def check_filters(cui, filters):
         if cui in filters.cuis or not filters.cuis:
@@ -68,15 +157,8 @@ def check_filters(cui, filters):
             return False
 
     for ent in spacy_doc.linked_ents:
-        if not check_ents(ent) and check_filters(ent.cui, cat.config.components.linking.filters):
-            to_add = True
-            for tkn in ent:
-                if tkn in tkns_in:
-                    to_add = False
-            if to_add:
-                for tkn in ent:
-                    tkns_in.append(tkn)
-                ents.append(ent)
+        if check_filters(ent.cui, filters_obj):
+            ents.append(ent)
 
     logger.debug('Found %s annotations to store', len(ents))
     for ent in ents:
@@ -93,6 +175,7 @@ def check_filters(cui, filters):
 
         ann_ent = AnnotatedEntity.objects.filter(project=project,
                                                   document=document,
+                                                  entity=entity,
                                                   start_ind=ent.start_char_index,
                                                   end_ind=ent.end_char_index).first()
         if ann_ent is None:
@@ -107,7 +190,6 @@ def check_filters(cui, filters):
             ann_ent.end_ind = ent.end_char_index
             ann_ent.acc = ent.context_similarity
 
-            MIN_ACC = cat.config.components.linking.similarity_threshold
             if ent.context_similarity < MIN_ACC:
                 ann_ent.deleted = True
                 ann_ent.validated = True
@@ -157,7 +239,7 @@ def get_create_cdb_infos(cdb, concept, cui, cui_info_prop, code_prop, desc_prop,
 
 
 def create_annotation(source_val: str, selection_occurrence_index: int, cui: str, user: User,
-                      project: ProjectAnnotateEntities, document, cat: CAT):
+                      project: ProjectAnnotateEntities, document: Document):
     text = document.text
     id = None
 
@@ -251,29 +333,58 @@ def prep_docs(project_id: List[int], doc_ids: List[int], user_id: int):
     project = ProjectAnnotateEntities.objects.get(id=project_id)
     docs = Document.objects.filter(id__in=doc_ids)
 
-    logger.info('Loading CAT object in bg process for project: %s', project.id)
-    cat = get_medcat(project=project)
-
-    # Set CAT filters
-    cat.config.components.linking.filters.cuis = project.cuis
-
-    for doc in docs:
-        logger.info(f'Running MedCAT model for project {project.id}:{project.name} over doc: {doc.id}')
-        if not project.deid_model_annotation:
-            spacy_doc = cat(doc.text)
-        else:
-            deid = DeIdModel(cat)
-            spacy_doc = deid(doc.text)
-        anns = AnnotatedEntity.objects.filter(document=doc).filter(project=project)
-        with transaction.atomic():
-            add_annotations(spacy_doc=spacy_doc,
-                            user=user,
-                            project=project,
-                            document=doc,
-                            cat=cat,
-                            existing_annotations=anns)
-            # add doc to prepared_documents
-        project.prepared_documents.add(doc)
+    # Get CUI filters
+    cuis = set()
+    if project.cuis is not None and project.cuis:
+        cuis = set([str(cui).strip() for cui in project.cuis.split(",")])
+    if project.cuis_file is not None and project.cuis_file:
+        try:
+            cuis.update(json.load(open(project.cuis_file.path)))
+        except FileNotFoundError:
+            logger.warning('Missing CUI filter file for project %s', project.id)
+
+    if project.use_model_service:
+        # Use remote model service
+        logger.info('Using remote model service in bg process for project: %s', project.id)
+        filters = SimpleFilters(cuis=cuis)
+        for doc in docs:
+            logger.info('Running remote MedCAT service for project %s:%s over doc: %s', project.id, project.name, doc.id)
+            spacy_doc = call_remote_model_service(project.model_service_url, doc.text)
+            anns = AnnotatedEntity.objects.filter(document=doc).filter(project=project)
+            with transaction.atomic():
+                add_annotations(spacy_doc=spacy_doc,
+                                user=user,
+                                project=project,
+                                document=doc,
+                                cat=None,
+                                filters=filters,
+                                similarity_threshold=0.3,
+                                existing_annotations=anns)
+            project.prepared_documents.add(doc)
+    else:
+        # Use local medcat model
+        logger.info('Loading CAT object in bg process for project: %s', project.id)
+        cat = get_medcat(project=project)
+
+        # Set CAT filters
+        cat.config.components.linking.filters.cuis = cuis
+
+        for doc in docs:
+            logger.info('Running MedCAT model for project %s:%s over doc: %s', project.id, project.name, doc.id)
+            if not project.deid_model_annotation:
+                spacy_doc = cat(doc.text)
+            else:
+                deid = DeIdModel(cat)
+                spacy_doc = deid(doc.text)
+            anns = AnnotatedEntity.objects.filter(document=doc).filter(project=project)
+            with transaction.atomic():
+                add_annotations(spacy_doc=spacy_doc,
+                                user=user,
+                                project=project,
+                                document=doc,
+                                cat=cat,
+                                existing_annotations=anns)
+            project.prepared_documents.add(doc)
     project.save()
     logger.info('Prepared all docs for project: %s, docs processed: %s',
                 project.id, project.prepared_documents)