From aa7195b31eb1799b3d771fd07e1f3ff4b6886d8a Mon Sep 17 00:00:00 2001 From: Tom Searle Date: Wed, 10 Dec 2025 21:02:57 +0000 Subject: [PATCH 1/3] CU-869bacykn: medcat-trainer(feat): fix overlapping annos styling and for more than one annotation --- medcat-trainer/webapp/api/api/utils.py | 16 +- medcat-trainer/webapp/api/api/views.py | 2 - .../src/components/common/ClinicalText.vue | 331 +++++++++++++++--- .../webapp/frontend/src/styles/_common.scss | 21 -- 4 files changed, 290 insertions(+), 80 deletions(-) diff --git a/medcat-trainer/webapp/api/api/utils.py b/medcat-trainer/webapp/api/api/utils.py index f88cb5a9c..32f58b1a0 100644 --- a/medcat-trainer/webapp/api/api/utils.py +++ b/medcat-trainer/webapp/api/api/utils.py @@ -57,10 +57,6 @@ def add_annotations(spacy_doc, user, project, document, existing_annotations, ca metataskvals2obj = {} pass - def check_ents(ent): - return any((ea[0] < ent.start_char_index < ea[1]) or - (ea[0] < ent.end_char_index < ea[1]) for ea in existing_annos_intervals) - def check_filters(cui, filters): if cui in filters.cuis or not filters.cuis: return cui not in filters.cuis_exclude @@ -68,15 +64,8 @@ def check_filters(cui, filters): return False for ent in spacy_doc.linked_ents: - if not check_ents(ent) and check_filters(ent.cui, cat.config.components.linking.filters): - to_add = True - for tkn in ent: - if tkn in tkns_in: - to_add = False - if to_add: - for tkn in ent: - tkns_in.append(tkn) - ents.append(ent) + if check_filters(ent.cui, cat.config.components.linking.filters): + ents.append(ent) logger.debug('Found %s annotations to store', len(ents)) for ent in ents: @@ -93,6 +82,7 @@ def check_filters(cui, filters): ann_ent = AnnotatedEntity.objects.filter(project=project, document=document, + entity=entity, start_ind=ent.start_char_index, end_ind=ent.end_char_index).first() if ann_ent is None: diff --git a/medcat-trainer/webapp/api/api/views.py b/medcat-trainer/webapp/api/api/views.py index c079e7547..991de25c7 100644 --- a/medcat-trainer/webapp/api/api/views.py +++ b/medcat-trainer/webapp/api/api/views.py @@ -291,8 +291,6 @@ def prepare_documents(request): deid = DeIdModel(cat) spacy_doc = deid(document.text) - spacy_doc = cat(document.text) - add_annotations(spacy_doc=spacy_doc, user=user, project=project, diff --git a/medcat-trainer/webapp/frontend/src/components/common/ClinicalText.vue b/medcat-trainer/webapp/frontend/src/components/common/ClinicalText.vue index 667c80c86..61872d4d1 100644 --- a/medcat-trainer/webapp/frontend/src/components/common/ClinicalText.vue +++ b/medcat-trainer/webapp/frontend/src/components/common/ClinicalText.vue @@ -66,7 +66,8 @@ export default { name: 'Add Term' } ], - selection: null + selection: null, + openPopoverId: null // Track which popover is open } }, computed: { @@ -84,7 +85,12 @@ export default { if (a.ent.start_ind !== b.ent.start_ind) { return a.ent.start_ind - b.ent.start_ind } - return b.ent.end_ind - a.ent.end_ind // Longer spans first when same start + if (a.ent.end_ind !== b.ent.end_ind) { + return b.ent.end_ind - a.ent.end_ind // Longer spans first when same start + } + // For exactly overlapping annotations (same start and end), use original index + // as tiebreaker to ensure stable, consistent ordering + return a.origIdx - b.origIdx }) const taskHighlightDefault = 'highlight-task-default' @@ -106,12 +112,45 @@ export default { if (a.type !== b.type) { return a.type === 'start' ? -1 : 1 } - return 0 + // For events at same position and same type (exact overlaps), + // use entIndex as tiebreaker to ensure stable, consistent ordering + // For starts: open in order (lower index first) + // For ends: close in reverse order (higher index first) to maintain proper nesting + if (a.type === 'start') { + return a.entIndex - b.entIndex + } else { + return b.entIndex - a.entIndex + } + }) + + // Pre-compute overlapping groups: for each annotation, find all annotations that overlap with it + const overlappingGroups = new Map() // Map from origIndex to set of all overlapping origIndices + sortedEnts.forEach((entData1, i1) => { + const origIdx1 = entData1.origIdx + const start1 = entData1.ent.start_ind + const end1 = entData1.ent.end_ind + const group = new Set([origIdx1]) + + sortedEnts.forEach((entData2, i2) => { + if (i1 === i2) return + const start2 = entData2.ent.start_ind + const end2 = entData2.ent.end_ind + // Check if annotations overlap (they overlap if one starts before the other ends) + if (!(end1 <= start2 || end2 <= start1)) { + group.add(entData2.origIdx) + } + }) + + // Store sorted array of indices for consistent ID generation + const sortedGroup = Array.from(group).sort((a, b) => a - b) + overlappingGroups.set(origIdx1, sortedGroup) }) let formattedText = '' let currentPos = 0 const activeEnts = [] // Stack of active entities (ordered by when they were opened) + const createdPopovers = new Set() // Track which popover IDs have been created to avoid duplicates + const createdBadges = new Set() // Track which popover IDs have had badges created to avoid multiple badges // Helper function to get style class for an entity const getStyleClass = (ent, origIndex) => { @@ -140,13 +179,48 @@ export default { return `` } - // Helper function to build closing span tag with optional remove button - const buildCloseSpan = (ent, origIndex, isInnermost) => { + // Helper function to build closing span tag with optional remove button and overlap indicator + const buildCloseSpan = (ent, origIndex, isInnermost, overlappingEnts = []) => { let removeButtonEl = '' if (isInnermost && ent.manually_created) { removeButtonEl = `` } - return `${removeButtonEl}` + + // Add overlap indicator only on innermost span and only if there are overlapping annotations + // Use pre-computed overlapping groups to get the complete group, not just current subset + let overlapIndicator = '' + if (isInnermost) { + // Get the complete overlapping group for this annotation + const completeGroup = overlappingGroups.get(origIndex) || [] + if (completeGroup.length > 1) { + // Create popover ID based on complete group + const popoverId = `popover-${completeGroup.join('-')}` + const overlapCount = completeGroup.length + + // Only create badge if we haven't created one for this popover ID yet + if (!createdBadges.has(popoverId)) { + createdBadges.add(popoverId) + + // Get entity names for all annotations in the complete group + const entityNames = completeGroup.map(idx => { + const entData = sortedEnts.find(e => e.origIdx === idx) + const name = entData ? (entData.ent.pretty_name || 'Unknown') : 'Unknown' + return `
${_.escape(name)}
` + }).join('') + + // Only create the popover HTML if it hasn't been created yet for this group + const popoverHtml = createdPopovers.has(popoverId) ? '' : `
${entityNames}
` + + if (!createdPopovers.has(popoverId)) { + createdPopovers.add(popoverId) + } + + overlapIndicator = `${overlapCount}${popoverHtml}` + } + } + } + + return `${removeButtonEl}${overlapIndicator}` } for (const event of events) { @@ -167,8 +241,15 @@ export default { // Close the span (in reverse order to maintain nesting) const index = activeEnts.findIndex(ae => ae.entIndex === event.entIndex) if (index !== -1) { + // Check if this is the last annotation ending at this position + // (i.e., all remaining activeEnts also end at this position) + const allEndAtSamePos = activeEnts.every(ae => { + const entEndPos = sortedEnts[ae.entIndex].ent.end_ind + return entEndPos === event.pos + }) + // If this is not the innermost span, we need to handle overlapping text - if (index < activeEnts.length - 1) { + if (index < activeEnts.length - 1 && !allEndAtSamePos) { // Add text up to the end position while all spans are still active // This text is inside all active spans including this one if (event.pos > currentPos) { @@ -194,8 +275,8 @@ export default { formattedText += buildOpenSpan(innerData.ent, innerData.origIndex) } } else { - // This is the innermost span at its final end position - // Add text then close it with remove button if needed + // This is either the innermost span, or all remaining spans end at the same position + // (exactly overlapping case). Add text then close it with remove button if needed if (event.pos > currentPos) { const textSegment = this.text.slice(currentPos, event.pos) if (textSegment.length > 0) { @@ -203,8 +284,18 @@ export default { } currentPos = event.pos } - // Only add remove button when closing at the actual end position - formattedText += buildCloseSpan(event.ent, event.origIndex, true) + // For exactly overlapping annotations, only the innermost (last to close) gets the remove button + const isInnermost = index === activeEnts.length - 1 + // Get all overlapping annotations (all activeEnts at this position) + // Include this span in the overlapping list + const overlappingEnts = activeEnts.map(ae => ({ + ent: ae.ent, + origIndex: ae.origIndex + })) + // Show badge on innermost span when there are overlapping annotations + // The popover HTML will only be created once per group (tracked by createdPopovers) + const shouldShowBadge = isInnermost && (overlappingEnts.length > 1) + formattedText += buildCloseSpan(event.ent, event.origIndex, isInnermost, shouldShowBadge ? overlappingEnts : []) } activeEnts.splice(index, 1) } @@ -221,7 +312,15 @@ export default { for (let j = activeEnts.length - 1; j >= 0; j--) { const activeData = activeEnts[j] const isInnermost = j === activeEnts.length - 1 - formattedText += buildCloseSpan(activeData.ent, activeData.origIndex, isInnermost) + // Get all overlapping annotations (all remaining activeEnts) + const overlappingEnts = activeEnts.map(ae => ({ + ent: ae.ent, + origIndex: ae.origIndex + })) + // Show badge on innermost span when there are overlapping annotations + // The popover HTML will only be created once per group (tracked by createdPopovers) + const shouldShowBadge = isInnermost && (overlappingEnts.length > 1) + formattedText += buildCloseSpan(activeData.ent, activeData.origIndex, isInnermost, shouldShowBadge ? overlappingEnts : []) } } @@ -247,7 +346,26 @@ export default { }, showCtxMenu (event) { const selection = window.getSelection() - const selStr = selection.toString().trim() + // Get selected text while excluding badge content + let selStr = '' + if (selection.rangeCount > 0) { + const range = selection.getRangeAt(0) + // Clone the range to avoid modifying the original selection + const clonedRange = range.cloneRange() + // Create a temporary container to get text content + const tempDiv = document.createElement('div') + tempDiv.appendChild(clonedRange.cloneContents()) + // Remove all badge elements from the cloned content + const badges = tempDiv.querySelectorAll('.overlap-badge') + badges.forEach(badge => badge.remove()) + const popovers = tempDiv.querySelectorAll('.overlap-popover') + popovers.forEach(popover => popover.remove()) + // Get text content without badges + selStr = tempDiv.textContent || tempDiv.innerText || '' + } else { + selStr = selection.toString() + } + selStr = selStr.trim() const anchor = selection.anchorNode const focus = selection.focusNode @@ -319,7 +437,71 @@ export default { }, removeNewAnno (idx) { this.$emit('remove:newAnno', idx) + }, + togglePopover (popoverId) { + // Close all other popovers first + const allPopovers = document.querySelectorAll('.overlap-popover') + allPopovers.forEach(pop => { + if (pop.id !== popoverId) { + pop.setAttribute('data-popover-open', 'false') + pop.classList.remove('popover-open') + } + }) + + // Toggle the clicked popover + const popover = document.getElementById(popoverId) + if (popover) { + const isOpen = popover.getAttribute('data-popover-open') === 'true' + popover.setAttribute('data-popover-open', isOpen ? 'false' : 'true') + if (isOpen) { + popover.classList.remove('popover-open') + this.openPopoverId = null + } else { + popover.classList.add('popover-open') + this.openPopoverId = popoverId + + // If any entities have "Unknown" as their name, call selectEnt to populate the name + // Extract entity indices from popover ID (format: popover-15-16-17-18) + const indicesStr = popoverId.replace('popover-', '') + const entityIndices = indicesStr.split('-').map(idx => parseInt(idx, 10)) + + // Check each entity and call selectEnt if it's unknown + entityIndices.forEach(origIndex => { + if (this.ents && this.ents[origIndex]) { + const ent = this.ents[origIndex] + const name = ent.pretty_name || '' + // If name is empty or "Unknown", call selectEnt to populate it + if (!name || name.trim() === '' || name === 'Unknown') { + this.selectEnt(origIndex) + } + } + }) + } + } + }, + handleOutsideClick (event) { + // Close popover if clicking outside of it + if (this.openPopoverId) { + const popover = document.getElementById(this.openPopoverId) + const badge = document.querySelector(`[data-popover-id="${this.openPopoverId}"]`) + if (popover && badge) { + // Check if click is outside both popover and badge + if (!popover.contains(event.target) && !badge.contains(event.target)) { + popover.setAttribute('data-popover-open', 'false') + popover.classList.remove('popover-open') + this.openPopoverId = null + } + } + } } + }, + mounted () { + // Close popovers when clicking outside + document.addEventListener('click', this.handleOutsideClick) + }, + beforeUnmount () { + // Clean up event listener + document.removeEventListener('click', this.handleOutsideClick) } } @@ -344,48 +526,109 @@ export default { white-space: pre-wrap; line-height: 1.6; // Base line height for normal text - // Increase line height when there are 3 or more nested underlines - // to prevent underlines from overlapping with next line - [class^="highlight-task-"] [class^="highlight-task-"] [class^="highlight-task-"] { - line-height: 2.2; // Increased line height for 3+ levels of nesting - padding-bottom: 4px; // Extra padding to push next line down - display: inline-block; // Ensure padding applies - } - - // Also handle when default is deeply nested - .highlight-task-default [class^="highlight-task-"] [class^="highlight-task-"] { - line-height: 2.2; - padding-bottom: 4px; - display: inline-block; - } } .highlight-task-default { + --underline-base-offset: 3px; + --underline-thickness: 2px; + text-decoration: underline; text-decoration-color: lightgrey; - text-decoration-thickness: 3px; - text-underline-offset: 3px; // Moved down 1px to avoid descender breaks + text-decoration-thickness: var(--underline-thickness); + text-underline-offset: var(--underline-base-offset); + cursor: pointer; + position: relative; + display: inline-block; +} + +// Overlap badge and popover styles +.overlap-badge { + position: absolute; + top: -12px; + right: -12px; + padding: 4px 8px; + background-color: rgba(245, 245, 245, 0.5); + color: #666; + border-radius: 12px; + font-size: 14px; + font-weight: bold; cursor: pointer; + z-index: 10; + user-select: none; + -webkit-user-select: none; + pointer-events: auto; + // Make badge larger and easier to click + min-width: 24px; + min-height: 24px; + display: inline-flex; + align-items: center; + justify-content: center; + box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); + transition: background-color 0.2s, transform 0.1s; + + &:hover { + background-color: #e0e0e0; + transform: scale(1.1); + } - // Stack underlines when nested - each nested level gets a larger offset with clear spacing - [class^="highlight-task-"] { - text-underline-offset: 7px; // Second level underline (4px spacing from first, moved down 1px) + &:active { + transform: scale(0.95); } +} + +.overlap-popover { + // Completely remove from document flow when hidden + position: absolute; + top: calc(100% + 4px); + right: 0; + z-index: 1000; - [class^="highlight-task-"] [class^="highlight-task-"] { - text-underline-offset: 11px; // Third level underline (4px spacing from second, moved down 1px) - // Increase line height for 3+ levels to prevent overlap with next line - line-height: 2.2; - padding-bottom: 4px; - display: inline-block; + // When hidden - display: none removes element from layout completely + display: none; + pointer-events: none; + + // Base styles (applied when visible) + background: white; + border: 1px solid #ddd; + border-radius: 8px; + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15); + min-width: 220px; + max-width: 300px; + max-height: 300px; + overflow-y: auto; + margin: 0; + padding: 0; + + &.popover-open { + display: block; + pointer-events: auto; } - [class^="highlight-task-"] [class^="highlight-task-"] [class^="highlight-task-"] { - text-underline-offset: 15px; // Fourth level underline (4px spacing from third, moved down 1px) - // Further increase line height for 4+ levels - line-height: 2.4; - padding-bottom: 6px; - display: inline-block; + .popover-content { + padding: 4px 0; + } + + .popover-entity-item { + padding: 10px 12px; + cursor: pointer; + border-bottom: 1px solid #eee; + transition: background-color 0.15s ease; + color: #333; + font-size: 14px; + line-height: 1.4; + + &:hover { + background-color: #f0f7ff; + color: #0066cc; + } + + &:active { + background-color: #e0f0ff; + } + + &:last-child { + border-bottom: none; + } } } diff --git a/medcat-trainer/webapp/frontend/src/styles/_common.scss b/medcat-trainer/webapp/frontend/src/styles/_common.scss index 889b6ddf6..7f5e9be86 100644 --- a/medcat-trainer/webapp/frontend/src/styles/_common.scss +++ b/medcat-trainer/webapp/frontend/src/styles/_common.scss @@ -67,27 +67,6 @@ $blur-radius: 10px; border-radius: 2px; color: white; } - - // Stack underlines when nested - each nested level gets a larger offset with clear spacing - [class^="highlight-task-"] { - text-underline-offset: 7px; // Second level underline (4px spacing from first, moved down 1px) - } - - [class^="highlight-task-"] [class^="highlight-task-"] { - text-underline-offset: 11px; // Third level underline (4px spacing from second, moved down 1px) - // Increase line height for 3+ levels to prevent overlap with next line - line-height: 2.2; - padding-bottom: 4px; - display: inline-block; - } - - [class^="highlight-task-"] [class^="highlight-task-"] [class^="highlight-task-"] { - text-underline-offset: 15px; // Fourth level underline (4px spacing from third, moved down 1px) - // Further increase line height for 4+ levels - line-height: 2.4; - padding-bottom: 6px; - display: inline-block; - } } } From 70f3fd92543cea628400ecbba5d06f9dda263fb3 Mon Sep 17 00:00:00 2001 From: Tom Searle Date: Tue, 16 Dec 2025 16:54:03 +0000 Subject: [PATCH 2/3] feat(medcat-trainer): CU-869bgx7m2: remote service functionality for running inference on remote models for prepare_doc. Online learning not yet supported for this project setup type --- medcat-trainer/webapp/api/api/admin/models.py | 4 +- .../0093_add_remote_model_service_fields.py | 33 ++++ medcat-trainer/webapp/api/api/models.py | 23 ++- medcat-trainer/webapp/api/api/utils.py | 175 +++++++++++++++--- medcat-trainer/webapp/api/api/views.py | 67 ++++--- medcat-trainer/webapp/requirements.txt | 1 + 6 files changed, 244 insertions(+), 59 deletions(-) create mode 100644 medcat-trainer/webapp/api/api/migrations/0093_add_remote_model_service_fields.py diff --git a/medcat-trainer/webapp/api/api/admin/models.py b/medcat-trainer/webapp/api/api/admin/models.py index 5d44a5a09..50cae265d 100644 --- a/medcat-trainer/webapp/api/api/admin/models.py +++ b/medcat-trainer/webapp/api/api/admin/models.py @@ -9,7 +9,7 @@ _PROJECT_ANNO_ENTS_SETTINGS_FIELD_ORDER = ( 'concept_db', 'vocab', 'model_pack', 'cdb_search_filter', 'deid_model_annotation', 'require_entity_validation', 'train_model_on_submit', - 'add_new_entities', 'restrict_concept_lookup', 'terminate_available', 'irrelevant_available', + 'use_model_service', 'model_service_url', 'add_new_entities', 'restrict_concept_lookup', 'terminate_available', 'irrelevant_available', 'enable_entity_annotation_comments', 'tasks', 'relations' ) @@ -177,7 +177,7 @@ class ModelPackAdmin(admin.ModelAdmin): def metacats(self, obj): return ", ".join(str(m_c) for m_c in obj.meta_cats.all()) - + def save_model(self, request, obj, form, change): obj.last_modified_by = request.user super().save_model(request, obj, form, change) diff --git a/medcat-trainer/webapp/api/api/migrations/0093_add_remote_model_service_fields.py b/medcat-trainer/webapp/api/api/migrations/0093_add_remote_model_service_fields.py new file mode 100644 index 000000000..bf3a8b760 --- /dev/null +++ b/medcat-trainer/webapp/api/api/migrations/0093_add_remote_model_service_fields.py @@ -0,0 +1,33 @@ +# Generated by Django 5.1.11 on 2025-12-11 11:47 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('api', '0092_exportedproject_cdb_search_filter_id_and_more'), + ] + + operations = [ + migrations.AddField( + model_name='projectannotateentities', + name='model_service_url', + field=models.CharField(blank=True, help_text='URL of the remote MedCAT service API (e.g., http://medcat-service:8000)', max_length=500, null=True), + ), + migrations.AddField( + model_name='projectannotateentities', + name='use_model_service', + field=models.BooleanField(default=False, help_text='Use a remote MedCAT service API for document processing instead of local models'), + ), + migrations.AddField( + model_name='projectgroup', + name='model_service_url', + field=models.CharField(blank=True, help_text='URL of the remote MedCAT service API (e.g., http://medcat-service:8000)', max_length=500, null=True), + ), + migrations.AddField( + model_name='projectgroup', + name='use_model_service', + field=models.BooleanField(default=False, help_text='Use a remote MedCAT service API for document processing instead of local models'), + ), + ] diff --git a/medcat-trainer/webapp/api/api/models.py b/medcat-trainer/webapp/api/api/models.py index 7e62cc33b..d8df36223 100644 --- a/medcat-trainer/webapp/api/api/models.py +++ b/medcat-trainer/webapp/api/api/models.py @@ -458,15 +458,24 @@ class Meta: 'if a model pack is used for the project') relations = models.ManyToManyField('Relation', blank=True, default=None, help_text='Relations that will be available for this project') + use_model_service = models.BooleanField(default=False, + help_text='Use a remote MedCAT service API for document processing instead of local models'\ + '(note: interim model training is not supported for remote model service projects)') + model_service_url = models.CharField(max_length=500, blank=True, null=True, + help_text='URL of the remote MedCAT service API (e.g., http://medcat-service:8003)') def save(self, *args, **kwargs): - if self.model_pack is None and (self.concept_db is None or self.vocab is None): - raise ValidationError('Must set at least the ModelPack or a Concept Database and Vocab Pair') - if self.model_pack and (self.concept_db is not None or self.vocab is not None): - raise ValidationError('Cannot set model pack and ConceptDB or a Vocab. You must use one or the other.') - if self.deid_model_annotation and self.model_pack is None: - raise ValidationError('Must set a DeID ModelPack for De-ID Model Annotation, cannot only set a cdb / vocab pair as' - ' not be a DeId model') + # If using remote model service, skip local model validation + if not self.use_model_service: + if self.model_pack is None and (self.concept_db is None or self.vocab is None): + raise ValidationError('Must set at least the ModelPack or a Concept Database and Vocab Pair') + if self.model_pack and (self.concept_db is not None or self.vocab is not None): + raise ValidationError('Cannot set model pack and ConceptDB or a Vocab. You must use one or the other.') + if self.deid_model_annotation and self.model_pack is None: + raise ValidationError('Must set a DeID ModelPack for De-ID Model Annotation, cannot only set a cdb / vocab pair as' + ' not be a DeId model') + elif self.use_model_service and not self.model_service_url: + raise ValidationError('When using model service, model_service_url must be set') super().save(*args, **kwargs) diff --git a/medcat-trainer/webapp/api/api/utils.py b/medcat-trainer/webapp/api/api/utils.py index 32f58b1a0..2141eb097 100644 --- a/medcat-trainer/webapp/api/api/utils.py +++ b/medcat-trainer/webapp/api/api/utils.py @@ -3,6 +3,7 @@ import os from typing import List +import requests from background_task import background from django.contrib.auth.models import User from django.db import transaction @@ -20,6 +21,70 @@ logger = logging.getLogger('trainer') +class RemoteEntity: + """A simple class to mimic spaCy entity structure for remote API responses.""" + def __init__(self, entity_data, text): + self.cui = entity_data.get('cui', '') + self.start_char_index = entity_data.get('start', 0) + self.end_char_index = entity_data.get('end', 0) + self.text = entity_data.get('detected_name') or entity_data.get('source_value', '') + self.context_similarity = entity_data.get('context_similarity', entity_data.get('acc', 0.0)) + self._meta_anns = entity_data.get('meta_anns', {}) + self._text = text + + def get_addon_data(self, key): + """Mimic get_addon_data for meta_cat_meta_anns.""" + if key == 'meta_cat_meta_anns': + return self._meta_anns + return {} + + +class RemoteSpacyDoc: + """A simple class to mimic spaCy document structure for remote API responses.""" + def __init__(self, linked_ents): + self.linked_ents = linked_ents + + +def call_remote_model_service(service_url, text): + """ + Call the remote MedCAT service API to process text. + + Args: + service_url: Base URL of the remote service (e.g., http://medcat-service:8000) + text: Text to process + + Returns: + RemoteSpacyDoc object with linked_ents + """ + # Ensure service_url doesn't end with / + service_url = service_url.rstrip('/') + api_url = f"{service_url}/api/process" + + payload = { + "text": text + } + + try: + response = requests.post(api_url, json=payload, timeout=60) + response.raise_for_status() + result = response.json() + + # Extract entities from the response + entities_data = result.get('entities', {}) + linked_ents = [] + + for _, entity_data in entities_data.items(): + linked_ents.append(RemoteEntity(entity_data, text)) + + return RemoteSpacyDoc(linked_ents) + except requests.exceptions.RequestException as e: + logger.error(f"Error calling remote model service at {api_url}: {e}") + raise Exception(f"Failed to call remote model service: {str(e)}") from e + except Exception as e: + logger.error(f"Error processing remote model service response: {e}") + raise Exception(f"Failed to process remote model service response: {str(e)}") from e + + def remove_annotations(document, project, partial=False): try: if partial: @@ -36,7 +101,27 @@ def remove_annotations(document, project, partial=False): logger.debug(f"Something went wrong: {e}") -def add_annotations(spacy_doc, user, project, document, existing_annotations, cat): +class SimpleFilters: + """Simple filter object for remote service when cat is not available.""" + def __init__(self, cuis=None, cuis_exclude=None): + self.cuis = cuis or set() + self.cuis_exclude = cuis_exclude or set() + + +def add_annotations(spacy_doc, user, project, document, existing_annotations, cat=None, filters=None, similarity_threshold=0.3): + """ + Add annotations from spacy_doc to the database. + + Args: + spacy_doc: spaCy document with linked_ents or RemoteSpacyDoc + user: User object + project: ProjectAnnotateEntities object + document: Document object + existing_annotations: List of existing AnnotatedEntity objects + cat: CAT object (optional, required if filters not provided) + filters: SimpleFilters object (optional, used when cat is None) + similarity_threshold: float (optional, default 0.3, used when cat is None) + """ spacy_doc.linked_ents.sort(key=lambda x: len(x.text), reverse=True) tkns_in = [] @@ -57,6 +142,14 @@ def add_annotations(spacy_doc, user, project, document, existing_annotations, ca metataskvals2obj = {} pass + # Get filters and similarity threshold + if cat is not None: + filters_obj = cat.config.components.linking.filters + MIN_ACC = cat.config.components.linking.similarity_threshold + else: + filters_obj = filters or SimpleFilters() + MIN_ACC = similarity_threshold + def check_filters(cui, filters): if cui in filters.cuis or not filters.cuis: return cui not in filters.cuis_exclude @@ -64,7 +157,7 @@ def check_filters(cui, filters): return False for ent in spacy_doc.linked_ents: - if check_filters(ent.cui, cat.config.components.linking.filters): + if check_filters(ent.cui, filters_obj): ents.append(ent) logger.debug('Found %s annotations to store', len(ents)) @@ -97,7 +190,6 @@ def check_filters(cui, filters): ann_ent.end_ind = ent.end_char_index ann_ent.acc = ent.context_similarity - MIN_ACC = cat.config.components.linking.similarity_threshold if ent.context_similarity < MIN_ACC: ann_ent.deleted = True ann_ent.validated = True @@ -147,7 +239,7 @@ def get_create_cdb_infos(cdb, concept, cui, cui_info_prop, code_prop, desc_prop, def create_annotation(source_val: str, selection_occurrence_index: int, cui: str, user: User, - project: ProjectAnnotateEntities, document, cat: CAT): + project: ProjectAnnotateEntities, document: Document): text = document.text id = None @@ -241,29 +333,58 @@ def prep_docs(project_id: List[int], doc_ids: List[int], user_id: int): project = ProjectAnnotateEntities.objects.get(id=project_id) docs = Document.objects.filter(id__in=doc_ids) - logger.info('Loading CAT object in bg process for project: %s', project.id) - cat = get_medcat(project=project) - - # Set CAT filters - cat.config.components.linking.filters.cuis = project.cuis - - for doc in docs: - logger.info(f'Running MedCAT model for project {project.id}:{project.name} over doc: {doc.id}') - if not project.deid_model_annotation: - spacy_doc = cat(doc.text) - else: - deid = DeIdModel(cat) - spacy_doc = deid(doc.text) - anns = AnnotatedEntity.objects.filter(document=doc).filter(project=project) - with transaction.atomic(): - add_annotations(spacy_doc=spacy_doc, - user=user, - project=project, - document=doc, - cat=cat, - existing_annotations=anns) - # add doc to prepared_documents - project.prepared_documents.add(doc) + # Get CUI filters + cuis = set() + if project.cuis is not None and project.cuis: + cuis = set([str(cui).strip() for cui in project.cuis.split(",")]) + if project.cuis_file is not None and project.cuis_file: + try: + cuis.update(json.load(open(project.cuis_file.path))) + except FileNotFoundError: + logger.warning('Missing CUI filter file for project %s', project.id) + + if project.use_model_service: + # Use remote model service + logger.info('Using remote model service in bg process for project: %s', project.id) + filters = SimpleFilters(cuis=cuis) + for doc in docs: + logger.info(f'Running remote MedCAT service for project {project.id}:{project.name} over doc: {doc.id}') + spacy_doc = call_remote_model_service(project.model_service_url, doc.text) + anns = AnnotatedEntity.objects.filter(document=doc).filter(project=project) + with transaction.atomic(): + add_annotations(spacy_doc=spacy_doc, + user=user, + project=project, + document=doc, + cat=None, + filters=filters, + similarity_threshold=0.3, + existing_annotations=anns) + project.prepared_documents.add(doc) + else: + # Use local medcat model + logger.info('Loading CAT object in bg process for project: %s', project.id) + cat = get_medcat(project=project) + + # Set CAT filters + cat.config.components.linking.filters.cuis = cuis + + for doc in docs: + logger.info(f'Running MedCAT model for project {project.id}:{project.name} over doc: {doc.id}') + if not project.deid_model_annotation: + spacy_doc = cat(doc.text) + else: + deid = DeIdModel(cat) + spacy_doc = deid(doc.text) + anns = AnnotatedEntity.objects.filter(document=doc).filter(project=project) + with transaction.atomic(): + add_annotations(spacy_doc=spacy_doc, + user=user, + project=project, + document=doc, + cat=cat, + existing_annotations=anns) + project.prepared_documents.add(doc) project.save() logger.info('Prepared all docs for project: %s, docs processed: %s', project.id, project.prepared_documents) diff --git a/medcat-trainer/webapp/api/api/views.py b/medcat-trainer/webapp/api/api/views.py index 991de25c7..46520d686 100644 --- a/medcat-trainer/webapp/api/api/views.py +++ b/medcat-trainer/webapp/api/api/views.py @@ -278,25 +278,40 @@ def prepare_documents(request): with transaction.atomic(): # If the document is not already annotated, annotate it if (len(anns) == 0 and not is_validated) or update: - # Based on the project id get the right medcat - cat = get_medcat(project=project) - logger.info('loaded medcat model for project: %s', project.id) - - # Set CAT filters - cat.config.components.linking.filters.cuis = cuis - - if not project.deid_model_annotation: - spacy_doc = cat(document.text) + if project.use_model_service: + # Use remote model service + logger.info('Using remote model service for project: %s', project.id) + from .utils import call_remote_model_service, SimpleFilters + spacy_doc = call_remote_model_service(project.model_service_url, document.text) + filters = SimpleFilters(cuis=cuis) + add_annotations(spacy_doc=spacy_doc, + user=user, + project=project, + document=document, + cat=None, + filters=filters, + similarity_threshold=0.3, + existing_annotations=anns) else: - deid = DeIdModel(cat) - spacy_doc = deid(document.text) - - add_annotations(spacy_doc=spacy_doc, - user=user, - project=project, - document=document, - cat=cat, - existing_annotations=anns) + # Use local medcat model + cat = get_medcat(project=project) + logger.info('loaded medcat model for project: %s', project.id) + + # Set CAT filters + cat.config.components.linking.filters.cuis = cuis + + if not project.deid_model_annotation: + spacy_doc = cat(document.text) + else: + deid = DeIdModel(cat) + spacy_doc = deid(document.text) + + add_annotations(spacy_doc=spacy_doc, + user=user, + project=project, + document=document, + cat=cat, + existing_annotations=anns) # add doc to prepared_documents project.prepared_documents.add(document) @@ -380,15 +395,12 @@ def add_annotation(request): user = request.user project = ProjectAnnotateEntities.objects.get(id=p_id) document = Document.objects.get(id=d_id) - - cat = get_medcat(project=project) id = create_annotation(source_val=source_val, selection_occurrence_index=sel_occur_idx, cui=cui, user=user, project=project, - document=document, - cat=cat) + document=document) logger.debug('Annotation added.') return Response({'message': 'Annotation added successfully', 'id': id}) @@ -412,6 +424,14 @@ def add_concept(request): user = request.user project = ProjectAnnotateEntities.objects.get(id=p_id) document = Document.objects.get(id=d_id) + + if project.use_model_service: + # Use remote model service + logger.error('Adding concepts is not supported for remote model service'\ + 'projects, you likely want to use a local model') + raise NotImplementedError('Adding concepts is not supported for remote model service projects') + + cat = get_medcat(project=project) if cui in cat.cdb.cui2names: @@ -460,7 +480,8 @@ def import_cdb_concepts(request): def _submit_document(project: ProjectAnnotateEntities, document: Document): - if project.train_model_on_submit: + if project.train_model_on_submit and not project.use_model_service: + # interim model training not supported for remote model service projects cat = get_medcat(project=project) train_medcat(cat, project, document) diff --git a/medcat-trainer/webapp/requirements.txt b/medcat-trainer/webapp/requirements.txt index f2bbf1f4f..a910d7814 100644 --- a/medcat-trainer/webapp/requirements.txt +++ b/medcat-trainer/webapp/requirements.txt @@ -11,3 +11,4 @@ psycopg[binary,pool]==3.2.9 cryptography==45.0.* drf-oidc-auth==3.0.0 django-health-check==3.20.0 +requests==2.31.* From 378634b078d4436a1de21cd777bb4dc0b4cd04a4 Mon Sep 17 00:00:00 2001 From: Tom Searle Date: Wed, 17 Dec 2025 11:38:30 +0000 Subject: [PATCH 3/3] review comments --- medcat-trainer/webapp/api/api/utils.py | 4 ++-- medcat-trainer/webapp/api/api/views.py | 12 ++++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/medcat-trainer/webapp/api/api/utils.py b/medcat-trainer/webapp/api/api/utils.py index 2141eb097..dde42cc4f 100644 --- a/medcat-trainer/webapp/api/api/utils.py +++ b/medcat-trainer/webapp/api/api/utils.py @@ -348,7 +348,7 @@ def prep_docs(project_id: List[int], doc_ids: List[int], user_id: int): logger.info('Using remote model service in bg process for project: %s', project.id) filters = SimpleFilters(cuis=cuis) for doc in docs: - logger.info(f'Running remote MedCAT service for project {project.id}:{project.name} over doc: {doc.id}') + logger.info('Running remote MedCAT service for project %s:%s over doc: %s', project.id, project.name, doc.id) spacy_doc = call_remote_model_service(project.model_service_url, doc.text) anns = AnnotatedEntity.objects.filter(document=doc).filter(project=project) with transaction.atomic(): @@ -370,7 +370,7 @@ def prep_docs(project_id: List[int], doc_ids: List[int], user_id: int): cat.config.components.linking.filters.cuis = cuis for doc in docs: - logger.info(f'Running MedCAT model for project {project.id}:{project.name} over doc: {doc.id}') + logger.info('Running MedCAT model for project %s:%s over doc: %s', project.id, project.name, doc.id) if not project.deid_model_annotation: spacy_doc = cat(doc.text) else: diff --git a/medcat-trainer/webapp/api/api/views.py b/medcat-trainer/webapp/api/api/views.py index 46520d686..f9d9e0d36 100644 --- a/medcat-trainer/webapp/api/api/views.py +++ b/medcat-trainer/webapp/api/api/views.py @@ -480,10 +480,14 @@ def import_cdb_concepts(request): def _submit_document(project: ProjectAnnotateEntities, document: Document): - if project.train_model_on_submit and not project.use_model_service: - # interim model training not supported for remote model service projects - cat = get_medcat(project=project) - train_medcat(cat, project, document) + if project.train_model_on_submit: + if project.use_model_service: + # TODO: Implement this, already available in CMS / gateway instances. + # interim model training not supported for remote model service projects + logger.warning('Interim model training is not supported for remote model service projects') + else: + cat = get_medcat(project=project) + train_medcat(cat, project, document) # Add cuis to filter if they did not exist cuis = []