diff --git a/database/10_topicTables.sql b/database/10_topicTables.sql index d0ec4cf8..dfe2cb8d 100644 --- a/database/10_topicTables.sql +++ b/database/10_topicTables.sql @@ -4,7 +4,8 @@ CREATE TABLE IF NOT EXISTS sentencetopics ( sentence_id BIGINT, topicinstance_id BIGINT, -- refers to topicvaluebase.id topiclabel VARCHAR(255), -- refers to topicvaluebase.value - thetast DOUBLE PRECISION + thetast DOUBLE PRECISION, + model_id BIGINT ); CREATE TABLE IF NOT EXISTS documenttopicsraw ( document_id BIGINT, diff --git a/docker-compose.yaml b/docker-compose.yaml index c8b383ab..97e1468a 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -98,6 +98,7 @@ services: - app_net healthcheck: test: ["CMD-SHELL", "pg_isready -U postgres"] + start_period: 5s interval: 5s timeout: 5s start_interval: 1s diff --git a/uce.portal/resources/templates/corpus/components/documents.ftl b/uce.portal/resources/templates/corpus/components/documents.ftl index 0f5afd33..5c9080b9 100644 --- a/uce.portal/resources/templates/corpus/components/documents.ftl +++ b/uce.portal/resources/templates/corpus/components/documents.ftl @@ -3,6 +3,50 @@
<#assign searchId = ""> <#include '*/search/components/documentCardContent.ftl' > + +
+ +
+
- \ No newline at end of file + + + \ No newline at end of file diff --git a/uce.portal/resources/templates/corpus/corpusInspector.ftl b/uce.portal/resources/templates/corpus/corpusInspector.ftl index 484c6790..e453b716 100644 --- a/uce.portal/resources/templates/corpus/corpusInspector.ftl +++ b/uce.portal/resources/templates/corpus/corpusInspector.ftl @@ -17,6 +17,27 @@ +
+ + + +
diff --git a/uce.portal/resources/templates/css/corpus-inspector.css b/uce.portal/resources/templates/css/corpus-inspector.css index e3d5367b..fae4b47b 100644 --- a/uce.portal/resources/templates/css/corpus-inspector.css +++ b/uce.portal/resources/templates/css/corpus-inspector.css @@ -84,4 +84,13 @@ width: 100%; border-bottom: lightgray 1px solid; padding:16px; +} +.corpus-inspector .annotation-entry:not(:has(input[type="checkbox"]:checked)) > div { + background: #e3e3e3; + border: 1px solid #cfcfcf !important; + border-radius: 6px; +} + +.corpus-inspector .annotation-entry:not(:has(input[type="checkbox"]:checked)) > div label { + color: #666; } \ No newline at end of file diff --git a/uce.portal/resources/templates/css/document-reader.css b/uce.portal/resources/templates/css/document-reader.css index 1e6c42f5..ae60e0d2 100644 --- a/uce.portal/resources/templates/css/document-reader.css +++ b/uce.portal/resources/templates/css/document-reader.css @@ -655,7 +655,9 @@ body { } .tab-content .tab-pane.active { - display: block; + display: flex; + flex-direction: column; + height: calc(100vh - 50px); } .side-bar.visualization-expanded { width: 150vw !important; @@ -665,7 +667,7 @@ body { .tab-pane .visualization-wrapper { display: flex; flex-direction: column; - height: 100%; + flex: 1; position: relative; } .visualization-wrapper .visualization-content { @@ -710,15 +712,16 @@ body { /* Bottom Navigation */ .tab-pane .viz-bottom-nav { - position: fixed; - right: 0%; - bottom: 30px; - transform: translateX(-50%); - width: auto; - min-width: 320px; - max-width: 200vw; + position: absolute; + bottom: 80px; + left: 16px; + width: max-content; + max-width: calc(100% - 32px); + align-self: flex-end; display: flex; - justify-content: space-around; + justify-content: center; + gap: 10px; + overflow: visible; border-radius: 24px; box-shadow: 0 4px 24px rgba(0,0,0,0.12); background: #fff; @@ -776,7 +779,7 @@ body { outline: none; } -#vp-3, #vp-4, #vp-5, #vp-2, #vp-1 { +#vp-4, #vp-5, #vp-2, #vp-1, #vp-6 { display: flex; align-items: center; justify-content: center; @@ -784,8 +787,20 @@ body { overflow: hidden; position: relative; } +#vp-3 { + display: flex; + flex-direction: column; + height: 100%; +} +#vp-3 > div[id$="-body"] { + flex: 1; +} +#vp-3 > .d-flex { + width: 100%; + padding: 0; +} .key-topic-settings-panel { position: absolute; top: 160px; @@ -951,6 +966,7 @@ body { border-radius: 4px; cursor: pointer; padding: 0; +} .paragraph .paragraph-header { border-radius: 16px; @@ -964,5 +980,77 @@ body { background-color: white; color: var(--prime); border: 1px solid var(--prime); +} + + /* --- Dropdown Navigation Erweiterung (Hover) --- */ + .tab-pane .viz-bottom-nav.viz-dropdown-nav{ + justify-content: flex-start; + overflow: visible; + gap: 10px; + overflow: visible; /* wichtig, damit Menüs nicht abgeschnitten werden */ + } + + .tab-pane .viz-nav-group{ + position: relative; + } + + /* Parent-Buttons bleiben optisch wie .viz-nav-btn */ + .tab-pane .viz-nav-parent{ + width: auto; /* nicht auf 100% ziehen, sonst werden Gruppen riesig */ + padding: 8px 14px; + display: inline-flex; + align-items: center; + gap: 8px; + } + + /* Dropdown-Menü */ + .tab-pane .viz-nav-menu{ + display: none; + position: absolute; + left: 0; + bottom: calc(100% + 10px); /* klappt nach oben auf */ + min-width: 240px; + background: #fff; + border: 1px solid #e0e0e0; + border-radius: 14px; + box-shadow: 0 10px 25px rgba(0,0,0,0.12); + padding: 6px; + z-index: 9999; + } + + /* Öffnen per Hover */ + .tab-pane .viz-nav-group:hover .viz-nav-menu{ + display: block; + } + + /* Menü-Items */ + .tab-pane .viz-nav-item{ + display: block; + padding: 8px 10px; + border-radius: 10px; + text-decoration: none; + color: #555; + font-weight: 600; + cursor: pointer; + white-space: nowrap; + } + + .tab-pane .viz-nav-item:hover{ + background: rgba(0,0,0,0.06); + } + + /* Optional: Disabled */ + .tab-pane .viz-nav-item.viz-disabled{ + opacity: 0.6; + cursor: not-allowed; + } -} \ No newline at end of file + /* Optional: kleiner “Puffer”, damit Menü nicht sofort schließt */ + .tab-pane .viz-nav-menu::before{ + content: ""; + position: absolute; + left: 0; + right: 0; + bottom: -10px; + height: 10px; + } \ No newline at end of file diff --git a/uce.portal/resources/templates/css/site.css b/uce.portal/resources/templates/css/site.css index c4edeeb0..1e189c9f 100644 --- a/uce.portal/resources/templates/css/site.css +++ b/uce.portal/resources/templates/css/site.css @@ -1147,6 +1147,13 @@ nav .selected-nav-btn.text::before { border-color: var(--prime) !important; } +/* lighter blue for already checked checkboxes/flags (Import Modal) */ +.custom-control-input:checked:disabled ~ .custom-control-label::before { + background-color: #6da2bc !important; + border-color: #6da2bc !important; + opacity: 1 !important; +} + /*Custom button switches */ /* UCE Map styles */ @@ -1240,3 +1247,27 @@ nav .selected-nav-btn.text::before { } /* UCE Map styles end */ + +/* Import Loading Bar Styles */ +#importProgressWrapper{ + display: none; + position: fixed; + bottom: 20px; + right: 20px; + width: 300px; + z-index: 1050; +} + +#importProgressWrapper:hover #allImportsList{ + display: block !important; +} + +#allImportsList{ + display: none; + position: absolute; + bottom: 100%; + right: 0; + width: 300px; + max-height: 400px; + overflow-y: auto; +} diff --git a/uce.portal/resources/templates/index.ftl b/uce.portal/resources/templates/index.ftl index 4523a6ac..9793ce68 100644 --- a/uce.portal/resources/templates/index.ftl +++ b/uce.portal/resources/templates/index.ftl @@ -221,6 +221,7 @@
+ +<#--Import Loading Box --> +
+
+
+
+ Importing... +
+
+
+
+
+
+
+
+
+
+ + + diff --git a/uce.portal/resources/templates/js/documentReader.js b/uce.portal/resources/templates/js/documentReader.js index 8b7dd880..af043631 100644 --- a/uce.portal/resources/templates/js/documentReader.js +++ b/uce.portal/resources/templates/js/documentReader.js @@ -3,7 +3,11 @@ let searchTokens = ""; let currentSelectedTopic = null; let currentTopicIndex = -1; let matchingTopics = []; - +let selectedTopicModelId = null; +let selectedTopicModelName = null; +let selectedTopicVizType = 'overview'; +let selectedEmotionModelId = null; +let selectedEmotionVizType = 'radar'; let defaultTopicColorMap = getDefaultTopicColorMap(); let defaultTopicSettings = { topicCount: 10, @@ -782,7 +786,7 @@ function minimapToDocumentPosition(minimapPos, dimensions) { function createMinimapMarker(options) { const { top, height, color, elementId, topic, className } = options; - + const $marker = $('
') .addClass('minimap-marker') .addClass(className || '') @@ -848,6 +852,22 @@ function updateFloatingUIPositions() { window.addEventListener('resize', updateFloatingUIPositions); window.addEventListener('DOMContentLoaded', updateFloatingUIPositions); +function activateVisualizationPanel(target, $button) { + clearTopicColoring(); + hideTopicNavButtons(); + $('.scrollbar-minimap').hide(); + + $('.viz-nav-btn').removeClass('active'); + $('.viz-nav-parent').removeClass('active'); + + if ($button && $button.length) { + $button.addClass('active'); + } + + $('.viz-panel').removeClass('active'); + $(target).addClass('active'); +} + document.querySelectorAll('.tab-btn').forEach(btn => { btn.addEventListener('click', async () => { const targetId = btn.getAttribute('data-tab'); @@ -867,58 +887,119 @@ document.querySelectorAll('.tab-btn').forEach(btn => { $('.scrollbar-minimap').hide(); sideBar.classList.add('visualization-expanded'); } else { - setTimeout(updateFloatingUIPositions,500) ; + setTimeout(updateFloatingUIPositions, 500); currentSelectedTopic = null; sideBar.classList.remove('visualization-expanded'); $('.scrollbar-minimap').show(); } + if (targetId === 'visualization-tab') { - setTimeout(() => renderTemporalExplorer('vp-1'), 500); - $('.viz-nav-btn').removeClass('active'); - $('.viz-nav-btn').first().addClass('active'); + const docId = document.getElementsByClassName('reader-container')[0].getAttribute('data-id'); + + loadEmotionModels(docId); + loadSentimentMenu(docId); + loadTopicMenu(docId).then(function (topicState) { + if (topicState.models && topicState.models.length > 0) { + activateVisualizationPanel('#viz-panel-3', $('.viz-nav-group[data-category="topic"] .viz-nav-parent')); + $('#vp-3').removeClass('rendered'); + setTimeout(() => renderTopicViz('vp-3'), 500); + } + }); - $('.viz-panel').removeClass('active'); - $('.viz-panel').first().addClass('active'); + loadOthersMenu(docId).then(function (othersState) { + }); } }); - }); -$(document).on('click', '.viz-nav-btn', function () { +$(document).on('click', '.viz-nav-item[data-target]:not(.others-menu-item)', function (e) { + e.preventDefault(); + const target = $(this).data('target'); - clearTopicColoring(); - hideTopicNavButtons(); - $('.scrollbar-minimap').hide(); + const $group = $(this).closest('.viz-nav-group'); - // Update active button - $('.viz-nav-btn').removeClass('active'); + activateVisualizationPanel(target, $group.find('.viz-nav-parent')); + + if (target === '#viz-panel-1') { + $('#vp-1').removeClass('rendered'); + setTimeout(() => renderTemporalExplorer('vp-1'), 500); + } + + if (target === '#viz-panel-2') { + $('#vp-2').removeClass('rendered'); + setTimeout(() => renderTopicEntityChordDiagram('vp-2'), 500); + } +}); + +$(document).on('click', '.topic-model-item', function (e) { + e.preventDefault(); + + selectedTopicModelId = $(this).data('model-id'); + selectedTopicModelName = $.trim($(this).text()); + + $('.topic-model-item').removeClass('active'); $(this).addClass('active'); - // Update visible panel - $('.viz-panel').removeClass('active'); - $(target).addClass('active'); + activateVisualizationPanel('#viz-panel-3', $('.viz-nav-group[data-category="topic"] .viz-nav-parent')); + $('#vp-3').removeClass('rendered'); + renderTopicViz('vp-3'); +}); +$(document).on('click', '.topic-viz-toggle-btn', function (e) { + e.preventDefault(); + + const nextVizType = $(this).data('viz-type'); + if (!nextVizType || nextVizType === selectedTopicVizType) return; + + selectedTopicVizType = nextVizType; + $('#vp-3').removeClass('rendered'); + renderTopicViz('vp-3'); +}); +$(document).on('click', '.emotion-model-item', function (e) { + e.preventDefault(); + + selectedEmotionModelId = $(this).data('model-id'); + $('.emotion-model-item').removeClass('active'); + $(this).addClass('active'); + + activateVisualizationPanel('#viz-panel-7', $('.viz-nav-group[data-category="emotion"] .viz-nav-parent')); + $('#vp-7').removeClass('rendered'); + renderEmotionViz('vp-7'); +}); +$(document).on('click', '.others-menu-item[data-target]', function (e) { + e.preventDefault(); + + const target = $(this).data('target'); + const $group = $(this).closest('.viz-nav-group'); + + activateVisualizationPanel(target, $group.find('.viz-nav-parent')); if (target === '#viz-panel-1') { + $('#vp-1').removeClass('rendered'); setTimeout(() => renderTemporalExplorer('vp-1'), 500); } if (target === '#viz-panel-2') { + $('#vp-2').removeClass('rendered'); setTimeout(() => renderTopicEntityChordDiagram('vp-2'), 500); } - if (target === '#viz-panel-3') { - setTimeout(() => renderSentenceTopicNetwork('vp-3'), 500); - } if (target === '#viz-panel-4') { - $('.selector-container').hide(); + $('#vp-4').removeClass('rendered'); setTimeout(() => renderTopicSimilarityMatrix('vp-4'), 500); - } if (target === '#viz-panel-5') { + $('#vp-5').removeClass('rendered'); setTimeout(() => renderSentenceTopicSankey('vp-5'), 500); - } }); +$(document).on('click', '.emotion-viz-toggle-btn', function (e) { + e.preventDefault(); + const nextVizType = $(this).data('viz-type'); + if (!nextVizType || nextVizType === selectedEmotionVizType) return; + selectedEmotionVizType = nextVizType; + $('#vp-7').removeClass('rendered'); + renderEmotionViz('vp-7'); +}); function renderSentenceTopicNetwork(containerId) { const container = document.getElementById(containerId); if (!container || container.classList.contains('rendered')) return; @@ -1124,7 +1205,54 @@ function computeTopicSimilarityMatrix(data, type = "cosine") { matrix: matrix }; } +function renderTopicViz(containerId) { + const container = document.getElementById(containerId); + if (!container) return; + if (!selectedTopicModelId) { + container.classList.remove('rendered'); + container.innerHTML = '
Please choose a topic model
'; + container.classList.add('rendered'); + return; + } + + const modelName = selectedTopicModelName || ('Model ' + selectedTopicModelId); + + const overviewBtnClass = selectedTopicVizType === 'overview' + ? 'btn btn-sm btn-primary topic-viz-toggle-btn' + : 'btn btn-sm btn-light topic-viz-toggle-btn'; + + const timelineBtnClass = selectedTopicVizType === 'timeline' + ? 'btn btn-sm btn-primary topic-viz-toggle-btn' + : 'btn btn-sm btn-light topic-viz-toggle-btn'; + + const heatmapBtnClass = selectedTopicVizType === 'heatmap' + ? 'btn btn-sm btn-primary topic-viz-toggle-btn' + : 'btn btn-sm btn-light topic-viz-toggle-btn'; + + container.classList.remove('rendered'); + container.innerHTML = '' + + '
' + + '
' + + '
Topic
' + + '
' + modelName + '
' + + '
' + + '
' + + '' + + '' + + '' + + '
' + + '
' + + '
'; + + if (selectedTopicVizType === 'timeline') { + renderTopicTimeline(containerId + '-body'); + } else if (selectedTopicVizType === 'heatmap') { + renderTopicHeatmap(containerId + '-body'); + } else { + renderTopicModelOverview(containerId + '-body'); + } +} function renderTopicSimilarityMatrix(containerId) { const container = document.getElementById(containerId); if (!container || container.classList.contains('rendered')){ @@ -1186,6 +1314,7 @@ function renderTopicEntityChordDiagram(containerId) { $.get('/api/document/page/topicEntityRelation', { documentId: docId }) + .catch(() => []) .then(data => { $('.visualization-spinner').hide() if (!data || !Array.isArray(data) || data.length === 0) { @@ -1433,13 +1562,17 @@ function renderTemporalExplorer(containerId) { $('.visualization-spinner').show() const docId = document.getElementsByClassName('reader-container')[0].getAttribute('data-id'); - const taxonReq = $.get('/api/document/page/taxon', { documentId: docId }); - const topicReq = $.get('/api/document/page/topics', { documentId: docId }); - const entityReq = $.get('/api/document/page/namedEntities', { documentId: docId }); - const lemmaReq = $.get('/api/document/page/lemma', { documentId: docId }); - const geonameReq = $.get('/api/document/page/geoname', { documentId: docId }); - - Promise.all([taxonReq, topicReq, entityReq, lemmaReq, geonameReq]).then(([taxon, topics, entities, lemma, geoname]) => { + const emotionReq = $.get('/api/document/page/emotions', { + documentId: docId, + modelId: selectedEmotionModelId + }).then(d => d).catch(() => []); + const taxonReq = $.get('/api/document/page/taxon', { documentId: docId }).then(d => d).catch(() => []); + const topicReq = $.get('/api/document/page/topics', { documentId: docId }).then(d => d).catch(() => []); + const entityReq = $.get('/api/document/page/namedEntities', { documentId: docId }).then(d => d).catch(() => []); + const lemmaReq = $.get('/api/document/page/lemma', { documentId: docId }).then(d => d).catch(() => []); + const geonameReq = $.get('/api/document/page/geoname', { documentId: docId }).then(d => d).catch(() => []); + + Promise.all([taxonReq, topicReq, entityReq, lemmaReq, geonameReq, emotionReq]).then(([taxon, topics, entities, lemma, geoname, emotions]) => { $('.visualization-spinner').hide() if ((!taxon || taxon.length === 0) && (!topics || topics.length === 0) && (!entities || entities.length === 0) && (!lemma || lemma.length === 0 && !geoname || geoname.length === 0)) { const container = document.getElementById(containerId); @@ -1491,6 +1624,14 @@ function renderTemporalExplorer(containerId) { valueField: 'geonameValue', label: 'Geonames', color: '#c680ff', + }, + { + key: 'Emotions', + data: emotions, + pageField: 'pageId', + valueField: 'emotionLabel', + label: 'Emotions', + color: '#f5c542' } ]; @@ -1524,7 +1665,8 @@ function renderTemporalExplorer(containerId) { Topics: [], "Named Entities": [], Lemmas: [], - Geonames: [] + Geonames: [], + Emotions: [] }); } @@ -1615,7 +1757,936 @@ function renderTemporalExplorer(containerId) { console.error("Error loading or processing annotation data:", err); }); } +function loadEmotionModels(docId) { + return $.get('/api/document/emotionModels', { documentId: docId }) + .then((models) => { + const $menu = $('#emotion-model-menu'); + $menu.empty(); + + if (!models || models.length === 0) { + selectedEmotionModelId = null; + $menu.append('No models found'); + return; + } + + const hasSelectedModel = models.some(function (m) { + return String(m.modelId) === String(selectedEmotionModelId); + }); + + if (!hasSelectedModel) { + selectedEmotionModelId = models[0].modelId; + } + + models.forEach((m) => { + const isActive = String(m.modelId) === String(selectedEmotionModelId) ? ' active' : ''; + $menu.append( + '' + + (m.modelName ? m.modelName : ('Model ' + m.modelId)) + + '' + ); + }); + }) + .catch(() => { + selectedEmotionModelId = null; + $('#emotion-model-menu').html('Failed to load'); + }); +} +function loadTopicMenu(docId) { + const $menu = $('#topic-menu'); + const noDataLabel = $menu.attr('data-label-no-data') || 'No data available'; + + $menu.empty(); + + const topicModelsReq = $.get('/api/document/topicModels', { documentId: docId }) + .then(function (data) { return data; }) + .catch(function () { return []; }); + + return topicModelsReq.then(function (topicModels) { + const hasModels = Array.isArray(topicModels) && topicModels.length > 0; + + if (hasModels) { + const selectedStillExists = topicModels.some(function (m) { + return String(m.modelId) === String(selectedTopicModelId); + }); + + if (!selectedStillExists) { + selectedTopicModelId = topicModels[0].modelId; + selectedTopicModelName = topicModels[0].modelName || ('Model ' + topicModels[0].modelId); + } + + topicModels.forEach(function (m) { + const isActive = String(m.modelId) === String(selectedTopicModelId) ? ' active' : ''; + const label = m.modelName ? m.modelName : ('Model ' + m.modelId); + + $menu.append( + '' + + label + + '' + ); + }); + } + + if (!hasModels) { + $menu.append('No models found'); + } + + return { + models: topicModels + }; + }); +} +function loadOthersMenu(docId) { + const $menu = $('#others-menu'); + const noDataLabel = $menu.attr('data-label-no-data') || 'No data available'; + + $menu.empty(); + + const topicPageReq = $.get('/api/document/page/topics', { documentId: docId }) + .then(function (data) { return data; }) + .catch(function () { return []; }); + + const topicEntityReq = $.get('/api/document/page/topicEntityRelation', { documentId: docId }) + .then(function (data) { return data; }) + .catch(function () { return []; }); + + return Promise.all([topicPageReq, topicEntityReq]).then(function (results) { + const topicPageData = results[0] || []; + const topicEntityData = results[1] || []; + + const hasSemanticDensity = Array.isArray(topicPageData) && topicPageData.length > 0; + const hasTopicEntity = Array.isArray(topicEntityData) && topicEntityData.length > 0; + + $menu.append( + '' + + 'Semantic Density' + + '' + ); + + $menu.append( + '' + + 'Topic Entity' + + '' + ); + + $menu.append( + '' + + 'Topic Landscape' + + '' + ); + + $menu.append( + '' + + 'Topic Similarity' + + '' + ); + + $menu.append( + '' + + 'Sentence Topic Flow' + + '' + ); + + return { + hasSemanticDensity: hasSemanticDensity, + hasTopicEntity: hasTopicEntity + }; + }); +} +function loadSentimentMenu(docId) { + const $menu = $('#sentiment-menu'); + $menu.empty(); + + $.get('/api/document/page/sentiments', { documentId: docId }) + .then(function (data) { + if (Array.isArray(data) && data.length > 0) { + $menu.append( + '' + + 'Sentence Sentiment' + + '' + ); + } else { + $menu.append('No models found'); + } + }) + .catch(function () { + $menu.append('No models found'); + }); +} +function renderTopicModelOverview(containerId) { + const container = document.getElementById(containerId); + if (!container) return; + + if (!selectedTopicModelId) { + container.classList.remove('rendered'); + container.innerHTML = '
Please choose a topic model
'; + container.classList.add('rendered'); + return; + } + + container.classList.remove('rendered'); + container.innerHTML = '
'; + + const docId = document.getElementsByClassName('reader-container')[0].getAttribute('data-id'); + + $('.visualization-spinner').show(); + + $.get('/api/document/topicModelOverview', { + documentId: docId, + modelId: selectedTopicModelId + }).then(function (data) { + $('.visualization-spinner').hide(); + + if (!data || !Array.isArray(data) || data.length === 0) { + container.innerHTML = '
No topic data for this model
'; + container.classList.add('rendered'); + return; + } + + const sortedTopics = data + .filter(function (item) { + return item && item.label && String(item.label).trim() !== ''; + }) + .slice(0, 10); + + if (sortedTopics.length === 0) { + container.innerHTML = '
No topic data for this model
'; + container.classList.add('rendered'); + return; + } + + const labels = sortedTopics.map(function (item) { + return String(item.label).trim(); + }); + + const values = sortedTopics.map(function (item) { + return item.value || 0; + }); + + const maxValue = Math.max.apply(null, values); + + function formatTopicLabel(label) { + const text = String(label || ''); + if (text.length <= 28) return text; + return text.slice(0, 25) + '...'; + } + + const chartDom = document.getElementById(containerId + '-topic-model'); + const chart = echarts.init(chartDom); + + const option = { + title: { + text: 'Topic Overview', + left: 0, + top: 0 + }, + tooltip: { + trigger: 'item', + formatter: function (params) { + return '
' + labels[params.dataIndex] + '
' + + '
Occurrences: ' + params.value + '
'; + } + }, + grid: { + left: '15%', + right: '12%', + top: 45, + bottom: 60, + containLabel: false + }, + xAxis: { + type: 'value', + minInterval: 1, + max: maxValue < 5 ? 5 : null, + splitLine: { + show: true + }, + axisLine: { + show: false + }, + axisTick: { + show: false + }, + name: 'Count', + nameLocation: 'middle', + nameGap: 28 + }, + yAxis: { + type: 'category', + inverse: true, + data: labels, + axisLine: { + show: false + }, + axisTick: { + show: false + }, + axisLabel: { + width: 180, + overflow: 'truncate', + formatter: function (value) { + return formatTopicLabel(value); + } + } + }, + series: [{ + type: 'bar', + data: values, + barWidth: 22, + label: { + show: true, + position: 'right', + formatter: '{c}' + }, + emphasis: { + focus: 'series' + } + }] + }; + + chart.setOption(option); + + window.addEventListener('resize', function () { + chart.resize(); + }); + + container.classList.add('rendered'); + }).catch(function () { + $('.visualization-spinner').hide(); + container.innerHTML = '
Failed to load topic model data
'; + container.classList.add('rendered'); + }); +} +function loadTopicModelPageCounts() { + const docId = document.getElementsByClassName('reader-container')[0].getAttribute('data-id'); + + return $.get('/api/document/topicModelPageCounts', { + documentId: docId, + modelId: selectedTopicModelId + }).then(function (data) { + if (!data || !Array.isArray(data) || data.length === 0) { + return null; + } + + const rawPageIds = []; + data.forEach(function (item) { + const pid = parseInt(item.pageId, 10); + if (!isNaN(pid)) rawPageIds.push(pid); + }); + + const uniqueSortedPageIds = Array.from(new Set(rawPageIds)).sort(function (a, b) { + return a - b; + }); + + const pageIdToPageNumber = new Map(); + uniqueSortedPageIds.forEach(function (pid, idx) { + pageIdToPageNumber.set(pid, idx + 1); + }); + + const pageTopicCounts = new Map(); + const totalTopicCounts = {}; + + data.forEach(function (item) { + const pid = parseInt(item.pageId, 10); + const pageNumber = pageIdToPageNumber.get(pid); + const label = item.label ? String(item.label).trim() : ''; + const value = parseInt(item.value, 10) || 0; + + if (!pageNumber || !label) return; + + if (!pageTopicCounts.has(pageNumber)) { + pageTopicCounts.set(pageNumber, {}); + } + + pageTopicCounts.get(pageNumber)[label] = value; + totalTopicCounts[label] = (totalTopicCounts[label] || 0) + value; + }); + + const pages = Array.from(pageTopicCounts.keys()).sort(function (a, b) { + return a - b; + }); + + const topLabels = Object.keys(totalTopicCounts) + .sort(function (a, b) { return totalTopicCounts[b] - totalTopicCounts[a]; }) + .slice(0, 8); + + return { + pages: pages, + labels: topLabels, + pageTopicCounts: pageTopicCounts + }; + }); +} +function renderTopicTimeline(containerId) { + const container = document.getElementById(containerId); + if (!container) return; + + container.classList.remove('rendered'); + container.innerHTML = '
'; + + $('.visualization-spinner').show(); + + loadTopicModelPageCounts() + .then(function (result) { + $('.visualization-spinner').hide(); + + if (!result || !result.pages.length || !result.labels.length) { + container.innerHTML = '
No topic timeline data for this model
'; + container.classList.add('rendered'); + return; + } + + const pages = result.pages; + const labels = result.labels; + const pageTopicCounts = result.pageTopicCounts; + + const series = labels.map(function (label) { + return { + name: label, + type: 'line', + smooth: true, + symbol: 'circle', + symbolSize: 6, + data: pages.map(function (page) { + const counts = pageTopicCounts.get(page) || {}; + return counts[label] || 0; + }) + }; + }); + + const chart = echarts.init(document.getElementById(containerId + '-timeline')); + + chart.setOption({ + title: { + text: 'Topic Timeline', + left: 0, + top: 0 + }, + tooltip: { + trigger: 'axis' + }, + legend: { + type: 'scroll', + top: 30 + }, + grid: { + left: '12%', + right: '8%', + top: 85, + bottom: 50 + }, + xAxis: { + type: 'category', + name: 'Page', + data: pages + }, + yAxis: { + type: 'value', + name: 'Count' + }, + series: series + }); + + container.classList.add('rendered'); + }) + .catch(function () { + $('.visualization-spinner').hide(); + container.innerHTML = '
Failed to load topic timeline
'; + container.classList.add('rendered'); + }); +} +function renderTopicHeatmap(containerId) { + const container = document.getElementById(containerId); + if (!container) return; + + container.classList.remove('rendered'); + container.innerHTML = '
'; + + $('.visualization-spinner').show(); + loadTopicModelPageCounts() + .then(function (result) { + $('.visualization-spinner').hide(); + + if (!result || !result.pages.length || !result.labels.length) { + container.innerHTML = '
No topic heatmap data for this model
'; + container.classList.add('rendered'); + return; + } + + const pages = result.pages; + const labels = result.labels; + const pageTopicCounts = result.pageTopicCounts; + + const heatmapData = []; + let maxValue = 0; + + pages.forEach(function (page, pageIndex) { + const counts = pageTopicCounts.get(page) || {}; + + labels.forEach(function (label, labelIndex) { + const value = counts[label] || 0; + if (value > maxValue) maxValue = value; + heatmapData.push([pageIndex, labelIndex, value]); + }); + }); + + const chart = echarts.init(document.getElementById(containerId + '-heatmap')); + + chart.setOption({ + title: { + text: 'Topic Heatmap', + left: 0, + top: 0 + }, + tooltip: { + position: 'top', + formatter: function (params) { + const page = pages[params.value[0]]; + const label = labels[params.value[1]]; + const value = params.value[2]; + return '
Page ' + page + '
' + label + ': ' + value + '
'; + } + }, + grid: { + left: 120, + right: 30, + top: 75, + bottom: 60 + }, + xAxis: { + type: 'category', + name: 'Page', + data: pages, + splitArea: { show: true } + }, + yAxis: { + type: 'category', + name: 'Topic', + data: labels, + splitArea: { show: true } + }, + visualMap: { + min: 0, + max: maxValue > 0 ? maxValue : 1, + calculable: true, + orient: 'horizontal', + left: 'center', + bottom: 10 + }, + series: [{ + name: 'Topic Count', + type: 'heatmap', + data: heatmapData, + emphasis: { + itemStyle: { + shadowBlur: 10, + shadowColor: 'rgba(0, 0, 0, 0.35)' + } + } + }] + }); + + container.classList.add('rendered'); + }) + .catch(function () { + $('.visualization-spinner').hide(); + container.innerHTML = '
Failed to load topic heatmap
'; + container.classList.add('rendered'); + }); +} +function getSelectedEmotionModelName() { + const $activeModel = $('.emotion-model-item.active'); + if ($activeModel.length > 0) { + return $.trim($activeModel.text()); + } + + if (selectedEmotionModelId) { + return 'Model ' + selectedEmotionModelId; + } + + return 'Emotion model'; +} + +function renderEmotionViz(containerId) { + const container = document.getElementById(containerId); + if (!container) return; + + if (!selectedEmotionModelId) { + container.classList.remove('rendered'); + container.innerHTML = '
Please choose an emotion model
'; + container.classList.add('rendered'); + return; + } + + const modelName = getSelectedEmotionModelName(); + + const radarBtnClass = selectedEmotionVizType === 'radar' + ? 'btn btn-sm btn-primary emotion-viz-toggle-btn' + : 'btn btn-sm btn-light emotion-viz-toggle-btn'; + + const timelineBtnClass = selectedEmotionVizType === 'timeline' + ? 'btn btn-sm btn-primary emotion-viz-toggle-btn' + : 'btn btn-sm btn-light emotion-viz-toggle-btn'; + + const heatmapBtnClass = selectedEmotionVizType === 'heatmap' + ? 'btn btn-sm btn-primary emotion-viz-toggle-btn' + : 'btn btn-sm btn-light emotion-viz-toggle-btn'; + + container.classList.remove('rendered'); + container.innerHTML = '' + + '
' + + '
' + + '
Emotion
' + + '
' + modelName + '
' + + '
' + + '
' + + '' + + '' + + '' + + '
' + + '
' + + '
'; + + if (selectedEmotionVizType === 'timeline') { + renderEmotionTimeline(containerId + '-body'); + } else if (selectedEmotionVizType === 'heatmap') { + renderEmotionHeatmap(containerId + '-body'); + } else { + renderEmotionRadar(containerId + '-body'); + } +} +function renderEmotionRadar(containerId) { + const container = document.getElementById(containerId); + if (!container) return; + + container.classList.remove('rendered'); + container.innerHTML = '
'; + + const docId = document.getElementsByClassName('reader-container')[0].getAttribute('data-id'); + const modelId = selectedEmotionModelId || null; + + $('.visualization-spinner').show(); + + $.get('/api/document/emotionRadar', { documentId: docId, modelId: modelId }) + .then(data => { + $('.visualization-spinner').hide(); + + if (!data || !Array.isArray(data) || data.length === 0) { + container.innerHTML = '
No emotion data for this model
'; + container.classList.add('rendered'); + return; + } + + const indicators = data.map(d => ({ name: d.label, max: 1 })); + const values = data.map(d => d.value); + + const chartDom = document.getElementById(containerId + '-radar'); + const chart = echarts.init(chartDom); + + const option = { + title: { text: 'Emotion Radar' }, + tooltip: {}, + radar: { + indicator: indicators, + radius: '65%' + }, + series: [{ + type: 'radar', + data: [{ + value: values, + name: 'Avg intensity' + }] + }] + }; + + chart.setOption(option); + container.classList.add('rendered'); + }) + .catch(() => { + $('.visualization-spinner').hide(); + container.innerHTML = '
Failed to load emotion radar
'; + container.classList.add('rendered'); + }); +} +function scrollToEmotionPage(pageNumber) { + const pageElement = document.querySelector('.page[data-id="' + pageNumber + '"]'); + if (pageElement) { + pageElement.scrollIntoView({ behavior: 'smooth', block: 'start' }); + } +} + +function loadEmotionPageCounts() { + const docId = document.getElementsByClassName('reader-container')[0].getAttribute('data-id'); + const modelId = selectedEmotionModelId || null; + + return $.get('/api/document/page/emotions', { documentId: docId, modelId: modelId }) + .then(function (data) { + if (!data || !Array.isArray(data) || data.length === 0) { + return null; + } + + const rawPageIds = []; + data.forEach(function (item) { + const pid = parseInt(item.pageId, 10); + if (!isNaN(pid)) rawPageIds.push(pid); + }); + + const uniqueSortedPageIds = Array.from(new Set(rawPageIds)).sort(function (a, b) { + return a - b; + }); + + const pageIdToPageNumber = new Map(); + uniqueSortedPageIds.forEach(function (pid, idx) { + pageIdToPageNumber.set(pid, idx + 1); + }); + + const pageEmotionCounts = new Map(); + const totalEmotionCounts = {}; + + data.forEach(function (item) { + const pid = parseInt(item.pageId, 10); + const pageNumber = pageIdToPageNumber.get(pid); + const label = item.emotionLabel ? String(item.emotionLabel).trim() : ''; + + if (!pageNumber || !label) return; + + if (!pageEmotionCounts.has(pageNumber)) { + pageEmotionCounts.set(pageNumber, {}); + } + + const currentPageMap = pageEmotionCounts.get(pageNumber); + currentPageMap[label] = (currentPageMap[label] || 0) + 1; + totalEmotionCounts[label] = (totalEmotionCounts[label] || 0) + 1; + }); + + const pages = Array.from(pageEmotionCounts.keys()).sort(function (a, b) { + return a - b; + }) + const topLabels = Object.keys(totalEmotionCounts) + .sort(function (a, b) { + return totalEmotionCounts[b] - totalEmotionCounts[a]; + }) + .slice(0, 6); + + return { + pages: pages, + labels: topLabels, + pageEmotionCounts: pageEmotionCounts, + totalEmotionCounts: totalEmotionCounts + }; + }); +} + +function renderEmotionTimeline(containerId) { + const container = document.getElementById(containerId); + if (!container) return; + + container.classList.remove('rendered'); + container.innerHTML = '
'; + + $('.visualization-spinner').show(); + + loadEmotionPageCounts() + .then(function (result) { + $('.visualization-spinner').hide(); + + if (!result || !result.pages || result.pages.length === 0 || !result.labels || result.labels.length === 0) { + container.innerHTML = '
No emotion timeline data for this model
'; + container.classList.add('rendered'); + return; + } + + const pages = result.pages; + const labels = result.labels; + const pageEmotionCounts = result.pageEmotionCounts; + + const series = labels.map(function (label) { + return { + name: label, + type: 'line', + smooth: true, + symbol: 'circle', + symbolSize: 6, + data: pages.map(function (page) { + const counts = pageEmotionCounts.get(page) || {}; + return counts[label] || 0; + }) + }; + }); + + const chartDom = document.getElementById(containerId + '-timeline'); + const chart = echarts.init(chartDom); + + const option = { + title: { text: 'Emotion Timeline' }, + tooltip: { + trigger: 'axis', + formatter: function (params) { + if (!params || params.length === 0) return ''; + + const page = params[0].axisValue; + let html = '
Page ' + page + '
'; + + params + .slice() + .sort(function (a, b) { return b.value - a.value; }) + .forEach(function (p) { + html += '
' + p.seriesName + ': ' + p.value + '
'; + }); + + return html; + } + }, + legend: { + type: 'scroll', + top: 30 + }, + grid: { + left: 50, + right: 20, + top: 80, + bottom: 50 + }, + xAxis: { + type: 'category', + name: 'Page', + data: pages + }, + yAxis: { + type: 'value', + name: 'Count' + }, + series: series + }; + + chart.setOption(option); + + chart.on('click', function (params) { + const pageNumber = parseInt(params.name, 10); + if (!isNaN(pageNumber)) { + scrollToEmotionPage(pageNumber); + } + }); + + container.classList.add('rendered'); + }) + .catch(function () { + $('.visualization-spinner').hide(); + container.innerHTML = '
Failed to load emotion timeline
'; + container.classList.add('rendered'); + }); +} + +function renderEmotionHeatmap(containerId) { + const container = document.getElementById(containerId); + if (!container) return; + + container.classList.remove('rendered'); + container.innerHTML = '
'; + + $('.visualization-spinner').show(); + + loadEmotionPageCounts() + .then(function (result) { + $('.visualization-spinner').hide(); + + if (!result || !result.pages || result.pages.length === 0 || !result.labels || result.labels.length === 0) { + container.innerHTML = '
No emotion heatmap data for this model
'; + container.classList.add('rendered'); + return; + } + + const pages = result.pages; + const labels = result.labels; + const pageEmotionCounts = result.pageEmotionCounts; + + const heatmapData = []; + let maxValue = 0; + + pages.forEach(function (page, pageIndex) { + const counts = pageEmotionCounts.get(page) || {}; + + labels.forEach(function (label, labelIndex) { + const value = counts[label] || 0; + if (value > maxValue) maxValue = value; + heatmapData.push([pageIndex, labelIndex, value]); + }); + }); + + const chartDom = document.getElementById(containerId + '-heatmap'); + const chart = echarts.init(chartDom); + + const option = { + title: { text: 'Emotion Heatmap' }, + tooltip: { + position: 'top', + formatter: function (params) { + const page = pages[params.value[0]]; + const label = labels[params.value[1]]; + const value = params.value[2]; + return '
Page ' + page + '
' + label + ': ' + value + '
'; + } + }, + grid: { + left: 90, + right: 30, + top: 60, + bottom: 60 + }, + xAxis: { + type: 'category', + name: 'Page', + data: pages, + splitArea: { show: true } + }, + yAxis: { + type: 'category', + name: 'Emotion', + data: labels, + splitArea: { show: true } + }, + visualMap: { + min: 0, + max: maxValue > 0 ? maxValue : 1, + calculable: true, + orient: 'horizontal', + left: 'center', + bottom: 10 + }, + series: [{ + name: 'Emotion Count', + type: 'heatmap', + data: heatmapData, + label: { + show: false + }, + emphasis: { + itemStyle: { + shadowBlur: 10, + shadowColor: 'rgba(0, 0, 0, 0.35)' + } + } + }] + }; + + chart.setOption(option); + + chart.on('click', function (params) { + const pageIndex = params.value[0]; + const pageNumber = pages[pageIndex]; + if (pageNumber) { + scrollToEmotionPage(pageNumber); + } + }); + + container.classList.add('rendered'); + }) + .catch(function () { + $('.visualization-spinner').hide(); + container.innerHTML = '
Failed to load emotion heatmap
'; + container.classList.add('rendered'); + }); +} function initializeTopicSettingsPanel() { if (topicSettings.colorMode === 'per-topic') { diff --git a/uce.portal/resources/templates/landing-page.ftl b/uce.portal/resources/templates/landing-page.ftl index 98e75ac8..94b571d1 100644 --- a/uce.portal/resources/templates/landing-page.ftl +++ b/uce.portal/resources/templates/landing-page.ftl @@ -3,7 +3,8 @@
-
${uceConfig.getMeta().getName()?trim!"-"}
+
${uceConfig.getMeta().getName()?trim!"-"}
@@ -15,8 +16,21 @@
-

${languageResource.get("corpora")}

+
+

${languageResource.get("corpora")}

+ <#if uceConfig.settings.enablePathImport?? && uceConfig.settings.enablePathImport> + + + +
+ +
<#if corpora?size == 0>
@@ -33,7 +47,8 @@ data-id="${corpusVm.getCorpus().getId()}"> ${corpusVm.getCorpus().getName()?trim} -

${corpusVm.getCorpus().getAuthor()}

+

${corpusVm.getCorpus().getAuthor()}

+ + +
@@ -74,5 +94,459 @@
+ + +<#--Modal for uploading files--> + + + \ No newline at end of file diff --git a/uce.portal/resources/templates/reader/documentReaderView.ftl b/uce.portal/resources/templates/reader/documentReaderView.ftl index 8c18fdf2..3d46e779 100644 --- a/uce.portal/resources/templates/reader/documentReaderView.ftl +++ b/uce.portal/resources/templates/reader/documentReaderView.ftl @@ -315,14 +315,57 @@
+
+
+
+
+
+
-
- - - - - +
+ + +
+ + +
+
+
+ + +
+ + +
+
+ + +
+ + +
+
+ + +
+ + +
+
+
diff --git a/uce.portal/resources/templates/wiki/analysisResultFragment.ftl b/uce.portal/resources/templates/wiki/analysisResultFragment.ftl index 7e012f0c..2fce37a3 100644 --- a/uce.portal/resources/templates/wiki/analysisResultFragment.ftl +++ b/uce.portal/resources/templates/wiki/analysisResultFragment.ftl @@ -1,4 +1,23 @@ +<#if analysisId??> +
+ +
+ + <#if DUUI??> <#if DUUI.modelGroups?has_content> <#if DUUI.isTopic> @@ -7,7 +26,10 @@
<#list DUUI.textInformation.topicAVG as model>
-
${model.getModelInfo().getName()}
+
+ ${model.getModelInfo().getName()} + +
<#list model.topics as topic> <#assign opacity = topic.getScore()?string?replace(",", ".")> @@ -16,6 +38,9 @@
+
diff --git a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/DUUIPipeline.java b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/DUUIPipeline.java index 3cb3d72a..e016d4ae 100644 --- a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/DUUIPipeline.java +++ b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/DUUIPipeline.java @@ -129,8 +129,8 @@ public JCas getLanguage(String inputText) throws Exception { public JCas getSentences(JCas cas) throws Exception { HashMap spacyUrls = new HashMap<>(); - spacyUrls.put("Spacy", "http://spacy-cohmetrix.service.component.duui.texttechnologylab.org"); -// spacyUrls.put("Spacy", "http://spacy.service.component.duui.texttechnologylab.org"); +// spacyUrls.put("Spacy", "http://spacy-cohmetrix.service.component.duui.texttechnologylab.org"); + spacyUrls.put("Spacy", "http://spacy.service.component.duui.texttechnologylab.org"); spacyUrls.put("Syntok", "http://paragraph-syntok.service.component.duui.texttechnologylab.org/"); DUUIComposer composer = setListComposer(spacyUrls); cas = runPipeline(cas, composer); diff --git a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java index 02687feb..c34c6f15 100644 --- a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java +++ b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java @@ -1,6 +1,8 @@ package org.texttechnologylab.uce.analysis; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.uima.fit.factory.JCasFactory; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.JCas; @@ -8,11 +10,41 @@ import org.texttechnologylab.uce.analysis.modules.*; import org.texttechnologylab.uce.analysis.typeClasses.TextClass; + + + +import java.time.Instant; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; +import java.io.InputStream; +import java.io.DataOutputStream; +import java.net.HttpURLConnection; +import java.net.URL; +import java.nio.charset.StandardCharsets; + import java.util.*; public class RunDUUIPipeline { + private static final AnalysisCache analysisCache = new AnalysisCache(); + private static final ThreadLocal lastAnalysisIdTL = new ThreadLocal<>(); + private static final Logger logger = LogManager.getLogger(RunDUUIPipeline.class); + private static final ThreadLocal currentUserIdTL = new ThreadLocal<>(); + + + public static AnalysisSession getCachedSession(String analysisId) { + return analysisCache.get(analysisId); + } + public static void setThreadLocalUserId(String userId) { + currentUserIdTL.set(userId); + } + + private static String getCurrentUserId() { + // TODO: replace with your auth/session identity + + return currentUserIdTL.get(); + } public DUUIInformation getModelResources(List modelGroups, String inputText, String claim, String coherenceText, String stanceText, String systemPrompt) throws Exception { ModelResources modelResources = new ModelResources(); @@ -189,10 +221,13 @@ public DUUIInformation getModelResources(List modelGroups, String inputT newCas.setDocumentText(text); cas = newCas; + logger.info("[CAS] Created secondary JCas for special models (fact/coherence/stance/LLM)"); + } // run pipeline DUUIComposer composer = pipeline.setComposer(modelInfosMap); JCas result = pipeline.runPipeline(cas, composer); + logger.info("[CAS] Final result JCas created via pipeline.runPipeline(cas, composer)"); // get results Object[] results = pipeline.getJCasResults(result, modelInfosList, ttlabScorerGroups, cohmetrixScorerGroups); // print results @@ -232,9 +267,29 @@ public DUUIInformation getModelResources(List modelGroups, String inputT if (isCohmetrix) { duuiInformation.setCohMetrixGroups(cohmetrixScorerGroups); } + String analysisId = UUID.randomUUID().toString(); + String userId = getCurrentUserId(); + logger.info("[USER] Running pipeline for User: " + userId); + String title = "Analysis " + Instant.now(); + + byte[] xmiBytes = toXmiBytes(result); + AnalysisSession session = new AnalysisSession( + analysisId, userId, title, /*externalId*/ null, + result, /*xmiBytes*/ xmiBytes + ); + analysisCache.put(session); + lastAnalysisIdTL.set(analysisId); + logger.info("[CACHE] Added analysisId=" + analysisId + " (stored in memory; TTL=45min)"); return duuiInformation; } + public AnalysisResponse getModelResourcesWithHandle(List modelGroups, String inputText, String claim, + String coherenceText, String stanceText, String systemPrompt) throws Exception { + DUUIInformation info = getModelResources(modelGroups, inputText, claim, coherenceText, stanceText, systemPrompt); + String id = lastAnalysisIdTL.get(); + return new AnalysisResponse(id, info); + } + public static void main(String[] args) throws Exception { ModelResources modelResources = new ModelResources(); List modelGroups = modelResources.getGroupedModelObjects(); @@ -256,5 +311,195 @@ public static void main(String[] args) throws Exception { DUUIInformation duuiInformation = new RunDUUIPipeline().getModelResources(modelGroupNames, inputText, claim, coherenceText, stanceText, systemPrompt); } + public static final class AnalysisResponse { + public final String analysisId; + public final DUUIInformation duuiInformation; + + public AnalysisResponse(String analysisId, DUUIInformation duuiInformation) { + this.analysisId = analysisId; + this.duuiInformation = duuiInformation; + } + } + + + //AnalysisSession + public static final class AnalysisSession { + public final String analysisId; + public final String userId; + public final long createdAtMillis; + public final String title; + public final String externalId; + public final JCas jcas; + public final byte[] xmiBytes; + + public AnalysisSession(String analysisId, String userId, String title, String externalId, + JCas jcas, byte[] xmiBytes) { + this.analysisId = analysisId; + this.userId = userId; + this.title = title; + this.externalId = externalId; + this.createdAtMillis = System.currentTimeMillis(); + this.jcas = jcas; + this.xmiBytes = xmiBytes; + } + } + + + // AnalysisCache + public static final class AnalysisCache { + private final Map map = new ConcurrentHashMap<>(); + private final long ttlMillis = 45 * 60 * 1000L; // 45 minutes + + public void put(AnalysisSession s) { map.put(s.analysisId, s); } + + public AnalysisSession get(String id) { // Retrieve a session from the cache + AnalysisSession s = map.get(id); + if (s == null) return null; + + if (System.currentTimeMillis() - s.createdAtMillis > ttlMillis) { // If this session is older than 45 minutes -> expire it + map.remove(id); + return null; + } + return s; + } + +// public void remove(String id) { +// map.remove(id); +// } //Manually remove a session by ID +// +// +// public void cleanupExpired() { // cleanup all expired sessions +// long now = System.currentTimeMillis(); +// for (var entry : map.entrySet()) { +// AnalysisSession s = entry.getValue(); +// if (now - s.createdAtMillis > ttlMillis) { +// map.remove(entry.getKey()); +// logger.info("[CRON] Removed expired session: " + s.analysisId); +// } +// } +// } +// } +// private static final java.util.concurrent.ScheduledExecutorService scheduler = //Cron job for automatic cleanup every 5 minutes +// java.util.concurrent.Executors.newScheduledThreadPool(1); +// +// static { +// scheduler.scheduleAtFixedRate(() -> { +// try { +// analysisCache.cleanupExpired(); +// } catch (Exception e) { +// logger.error("[CACHE] Cache cleanup failed: " + e.getMessage()); +// } +// }, 5, 5, java.util.concurrent.TimeUnit.MINUTES); +// +// scheduler.scheduleAtFixedRate(() -> { +// logger.info("[CACHE] Running cache cleanup task..."); +// analysisCache.cleanupExpired(); // your cleanup method +// }, 1, 5, TimeUnit.MINUTES); +// +// + } + private static byte[] toXmiBytes(org.apache.uima.jcas.JCas jcas) throws Exception { + java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream(); + org.apache.uima.cas.impl.XmiCasSerializer ser = + new org.apache.uima.cas.impl.XmiCasSerializer(jcas.getTypeSystem()); + org.apache.uima.util.XMLSerializer xmlSer = + new org.apache.uima.util.XMLSerializer(bos, true); + xmlSer.setOutputProperty(javax.xml.transform.OutputKeys.VERSION, "1.1"); + ser.serialize(jcas.getCas(), xmlSer.getContentHandler()); + return bos.toByteArray(); + } + + + // When we send CAS to the importer via HTTP, we want to capture the response. + // This small class acts like a container for the HTTP response details + private static class HttpResult { + final int status; + final String body; + final String locationHeader; + HttpResult(int status, String body, String locationHeader) { + this.status = status; this.body = body; this.locationHeader = locationHeader; + } + } + + + // Send CAS via HTTP + private static HttpResult postMultipart(String urlStr, + Map fields, + String fileField, String filename, + String fileContentType, byte[] fileBytes) throws Exception { + String boundary = "----JAVA-" + UUID.randomUUID(); //Generate a boundary string to separate parts in multipart body + URL url = new URL(urlStr); //Open HTTP connection to the importer endpoint + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setDoOutput(true); + conn.setRequestMethod("POST"); + conn.setRequestProperty("Content-Type", "multipart/form-data; boundary=" + boundary); + + try (DataOutputStream out = new DataOutputStream(conn.getOutputStream())) { //Write request body + // text fields + for (var e : fields.entrySet()) { + out.writeBytes("--" + boundary + "\r\n"); + out.writeBytes("Content-Disposition: form-data; name=\"" + e.getKey() + "\"\r\n\r\n"); + out.write(e.getValue().getBytes(StandardCharsets.UTF_8)); + out.writeBytes("\r\n"); + } + // file field + out.writeBytes("--" + boundary + "\r\n"); + out.writeBytes("Content-Disposition: form-data; name=\"" + fileField + "\"; filename=\"" + filename + "\"\r\n"); + out.writeBytes("Content-Type: " + fileContentType + "\r\n\r\n"); + out.write(fileBytes); + out.writeBytes("\r\n"); + out.writeBytes("--" + boundary + "--\r\n"); + out.flush(); + } + + int status = conn.getResponseCode(); //Read the HTTP response from the importer + String location = conn.getHeaderField("Location"); + String body; + + try (InputStream in = (status >= 200 && status < 400) ? conn.getInputStream() : conn.getErrorStream()) { + body = (in != null) ? new String(in.readAllBytes(), StandardCharsets.UTF_8) : ""; + } + conn.disconnect(); + return new HttpResult(status, body, location); + } + + public static HttpResult sendToImporterViaHttp(String importUrl, //Send cached CAS to importer + String analysisId, + long corpusId, + String documentId, + String casView) throws Exception { + AnalysisSession s = getCachedSession(analysisId); + if (s == null) throw new IllegalArgumentException("No cached session for id: " + analysisId); + + byte[] casBytes = toXmiBytes(s.jcas); + + Map fields = new LinkedHashMap<>(); // Form-data fields + fields.put("analysisId", analysisId); + fields.put("corpusId", Long.toString(corpusId)); + if (documentId != null && !documentId.isBlank()) fields.put("documentId", documentId); + if (casView != null && !casView.isBlank()) fields.put("casView", casView); + + + // Send multipart as XMI + String filename = "cas_" + analysisId + ".xmi"; + logger.info("[IMPORT][HTTP] POST " + importUrl + + " corpusId=" + corpusId + " analysisId=" + analysisId + + " documentId=" + documentId + " casView=" + casView + + " file=" + filename + " (" + casBytes.length + " bytes)"); + + HttpResult res = postMultipart( + importUrl, + fields, + "file", + filename, + "application/xml", + casBytes + ); + logger.info("[IMPORT][HTTP] status=" + res.status + + (res.locationHeader != null ? " Location=" + res.locationHeader : "") + + (res.body != null && !res.body.isBlank() ? " body=" + res.body : "")); + return res; + } + } diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/HibernateConf.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/HibernateConf.java index 03afde97..e2c3758c 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/HibernateConf.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/HibernateConf.java @@ -5,12 +5,14 @@ import org.hibernate.boot.registry.StandardServiceRegistryBuilder; import org.springframework.context.annotation.Configuration; import org.springframework.transaction.annotation.EnableTransactionManagement; +import org.texttechnologylab.uce.common.models.ModelEntity; import org.texttechnologylab.uce.common.models.biofid.BiofidTaxon; import org.texttechnologylab.uce.common.models.biofid.GazetteerTaxon; import org.texttechnologylab.uce.common.models.biofid.GnFinderTaxon; import org.texttechnologylab.uce.common.models.corpus.*; import org.texttechnologylab.uce.common.models.corpus.emotion.Emotion; import org.texttechnologylab.uce.common.models.corpus.emotion.Feeling; +import org.texttechnologylab.uce.common.models.corpus.emotion.SentenceEmotions; import org.texttechnologylab.uce.common.models.corpus.links.AnnotationLink; import org.texttechnologylab.uce.common.models.corpus.links.AnnotationToDocumentLink; import org.texttechnologylab.uce.common.models.corpus.links.DocumentLink; @@ -19,10 +21,7 @@ import org.texttechnologylab.uce.common.models.imp.ImportLog; import org.texttechnologylab.uce.common.models.imp.UCEImport; import org.texttechnologylab.uce.common.models.negation.*; -import org.texttechnologylab.uce.common.models.topic.TopicValueBase; -import org.texttechnologylab.uce.common.models.topic.TopicValueBaseWithScore; -import org.texttechnologylab.uce.common.models.topic.TopicWord; -import org.texttechnologylab.uce.common.models.topic.UnifiedTopic; +import org.texttechnologylab.uce.common.models.topic.*; import java.util.HashMap; @@ -55,6 +54,7 @@ public static SessionFactory buildSessionFactory() { metadataSources.addAnnotatedClass(Sentiment.class); metadataSources.addAnnotatedClass(Emotion.class); metadataSources.addAnnotatedClass(Feeling.class); + metadataSources.addAnnotatedClass(SentenceEmotions.class); metadataSources.addAnnotatedClass(GeoName.class); metadataSources.addAnnotatedClass(Paragraph.class); metadataSources.addAnnotatedClass(Sentence.class); @@ -86,6 +86,9 @@ public static SessionFactory buildSessionFactory() { metadataSources.addAnnotatedClass(TopicWord.class); metadataSources.addAnnotatedClass(TopicValueBase.class); metadataSources.addAnnotatedClass(TopicValueBaseWithScore.class); + metadataSources.addAnnotatedClass(SentenceTopic.class); + //models + metadataSources.addAnnotatedClass(ModelEntity.class); metadataSources.addAnnotatedClass(DocumentTopThreeTopics.class); var metadata = metadataSources.buildMetadata(); diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/corpusConfig/CorpusAnnotationConfig.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/corpusConfig/CorpusAnnotationConfig.java index 14bc3e18..cf80c528 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/corpusConfig/CorpusAnnotationConfig.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/corpusConfig/CorpusAnnotationConfig.java @@ -32,4 +32,5 @@ public class CorpusAnnotationConfig { private boolean scope; private boolean xscope; private boolean unifiedTopic; + public boolean topic; } diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/ModelEntity.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/ModelEntity.java new file mode 100644 index 00000000..705aa1f9 --- /dev/null +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/ModelEntity.java @@ -0,0 +1,44 @@ +package org.texttechnologylab.uce.common.models; + +import lombok.Getter; +import lombok.Setter; +import javax.persistence.*; + +@Setter +@Getter +@Entity +@Table(name = "models") +public class ModelEntity { + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + @Column(name = "model_key", unique = true, nullable = false) + private String modelKey; + + private String name; + + @Column(columnDefinition = "TEXT") + private String url; + + @Column(columnDefinition = "TEXT") + private String github; + + @Column(columnDefinition = "TEXT") + private String huggingface; + + @Column(columnDefinition = "TEXT") + private String paper; + + private String map; + private String variant; + + @Column(name = "main_tool") + private String mainTool; + + @Column(name = "model_type") + private String modelType; + + public ModelEntity() { + } +} diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/Document.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/Document.java index 2d2d3740..7b611e4d 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/Document.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/Document.java @@ -21,6 +21,7 @@ import org.texttechnologylab.uce.common.models.corpus.links.DocumentLink; import org.texttechnologylab.uce.common.models.corpus.links.DocumentToAnnotationLink; import org.texttechnologylab.uce.common.models.negation.*; +import org.texttechnologylab.uce.common.models.topic.SentenceTopic; import org.texttechnologylab.uce.common.models.topic.TopicValueBase; import org.texttechnologylab.uce.common.models.topic.TopicValueBaseWithScore; import org.texttechnologylab.uce.common.models.topic.UnifiedTopic; @@ -234,6 +235,11 @@ public long getPrimaryDbIdentifier() { @JoinColumn(name = "document_Id") private List images; + @Getter + @Setter + @OneToMany(mappedBy = "document", cascade = CascadeType.ALL, orphanRemoval = false) + private List sentenceTopics = new ArrayList<>(); + public Document() { metadataTitleInfo = new MetadataTitleInfo(); } diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/Emotion.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/Emotion.java index 45c78768..936c335a 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/Emotion.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/Emotion.java @@ -3,6 +3,7 @@ import lombok.Getter; import lombok.Setter; import org.texttechnologylab.uce.common.annotations.Typesystem; +import org.texttechnologylab.uce.common.models.ModelEntity; import org.texttechnologylab.uce.common.models.UIMAAnnotation; import org.texttechnologylab.uce.common.models.WikiModel; @@ -18,11 +19,14 @@ @Typesystem(types = {Emotion.class}) public class Emotion extends UIMAAnnotation implements WikiModel { - private String model; - @OneToMany(cascade = CascadeType.ALL) @JoinColumn(name = "emotion_id") private List feelings; + + @ManyToOne + @JoinColumn(name = "model_id") + private ModelEntity dbModel; + public String generateEmotionMarker() { var tooltip = ""; diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/SentenceEmotions.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/SentenceEmotions.java new file mode 100644 index 00000000..04c4eed1 --- /dev/null +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/SentenceEmotions.java @@ -0,0 +1,40 @@ +package org.texttechnologylab.uce.common.models.corpus.emotion; + +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; +import org.texttechnologylab.uce.common.models.ModelBase; +import org.texttechnologylab.uce.common.models.ModelEntity; +import org.texttechnologylab.uce.common.models.corpus.Document; +import org.texttechnologylab.uce.common.models.corpus.Sentence; + +import javax.persistence.*; + +@Getter +@Setter +@NoArgsConstructor +@Entity +@Table(name = "sentenceemotions") +public class SentenceEmotions extends ModelBase { + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "document_id", nullable = false) + private Document document; + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "sentence_id", nullable = false) + private Sentence sentence; + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "emotion_id", nullable = false) + private Emotion emotion; + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "model_id", nullable = false) + private ModelEntity model; + + public SentenceEmotions(Sentence sentence, Emotion emotion, ModelEntity model) { + this.sentence = sentence; + this.emotion = emotion; + this.model = model; + } +} diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/topic/SentenceTopic.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/topic/SentenceTopic.java new file mode 100644 index 00000000..bac172e4 --- /dev/null +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/topic/SentenceTopic.java @@ -0,0 +1,46 @@ +package org.texttechnologylab.uce.common.models.topic; + +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; +import org.texttechnologylab.uce.common.models.ModelBase; +import org.texttechnologylab.uce.common.models.ModelEntity; +import org.texttechnologylab.uce.common.models.corpus.Document; +import org.texttechnologylab.uce.common.models.corpus.Sentence; +import org.texttechnologylab.uce.common.models.topic.UnifiedTopic; + +import javax.persistence.*; + +@Getter +@Setter +@NoArgsConstructor +@Entity +@Table(name = "sentencetopics") +public class SentenceTopic extends ModelBase { + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "unifiedtopic_id") + private UnifiedTopic unifiedTopic; + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "topicinstance_id") + private TopicValueBase topicInstance; + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "document_id", nullable = false) + private Document document; + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "sentence_id", nullable = false) + private Sentence sentence; + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "model_id", nullable = false) + private ModelEntity model; + + @Column(name = "topiclabel", nullable = false) + private String topicLabel; + + @Column(name = "thetast", nullable = false) + private Double score; +} \ No newline at end of file diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/DataInterface.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/DataInterface.java index f3c5f722..0ef5cf0a 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/DataInterface.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/DataInterface.java @@ -15,6 +15,7 @@ import org.texttechnologylab.uce.common.models.imp.ImportLog; import org.texttechnologylab.uce.common.models.imp.UCEImport; import org.texttechnologylab.uce.common.models.search.*; +import org.texttechnologylab.uce.common.models.ModelEntity; import java.util.List; @@ -366,4 +367,34 @@ public DocumentSearchResult defaultSearchForDocuments(int skip, * @param corpus */ public void saveCorpus(Corpus corpus) throws DatabaseOperationException; + + /** + * Stores or updates a ModelEntity in the database + */ + public void saveOrUpdateModelEntity(ModelEntity model) throws DatabaseOperationException; + + /** + * Gets a Model based on its JSON-Key + */ + public ModelEntity getModelEntityByKey(String modelKey) throws DatabaseOperationException; + + /** + * Gets a Model based on its map column value + */ + public ModelEntity getModelEntityByMap(String mapString) throws DatabaseOperationException; + + /** + * Adds new emotions to an existing document in the database + */ + public void saveNewEmotionsForDocument(long documentId, List newEmotions) throws DatabaseOperationException; + + /** + * Updates a corpusJsonConfig in the database + */ + public void updateCorpusJsonConfig(long corpusId,String jsonConfig) throws DatabaseOperationException; + + /** + * Deletes a corpus by id + */ + public void deleteCorpusById(long corpusId) throws DatabaseOperationException; } diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java index 170525c0..8c67e41c 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java @@ -15,6 +15,7 @@ import org.texttechnologylab.uce.common.exceptions.ExceptionUtils; import org.texttechnologylab.uce.common.models.Linkable; import org.texttechnologylab.uce.common.models.ModelBase; +import org.texttechnologylab.uce.common.models.ModelEntity; import org.texttechnologylab.uce.common.models.UIMAAnnotation; import org.texttechnologylab.uce.common.models.biofid.BiofidTaxon; import org.texttechnologylab.uce.common.models.biofid.GazetteerTaxon; @@ -33,6 +34,7 @@ import org.texttechnologylab.uce.common.models.topic.TopicValueBase; import org.texttechnologylab.uce.common.models.topic.TopicWord; import org.texttechnologylab.uce.common.models.topic.UnifiedTopic; +import org.texttechnologylab.uce.common.models.topic.SentenceTopic; import org.texttechnologylab.uce.common.models.util.HealthStatus; import org.texttechnologylab.uce.common.utils.ReflectionUtils; import org.texttechnologylab.uce.common.utils.StringUtils; @@ -42,6 +44,11 @@ import javax.persistence.criteria.Order; import javax.persistence.criteria.Path; import javax.persistence.criteria.Predicate; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.lang.reflect.Type; +import java.nio.charset.StandardCharsets; import java.sql.Array; import java.sql.PreparedStatement; import java.sql.ResultSet; @@ -65,6 +72,7 @@ private Session getCurrentSession() { public PostgresqlDataInterface_Impl() { sessionFactory = HibernateConf.buildSessionFactory(); TestConnection(); + initializeModelsFromJson(); } public void TestConnection() { @@ -1193,10 +1201,31 @@ public List findDocumentIdsByMetadata(String key, String value, UCEMetadat }); } + /** + * Deletes a document from the database + */ public void deleteDocumentById(long id) throws DatabaseOperationException { // NOTE this only cleans up everything directly connected to the document // TODO also remove embeddings and other data executeOperationSafely((session) -> { + List queries = List.of( + "DELETE FROM sentenceemotions WHERE emotion_id IN (SELECT id FROM emotion WHERE document_id = :did)", + "DELETE FROM sentencetopics WHERE unifiedtopic_id IN (SELECT id FROM unifiedtopic WHERE document_id = :did)", + "DELETE FROM sentenceemotions WHERE document_id = :did", + "DELETE FROM sentencetopics WHERE document_id = :did", + "DELETE FROM documenttopicsraw WHERE document_id = :did", + "DELETE FROM documenttopicwords WHERE document_id = :did", + "DELETE FROM feeling WHERE emotion_id IN (SELECT id FROM emotion WHERE document_id = :did)", + "DELETE FROM documentchunkembeddings WHERE document_id = :did", + "DELETE FROM documentembeddings WHERE document_id = :did", + "DELETE FROM documentsentenceembeddings WHERE document_id = :did" + ); + for (String sql : queries) { + session.createNativeQuery(sql) + .setParameter("did", id) + .executeUpdate(); + } + var doc = session.get(Document.class, id); if (doc != null) { session.delete(doc); @@ -1204,6 +1233,59 @@ public void deleteDocumentById(long id) throws DatabaseOperationException { return null; }); } + + /** + * Deletes a corpus from the database + */ + public void deleteCorpusById(long corpusId) throws DatabaseOperationException{ + executeOperationSafely((session) -> { + List queries = List.of( + "DELETE FROM sentenceemotions WHERE emotion_id IN (SELECT e.id FROM emotion e JOIN document d ON e.document_id = d.id WHERE d.corpusid = :cid)", + "DELETE FROM sentencetopics WHERE unifiedtopic_id IN (SELECT ut.id FROM unifiedtopic ut JOIN document d ON ut.document_id = d.id WHERE d.corpusid = :cid)", + "DELETE FROM sentenceemotions USING document WHERE sentenceemotions.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM sentencetopics USING document WHERE sentencetopics.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM documenttopicsraw USING document WHERE documenttopicsraw.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM documenttopicwords USING document WHERE documenttopicwords.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM corpustopicwords WHERE corpus_id = :cid", + "DELETE FROM feeling WHERE emotion_id IN (SELECT e.id FROM emotion e JOIN document d ON e.document_id = d.id WHERE d.corpusid = :cid)", + "DELETE FROM emotion USING document WHERE emotion.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM unifiedtopic USING document WHERE unifiedtopic.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM xscope USING document WHERE xscope.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM scope USING document WHERE scope.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM event USING document WHERE event.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM focus USING document WHERE focus.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM cue USING document WHERE cue.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM completenegation USING document WHERE completenegation.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM documentchunkembeddings USING document WHERE documentchunkembeddings.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM documentembeddings USING document WHERE documentembeddings.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM documentsentenceembeddings USING document WHERE documentsentenceembeddings.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM ucemetadata USING document WHERE ucemetadata.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM ucemetadatafilter WHERE ucemetadatafilter.corpusid = :cid", + "DELETE FROM biofidtaxon USING document WHERE biofidtaxon.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM lemma USING document WHERE lemma.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM namedentity USING document WHERE namedentity.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM geoname USING document WHERE geoname.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM time USING document WHERE time.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM srlink USING document WHERE srlink.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM gazetteertaxon USING document WHERE gazetteertaxon.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM gnfindertaxon USING document WHERE gnfindertaxon.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM sentence USING document WHERE sentence.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM documentlink WHERE documentlink.corpusid = :cid", + "DELETE FROM annotationlink WHERE annotationlink.corpusid = :cid", + "DELETE FROM documenttoannotationlink WHERE documenttoannotationlink.corpusid = :cid", + "DELETE FROM annotationtodocumentlink WHERE annotationtodocumentlink.corpusid = :cid", + "DELETE FROM page USING document WHERE page.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM document WHERE corpusid = :cid", + "DELETE FROM corpus WHERE id = :cid" + ); + for (String sql : queries) { + session.createNativeQuery(sql) + .setParameter("cid", corpusId) + .executeUpdate(); + } + return null; + }); + } public List findDocumentIDsByTitle(String title, boolean like) throws DatabaseOperationException { return executeOperationSafely((session) -> { @@ -1295,6 +1377,7 @@ public Document getDocumentByCorpusAndDocumentId(long corpusId, String documentI Document doc = session.createQuery(criteriaQuery).uniqueResult(); if (doc != null) { + Hibernate.initialize(doc.getSentences()); //initializeCompleteDocument(doc, 0, 999999); } return doc; @@ -2020,81 +2103,162 @@ public List getGeonameByPage(long documentId) throws DatabaseOperation return query.getResultList(); }); } - - public List getTopicDistributionByPageForDocument(long documentId) throws DatabaseOperationException { + return getTopicDistributionByPageForDocument(documentId, null); + } + + public List getTopicDistributionByPageForDocument(long documentId, Long modelId) throws DatabaseOperationException { return executeOperationSafely((session) -> { String sql = """ - WITH best_topic_per_sentence AS ( - SELECT DISTINCT ON (st.document_id, st.sentence_id) - st.unifiedtopic_id, - st.document_id, - st.sentence_id, - st.topiclabel, - st.thetast - FROM - sentencetopics st - WHERE - st.document_id = :documentId - ORDER BY - st.document_id, st.sentence_id, st.thetast DESC - ) - SELECT - ut.page_id, - btp.topiclabel + WITH best_topic_per_sentence AS ( + SELECT DISTINCT ON (st.document_id, st.sentence_id) + st.unifiedtopic_id, + st.document_id, + st.sentence_id, + st.topiclabel, + st.thetast FROM - best_topic_per_sentence btp - JOIN - unifiedtopic ut ON btp.unifiedtopic_id = ut.id + sentencetopics st WHERE - ut.document_id = :documentId + st.document_id = :documentId + AND (:modelId IS NULL OR st.model_id = :modelId) ORDER BY - ut.page_id, btp.topiclabel - """; + st.document_id, st.sentence_id, st.thetast DESC + ) + SELECT + ut.page_id, + btp.topiclabel + FROM + best_topic_per_sentence btp + JOIN + unifiedtopic ut ON btp.unifiedtopic_id = ut.id + WHERE + ut.document_id = :documentId + ORDER BY + ut.page_id, btp.topiclabel + """; var query = session.createNativeQuery(sql) - .setParameter("documentId", documentId); + .setParameter("documentId", documentId) + .setParameter("modelId", modelId, org.hibernate.type.LongType.INSTANCE); return query.getResultList(); }); } + public List getTopicModelsForDocumentWithName(long documentId) throws DatabaseOperationException { + return executeOperationSafely((session) -> { + String sql = """ + SELECT DISTINCT m.id AS model_id, m.name AS model_name + FROM sentencetopics st + JOIN models m ON m.id = st.model_id + WHERE st.document_id = :documentId + ORDER BY m.id + """; + + return session.createNativeQuery(sql) + .setParameter("documentId", documentId) + .getResultList(); + }); + } + public List getTopicModelOverview(long documentId, long modelId) throws DatabaseOperationException { + return executeOperationSafely((session) -> { + String sql = """ + WITH best_topic_per_sentence AS ( + SELECT DISTINCT ON (st.document_id, st.sentence_id) + st.sentence_id, + st.topiclabel, + st.thetast + FROM sentencetopics st + WHERE st.document_id = :documentId + AND st.model_id = :modelId + ORDER BY st.document_id, st.sentence_id, st.thetast DESC + ) + SELECT + b.topiclabel, + COUNT(*) AS topic_count + FROM best_topic_per_sentence b + WHERE b.topiclabel IS NOT NULL + AND TRIM(b.topiclabel) <> '' + GROUP BY b.topiclabel + ORDER BY topic_count DESC, b.topiclabel + """; + + return session.createNativeQuery(sql) + .setParameter("documentId", documentId) + .setParameter("modelId", modelId) + .getResultList(); + }); + } + public List getTopicModelPageCounts(long documentId, long modelId) throws DatabaseOperationException { + return executeOperationSafely((session) -> { + String sql = """ + WITH best_topic_per_sentence AS ( + SELECT DISTINCT ON (st.document_id, st.sentence_id) + st.sentence_id, + st.topiclabel, + st.thetast + FROM sentencetopics st + WHERE st.document_id = :documentId + AND st.model_id = :modelId + ORDER BY st.document_id, st.sentence_id, st.thetast DESC + ) + SELECT + s.page_id, + b.topiclabel, + COUNT(*) AS topic_count + FROM best_topic_per_sentence b + JOIN sentence s ON s.id = b.sentence_id + WHERE b.topiclabel IS NOT NULL + AND TRIM(b.topiclabel) <> '' + GROUP BY s.page_id, b.topiclabel + ORDER BY s.page_id, topic_count DESC, b.topiclabel + """; + + return session.createNativeQuery(sql) + .setParameter("documentId", documentId) + .setParameter("modelId", modelId) + .getResultList(); + }); + } + public List getSentenceTopicsWithEntitiesByPageForDocument(long documentId) + throws DatabaseOperationException { - public List getSentenceTopicsWithEntitiesByPageForDocument(long documentId) throws DatabaseOperationException { return executeOperationSafely((session) -> { + String sql = """ - WITH best_topic_per_sentence AS ( - SELECT DISTINCT ON (st.document_id, st.sentence_id) - st.sentence_id, - st.topiclabel - FROM - sentencetopics st - WHERE - st.document_id = :document_id - ORDER BY - st.document_id, st.sentence_id, st.thetast DESC - ), - entities_in_sentences AS ( - SELECT DISTINCT - s.id AS sentence_id, - ne.typee AS entity_type - FROM - sentence s - JOIN namedentity ne ON - ne.document_id = s.document_id AND - ne.beginn >= s.beginn AND - ne.endd <= s.endd - WHERE - s.document_id = :document_id - ) - SELECT - btps.topiclabel, - eis.entity_type + WITH best_topic_per_sentence AS ( + SELECT DISTINCT ON (st.document_id, st.sentence_id) + st.sentence_id, + st.topiclabel + FROM + sentencetopics st + WHERE + st.document_id = :document_id + ORDER BY + st.document_id, st.sentence_id, st.thetast DESC + ), + entities_in_sentences AS ( + SELECT DISTINCT + s.id AS sentence_id, + ne.typee AS entity_type FROM - best_topic_per_sentence btps - JOIN entities_in_sentences eis ON btps.sentence_id = eis.sentence_id - ORDER BY - btps.sentence_id, eis.entity_type - """; + sentence s + JOIN namedentity ne ON + ne.document_id = s.document_id AND + ne.beginn >= s.beginn AND + ne.endd <= s.endd + WHERE + s.document_id = :document_id + ) + SELECT + btps.topiclabel, + eis.entity_type + FROM + best_topic_per_sentence btps + JOIN entities_in_sentences eis ON btps.sentence_id = eis.sentence_id + ORDER BY + btps.sentence_id, eis.entity_type + """; Query query = session.createNativeQuery(sql) .setParameter("document_id", documentId); @@ -2122,7 +2286,10 @@ public List getTopicWordsByDocumentId(long documentId) throws Database public Map getUnifiedTopicToSentenceMap(long documentId) throws DatabaseOperationException { return executeOperationSafely((session) -> { - String sql = "SELECT unifiedtopic_id, sentence_id FROM sentencetopics WHERE document_id = :documentId"; + String sql = "SELECT unifiedtopic_id, sentence_id " + + "FROM sentencetopics " + + "WHERE document_id = :documentId " + + "AND unifiedtopic_id IS NOT NULL"; var query = session.createNativeQuery(sql) .setParameter("documentId", documentId); @@ -2264,4 +2431,314 @@ private String escapeSql(String input) { return input.replace("(", "\\(").replace(")", "\\)").replace(":", "\\:").replace("|", "\\|"); } + /** + * Create unifiedtopic rows if missing for sentences that have sentencetopics + * Backfill sentencetopics.unifiedtopic_id + */ + public int ensureUnifiedTopicsForSentenceTopics(long documentId) throws DatabaseOperationException { + return executeOperationSafely(session -> { + + + String insertUnifiedTopics = + "INSERT INTO unifiedtopic (document_id, beginn, endd, coveredtext, islexicalized, page_id) " + + "SELECT DISTINCT s.document_id, s.beginn, s.endd, s.coveredtext, s.islexicalized, s.page_id " + + "FROM sentence s " + + "JOIN sentencetopics st ON st.sentence_id = s.id AND st.document_id = :docId " + + "LEFT JOIN unifiedtopic ut " + + " ON ut.document_id = s.document_id AND ut.beginn = s.beginn AND ut.endd = s.endd " + + "WHERE ut.id IS NULL"; + + session.createNativeQuery(insertUnifiedTopics) + .setParameter("docId", documentId) + .executeUpdate(); + + + String updateSentenceTopics = + "UPDATE sentencetopics st " + + "SET unifiedtopic_id = ut.id " + + "FROM sentence s " + + "JOIN unifiedtopic ut " + + " ON ut.document_id = s.document_id AND ut.beginn = s.beginn AND ut.endd = s.endd " + + "WHERE st.document_id = :docId " + + " AND st.sentence_id = s.id " + + " AND st.unifiedtopic_id IS NULL"; + + int updated = session.createNativeQuery(updateSentenceTopics) + .setParameter("docId", documentId) + .executeUpdate(); + + return updated; + }); + } + + public int createSentenceEmotions(long documentId) throws DatabaseOperationException { + return executeOperationSafely(session -> { + String createSentenceEmotions = + """ + INSERT INTO sentenceemotions (sentence_id, emotion_id, model_id, document_id) + SELECT s.id, e.id, e.model_id, s.document_id + FROM emotion e + JOIN sentence s + ON s.beginn = e.beginn AND s.endd = e.endd and s.document_id = :docId AND e.document_id = :docId + WHERE NOT EXISTS( + SELECT 1 FROM sentenceemotions se + WHERE se.sentence_id = s.id AND se.emotion_id = e.id + ); + """; + + System.out.println(documentId); + return session.createNativeQuery(createSentenceEmotions) + .setParameter("docId", documentId) + .executeUpdate(); + }); + } + + /** + * Adds new emotions to an existing document in the database + */ + public void saveNewEmotionsForDocument(long documentId, List newEmotions) throws DatabaseOperationException { + executeOperationSafely((session) -> { + Document doc = session.get(Document.class, documentId); + if (doc != null) { + // Initialize to prevent a LazyInitializationException + Hibernate.initialize(doc.getEmotions()); + for (var emotion : newEmotions) { + if (emotion.getDbModel() != null) { + // Merge detached model entity intp the current active session + emotion.setDbModel((ModelEntity) session.merge(emotion.getDbModel())); + } + } + doc.getEmotions().addAll(newEmotions); + session.update(doc); + } + return null; + }); + } + + + + @Override + public void saveOrUpdateModelEntity(ModelEntity model) throws DatabaseOperationException{ + executeOperationSafely((session) -> { + session.saveOrUpdate(model); + return null; + }); + } + + @Override + public ModelEntity getModelEntityByKey(String modelKey) throws DatabaseOperationException{ + return executeOperationSafely((session) -> { + var cb = session.getCriteriaBuilder(); + var cq = cb.createQuery(ModelEntity.class); + var root = cq.from(ModelEntity.class); + + cq.select(root).where(cb.equal(root.get("modelKey"),modelKey)); + + var query = session.createQuery(cq); + query.setMaxResults(1); + return query.uniqueResult(); + }); + } + + @Override + public ModelEntity getModelEntityByMap(String mapString) throws DatabaseOperationException { + return executeOperationSafely((session) -> { + var cb = session.getCriteriaBuilder(); + var cq = cb.createQuery(org.texttechnologylab.uce.common.models.ModelEntity.class); + var root = cq.from(org.texttechnologylab.uce.common.models.ModelEntity.class); + + cq.select(root).where(cb.equal(root.get("map"), mapString)); + + var query = session.createQuery(cq); + query.setMaxResults(1); + return query.uniqueResult(); + }); + } + + /** + * Creates a models table in the database and stores all models from models.json + */ + public void initializeModelsFromJson(){ + try(InputStream is = getClass().getClassLoader().getResourceAsStream("models.json"); + InputStreamReader reader = new InputStreamReader(is, StandardCharsets.UTF_8)){ + Type type = new TypeToken>>(){}.getType(); + Map> modelsMap = gson.fromJson(reader, type); + + if (modelsMap != null){ + for (Map.Entry> entry : modelsMap.entrySet()){ + String key = entry.getKey(); + Map info = entry.getValue(); + ModelEntity dbModel = getModelEntityByKey(key); + if(dbModel == null){ + dbModel = new ModelEntity(); + dbModel.setModelKey(key); + } + dbModel.setName(info.get("Name")); + dbModel.setUrl(info.get("url")); + dbModel.setGithub(info.get("github")); + dbModel.setHuggingface(info.get("huggingface")); + dbModel.setPaper(info.get("paper")); + dbModel.setMap(info.get("map")); + dbModel.setVariant(info.get("Variant")); + dbModel.setMainTool(info.get("Main Tool")); + dbModel.setModelType(info.get("type")); + + saveOrUpdateModelEntity(dbModel); + } + } + + } catch (IOException e) { + System.err.println("Error during initializing models from models.json"); + } catch (DatabaseOperationException e) { + System.err.println("Error during getting ModalEntity from database"); + } + } + /** + * Retrieves the strongest emotion per sentence and maps it to its page. + * Optionally filters by modelId. + */ + public List getEmotionByPage(long documentId, Long modelId) throws DatabaseOperationException { + return executeOperationSafely((session) -> { + + String sql; + var query = session.createNativeQuery(""); + + if (modelId == null) { + sql = """ + WITH best_emotion_per_sentence AS ( + SELECT DISTINCT ON (se.document_id, se.sentence_id) + se.sentence_id, + f.feeling AS emotion_label, + f.value AS emotion_value + FROM sentenceemotions se + JOIN emotion e ON e.id = se.emotion_id + JOIN feeling f ON f.emotion_id = e.id + WHERE se.document_id = :documentId + ORDER BY se.document_id, se.sentence_id, f.value DESC + ) + SELECT + s.page_id, + bes.emotion_label + FROM best_emotion_per_sentence bes + JOIN sentence s ON s.id = bes.sentence_id + WHERE s.document_id = :documentId + ORDER BY s.page_id, bes.emotion_label + """; + + query = session.createNativeQuery(sql) + .setParameter("documentId", documentId, LongType.INSTANCE); + } else { + sql = """ + WITH best_emotion_per_sentence AS ( + SELECT DISTINCT ON (se.document_id, se.sentence_id) + se.sentence_id, + f.feeling AS emotion_label, + f.value AS emotion_value + FROM sentenceemotions se + JOIN emotion e ON e.id = se.emotion_id + JOIN feeling f ON f.emotion_id = e.id + WHERE se.document_id = :documentId + AND se.model_id = :modelId + ORDER BY se.document_id, se.sentence_id, f.value DESC + ) + SELECT + s.page_id, + bes.emotion_label + FROM best_emotion_per_sentence bes + JOIN sentence s ON s.id = bes.sentence_id + WHERE s.document_id = :documentId + ORDER BY s.page_id, bes.emotion_label + """; + + query = session.createNativeQuery(sql) + .setParameter("documentId", documentId, LongType.INSTANCE) + .setParameter("modelId", modelId, LongType.INSTANCE); + } + + return query.getResultList(); + }); + } + public List getEmotionRadarForDocument(long documentId, Long modelId) throws DatabaseOperationException { + return executeOperationSafely((session) -> { + + String sql = """ + SELECT + f.feeling AS feeling_label, + AVG(f.value) AS avg_value + FROM sentenceemotions se + JOIN emotion e ON e.id = se.emotion_id + JOIN feeling f ON f.emotion_id = e.id + WHERE se.document_id = :documentId + AND (:modelId IS NULL OR se.model_id = :modelId) + GROUP BY f.feeling + ORDER BY avg_value DESC + LIMIT 12 + """; + + var query = session.createNativeQuery(sql) + .setParameter("documentId", documentId) + .setParameter("modelId", modelId); + + return query.getResultList(); + }); + } + public List getEmotionModelsForDocumentWithName(long documentId) throws DatabaseOperationException { + return executeOperationSafely((session) -> { + String sql = """ + SELECT DISTINCT m.id AS model_id, m.name AS model_name + FROM sentenceemotions se + JOIN models m ON m.id = se.model_id + WHERE se.document_id = :documentId + ORDER BY m.id + """; + + return session.createNativeQuery(sql) + .setParameter("documentId", documentId) + .getResultList(); + }); + } + + public void updateCorpusJsonConfig(long corpusId,String jsonConfig) throws DatabaseOperationException{ + executeOperationSafely((session) -> { + Corpus corpus = session.get(Corpus.class,corpusId); + if (corpus != null){ + corpus.setCorpusJsonConfig(jsonConfig); + session.update(corpus); + } + return null; + }); + } + + /** + * Saves new sentence-topic assignments for a document, + * linking them to the corresponding sentence and model. + */ + public void saveNewSentenceTopicsForDocument(long documentId, List newSentenceTopics) + throws DatabaseOperationException { + + executeOperationSafely(session -> { + Document doc = session.get(Document.class, documentId); + + if (doc == null || newSentenceTopics == null || newSentenceTopics.isEmpty()) { + return null; + } + + for (SentenceTopic st : newSentenceTopics) { + st.setDocument(doc); + + if (st.getSentence() != null) { + st.setSentence(session.get(Sentence.class, st.getSentence().getId())); + } + + if (st.getModel() != null) { + st.setModel((ModelEntity) session.merge(st.getModel())); + } + + session.save(st); + } + + return null; + }); + } + } diff --git a/uce.portal/uce.common/src/main/resources/defaultUceConfig.json b/uce.portal/uce.common/src/main/resources/defaultUceConfig.json index 3ccf0a6a..3e5462bc 100644 --- a/uce.portal/uce.common/src/main/resources/defaultUceConfig.json +++ b/uce.portal/uce.common/src/main/resources/defaultUceConfig.json @@ -144,7 +144,7 @@ ] }, "analysis": { - "enableAnalysisEngine": false + "enableAnalysisEngine": true }, "authentication": { "isActivated": false, diff --git a/uce.portal/uce.common/src/main/resources/models.json b/uce.portal/uce.common/src/main/resources/models.json new file mode 100644 index 00000000..edb566b2 --- /dev/null +++ b/uce.portal/uce.common/src/main/resources/models.json @@ -0,0 +1,1201 @@ +{ + "Topic Tweet": { + "url": "http://tweentopic.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-topic", + "huggingface": "https://huggingface.co/cardiffnlp/tweet-topic-large-multilingual", + "paper": "https://arxiv.org/abs/2410.03075", + "map": "cardiffnlp/tweet-topic-large-multilingual", + "Name": "Topic Tweet", + "Main Tool": "Topic Modeling", + "Variant": "Topic", + "type": "" + }, + "Topic IPTC ": { + "url": "http://iptc.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-topic", + "huggingface": "https://huggingface.co/classla/multilingual-IPTC-news-topic-classifier", + "paper": "", + "map": "classla/multilingual-IPTC-news-topic-classifier", + "Name": "Topic IPTC", + "Main Tool": "Topic Modeling", + "Variant": "Topic", + "type": "" + }, + "Topic Manifesto": { + "url": "http://topic-manifestoberta-xlm-roberta.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-topic", + "huggingface": "https://huggingface.co/manifesto-project/manifestoberta-xlm-roberta-56policy-topics-context-2023-1-1", + "paper": "https://doi.org/10.25522/manifesto.manifestoberta.56topics.context.2023.1.1", + "map": "manifesto-project/manifestoberta-xlm-roberta-56policy-topics-context-2023-1-1", + "Name": "Topic Manifesto", + "Main Tool": "Topic Modeling", + "Variant": "Topic", + "type": "" + }, + "Topic ParlaCAP": { + "url": "", + "github": "", + "huggingface": "", + "paper": "", + "map": "classla/ParlaCAP-Topic-Classifier", + "Name": "Topic ParlaCap", + "Main Tool": "Topic Modeling", + "Variant": "Topic", + "type": "" + }, + "Topic dstefa": { + "url": "", + "github": "", + "huggingface": "", + "paper": "", + "map": "dstefa/roberta-base_topic_classification_nyt_news", + "Name": "Topic dstefa", + "Main Tool": "Topic Modeling", + "Variant": "Topic", + "type": "" + }, + "Topic Cardiffnlp (EN)": { + "url": "http://topic-cardiffnlp-roberta-large-tweet-topic-single-all.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-topic", + "huggingface": "https://huggingface.co/cardiffnlp/roberta-large-tweet-topic-single-all", + "paper": "https://aclanthology.org/2022.coling-1.299/", + "map": "cardiffnlp/roberta-large-tweet-topic-single-all", + "Name": "Topic Cardiffnlp (EN)", + "Main Tool": "Topic Modeling", + "Variant": "Topic", + "type": "" + }, + "Topic WebOrganizer (EN)": { + "url": "http://topic-organize-web.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-topic", + "huggingface": "https://huggingface.co/WebOrganizer/TopicClassifier", + "paper": "https://arxiv.org/abs/2502.10341", + "map": "WebOrganizer/TopicClassifier", + "Name": "Topic WebOrganizer (EN)", + "Main Tool": "Topic Modeling", + "Variant": "Topic", + "type": "" + }, + "Sentiment CardiffNLP": { + "url": "http://sentiment-cardiffnlp.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-sentiment-atomar", + "huggingface": "https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base-sentiment", + "paper": "https://arxiv.org/abs/2104.12250", + "map": "cardiffnlp/twitter-xlm-roberta-base-sentiment", + "Name": "Sentiment CardiffNLP", + "Main Tool": "Sentiment Analysis", + "Variant": "Sentiment", + "type": "" + }, + "Sentiment CitizenLab": { + "url": "http://sentiment-citizenlab.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-sentiment-atomar", + "huggingface": "https://huggingface.co/citizenlab/twitter-xlm-roberta-base-sentiment-finetunned", + "paper": "", + "map": "citizenlab/twitter-xlm-roberta-base-sentiment-finetunned", + "Name": "Sentiment CitizenLab", + "Main Tool": "Sentiment Analysis", + "Variant": "Sentiment", + "type": "" + }, + "Sentiment Multilingual DistilBert Students": { + "url": "http://duui-transformers-sentiment-atomar-distilbert-student.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-sentiment-atomar", + "huggingface": "https://huggingface.co/lxyuan/distilbert-base-multilingual-cased-sentiments-student", + "paper": "", + "map": "lxyuan/distilbert-base-multilingual-cased-sentiments-student", + "Name": "Sentiment Multilingual DistilBert Students", + "Main Tool": "Sentiment Analysis", + "Variant": "Sentiment", + "type": "" + }, + "Sentiment Multilingual DistilBert": { + "url": "http://duui-transformers-sentiment-atomar-distilbert-multilingual.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-sentiment-atomar", + "huggingface": "https://huggingface.co/philschmid/distilbert-base-multilingual-cased-sentiment", + "paper": "", + "map": "philschmid/distilbert-base-multilingual-cased-sentiment", + "Name": "Sentiment Multilingual DistilBert", + "Main Tool": "Sentiment Analysis", + "Variant": "Sentiment", + "type": "" + }, + "Sentiment CardiffNLP (EN)": { + "url": "http://duui-transformers-sentiment-atomar-cardiffnlp-sentiment-en.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-sentiment-atomar", + "huggingface": "https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment-latest", + "paper": "", + "map": "cardiffnlp/twitter-roberta-base-sentiment-latest", + "Name": "Sentiment CardiffNLP (EN)", + "Main Tool": "Sentiment Analysis", + "Variant": "Sentiment", + "type": "" + }, + "Sentiment RoBERTa based (EN)": { + "url": "http://duui-transformers-sentiment-atomar-roberta-based-en.service.component.duui.texttechnologylab.org/", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-sentiment-atomar", + "huggingface": "https://huggingface.co/j-hartmann/sentiment-roberta-large-english-3-classes", + "paper": "", + "map": "j-hartmann/sentiment-roberta-large-english-3-classes", + "Name": "Sentiment RoBERTa based (EN)", + "Main Tool": "Sentiment Analysis", + "Variant": "Sentiment", + "type": "" + }, + "Sentiment Finance (DE)": { + "url": "http://duui-transformers-sentiment-atomar-finance-sentiment-de.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-sentiment-atomar", + "huggingface": "https://huggingface.co/bardsai/finance-sentiment-de-base", + "paper": "", + "map": "bardsai/finance-sentiment-de-base", + "Name": "Sentiment Finance (DE)", + "Main Tool": "Sentiment Analysis", + "Variant": "Sentiment", + "type": "" + }, + "Hate Cardiffnlp": { + "url": "http://cardiffnlp.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/cardiffnlp/twitter-roberta-base-hate-latest", + "paper": "https://aclanthology.org/2023.woah-1.25.pdf", + "map": "cardiffnlp/twitter-roberta-base-hate-latest", + "Name": "Hate Cardiffnlp", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate MeHate-RoBERTa": { + "url": "http://hate-l3cube.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/l3cube-pune/me-hate-roberta", + "paper": "https://aclanthology.org/2023.findings-ijcnlp.22.pdf", + "map": "l3cube-pune/me-hate-roberta", + "Name": "Hate MeHate-RoBERTa", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate Andrazp": { + "url": "http://hate-andrazp.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/Andrazp/multilingual-hate-speech-robacofi", + "paper": "", + "map": "Andrazp/multilingual-hate-speech-robacofi", + "Name": "Hate Andrazp", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "HateBERT GroNLP (EN)": { + "url": "http://hate-groNLP.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/GroNLP/hateBERT", + "paper": "https://aclanthology.org/2021.woah-1.3/", + "map": "GroNLP/hateBERT", + "Name": "HateBERT GroNLP (EN)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate-speech-CNERG (EN)": { + "url": "http://hate-cnerg.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/Hate-speech-CNERG/dehatebert-mono-english", + "paper": "https://arxiv.org/abs/2004.06465", + "map": "Hate-speech-CNERG/dehatebert-mono-english", + "Name": "Hate-speech-CNERG (EN)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate-speech-CNERG (DE)": { + "url": "http://hate-cnergde.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/Hate-speech-CNERG/dehatebert-mono-german", + "paper": "https://arxiv.org/abs/2004.06465", + "map": "Hate-speech-CNERG/dehatebert-mono-german", + "Name": "Hate-speech-CNERG (DE)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate-speech-CNERG (PL)": { + "url": "http://hate-cnergpl.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/Hate-speech-CNERG/dehatebert-mono-polish", + "paper": "https://arxiv.org/abs/2004.06465", + "map": "Hate-speech-CNERG/dehatebert-mono-polish", + "Name": "Hate-speech-CNERG (PL)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate-speech-CNERG (ES)": { + "url": "http://hate-cnerges.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/Hate-speech-CNERG/dehatebert-mono-spanish", + "paper": "https://arxiv.org/abs/2004.06465", + "map": "Hate-speech-CNERG/dehatebert-mono-spanish", + "Name": "Hate-speech-CNERG (ES)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate-speech-CNERG (IT)": { + "url": "http://hate-cnergit.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/Hate-speech-CNERG/dehatebert-mono-italian", + "paper": "https://arxiv.org/abs/2004.06465", + "map": "Hate-speech-CNERG/dehatebert-mono-italian", + "Name": "Hate-speech-CNERG (IT)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate-speech-CNERG (PT)": { + "url": "http://hate-cnergpt.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/Hate-speech-CNERG/dehatebert-mono-portuguese", + "paper": "https://arxiv.org/abs/2004.06465", + "map": "Hate-speech-CNERG/dehatebert-mono-portugese", + "Name": "Hate-speech-CNERG (PT)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate-speech-CNERG (AR)": { + "url": "http://hate-cnergar.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/Hate-speech-CNERG/dehatebert-mono-arabic", + "paper": "https://arxiv.org/abs/2004.06465", + "map": "Hate-speech-CNERG/dehatebert-mono-arabic", + "Name": "Hate-speech-CNERG (AR)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate-speech-CNERG (FR)": { + "url": "http://hate-cnergfr.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/Hate-speech-CNERG/dehatebert-mono-french", + "paper": "https://arxiv.org/abs/2004.06465", + "map": "Hate-speech-CNERG/dehatebert-mono-french", + "Name": "Hate-speech-CNERG (FR)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate-speech-CNERG (ID)": { + "url": "http://hate-cnergid.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/Hate-speech-CNERG/dehatebert-mono-indonesian", + "paper": "https://arxiv.org/abs/2004.06465", + "map": "Hate-speech-CNERG/dehatebert-mono-indonesian", + "Name": "Hate-speech-CNERG (ID)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate-speech-Alexandrainst (DA)": { + "url": "http://hate-alexandrainst.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/alexandrainst/da-hatespeech-detection-base", + "paper": "", + "map": "alexandrainst/da-hatespeech-detection-base", + "Name": "Hate-speech-Alexandrainst (DA)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate Deepset GermanBert (DE)": { + "url": "http://hate-deepset.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/deepset/bert-base-german-cased-hatespeech-GermEval18Coarse", + "paper": "", + "map": "deepset/bert-base-german-cased-hatespeech-GermEval18Coarse", + "Name": "Hate Deepset GermanBert (DE)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate LFTW Facebook (EN)": { + "url": "http://hate-lftw-facebook.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/facebook/roberta-hate-speech-dynabench-r4-target", + "paper": "", + "map": "facebook/roberta-hate-speech-dynabench-r4-target", + "Name": "Hate LFTW Facebook (EN)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate MetaHateBERT (EN)": { + "url": "http://hate-metahatebert.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/irlab-udc/MetaHateBERT", + "paper": "", + "map": "irlab-udc/MetaHateBERT", + "Name": "Hate MetaHateBERT (EN)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate HateBERT hateval (EN)": { + "url": "http://hate-hatebert-hateval.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://osf.io/tbd58/files/osfstorage?view_only=d90e681c672a494bb555de99fc7ae780", + "paper": "", + "map": "HateBERT_hateval", + "Name": "Hate HateBERT hateval (EN)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate Check Eziisk (EN)": { + "url": "http://hate-hate-check-eziisk.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/EZiisk/EZ_finetune_Vidgen_model_RHS_Best", + "paper": "", + "map": "HateCheckEziisk", + "Name": "Hate Check Eziisk (EN)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Detoxify": { + "url": "http://toxic-detoxify.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://github.com/unitaryai/detoxify", + "paper": "", + "map": "Detoxify", + "Name": "Detoxify", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "One for all Toxicity": { + "url": "http://toxic-one-for-all-toxicity-v3.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/FredZhang7/one-for-all-toxicity-v3", + "paper": "", + "map": "FredZhang7/one-for-all-toxicity-v3", + "Name": "One for all Toxicity", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "Toxic Multilingual Distil-Bert": { + "url": "http://toxic-distilbert-base-multilingual-cased-toxicity.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "citizenlab/distilbert-base-multilingual-cased-toxicity", + "paper": "", + "map": "citizenlab/distilbert-base-multilingual-cased-toxicity", + "Name": "Toxic Multilingual Distil-Bert", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "Toxic Comment (EN)": { + "url": "http://toxic-toxic-comment-model.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "citizenlab/distilbert-base-multilingual-cased-toxicity", + "paper": "", + "map": "martin-ha/toxic-comment-model", + "Name": "Toxic Comment (EN)", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "Roberta Toxicity Classifier (EN)": { + "url": "http://toxic-roberta-toxicity-classifier.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/s-nlp/roberta_toxicity_classifier", + "paper": "https://aclanthology.org/2022.acl-long.469", + "map": "s-nlp/roberta_toxicity_classifier", + "Name": "Roberta Toxicity Classifier (EN)", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "ToxiGen (EN)": { + "url": "http://toxic-toxigen.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/tomh/toxigen_roberta", + "paper": "https://aclanthology.org/2022.acl-long.234/", + "map": "tomh/toxigen_roberta", + "Name": "ToxiGen (EN)", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "Toxicity German (DE)": { + "url": "http://toxic-german-toxicity-classifier-plus-v2.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/EIStakovskii/german_toxicity_classifier_plus_v2", + "paper": "", + "map": "EIStakovskii/german_toxicity_classifier_plus_v2", + "Name": "Toxicity German (DE)", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "Toxic XLM-Roberta": { + "url": "http://toxic-xlm-roberta.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus", + "paper": "", + "map": "EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus", + "Name": "Toxicity XLM-Roberta", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "RUSSE-2022 (RU)": { + "url": "http://toxic-russe-2022.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/s-nlp/russian_toxicity_classifier", + "paper": "", + "map": "s-nlp/russian_toxicity_classifier", + "Name": "RUSSE-2022 (RU)", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "Multilingual Binary Toxicity Classification": { + "url": "http://toxic-xlm-multi-toxic.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/malexandersalazar/xlm-roberta-large-binary-cls-toxicity", + "paper": "", + "map": "malexandersalazar/xlm-roberta-large-binary-cls-toxicity", + "Name": "Multilingual Binary Toxicity Classification", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "RuBert Toxic (RU)": { + "url": "http://toxic-rubert-toxic.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/sismetanin/rubert-toxic-pikabu-2ch", + "paper": "", + "map": "sismetanin/rubert-toxic-pikabu-2ch", + "Name": "RuBert Toxic (RU)", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "TextDetox Glot500": { + "url": "http://toxic-textdetox-glot500.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/textdetox/glot500-toxicity-classifier", + "paper": "", + "map": "textdetox/glot500-toxicity-classifier", + "Name": "TextDetox Glot500", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "TextDetox BERT": { + "url": "http://toxic-textdetox-bert.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/textdetox/bert-multilingual-toxicity-classifier", + "paper": "", + "map": "textdetox/bert-multilingual-toxicity-classifier", + "Name": "TextDetox BERT", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "Toxicity Classifier UK": { + "url": "http://toxic-toxicity-classifier-uk.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/dardem/xlm-roberta-large-uk-toxicity", + "paper": "", + "map": "dardem/xlm-roberta-large-uk-toxicity", + "Name": "Toxicity Classifier UK", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "ToxDect (EN)": { + "url": "http://toxic-toxdect.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/Xuhui/ToxDect-roberta-large", + "paper": "", + "map": "Xuhui/ToxDect-roberta-large", + "Name": "ToxDect (EN)", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "XLM-RoBertA Multilingual Toxic Classifier": { + "url": "http://toxic-multi-toxic-classifier-plus.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus", + "paper": "", + "map": "EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus", + "Name": "XLM-RoBertA Multilingual Toxic Classifier", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "CNERG HateExplain (EN)": { + "url": "http://offensive-cnerg-hatexplain.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface": "https://huggingface.co/Hate-speech-CNERG/bert-base-uncased-hatexplain", + "paper": "", + "map": "Hate-speech-CNERG/bert-base-uncased-hatexplain", + "Name": "CNERG HateExplain (EN)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "CNERG HateExplain-Rationale (EN)": { + "url": "http://offensive-cnerg-hatexplain-rationale.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface": "https://huggingface.co/Hate-speech-CNERG/bert-base-uncased-hatexplain-rationale-two", + "paper": "", + "map": "Hate-speech-CNERG/bert-base-uncased-hatexplain-rationale-two", + "Name": "CNERG HateExplain-Rationale (EN)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "NaijaXLM-T-base Hate (EN,IG,YO,HA,PIGDIN)": { + "url": "http://offensive-naija-xlm-t-base-hate.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface": "https://huggingface.co/worldbank/naija-xlm-twitter-base-hate", + "paper": "", + "map": "worldbank/naija-xlm-twitter-base-hate", + "Name": "NaijaXLM-T-base Hate (EN,IG,YO,HA,PIGDIN)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "HateBERT abuseval (EN)": { + "url": "http://offensive-hatebert-abuseval.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface": "https://osf.io/tbd58/files/osfstorage?view_only=d90e681c672a494bb555de99fc7ae780", + "paper": "", + "map": "HateBERT_abuseval", + "Name": "HateBERT abuseval (EN)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "HateBERT offenseval (EN)": { + "url": "http://offensive-hatebert-offenseval.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface": "https://osf.io/tbd58/files/osfstorage?view_only=d90e681c672a494bb555de99fc7ae780", + "paper": "", + "map": "HateBERT_offenseval", + "Name": "HateBERT offenseval (EN)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "BERTweet Hate Speech (EN)": { + "url": "http://offensive-bertweet-hate-speech.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface":"https://huggingface.co/pysentimiento/bertweet-hate-speech", + "paper": "", + "map": "pysentimiento/bertweet-hate-speech", + "Name": "BERTweet Hate Speech (EN)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "RoBERTuito Hate Speech (ES)": { + "url": "http://offensive-robertuito-hate-speech.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface":"https://huggingface.co/pysentimiento/robertuito-hate-speech", + "paper": "", + "map": "pysentimiento/robertuito-hate-speech", + "Name": "RoBERTuito Hate Speech (ES)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "BERTabaporu Hate Speech (PT)": { + "url": "http://offensive-bertabaporu-hate-speech.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface":"https://huggingface.co/pysentimiento/bertabaporu-pt-hate-speech", + "paper": "", + "map": "pysentimiento/bertabaporu-pt-hate-speech", + "Name": "BERTabaporu Hate Speech (PT)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "BERT-IT Hate Speech (IT)": { + "url": "http://offensive-bert-it-hate-speech.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface":"https://huggingface.co/pysentimiento/bert-it-hate-speech", + "paper": "", + "map": "pysentimiento/bert-it-hate-speech", + "Name": "BERT-IT Hate Speech (IT)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "IMSyPP Social Media Hate Speech": { + "url": "http://offensive-imsypp-social-media.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface":"https://huggingface.co/IMSyPP/hate_speech_multilingual", + "paper": "", + "map": "IMSyPP/hate_speech_multilingual", + "Name": "IMSyPP Social Media Hate Speech", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "IMSyPP Social Media Hate Speech (EN)": { + "url": "http://offensive-imsypp-social-media-en.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface":"https://huggingface.co/IMSyPP/hate_speech_en", + "paper": "", + "map": "IMSyPP/hate_speech_en", + "Name": "IMSyPP Social Media Hate Speech (EN)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "IMSyPP Social Media Hate Speech (IT)": { + "url": "http://offensive-imsypp-social-media-it.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface":"https://huggingface.co/IMSyPP/hate_speech_it", + "paper": "", + "map": "IMSyPP/hate_speech_it", + "Name": "IMSyPP Social Media Hate Speech (IT)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "IMSyPP Social Media Hate Speech (NL)": { + "url": "http://offensive-imsypp-social-media-nl.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface":"https://huggingface.co/IMSyPP/hate_speech_nl", + "paper": "", + "map": "IMSyPP/hate_speech_nl", + "Name": "IMSyPP Social Media Hate Speech (NL)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "IMSyPP Social Media Hate Speech (SLO)": { + "url": "http://offensive-imsypp-social-media-slo.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface":"https://huggingface.co/IMSyPP/hate_speech_SLO", + "paper": "", + "map": "IMSyPP/hate_speech_slo", + "Name": "IMSyPP Social Media Hate Speech (SLO)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "CardiffNLP Multiclass Hate (EN)": { + "url": "http://offensive-cardiffnlp-hate-multiclass.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface":"https://huggingface.co/cardiffnlp/twitter-roberta-base-hate-multiclass-latest", + "paper": "", + "map": "cardiffnlp/twitter-roberta-base-hate-multiclass-latest", + "Name": "CardiffNLP Multiclass Hate (EN)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "CardiffNLP Multilabel Sensitive (EN)": { + "url": "http://offensive-cardiffnlp-sensitive-multilabel.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface":"https://huggingface.co/cardiffnlp/twitter-roberta-large-sensitive-multilabel", + "paper": "", + "map": "cardiffnlp/twitter-roberta-large-sensitive-multilabel", + "Name": "CardiffNLP Multilabel Sensitive (EN)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "Emotion Twitter": { + "url": "http://emotion-finetuned-twitter-xlm-roberta-base-emotion.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/02shanky/finetuned-twitter-xlm-roberta-base-emotion", + "paper": "", + "map": "02shanky/finetuned-twitter-xlm-roberta-base-emotion", + "Name": "Emotion Twitter", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Emotion DReAMy": { + "url": "http://emotion-dreamy-xlm-roberta-emotion.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/DReAMy-lib/xlm-roberta-large-DreamBank-emotion-presence", + "paper": "https://www.sciencedirect.com/science/article/pii/S1389945723015186", + "map": "DReAMy-lib/xlm-roberta-large-DreamBank-emotion-presence", + "Name": "Emotion DReAMy", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Emotion XLM-EMO": { + "url": "http://emotion-xlm-emo-t.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/DReAMy-lib/xlm-roberta-large-DreamBank-emotion-presence", + "paper": "https://aclanthology.org/2022.wassa-1.18/", + "map": "MilaNLProc/xlm-emo-t", + "Name": "Emotion XLM-EMO", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Emotion pol emo mDeBERTa": { + "url": "http://duui-transformers-emotion-pol-emo-mdeberta.service.component.duui.texttechnologylab.org/", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://github.com/tweedmann/pol_emo_mDeBERTa2 ", + "paper": "", + "map": "pol_emo_mDeBERTa", + "Name": "Emotion pol emo mDeBERTa", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Emotion DistilRoBERTa-base (EN)": { + "url": "http://emotion-emotion-english-distilroberta-base.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/DReAMy-lib/xlm-roberta-large-DreamBank-emotion-presence", + "paper": "", + "map": "j-hartmann/emotion-english-distilroberta-base", + "Name": "Emotion DistilRoBERTa-base (EN)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Emotion BerTweet (EN)": { + "url": "http://emotion-bertweet-base-emotion-analysis.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/finiteautomata/bertweet-base-emotion-analysis", + "paper": "https://arxiv.org/abs/2106.09462", + "map": "finiteautomata/bertweet-base-emotion-analysis", + "Name": "Emotion BerTweet (EN)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Emotion DistilBert-base (EN)": { + "url": "http://emotion-distilbert-base-uncased-finetuned-emotion.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/ActivationAI/distilbert-base-uncased-finetuned-emotion", + "paper": "", + "map": "ActivationAI/distilbert-base-uncased-finetuned-emotion", + "Name": "Emotion DistilBert-base (EN)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Emotion T5-base (EN)": { + "url": "http://emotion-t5-base-finetuned-emotion.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/mrm8488/t5-base-finetuned-emotion", + "paper": "", + "map": "mrm8488/t5-base-finetuned-emotion", + "Name": "Emotion T5-base (EN)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Emotion EmoAtlas (EN)": { + "url": "http://emotion-emoatlas.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://github.com/massimostel/emoatlas", + "paper": "", + "map": "EmoAtlas", + "Name": "Emotion EmoAtlas (EN)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Emotion Text Classifier (EN)": { + "url": "http://emotion-emotion-text-classifier.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/michellejieli/emotion_text_classifier", + "paper": "", + "map": "michellejieli/emotion_text_classifier", + "Name": "Emotion Text Classifier (EN)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Emotion pysentimiento (EN,ES,IT,PT)": { + "url": "http://emotion-pysentimiento.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://github.com/pysentimiento/pysentimiento/", + "paper": "https://arxiv.org/abs/2106.09462", + "map": "pysentimiento", + "Name": "Emotion EmoAtlas (EN,ES,IT,PT)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "CardiffNLP Twitter (EN)": { + "url": "http://emotion-cardiffnlp-twitter.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/cardiffnlp/twitter-roberta-base-emotion", + "paper": "", + "map": "cardiffnlp/twitter-roberta-base-emotion", + "Name": "CardiffNLP Twitter (EN)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Roberta Go (EN)": { + "url": "http://emotion-roberta-go.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/SamLowe/roberta-base-go_emotions", + "paper": "", + "map": "SamLowe/roberta-base-go_emotions", + "Name": "Roberta Go (EN)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Exalt Baseline": { + "url": "http://emotion-exalt-baseline.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/pranaydeeps/EXALT-Baseline", + "paper": "", + "map": "pranaydeeps/EXALT-Baseline", + "Name": "Exalt Baseline", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "BERT-Emotion (EN)": { + "url": "http://emotion-bert-emotion.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/boltuix/bert-emotion", + "paper": "", + "map": "boltuix/bert-emotion", + "Name": "BERT-Emotion (EN)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Feel IT (IT)": { + "url": "http://emotion-feel-it.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/MilaNLProc/feel-it-italian-emotion", + "paper": "", + "map": "MilaNLProc/feel-it-italian-emotion", + "Name": "Feel IT (IT)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "CardiffNLP Multilabel (EN)": { + "url": "http://emotion-cardiffnlp-multilabel.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/cardiffnlp/twitter-roberta-base-emotion-multilabel-latest", + "paper": "", + "map": "cardiffnlp/twitter-roberta-base-emotion-multilabel-latest", + "Name": "CardiffNLP Multilabel (EN)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Emotion Analysis in Spanish (ES)": { + "url": "http://emotion-beto-es-analysis.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/finiteautomata/beto-emotion-analysis", + "paper": "", + "map": "finiteautomata/beto-emotion-analysis", + "Name": "Emotion Analysis in Spanish (ES)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "twitter-XLM-roBERTa-base for Emotion Analysis (ES)": { + "url": "http://emotion-twitter-xlm-roberta.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/daveni/twitter-xlm-roberta-emotion-es", + "paper": "", + "map": "daveni/twitter-xlm-roberta-emotion-es", + "Name": "twitter-XLM-roBERTa-base for Emotion Analysis (ES)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "German-Emotions (DE)": { + "url": "http://emotion-german-emotions.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/daveni/ChrisLalk/German-Emotions", + "paper": "", + "map": "ChrisLalk/German-Emotions", + "Name": "German-Emotions (DE)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "XLM-R Multi-Emotion Classifier (AR, EN)": { + "url": "http://emotion-xlm-emo-multi.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/msgfrom96/xlm_emo_multi", + "paper": "", + "map": "msgfrom96/xlm_emo_multi", + "Name": "XLM-R Multi-Emotion Classifier (AR, EN)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "RuBert cedr Emotion (RU)": { + "url": "http://emotion-rubert-cedr-emotion.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/cointegrated/rubert-tiny2-cedr-emotion-detection", + "paper": "", + "map": "cointegrated/rubert-tiny2-cedr-emotion-detection", + "Name": "RuBert cedr Emotion (RU)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "RuBert-Tiny2-Russian-Emotion (RU)": { + "url": "http://emotion-rubert-tiny2-russian.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/Aniemore/rubert-tiny2-russian-emotion-detection", + "paper": "", + "map": "Aniemore/rubert-tiny2-russian-emotion-detection", + "Name": "RuBert-Tiny2-Russian-Emotion (RU)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Chinese-Emotion-Small (ZH)": { + "url": "http://emotion-chinese-emotion-small.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/Johnson8187/Chinese-Emotion-Small", + "paper": "", + "map": "Johnson8187/Chinese-Emotion-Small", + "Name": "Chinese-Emotion-Small (ZH)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Chinese-Emotion (ZH)": { + "url": "http://emotion-chinese-emotion.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/Johnson8187/Chinese-Emotion", + "paper": "", + "map": "Johnson8187/Chinese-Emotion", + "Name": "Chinese-Emotion-Small (ZH)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Multilingual Go Emotions": { + "url": "http://emotion-multi-go-emotions.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/AnasAlokla/multilingual_go_emotions", + "paper": "", + "map": "AnasAlokla/multilingual_go_emotions", + "Name": "Multilingual Go Emotions", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "FactChecking UniEval (EN)": { + "url": "http://factchecking-unieval.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-FactChecking", + "huggingface": "https://github.com/maszhongming/UniEval", + "paper": "https://arxiv.org/abs/2210.07197", + "map": "unieval", + "Name": "FactChecking UniEval (EN)", + "Main Tool": "Fact Checking", + "Variant": "Factchecking", + "type": "" + }, + "FactChecking NUBIA (EN)": { + "url": "http://factchecking-nubia.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-FactChecking", + "huggingface": "https://github.com/wl-research/nubia", + "paper": "https://aclanthology.org/2020.evalnlgeval-1.4/", + "map": "nubia", + "Name": "FactChecking NUBIA (EN)", + "Main Tool": "Fact Checking", + "Variant": "Factchecking", + "type": "" + }, + "FactChecking FactCC (EN)": { + "url": "http://factchecking-factcc.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-FactChecking", + "huggingface": "https://huggingface.co/manueldeprada/FactCC", + "paper": "https://aclanthology.org/2020.emnlp-main.750/", + "map": "manueldeprada/FactCC", + "Name": "FactChecking FactCC (EN)", + "Main Tool": "Fact Checking", + "Variant": "Factchecking", + "type": "" + }, + "FactChecking MiniCheck (EN)": { + "url": "http://factchecking-minicheck.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-FactChecking", + "huggingface": "https://github.com/Liyan06/MiniCheck", + "paper": "https://aclanthology.org/2020.evalnlgeval-1.4/", + "map": "MiniCheck", + "Name": "FactChecking MiniCheck (EN)", + "Main Tool": "Fact Checking", + "Variant": "Factchecking", + "type": "" + }, + "Cohesion Intfloat": { + "url": "http://transformers-complexity.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Complexity", + "huggingface": "", + "paper": "", + "map": "intfloat/multilingual-e5-base", + "Name": "Cohesion Intfloat", + "Main Tool": "Text Cohesion Analysis", + "Variant": "Coherence", + "type": "BertSentence" + }, + "Cohesion BERT": { + "url": "http://transformers-complexity.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Complexity", + "huggingface": "", + "paper": "", + "map": "google-bert/bert-base-multilingual-cased", + "Name": "Cohesion BERT", + "Main Tool": "Text Cohesion Analysis", + "Variant": "Coherence", + "type": "BERT" + }, + "Cohesion FaceBook": { + "url": "http://transformers-complexity.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Complexity", + "huggingface": "", + "paper": "", + "map": "facebook/xlm-v-base", + "Name": "Cohesion FaceBook", + "Main Tool": "Text Cohesion Analysis", + "Variant": "Coherence", + "type": "BERT" + }, + "Cohesion FaceBook-XLM-Roberta": { + "url": "http://transformers-complexity.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Complexity", + "huggingface": "", + "paper": "", + "map": "FacebookAI/xlm-roberta-large", + "Name": "Cohesion FaceBook-XLM-Roberta", + "Main Tool": "Text Cohesion Analysis", + "Variant": "Coherence", + "type": "BERT" + }, + "Cohesion CardiffNLP": { + "url": "http://transformers-complexity.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Complexity", + "huggingface": "", + "paper": "", + "map": "cardiffnlp/twitter-xlm-roberta-base", + "Name": "Cohesion CardiffNLP", + "Main Tool": "Text Cohesion Analysis", + "Variant": "Coherence", + "type": "BERT" + }, + "Cohesion LEALLA": { + "url": "http://transformers-complexity.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Complexity", + "huggingface": "", + "paper": "", + "map": "setu4993/LEALLA-small", + "Name": "Cohesion LEALLA", + "Main Tool": "Text Cohesion Analysis", + "Variant": "Coherence", + "type": "BertSentence" + }, + "Cohesion Twitter": { + "url": "http://transformers-complexity.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Complexity", + "huggingface": "", + "paper": "", + "map": "Twitter/twhin-bert-large", + "Name": "Cohesion Twitter", + "Main Tool": "Text Cohesion Analysis", + "Variant": "Coherence", + "type": "Bert" + }, + "Cohesion LABSE": { + "url": "http://transformers-complexity.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Complexity", + "huggingface": "", + "paper": "", + "map": "sentence-transformers/LaBSE", + "Name": "Cohesion LABSE", + "Main Tool": "Text Cohesion Analysis", + "Variant": "Coherence", + "type": "Sentence" + }, + "Stance Trump Twitter US Election 2020 (EN)": { + "url": "http://stance-kornosk.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Stance", + "huggingface": "https://huggingface.co/kornosk/bert-election2020-twitter-stance-trump-KE-MLM", + "paper": "https://www.aclweb.org/anthology/2021.naacl-main.376", + "map": "kornosk", + "Name": "Stance Trump Twitter US Election 2020 (EN)", + "Main Tool": "Stance detection", + "Variant": "Stance", + "type": "Stance" + }, + "Stance ZeroShot PoliStance (EN)": { + "url": "http://stance-mlburnham.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Stance", + "huggingface": "https://huggingface.co/mlburnham/deberta-v3-base-polistance-affect-v1.0", + "paper": "https://arxiv.org/abs/2409.02078", + "map": "mlburnham", + "Name": "Stance ZeroShot PoliStance (EN)", + "Main Tool": "Stance detection", + "Variant": "Stance", + "type": "Stance" + }, + "Readability (EN)": { + "url": "http://readability.service.component.duui.texttechnologylab.org/", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-readability", + "huggingface": "", + "paper": "", + "map": "Readability", + "Name": "Readability (EN)", + "Main Tool": "Text Readability", + "Variant": "Readability", + "type": "Readability" + }, + "Textstat (EN)": { + "url": "http://readability-textstat.service.component.duui.texttechnologylab.org/", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-readability", + "huggingface": "", + "paper": "", + "map": "Textstat", + "Name": "Textstat (EN)", + "Main Tool": "Text Readability", + "Variant": "Readability", + "type": "Readability" + }, + "Diversity (EN)": { + "url": "http://readability-diversity.service.component.duui.texttechnologylab.org/", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-readability", + "huggingface": "", + "paper": "", + "map": "Diversity", + "Name": "Diversity (EN)", + "Main Tool": "Text Readability", + "Variant": "Readability", + "type": "Readability" + }, + "Readability Advance (EN)": { + "url": "http://readability-readability.service.component.duui.texttechnologylab.org/", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-readability", + "huggingface": "", + "paper": "", + "map": "Readability", + "Name": "Readability Advance (EN)", + "Main Tool": "Text Readability", + "Variant": "Readability", + "type": "Readability" + } +} \ No newline at end of file diff --git a/uce.portal/uce.corpus-importer/logs/uce-corpus-importer-2025-11-27-1.log b/uce.portal/uce.corpus-importer/logs/uce-corpus-importer-2025-11-27-1.log new file mode 100644 index 00000000..e03b5847 --- /dev/null +++ b/uce.portal/uce.corpus-importer/logs/uce-corpus-importer-2025-11-27-1.log @@ -0,0 +1,66 @@ +2025-11-27 20:53:27.127 [main] INFO org.hibernate.Version - HHH000412: Hibernate ORM core version 5.6.15.Final +2025-11-27 20:53:27.228 [main] INFO org.hibernate.spatial.integration.SpatialService - HHH80000001: hibernate-spatial integration enabled : true +2025-11-27 20:53:27.265 [main] INFO org.hibernate.annotations.common.Version - HCANN000001: Hibernate Commons Annotations {5.1.2.Final} +2025-11-27 20:53:27.370 [main] WARN org.hibernate.orm.connections.pooling - HHH10001002: Using Hibernate built-in connection pool (not for production use!) +2025-11-27 20:53:27.370 [main] INFO org.hibernate.orm.connections.pooling - HHH10001005: using driver [null] at URL [jdbc:postgresql://localhost:8002/uce] +2025-11-27 20:53:27.371 [main] INFO org.hibernate.orm.connections.pooling - HHH10001001: Connection properties: {password=****, user=postgres} +2025-11-27 20:53:27.371 [main] INFO org.hibernate.orm.connections.pooling - HHH10001003: Autocommit mode: false +2025-11-27 20:53:27.374 [main] INFO org.hibernate.engine.jdbc.connections.internal.DriverManagerConnectionProviderImpl - HHH000115: Hibernate connection pool size: 20 (min=1) +2025-11-27 20:53:27.532 [main] INFO org.hibernate.dialect.Dialect - HHH000400: Using dialect: org.hibernate.dialect.PostgreSQL10Dialect +2025-11-27 20:53:28.416 [main] INFO org.hibernate.orm.connections.access - HHH10001501: Connection obtained from JdbcConnectionAccess [org.hibernate.engine.jdbc.env.internal.JdbcEnvironmentInitiator$ConnectionProviderJdbcConnectionAccess@1f7557fe] for (non-JTA) DDL execution was not in auto-commit mode; the Connection 'local transaction' will be committed and the Connection will be set into auto-commit mode. +2025-11-27 20:53:29.335 [main] INFO org.texttechnologylab.uce.corpusimporter.App - Executing external database scripts from ../database/ +2025-11-27 20:53:29.337 [main] WARN org.texttechnologylab.uce.corpusimporter.App - Couldn't read the db scripts in the external database scripts folder; path wasn't found or other IO problems. +java.nio.file.NoSuchFileException: ..\database + at java.base/sun.nio.fs.WindowsException.translateToIOException(WindowsException.java:85) ~[?:?] + at java.base/sun.nio.fs.WindowsException.rethrowAsIOException(WindowsException.java:103) ~[?:?] + at java.base/sun.nio.fs.WindowsException.rethrowAsIOException(WindowsException.java:108) ~[?:?] + at java.base/sun.nio.fs.WindowsDirectoryStream.(WindowsDirectoryStream.java:86) ~[?:?] + at java.base/sun.nio.fs.WindowsFileSystemProvider.newDirectoryStream(WindowsFileSystemProvider.java:541) ~[?:?] + at java.base/java.nio.file.Files.newDirectoryStream(Files.java:482) ~[?:?] + at java.base/java.nio.file.Files.list(Files.java:3785) ~[?:?] + at org.texttechnologylab.uce.common.utils.SystemStatus.executeExternalDatabaseScripts(SystemStatus.java:38) ~[classes/:?] + at org.texttechnologylab.uce.corpusimporter.App.lambda$main$0(App.java:44) ~[classes/:?] + at org.texttechnologylab.uce.common.exceptions.ExceptionUtils.tryCatchLog(ExceptionUtils.java:30) [classes/:?] + at org.texttechnologylab.uce.corpusimporter.App.main(App.java:43) [classes/:?] +2025-11-27 20:53:29.341 [main] INFO org.texttechnologylab.uce.corpusimporter.App - Finished with executing external database scripts. +2025-11-27 20:55:05.236 [main] INFO org.hibernate.Version - HHH000412: Hibernate ORM core version 5.6.15.Final +2025-11-27 20:55:05.340 [main] INFO org.hibernate.spatial.integration.SpatialService - HHH80000001: hibernate-spatial integration enabled : true +2025-11-27 20:55:05.373 [main] INFO org.hibernate.annotations.common.Version - HCANN000001: Hibernate Commons Annotations {5.1.2.Final} +2025-11-27 20:55:05.475 [main] WARN org.hibernate.orm.connections.pooling - HHH10001002: Using Hibernate built-in connection pool (not for production use!) +2025-11-27 20:55:05.475 [main] INFO org.hibernate.orm.connections.pooling - HHH10001005: using driver [null] at URL [jdbc:postgresql://localhost:8002/uce] +2025-11-27 20:55:05.475 [main] INFO org.hibernate.orm.connections.pooling - HHH10001001: Connection properties: {password=****, user=postgres} +2025-11-27 20:55:05.475 [main] INFO org.hibernate.orm.connections.pooling - HHH10001003: Autocommit mode: false +2025-11-27 20:55:05.477 [main] INFO org.hibernate.engine.jdbc.connections.internal.DriverManagerConnectionProviderImpl - HHH000115: Hibernate connection pool size: 20 (min=1) +2025-11-27 20:55:05.624 [main] INFO org.hibernate.dialect.Dialect - HHH000400: Using dialect: org.hibernate.dialect.PostgreSQL10Dialect +2025-11-27 20:55:06.528 [main] INFO org.hibernate.orm.connections.access - HHH10001501: Connection obtained from JdbcConnectionAccess [org.hibernate.engine.jdbc.env.internal.JdbcEnvironmentInitiator$ConnectionProviderJdbcConnectionAccess@52963839] for (non-JTA) DDL execution was not in auto-commit mode; the Connection 'local transaction' will be committed and the Connection will be set into auto-commit mode. +2025-11-27 20:55:07.427 [main] INFO org.texttechnologylab.uce.corpusimporter.App - Executing external database scripts from ../database/ +2025-11-27 20:55:07.430 [main] WARN org.texttechnologylab.uce.corpusimporter.App - Couldn't read the db scripts in the external database scripts folder; path wasn't found or other IO problems. +java.nio.file.NoSuchFileException: ..\database + at java.base/sun.nio.fs.WindowsException.translateToIOException(WindowsException.java:85) ~[?:?] + at java.base/sun.nio.fs.WindowsException.rethrowAsIOException(WindowsException.java:103) ~[?:?] + at java.base/sun.nio.fs.WindowsException.rethrowAsIOException(WindowsException.java:108) ~[?:?] + at java.base/sun.nio.fs.WindowsDirectoryStream.(WindowsDirectoryStream.java:86) ~[?:?] + at java.base/sun.nio.fs.WindowsFileSystemProvider.newDirectoryStream(WindowsFileSystemProvider.java:541) ~[?:?] + at java.base/java.nio.file.Files.newDirectoryStream(Files.java:482) ~[?:?] + at java.base/java.nio.file.Files.list(Files.java:3785) ~[?:?] + at org.texttechnologylab.uce.common.utils.SystemStatus.executeExternalDatabaseScripts(SystemStatus.java:38) ~[classes/:?] + at org.texttechnologylab.uce.corpusimporter.App.lambda$main$0(App.java:44) ~[classes/:?] + at org.texttechnologylab.uce.common.exceptions.ExceptionUtils.tryCatchLog(ExceptionUtils.java:30) [classes/:?] + at org.texttechnologylab.uce.corpusimporter.App.main(App.java:43) [classes/:?] +2025-11-27 20:55:07.433 [main] INFO org.texttechnologylab.uce.corpusimporter.App - Finished with executing external database scripts. +2025-11-27 20:55:07.524 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - + _ _ _____ _____ _____ _ +| | | / __ \| ___| |_ _| | | +| | | | / \/| |__ | | _ __ ___ _ __ ___ _ __| |_ +| | | | | | __| | || '_ ` _ \| '_ \ / _ \| '__| __| +| |_| | \__/\| |___ _| || | | | | | |_) | (_) | | | |_ + \___/ \____/\____/ \___/_| |_| |_| .__/ \___/|_| \__| + | | + |_| +2025-11-27 20:55:07.524 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - ===========> Global Import Id: 07d96ad0-e619-4231-832a-e2202fc86c9d +2025-11-27 20:55:07.525 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - ===========> Importer Number: 1 +2025-11-27 20:55:07.525 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - ===========> Used Threads: 1 +2025-11-27 20:55:07.525 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - ===========> Importing from path: F:\Area51\UCE\corpora\my_first_corpus +2025-11-27 20:55:07.525 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - ===========> Reading view: null + + diff --git a/uce.portal/uce.corpus-importer/logs/uce-corpus-importer.log b/uce.portal/uce.corpus-importer/logs/uce-corpus-importer.log new file mode 100644 index 00000000..060a6efa --- /dev/null +++ b/uce.portal/uce.corpus-importer/logs/uce-corpus-importer.log @@ -0,0 +1,1650 @@ +2026-01-30 17:38:54.080 [main] INFO org.hibernate.Version - HHH000412: Hibernate ORM core version 5.6.15.Final +2026-01-30 17:38:54.188 [main] INFO org.hibernate.spatial.integration.SpatialService - HHH80000001: hibernate-spatial integration enabled : true +2026-01-30 17:38:54.225 [main] INFO org.hibernate.annotations.common.Version - HCANN000001: Hibernate Commons Annotations {5.1.2.Final} +2026-01-30 17:38:54.329 [main] WARN org.hibernate.orm.connections.pooling - HHH10001002: Using Hibernate built-in connection pool (not for production use!) +2026-01-30 17:38:54.329 [main] INFO org.hibernate.orm.connections.pooling - HHH10001005: using driver [null] at URL [jdbc:postgresql://localhost:8002/uce] +2026-01-30 17:38:54.342 [main] INFO org.hibernate.orm.connections.pooling - HHH10001001: Connection properties: {password=****, user=postgres} +2026-01-30 17:38:54.342 [main] INFO org.hibernate.orm.connections.pooling - HHH10001003: Autocommit mode: false +2026-01-30 17:38:54.345 [main] INFO org.hibernate.engine.jdbc.connections.internal.DriverManagerConnectionProviderImpl - HHH000115: Hibernate connection pool size: 20 (min=1) +2026-01-30 17:38:54.507 [main] INFO org.hibernate.dialect.Dialect - HHH000400: Using dialect: org.hibernate.dialect.PostgreSQL10Dialect +2026-01-30 17:38:55.386 [main] INFO org.hibernate.orm.connections.access - HHH10001501: Connection obtained from JdbcConnectionAccess [org.hibernate.engine.jdbc.env.internal.JdbcEnvironmentInitiator$ConnectionProviderJdbcConnectionAccess@42e4431] for (non-JTA) DDL execution was not in auto-commit mode; the Connection 'local transaction' will be committed and the Connection will be set into auto-commit mode. +2026-01-30 17:38:56.293 [main] INFO org.texttechnologylab.uce.corpusimporter.App - Executing external database scripts from ../database/ +2026-01-30 17:38:56.295 [main] WARN org.texttechnologylab.uce.corpusimporter.App - Couldn't read the db scripts in the external database scripts folder; path wasn't found or other IO problems. +java.nio.file.NoSuchFileException: ..\database + at java.base/sun.nio.fs.WindowsException.translateToIOException(WindowsException.java:85) ~[?:?] + at java.base/sun.nio.fs.WindowsException.rethrowAsIOException(WindowsException.java:103) ~[?:?] + at java.base/sun.nio.fs.WindowsException.rethrowAsIOException(WindowsException.java:108) ~[?:?] + at java.base/sun.nio.fs.WindowsDirectoryStream.(WindowsDirectoryStream.java:86) ~[?:?] + at java.base/sun.nio.fs.WindowsFileSystemProvider.newDirectoryStream(WindowsFileSystemProvider.java:541) ~[?:?] + at java.base/java.nio.file.Files.newDirectoryStream(Files.java:482) ~[?:?] + at java.base/java.nio.file.Files.list(Files.java:3785) ~[?:?] + at org.texttechnologylab.uce.common.utils.SystemStatus.executeExternalDatabaseScripts(SystemStatus.java:38) ~[classes/:?] + at org.texttechnologylab.uce.corpusimporter.App.lambda$main$0(App.java:44) ~[classes/:?] + at org.texttechnologylab.uce.common.exceptions.ExceptionUtils.tryCatchLog(ExceptionUtils.java:30) [classes/:?] + at org.texttechnologylab.uce.corpusimporter.App.main(App.java:43) [classes/:?] +2026-01-30 17:38:56.299 [main] INFO org.texttechnologylab.uce.corpusimporter.App - Finished with executing external database scripts. +2026-01-30 17:38:56.316 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - + _ _ _____ _____ _____ _ +| | | / __ \| ___| |_ _| | | +| | | | / \/| |__ | | _ __ ___ _ __ ___ _ __| |_ +| | | | | | __| | || '_ ` _ \| '_ \ / _ \| '__| __| +| |_| | \__/\| |___ _| || | | | | | |_) | (_) | | | |_ + \___/ \____/\____/ \___/_| |_| |_| .__/ \___/|_| \__| + | | + |_| +2026-01-30 17:38:56.316 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - ===========> Global Import Id: f3ff8f20-2b53-4dc9-9b09-81964b9a3a8c +2026-01-30 17:38:56.316 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - ===========> Importer Number: 1 +2026-01-30 17:38:56.316 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - ===========> Used Threads: 1 +2026-01-30 17:38:56.316 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - ===========> Importing from path: F:\Area51\UCE\corpora\my_first_corpus +2026-01-30 17:38:56.316 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - ===========> Reading view: null + + +2026-01-30 17:38:56.345 [main] WARN org.hibernate.orm.deprecation - HHH90000022: Hibernate's legacy org.hibernate.Criteria API is deprecated; use the JPA javax.persistence.criteria.CriteriaQuery instead +2026-01-30 17:38:57.584 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:38:57.599 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:38:57.606 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing 23-year-old Ukrainian refugee killed on North Carolina transit system.json +2026-01-30 17:38:57.678 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:38:57.679 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:38:57.680 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:38:57.681 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:38:57.684 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:38:57.710 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:38:57.712 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:38:57.717 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\23-year-old Ukrainian refugee killed on North Carolina transit system.json.xmi.gz.xmi.gz +2026-01-30 17:38:57.717 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:38:57.717 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id 23-year-old Ukrainian refugee killed on North Carolina transit system.json... +2026-01-30 17:38:59.940 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document 23-year-old Ukrainian refugee killed on North Carolina transit system.json.xmi.gz.xmi.gz +2026-01-30 17:38:59.940 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:38:59.946 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\23-year-old Ukrainian refugee killed on North Carolina transit system.json.xmi.gz.xmi.gz +2026-01-30 17:38:59.950 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\23-year-old Ukrainian refugee killed on North Carolina transit system.json.xmi.gz.xmi.gz +2026-01-30 17:39:00.025 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:00.028 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:00.050 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing A 10-year-old boy in Tokyo ended up with Shohei Ohtani's first home run of the season.json +2026-01-30 17:39:00.054 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:00.054 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:00.054 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:00.055 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:00.056 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:00.061 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:00.061 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:00.064 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\A 10-year-old boy in Tokyo ended up with Shohei Ohtani's first home run of the season.json.xmi.gz.xmi.gz +2026-01-30 17:39:00.064 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:00.064 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id A 10-year-old boy in Tokyo ended up with Shohei Ohtani's first home run of the season.json... +2026-01-30 17:39:00.954 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:01.094 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document A 10-year-old boy in Tokyo ended up with Shohei Ohtani's first home run of the season.json.xmi.gz.xmi.gz +2026-01-30 17:39:01.094 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:01.098 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\A 10-year-old boy in Tokyo ended up with Shohei Ohtani's first home run of the season.json.xmi.gz.xmi.gz +2026-01-30 17:39:01.101 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\A 10-year-old boy in Tokyo ended up with Shohei Ohtani's first home run of the season.json.xmi.gz.xmi.gz +2026-01-30 17:39:01.315 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:01.323 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:01.327 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Adams case and other Trump moves threaten to open corruption floodgates, experts say.json +2026-01-30 17:39:01.331 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:01.331 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:01.331 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:01.331 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:01.332 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:01.362 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:01.363 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:01.367 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Adams case and other Trump moves threaten to open corruption floodgates, experts say.json.xmi.gz.xmi.gz +2026-01-30 17:39:01.367 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:01.367 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Adams case and other Trump moves threaten to open corruption floodgates, experts say.json... +2026-01-30 17:39:01.673 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:05.728 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Adams case and other Trump moves threaten to open corruption floodgates, experts say.json.xmi.gz.xmi.gz +2026-01-30 17:39:05.728 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:05.732 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Adams case and other Trump moves threaten to open corruption floodgates, experts say.json.xmi.gz.xmi.gz +2026-01-30 17:39:05.737 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Adams case and other Trump moves threaten to open corruption floodgates, experts say.json.xmi.gz.xmi.gz +2026-01-30 17:39:05.781 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:05.785 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:05.788 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing After a couple struck a deer in Alabama, a fire chief who stopped to help was fatally shot.json +2026-01-30 17:39:05.791 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:05.791 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:05.791 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:05.792 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:05.793 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:05.797 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:05.798 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:05.802 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\After a couple struck a deer in Alabama, a fire chief who stopped to help was fatally shot.json.xmi.gz.xmi.gz +2026-01-30 17:39:05.802 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:05.802 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id After a couple struck a deer in Alabama, a fire chief who stopped to help was fatally shot.json... +2026-01-30 17:39:07.020 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document After a couple struck a deer in Alabama, a fire chief who stopped to help was fatally shot.json.xmi.gz.xmi.gz +2026-01-30 17:39:07.020 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:07.024 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\After a couple struck a deer in Alabama, a fire chief who stopped to help was fatally shot.json.xmi.gz.xmi.gz +2026-01-30 17:39:07.028 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\After a couple struck a deer in Alabama, a fire chief who stopped to help was fatally shot.json.xmi.gz.xmi.gz +2026-01-30 17:39:07.095 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:07.099 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:07.124 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Agency that handles green cards and citizenship to hire armed agents who can make arrests.json +2026-01-30 17:39:07.128 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:07.129 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:07.129 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:07.129 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:07.130 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:07.148 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:07.149 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:07.155 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Agency that handles green cards and citizenship to hire armed agents who can make arrests.json.xmi.gz.xmi.gz +2026-01-30 17:39:07.155 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:07.155 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Agency that handles green cards and citizenship to hire armed agents who can make arrests.json... +2026-01-30 17:39:07.614 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:07.670 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:09.302 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Agency that handles green cards and citizenship to hire armed agents who can make arrests.json.xmi.gz.xmi.gz +2026-01-30 17:39:09.302 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:09.306 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Agency that handles green cards and citizenship to hire armed agents who can make arrests.json.xmi.gz.xmi.gz +2026-01-30 17:39:09.311 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Agency that handles green cards and citizenship to hire armed agents who can make arrests.json.xmi.gz.xmi.gz +2026-01-30 17:39:09.342 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:09.345 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:09.349 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Amazon apologizes to Mandy Moore after package is delivered to ruins of in-laws' California home.json +2026-01-30 17:39:09.352 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:09.352 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:09.352 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:09.353 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:09.354 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:09.361 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:09.361 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:09.365 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Amazon apologizes to Mandy Moore after package is delivered to ruins of in-laws' California home.json.xmi.gz.xmi.gz +2026-01-30 17:39:09.365 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:09.366 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Amazon apologizes to Mandy Moore after package is delivered to ruins of in-laws' California home.json... +2026-01-30 17:39:10.270 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:10.623 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Amazon apologizes to Mandy Moore after package is delivered to ruins of in-laws' California home.json.xmi.gz.xmi.gz +2026-01-30 17:39:10.624 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:10.628 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Amazon apologizes to Mandy Moore after package is delivered to ruins of in-laws' California home.json.xmi.gz.xmi.gz +2026-01-30 17:39:10.632 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Amazon apologizes to Mandy Moore after package is delivered to ruins of in-laws' California home.json.xmi.gz.xmi.gz +2026-01-30 17:39:10.668 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:10.670 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:10.672 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing As Trump considers ways to dismantle the Education Deparment, here's what to know about your student loans.json +2026-01-30 17:39:10.674 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:10.675 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:10.675 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:10.675 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:10.675 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:10.681 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:10.681 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:10.684 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\As Trump considers ways to dismantle the Education Deparment, here's what to know about your student loans.json.xmi.gz.xmi.gz +2026-01-30 17:39:10.684 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:10.684 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id As Trump considers ways to dismantle the Education Deparment, here's what to know about your student loans.json... +2026-01-30 17:39:11.151 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:12.346 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document As Trump considers ways to dismantle the Education Deparment, here's what to know about your student loans.json.xmi.gz.xmi.gz +2026-01-30 17:39:12.346 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:12.348 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\As Trump considers ways to dismantle the Education Deparment, here's what to know about your student loans.json.xmi.gz.xmi.gz +2026-01-30 17:39:12.352 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\As Trump considers ways to dismantle the Education Deparment, here's what to know about your student loans.json.xmi.gz.xmi.gz +2026-01-30 17:39:12.368 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:12.370 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:12.372 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Belgium's future queen caught up in Trump administration's Harvard foreign student ban effort.json +2026-01-30 17:39:12.374 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:12.374 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:12.374 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:12.374 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:12.374 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:12.377 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:12.378 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:12.380 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Belgium's future queen caught up in Trump administration's Harvard foreign student ban effort.json.xmi.gz.xmi.gz +2026-01-30 17:39:12.380 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:12.381 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Belgium's future queen caught up in Trump administration's Harvard foreign student ban effort.json... +2026-01-30 17:39:12.982 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:13.154 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Belgium's future queen caught up in Trump administration's Harvard foreign student ban effort.json.xmi.gz.xmi.gz +2026-01-30 17:39:13.155 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:13.156 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Belgium's future queen caught up in Trump administration's Harvard foreign student ban effort.json.xmi.gz.xmi.gz +2026-01-30 17:39:13.162 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Belgium's future queen caught up in Trump administration's Harvard foreign student ban effort.json.xmi.gz.xmi.gz +2026-01-30 17:39:13.204 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:13.206 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:13.209 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Biden administration has no plans to fine companies if TikTok ban goes into effect.json +2026-01-30 17:39:13.211 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:13.212 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:13.212 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:13.212 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:13.212 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:13.217 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:13.217 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:13.220 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Biden administration has no plans to fine companies if TikTok ban goes into effect.json.xmi.gz.xmi.gz +2026-01-30 17:39:13.221 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:13.221 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Biden administration has no plans to fine companies if TikTok ban goes into effect.json... +2026-01-30 17:39:13.467 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:15.004 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Biden administration has no plans to fine companies if TikTok ban goes into effect.json.xmi.gz.xmi.gz +2026-01-30 17:39:15.004 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:15.006 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Biden administration has no plans to fine companies if TikTok ban goes into effect.json.xmi.gz.xmi.gz +2026-01-30 17:39:15.009 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Biden administration has no plans to fine companies if TikTok ban goes into effect.json.xmi.gz.xmi.gz +2026-01-30 17:39:15.032 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:15.033 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:15.036 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Bruce Springsteen opens U.K. tour by calling Trump 'unfit' for office.json +2026-01-30 17:39:15.038 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:15.038 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:15.038 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:15.038 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:15.039 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:15.041 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:15.041 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:15.044 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Bruce Springsteen opens U.K. tour by calling Trump 'unfit' for office.json.xmi.gz.xmi.gz +2026-01-30 17:39:15.044 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:15.044 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Bruce Springsteen opens U.K. tour by calling Trump 'unfit' for office.json... +2026-01-30 17:39:15.745 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:16.236 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Bruce Springsteen opens U.K. tour by calling Trump 'unfit' for office.json.xmi.gz.xmi.gz +2026-01-30 17:39:16.236 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:16.238 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Bruce Springsteen opens U.K. tour by calling Trump 'unfit' for office.json.xmi.gz.xmi.gz +2026-01-30 17:39:16.241 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Bruce Springsteen opens U.K. tour by calling Trump 'unfit' for office.json.xmi.gz.xmi.gz +2026-01-30 17:39:16.271 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:16.272 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:16.276 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Charlie Javice, college financial aid startup founder, found guilty of defrauding JPMorgan.json +2026-01-30 17:39:16.278 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:16.278 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:16.278 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:16.278 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:16.278 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:16.280 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:16.280 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:16.283 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Charlie Javice, college financial aid startup founder, found guilty of defrauding JPMorgan.json.xmi.gz.xmi.gz +2026-01-30 17:39:16.283 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:16.283 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Charlie Javice, college financial aid startup founder, found guilty of defrauding JPMorgan.json... +2026-01-30 17:39:16.745 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:17.126 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Charlie Javice, college financial aid startup founder, found guilty of defrauding JPMorgan.json.xmi.gz.xmi.gz +2026-01-30 17:39:17.126 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:17.130 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Charlie Javice, college financial aid startup founder, found guilty of defrauding JPMorgan.json.xmi.gz.xmi.gz +2026-01-30 17:39:17.132 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Charlie Javice, college financial aid startup founder, found guilty of defrauding JPMorgan.json.xmi.gz.xmi.gz +2026-01-30 17:39:17.157 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:17.159 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:17.161 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Civil rights agency sued over handling of trans worker discrimination complaints.json +2026-01-30 17:39:17.163 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:17.163 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:17.163 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:17.163 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:17.163 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:17.165 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:17.166 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:17.168 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Civil rights agency sued over handling of trans worker discrimination complaints.json.xmi.gz.xmi.gz +2026-01-30 17:39:17.168 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:17.168 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Civil rights agency sued over handling of trans worker discrimination complaints.json... +2026-01-30 17:39:17.507 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:18.438 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Civil rights agency sued over handling of trans worker discrimination complaints.json.xmi.gz.xmi.gz +2026-01-30 17:39:18.438 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:18.441 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Civil rights agency sued over handling of trans worker discrimination complaints.json.xmi.gz.xmi.gz +2026-01-30 17:39:18.444 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Civil rights agency sued over handling of trans worker discrimination complaints.json.xmi.gz.xmi.gz +2026-01-30 17:39:18.465 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:18.466 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:18.470 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing DHS has begun performing polygraph tests on employees to find leakers.json +2026-01-30 17:39:18.472 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:18.472 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:18.472 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:18.472 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:18.472 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:18.474 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:18.474 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:18.478 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\DHS has begun performing polygraph tests on employees to find leakers.json.xmi.gz.xmi.gz +2026-01-30 17:39:18.478 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:18.478 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id DHS has begun performing polygraph tests on employees to find leakers.json... +2026-01-30 17:39:19.007 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:19.246 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document DHS has begun performing polygraph tests on employees to find leakers.json.xmi.gz.xmi.gz +2026-01-30 17:39:19.246 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:19.248 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\DHS has begun performing polygraph tests on employees to find leakers.json.xmi.gz.xmi.gz +2026-01-30 17:39:19.251 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\DHS has begun performing polygraph tests on employees to find leakers.json.xmi.gz.xmi.gz +2026-01-30 17:39:19.274 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:19.276 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:19.278 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Elon Musk turns on Nigel Farage, calls on him to step down as U.K. party leader.json +2026-01-30 17:39:19.280 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:19.281 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:19.281 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:19.281 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:19.281 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:19.283 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:19.283 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:19.285 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Elon Musk turns on Nigel Farage, calls on him to step down as U.K. party leader.json.xmi.gz.xmi.gz +2026-01-30 17:39:19.285 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:19.285 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Elon Musk turns on Nigel Farage, calls on him to step down as U.K. party leader.json... +2026-01-30 17:39:19.571 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:20.504 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Elon Musk turns on Nigel Farage, calls on him to step down as U.K. party leader.json.xmi.gz.xmi.gz +2026-01-30 17:39:20.504 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:20.506 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Elon Musk turns on Nigel Farage, calls on him to step down as U.K. party leader.json.xmi.gz.xmi.gz +2026-01-30 17:39:20.509 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Elon Musk turns on Nigel Farage, calls on him to step down as U.K. party leader.json.xmi.gz.xmi.gz +2026-01-30 17:39:20.560 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:20.562 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:20.564 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing How much money you should save for a comfortable retirement.json +2026-01-30 17:39:20.566 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:20.566 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:20.566 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:20.566 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:20.566 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:20.568 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:20.568 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:20.571 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\How much money you should save for a comfortable retirement.json.xmi.gz.xmi.gz +2026-01-30 17:39:20.571 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:20.571 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id How much money you should save for a comfortable retirement.json... +2026-01-30 17:39:20.952 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:23.767 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document How much money you should save for a comfortable retirement.json.xmi.gz.xmi.gz +2026-01-30 17:39:23.767 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:23.771 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\How much money you should save for a comfortable retirement.json.xmi.gz.xmi.gz +2026-01-30 17:39:23.774 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\How much money you should save for a comfortable retirement.json.xmi.gz.xmi.gz +2026-01-30 17:39:23.796 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:23.797 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:23.800 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Israeli strikes kill 14 in Gaza in one day as negotiators work to uphold fragile ceasefire.json +2026-01-30 17:39:23.802 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:23.802 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:23.802 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:23.802 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:23.802 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:23.804 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:23.804 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:23.807 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Israeli strikes kill 14 in Gaza in one day as negotiators work to uphold fragile ceasefire.json.xmi.gz.xmi.gz +2026-01-30 17:39:23.807 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:23.807 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Israeli strikes kill 14 in Gaza in one day as negotiators work to uphold fragile ceasefire.json... +2026-01-30 17:39:24.940 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:24.953 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Israeli strikes kill 14 in Gaza in one day as negotiators work to uphold fragile ceasefire.json.xmi.gz.xmi.gz +2026-01-30 17:39:24.954 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:24.956 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Israeli strikes kill 14 in Gaza in one day as negotiators work to uphold fragile ceasefire.json.xmi.gz.xmi.gz +2026-01-30 17:39:24.959 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Israeli strikes kill 14 in Gaza in one day as negotiators work to uphold fragile ceasefire.json.xmi.gz.xmi.gz +2026-01-30 17:39:24.989 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:24.991 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:24.993 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Lakers star Luka Dončić says he took a month off from basketball to transform his body.json +2026-01-30 17:39:24.995 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:24.996 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:24.996 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:24.996 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:24.996 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:24.998 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:24.998 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:25.001 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Lakers star Luka Dončić says he took a month off from basketball to transform his body.json.xmi.gz.xmi.gz +2026-01-30 17:39:25.001 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:25.001 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Lakers star Luka Dončić says he took a month off from basketball to transform his body.json... +2026-01-30 17:39:25.347 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:26.238 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Lakers star Luka Dončić says he took a month off from basketball to transform his body.json.xmi.gz.xmi.gz +2026-01-30 17:39:26.238 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:26.241 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Lakers star Luka Dončić says he took a month off from basketball to transform his body.json.xmi.gz.xmi.gz +2026-01-30 17:39:26.243 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Lakers star Luka Dončić says he took a month off from basketball to transform his body.json.xmi.gz.xmi.gz +2026-01-30 17:39:26.251 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:26.252 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:26.255 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Musk's brain implant company filed as a 'disadvantaged business'.json +2026-01-30 17:39:26.256 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:26.256 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:26.256 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:26.257 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:26.257 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:26.258 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:26.258 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:26.262 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Musk's brain implant company filed as a 'disadvantaged business'.json.xmi.gz.xmi.gz +2026-01-30 17:39:26.262 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:26.262 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Musk's brain implant company filed as a 'disadvantaged business'.json... +2026-01-30 17:39:26.817 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:27.492 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Musk's brain implant company filed as a 'disadvantaged business'.json.xmi.gz.xmi.gz +2026-01-30 17:39:27.492 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:27.495 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Musk's brain implant company filed as a 'disadvantaged business'.json.xmi.gz.xmi.gz +2026-01-30 17:39:27.497 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Musk's brain implant company filed as a 'disadvantaged business'.json.xmi.gz.xmi.gz +2026-01-30 17:39:27.522 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:27.524 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:27.527 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Naomi Osaka tears up after first-round French Open loss to Paula Badosa.json +2026-01-30 17:39:27.529 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:27.529 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:27.529 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:27.529 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:27.529 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:27.531 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:27.531 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:27.533 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Naomi Osaka tears up after first-round French Open loss to Paula Badosa.json.xmi.gz.xmi.gz +2026-01-30 17:39:27.534 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:27.534 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Naomi Osaka tears up after first-round French Open loss to Paula Badosa.json... +2026-01-30 17:39:28.206 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:29.243 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Naomi Osaka tears up after first-round French Open loss to Paula Badosa.json.xmi.gz.xmi.gz +2026-01-30 17:39:29.243 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:29.246 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Naomi Osaka tears up after first-round French Open loss to Paula Badosa.json.xmi.gz.xmi.gz +2026-01-30 17:39:29.249 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Naomi Osaka tears up after first-round French Open loss to Paula Badosa.json.xmi.gz.xmi.gz +2026-01-30 17:39:29.257 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:29.257 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:29.260 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing New York Jets to sign QB Justin Fields, according to reports.json +2026-01-30 17:39:29.262 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:29.262 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:29.262 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:29.262 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:29.262 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:29.262 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:29.263 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:29.265 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\New York Jets to sign QB Justin Fields, according to reports.json.xmi.gz.xmi.gz +2026-01-30 17:39:29.265 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:29.265 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id New York Jets to sign QB Justin Fields, according to reports.json... +2026-01-30 17:39:29.789 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:29.825 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document New York Jets to sign QB Justin Fields, according to reports.json.xmi.gz.xmi.gz +2026-01-30 17:39:29.826 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:29.829 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\New York Jets to sign QB Justin Fields, according to reports.json.xmi.gz.xmi.gz +2026-01-30 17:39:29.832 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\New York Jets to sign QB Justin Fields, according to reports.json.xmi.gz.xmi.gz +2026-01-30 17:39:29.848 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:29.849 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:29.852 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Phillies star Bryce Harper uses a blue bat in gender reveal for his child.json +2026-01-30 17:39:29.853 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:29.853 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:29.853 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:29.854 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:29.854 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:29.855 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:29.855 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:29.858 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Phillies star Bryce Harper uses a blue bat in gender reveal for his child.json.xmi.gz.xmi.gz +2026-01-30 17:39:29.858 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:29.858 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Phillies star Bryce Harper uses a blue bat in gender reveal for his child.json... +2026-01-30 17:39:30.039 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:30.867 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Phillies star Bryce Harper uses a blue bat in gender reveal for his child.json.xmi.gz.xmi.gz +2026-01-30 17:39:30.867 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:30.869 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Phillies star Bryce Harper uses a blue bat in gender reveal for his child.json.xmi.gz.xmi.gz +2026-01-30 17:39:30.872 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Phillies star Bryce Harper uses a blue bat in gender reveal for his child.json.xmi.gz.xmi.gz +2026-01-30 17:39:30.892 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:30.893 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:30.895 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Russian strikes batter Ukraine as Zelenskyy accuses Putin of stalling peace talks.json +2026-01-30 17:39:30.897 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:30.897 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:30.897 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:30.897 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:30.897 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:30.899 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:30.899 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:30.901 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Russian strikes batter Ukraine as Zelenskyy accuses Putin of stalling peace talks.json.xmi.gz.xmi.gz +2026-01-30 17:39:30.901 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:30.901 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Russian strikes batter Ukraine as Zelenskyy accuses Putin of stalling peace talks.json... +2026-01-30 17:39:31.310 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:31.988 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Russian strikes batter Ukraine as Zelenskyy accuses Putin of stalling peace talks.json.xmi.gz.xmi.gz +2026-01-30 17:39:31.988 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:31.990 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Russian strikes batter Ukraine as Zelenskyy accuses Putin of stalling peace talks.json.xmi.gz.xmi.gz +2026-01-30 17:39:31.992 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Russian strikes batter Ukraine as Zelenskyy accuses Putin of stalling peace talks.json.xmi.gz.xmi.gz +2026-01-30 17:39:32.022 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:32.023 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:32.026 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Sen. Michael Bennet will run for governor of Colorado in 2026.json +2026-01-30 17:39:32.028 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:32.028 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:32.028 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:32.028 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:32.028 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:32.029 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:32.029 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:32.032 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Sen. Michael Bennet will run for governor of Colorado in 2026.json.xmi.gz.xmi.gz +2026-01-30 17:39:32.032 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:32.032 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Sen. Michael Bennet will run for governor of Colorado in 2026.json... +2026-01-30 17:39:32.455 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:33.698 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Sen. Michael Bennet will run for governor of Colorado in 2026.json.xmi.gz.xmi.gz +2026-01-30 17:39:33.698 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:33.700 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Sen. Michael Bennet will run for governor of Colorado in 2026.json.xmi.gz.xmi.gz +2026-01-30 17:39:33.703 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Sen. Michael Bennet will run for governor of Colorado in 2026.json.xmi.gz.xmi.gz +2026-01-30 17:39:33.755 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:33.757 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:33.760 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Suspect pleads guilty in Highland Park mass shooting at July Fourth parade.json +2026-01-30 17:39:33.762 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:33.762 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:33.762 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:33.762 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:33.763 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:33.765 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:33.766 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:33.769 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Suspect pleads guilty in Highland Park mass shooting at July Fourth parade.json.xmi.gz.xmi.gz +2026-01-30 17:39:33.769 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:33.769 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Suspect pleads guilty in Highland Park mass shooting at July Fourth parade.json... +2026-01-30 17:39:34.440 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:36.454 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Suspect pleads guilty in Highland Park mass shooting at July Fourth parade.json.xmi.gz.xmi.gz +2026-01-30 17:39:36.454 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:36.457 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Suspect pleads guilty in Highland Park mass shooting at July Fourth parade.json.xmi.gz.xmi.gz +2026-01-30 17:39:36.460 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Suspect pleads guilty in Highland Park mass shooting at July Fourth parade.json.xmi.gz.xmi.gz +2026-01-30 17:39:36.525 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:36.527 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:36.530 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Target says its holiday sales were better than expected — but its profits weren't.json +2026-01-30 17:39:36.532 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:36.532 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:36.532 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:36.532 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:36.533 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:36.535 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:36.535 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:36.538 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Target says its holiday sales were better than expected — but its profits weren't.json.xmi.gz.xmi.gz +2026-01-30 17:39:36.538 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:36.538 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Target says its holiday sales were better than expected — but its profits weren't.json... +2026-01-30 17:39:37.556 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:39.092 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Target says its holiday sales were better than expected — but its profits weren't.json.xmi.gz.xmi.gz +2026-01-30 17:39:39.093 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:39.095 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Target says its holiday sales were better than expected — but its profits weren't.json.xmi.gz.xmi.gz +2026-01-30 17:39:39.098 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Target says its holiday sales were better than expected — but its profits weren't.json.xmi.gz.xmi.gz +2026-01-30 17:39:39.155 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:39.157 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:39.159 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing The 14 best toothpastes for clean, healthy teeth in 2025.json +2026-01-30 17:39:39.162 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:39.162 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:39.162 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:39.162 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:39.162 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:39.164 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:39.164 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:39.166 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\The 14 best toothpastes for clean, healthy teeth in 2025.json.xmi.gz.xmi.gz +2026-01-30 17:39:39.166 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:39.166 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id The 14 best toothpastes for clean, healthy teeth in 2025.json... +2026-01-30 17:39:40.127 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:41.418 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document The 14 best toothpastes for clean, healthy teeth in 2025.json.xmi.gz.xmi.gz +2026-01-30 17:39:41.418 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:41.420 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\The 14 best toothpastes for clean, healthy teeth in 2025.json.xmi.gz.xmi.gz +2026-01-30 17:39:41.423 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\The 14 best toothpastes for clean, healthy teeth in 2025.json.xmi.gz.xmi.gz +2026-01-30 17:39:41.528 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:41.532 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:41.534 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing The 50+ best white t-shirts tested and ranked, according to NBC Select editors.json +2026-01-30 17:39:41.536 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:41.536 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:41.536 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:41.536 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:41.537 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:41.540 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:41.541 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:41.544 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\The 50 best white t-shirts tested and ranked, according to NBC Select editors.json.xmi.gz.xmi.gz +2026-01-30 17:39:41.544 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:41.544 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id The 50+ best white t-shirts tested and ranked, according to NBC Select editors.json... +2026-01-30 17:39:42.345 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:46.996 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document The 50 best white t-shirts tested and ranked, according to NBC Select editors.json.xmi.gz.xmi.gz +2026-01-30 17:39:46.996 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:46.999 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\The 50 best white t-shirts tested and ranked, according to NBC Select editors.json.xmi.gz.xmi.gz +2026-01-30 17:39:47.002 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\The 50 best white t-shirts tested and ranked, according to NBC Select editors.json.xmi.gz.xmi.gz +2026-01-30 17:39:47.034 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:47.035 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:47.037 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing These three LGBTQ women just made congressional 'herstory'.json +2026-01-30 17:39:47.039 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:47.039 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:47.039 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:47.039 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:47.039 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:47.041 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:47.041 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:47.044 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\These three LGBTQ women just made congressional 'herstory'.json.xmi.gz.xmi.gz +2026-01-30 17:39:47.044 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:47.044 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id These three LGBTQ women just made congressional 'herstory'.json... +2026-01-30 17:39:48.227 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document These three LGBTQ women just made congressional 'herstory'.json.xmi.gz.xmi.gz +2026-01-30 17:39:48.227 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:48.229 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\These three LGBTQ women just made congressional 'herstory'.json.xmi.gz.xmi.gz +2026-01-30 17:39:48.232 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\These three LGBTQ women just made congressional 'herstory'.json.xmi.gz.xmi.gz +2026-01-30 17:39:48.248 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:48.248 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:48.254 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Thousands of U.S. seniors deal with the harsh realities of homelessness.json +2026-01-30 17:39:48.255 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:48.256 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:48.256 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:48.256 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:48.256 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:48.258 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:48.258 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:48.260 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Thousands of U.S. seniors deal with the harsh realities of homelessness.json.xmi.gz.xmi.gz +2026-01-30 17:39:48.261 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:48.261 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Thousands of U.S. seniors deal with the harsh realities of homelessness.json... +2026-01-30 17:39:48.748 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:49.048 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Thousands of U.S. seniors deal with the harsh realities of homelessness.json.xmi.gz.xmi.gz +2026-01-30 17:39:49.048 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:49.050 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Thousands of U.S. seniors deal with the harsh realities of homelessness.json.xmi.gz.xmi.gz +2026-01-30 17:39:49.052 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Thousands of U.S. seniors deal with the harsh realities of homelessness.json.xmi.gz.xmi.gz +2026-01-30 17:39:49.100 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:49.102 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:49.104 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Trump administration uses King's 'Dream' speech to introduce executive orders cutting DEI.json +2026-01-30 17:39:49.105 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:49.106 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:49.106 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:49.106 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:49.106 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:49.108 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:49.108 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:49.110 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Trump administration uses King's 'Dream' speech to introduce executive orders cutting DEI.json.xmi.gz.xmi.gz +2026-01-30 17:39:49.110 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:49.110 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Trump administration uses King's 'Dream' speech to introduce executive orders cutting DEI.json... +2026-01-30 17:39:49.195 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:49.358 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:51.251 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Trump administration uses King's 'Dream' speech to introduce executive orders cutting DEI.json.xmi.gz.xmi.gz +2026-01-30 17:39:51.251 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:51.255 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Trump administration uses King's 'Dream' speech to introduce executive orders cutting DEI.json.xmi.gz.xmi.gz +2026-01-30 17:39:51.257 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Trump administration uses King's 'Dream' speech to introduce executive orders cutting DEI.json.xmi.gz.xmi.gz +2026-01-30 17:39:51.328 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:51.329 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:51.331 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Trump wants automakers to move vehicle production to the U.S. It's not that simple..json +2026-01-30 17:39:51.333 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:51.333 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:51.334 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:51.334 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:51.334 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:51.336 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:51.337 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:51.339 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Trump wants automakers to move vehicle production to the U.S. It's not that simple..json.xmi.gz.xmi.gz +2026-01-30 17:39:51.339 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:51.339 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Trump wants automakers to move vehicle production to the U.S. It's not that simple..json... +2026-01-30 17:39:52.155 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:54.184 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Trump wants automakers to move vehicle production to the U.S. It's not that simple..json.xmi.gz.xmi.gz +2026-01-30 17:39:54.184 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:54.186 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Trump wants automakers to move vehicle production to the U.S. It's not that simple..json.xmi.gz.xmi.gz +2026-01-30 17:39:54.190 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Trump wants automakers to move vehicle production to the U.S. It's not that simple..json.xmi.gz.xmi.gz +2026-01-30 17:39:54.229 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:54.230 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:54.233 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Trump's 'big, beautiful bill' includes these key tax changes for 2025 — what they mean for you.json +2026-01-30 17:39:54.235 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:54.235 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:54.235 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:54.236 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:54.236 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:54.237 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:54.237 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:54.239 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Trump's 'big, beautiful bill' includes these key tax changes for 2025 — what they mean for you.json.xmi.gz.xmi.gz +2026-01-30 17:39:54.239 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:54.239 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Trump's 'big, beautiful bill' includes these key tax changes for 2025 — what they mean for you.json... +2026-01-30 17:39:55.349 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:55.742 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Trump's 'big, beautiful bill' includes these key tax changes for 2025 — what they mean for you.json.xmi.gz.xmi.gz +2026-01-30 17:39:55.742 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:55.745 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Trump's 'big, beautiful bill' includes these key tax changes for 2025 — what they mean for you.json.xmi.gz.xmi.gz +2026-01-30 17:39:55.748 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Trump's 'big, beautiful bill' includes these key tax changes for 2025 — what they mean for you.json.xmi.gz.xmi.gz +2026-01-30 17:39:55.774 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:55.775 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:55.777 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing U.S. envoy Steve Witkoff will travel to Israel to address humanitarian crisis in Gaza.json +2026-01-30 17:39:55.779 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:55.779 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:55.779 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:55.779 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:55.779 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:55.780 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:55.780 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:55.782 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\U.S. envoy Steve Witkoff will travel to Israel to address humanitarian crisis in Gaza.json.xmi.gz.xmi.gz +2026-01-30 17:39:55.782 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:55.782 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id U.S. envoy Steve Witkoff will travel to Israel to address humanitarian crisis in Gaza.json... +2026-01-30 17:39:56.331 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:57.112 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document U.S. envoy Steve Witkoff will travel to Israel to address humanitarian crisis in Gaza.json.xmi.gz.xmi.gz +2026-01-30 17:39:57.112 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:57.114 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\U.S. envoy Steve Witkoff will travel to Israel to address humanitarian crisis in Gaza.json.xmi.gz.xmi.gz +2026-01-30 17:39:57.117 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\U.S. envoy Steve Witkoff will travel to Israel to address humanitarian crisis in Gaza.json.xmi.gz.xmi.gz +2026-01-30 17:39:57.130 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:57.130 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:57.132 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Ukraine and allies discuss ways to pressure Russia into 30-day ceasefire.json +2026-01-30 17:39:57.134 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:57.134 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:57.134 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:57.134 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:57.134 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:57.135 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:57.135 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:57.137 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Ukraine and allies discuss ways to pressure Russia into 30-day ceasefire.json.xmi.gz.xmi.gz +2026-01-30 17:39:57.137 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:57.137 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Ukraine and allies discuss ways to pressure Russia into 30-day ceasefire.json... +2026-01-30 17:39:57.664 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:57.833 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Ukraine and allies discuss ways to pressure Russia into 30-day ceasefire.json.xmi.gz.xmi.gz +2026-01-30 17:39:57.833 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:57.835 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Ukraine and allies discuss ways to pressure Russia into 30-day ceasefire.json.xmi.gz.xmi.gz +2026-01-30 17:39:57.837 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Ukraine and allies discuss ways to pressure Russia into 30-day ceasefire.json.xmi.gz.xmi.gz +2026-01-30 17:39:57.871 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:57.872 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:57.875 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Utah lawmakers said gender-affirming care is harmful to kids. Their own study contradicts that claim..json +2026-01-30 17:39:57.877 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:57.877 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:57.878 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:57.878 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:57.878 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:57.879 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:57.880 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:57.882 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Utah lawmakers said gender-affirming care is harmful to kids. Their own study contradicts that claim..json.xmi.gz.xmi.gz +2026-01-30 17:39:57.883 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:57.883 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Utah lawmakers said gender-affirming care is harmful to kids. Their own study contradicts that claim..json... +2026-01-30 17:39:58.114 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:59.606 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Utah lawmakers said gender-affirming care is harmful to kids. Their own study contradicts that claim..json.xmi.gz.xmi.gz +2026-01-30 17:39:59.606 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:59.608 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Utah lawmakers said gender-affirming care is harmful to kids. Their own study contradicts that claim..json.xmi.gz.xmi.gz +2026-01-30 17:39:59.610 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Utah lawmakers said gender-affirming care is harmful to kids. Their own study contradicts that claim..json.xmi.gz.xmi.gz +2026-01-30 17:39:59.630 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:59.631 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:59.634 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Waymo's robotaxis to start carrying passengers in Atlanta.json +2026-01-30 17:39:59.635 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:59.635 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:59.636 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:59.636 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:59.636 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:59.637 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:59.637 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:59.638 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Waymo's robotaxis to start carrying passengers in Atlanta.json.xmi.gz.xmi.gz +2026-01-30 17:39:59.638 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:59.638 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Waymo's robotaxis to start carrying passengers in Atlanta.json... +2026-01-30 17:40:00.332 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:40:00.675 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Waymo's robotaxis to start carrying passengers in Atlanta.json.xmi.gz.xmi.gz +2026-01-30 17:40:00.675 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:40:00.678 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Waymo's robotaxis to start carrying passengers in Atlanta.json.xmi.gz.xmi.gz +2026-01-30 17:40:00.680 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Waymo's robotaxis to start carrying passengers in Atlanta.json.xmi.gz.xmi.gz +2026-01-30 17:40:00.719 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:00.721 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:00.723 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing ‘Call Her Daddy’ host Alex Cooper claims college soccer coach sexually harassed her.json +2026-01-30 17:40:00.724 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:40:00.724 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:40:00.724 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:40:00.724 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:40:00.724 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:40:00.726 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:40:00.726 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:40:00.729 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\‘Call Her Daddy’ host Alex Cooper claims college soccer coach sexually harassed her.json.xmi.gz.xmi.gz +2026-01-30 17:40:00.729 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:00.729 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id ‘Call Her Daddy’ host Alex Cooper claims college soccer coach sexually harassed her.json... +2026-01-30 17:40:01.094 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:40:02.532 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document ‘Call Her Daddy’ host Alex Cooper claims college soccer coach sexually harassed her.json.xmi.gz.xmi.gz +2026-01-30 17:40:02.532 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:40:02.534 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\‘Call Her Daddy’ host Alex Cooper claims college soccer coach sexually harassed her.json.xmi.gz.xmi.gz +2026-01-30 17:40:02.537 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\‘Call Her Daddy’ host Alex Cooper claims college soccer coach sexually harassed her.json.xmi.gz.xmi.gz +2026-01-30 17:40:02.570 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:02.570 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:02.572 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing 23-year-old Ukrainian refugee killed on North Carolina transit system.json +2026-01-30 17:40:02.576 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id 23-year-old Ukrainian refugee killed on North Carolina transit system.json already exists in the corpus 8. +2026-01-30 17:40:02.576 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:02.615 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:02.615 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:02.615 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:02.637 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:02.637 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:02.641 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing A 10-year-old boy in Tokyo ended up with Shohei Ohtani's first home run of the season.json +2026-01-30 17:40:02.643 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id A 10-year-old boy in Tokyo ended up with Shohei Ohtani's first home run of the season.json already exists in the corpus 8. +2026-01-30 17:40:02.643 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:02.657 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:02.657 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:02.657 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:02.754 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:02.758 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:02.762 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Adams case and other Trump moves threaten to open corruption floodgates, experts say.json +2026-01-30 17:40:02.764 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Adams case and other Trump moves threaten to open corruption floodgates, experts say.json already exists in the corpus 8. +2026-01-30 17:40:02.764 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:02.783 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:02.783 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:02.783 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:02.812 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:02.813 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:02.817 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing After a couple struck a deer in Alabama, a fire chief who stopped to help was fatally shot.json +2026-01-30 17:40:02.819 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id After a couple struck a deer in Alabama, a fire chief who stopped to help was fatally shot.json already exists in the corpus 8. +2026-01-30 17:40:02.819 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:02.837 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:02.837 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:02.837 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:02.878 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:02.879 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:02.882 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Agency that handles green cards and citizenship to hire armed agents who can make arrests.json +2026-01-30 17:40:02.884 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Agency that handles green cards and citizenship to hire armed agents who can make arrests.json already exists in the corpus 8. +2026-01-30 17:40:02.884 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:02.899 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:02.899 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:02.899 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:02.927 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:02.928 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:02.930 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Amazon apologizes to Mandy Moore after package is delivered to ruins of in-laws' California home.json +2026-01-30 17:40:02.931 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Amazon apologizes to Mandy Moore after package is delivered to ruins of in-laws' California home.json already exists in the corpus 8. +2026-01-30 17:40:02.931 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:02.942 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:02.942 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:02.942 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:02.976 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:02.976 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:02.980 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing As Trump considers ways to dismantle the Education Deparment, here's what to know about your student loans.json +2026-01-30 17:40:02.982 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id As Trump considers ways to dismantle the Education Deparment, here's what to know about your student loans.json already exists in the corpus 8. +2026-01-30 17:40:02.982 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:02.996 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:02.996 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:02.996 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.018 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.019 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.022 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Belgium's future queen caught up in Trump administration's Harvard foreign student ban effort.json +2026-01-30 17:40:03.024 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Belgium's future queen caught up in Trump administration's Harvard foreign student ban effort.json already exists in the corpus 8. +2026-01-30 17:40:03.024 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.039 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.039 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.039 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.085 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.086 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.088 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Biden administration has no plans to fine companies if TikTok ban goes into effect.json +2026-01-30 17:40:03.089 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Biden administration has no plans to fine companies if TikTok ban goes into effect.json already exists in the corpus 8. +2026-01-30 17:40:03.089 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.100 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.101 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.101 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.130 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.131 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.133 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Bruce Springsteen opens U.K. tour by calling Trump 'unfit' for office.json +2026-01-30 17:40:03.134 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Bruce Springsteen opens U.K. tour by calling Trump 'unfit' for office.json already exists in the corpus 8. +2026-01-30 17:40:03.135 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.147 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.147 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.147 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.170 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.170 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.172 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Charlie Javice, college financial aid startup founder, found guilty of defrauding JPMorgan.json +2026-01-30 17:40:03.174 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Charlie Javice, college financial aid startup founder, found guilty of defrauding JPMorgan.json already exists in the corpus 8. +2026-01-30 17:40:03.174 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.186 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.186 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.186 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.228 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.231 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.233 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Civil rights agency sued over handling of trans worker discrimination complaints.json +2026-01-30 17:40:03.235 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Civil rights agency sued over handling of trans worker discrimination complaints.json already exists in the corpus 8. +2026-01-30 17:40:03.235 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.249 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.249 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.249 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.269 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.269 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.272 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing DHS has begun performing polygraph tests on employees to find leakers.json +2026-01-30 17:40:03.274 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id DHS has begun performing polygraph tests on employees to find leakers.json already exists in the corpus 8. +2026-01-30 17:40:03.274 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.286 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.286 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.286 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.314 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.315 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.317 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Elon Musk turns on Nigel Farage, calls on him to step down as U.K. party leader.json +2026-01-30 17:40:03.319 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Elon Musk turns on Nigel Farage, calls on him to step down as U.K. party leader.json already exists in the corpus 8. +2026-01-30 17:40:03.319 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.330 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.330 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.330 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.375 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:40:03.386 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.387 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.390 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing How much money you should save for a comfortable retirement.json +2026-01-30 17:40:03.393 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id How much money you should save for a comfortable retirement.json already exists in the corpus 8. +2026-01-30 17:40:03.394 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.410 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.410 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.410 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.448 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.449 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.453 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Israeli strikes kill 14 in Gaza in one day as negotiators work to uphold fragile ceasefire.json +2026-01-30 17:40:03.456 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Israeli strikes kill 14 in Gaza in one day as negotiators work to uphold fragile ceasefire.json already exists in the corpus 8. +2026-01-30 17:40:03.456 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.472 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.472 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.472 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.498 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.499 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.501 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Lakers star Luka Dončić says he took a month off from basketball to transform his body.json +2026-01-30 17:40:03.503 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Lakers star Luka Dončić says he took a month off from basketball to transform his body.json already exists in the corpus 8. +2026-01-30 17:40:03.503 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.513 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.513 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.513 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.536 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.537 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.540 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Musk's brain implant company filed as a 'disadvantaged business'.json +2026-01-30 17:40:03.541 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Musk's brain implant company filed as a 'disadvantaged business'.json already exists in the corpus 8. +2026-01-30 17:40:03.541 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.549 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.549 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.549 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.576 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.577 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.580 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Naomi Osaka tears up after first-round French Open loss to Paula Badosa.json +2026-01-30 17:40:03.582 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Naomi Osaka tears up after first-round French Open loss to Paula Badosa.json already exists in the corpus 8. +2026-01-30 17:40:03.582 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.589 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.589 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.589 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.605 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.606 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.609 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing New York Jets to sign QB Justin Fields, according to reports.json +2026-01-30 17:40:03.611 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id New York Jets to sign QB Justin Fields, according to reports.json already exists in the corpus 8. +2026-01-30 17:40:03.611 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.621 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.622 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.622 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.645 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.646 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.650 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Phillies star Bryce Harper uses a blue bat in gender reveal for his child.json +2026-01-30 17:40:03.652 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Phillies star Bryce Harper uses a blue bat in gender reveal for his child.json already exists in the corpus 8. +2026-01-30 17:40:03.652 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.662 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.662 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.662 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.690 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.691 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.694 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Russian strikes batter Ukraine as Zelenskyy accuses Putin of stalling peace talks.json +2026-01-30 17:40:03.697 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Russian strikes batter Ukraine as Zelenskyy accuses Putin of stalling peace talks.json already exists in the corpus 8. +2026-01-30 17:40:03.697 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.705 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.705 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.705 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.739 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.740 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.744 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Sen. Michael Bennet will run for governor of Colorado in 2026.json +2026-01-30 17:40:03.745 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Sen. Michael Bennet will run for governor of Colorado in 2026.json already exists in the corpus 8. +2026-01-30 17:40:03.745 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.753 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.753 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.753 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.804 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.805 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.808 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Suspect pleads guilty in Highland Park mass shooting at July Fourth parade.json +2026-01-30 17:40:03.810 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Suspect pleads guilty in Highland Park mass shooting at July Fourth parade.json already exists in the corpus 8. +2026-01-30 17:40:03.810 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.820 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.821 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.821 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.885 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.887 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.890 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Target says its holiday sales were better than expected — but its profits weren't.json +2026-01-30 17:40:03.892 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Target says its holiday sales were better than expected — but its profits weren't.json already exists in the corpus 8. +2026-01-30 17:40:03.892 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.903 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.903 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.903 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.952 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.953 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.956 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing The 14 best toothpastes for clean, healthy teeth in 2025.json +2026-01-30 17:40:03.957 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id The 14 best toothpastes for clean, healthy teeth in 2025.json already exists in the corpus 8. +2026-01-30 17:40:03.957 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.965 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.965 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.965 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.080 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.083 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.087 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing The 50+ best white t-shirts tested and ranked, according to NBC Select editors.json +2026-01-30 17:40:04.090 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id The 50+ best white t-shirts tested and ranked, according to NBC Select editors.json already exists in the corpus 8. +2026-01-30 17:40:04.090 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.101 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.101 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.101 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.129 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.130 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.132 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing These three LGBTQ women just made congressional 'herstory'.json +2026-01-30 17:40:04.135 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id These three LGBTQ women just made congressional 'herstory'.json already exists in the corpus 8. +2026-01-30 17:40:04.135 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.142 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.142 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.143 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.160 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.161 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.164 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Thousands of U.S. seniors deal with the harsh realities of homelessness.json +2026-01-30 17:40:04.165 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Thousands of U.S. seniors deal with the harsh realities of homelessness.json already exists in the corpus 8. +2026-01-30 17:40:04.165 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.173 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.173 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.173 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.225 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.226 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.229 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Trump administration uses King's 'Dream' speech to introduce executive orders cutting DEI.json +2026-01-30 17:40:04.230 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Trump administration uses King's 'Dream' speech to introduce executive orders cutting DEI.json already exists in the corpus 8. +2026-01-30 17:40:04.230 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.238 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.238 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.238 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.302 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.304 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.309 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Trump wants automakers to move vehicle production to the U.S. It's not that simple..json +2026-01-30 17:40:04.310 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Trump wants automakers to move vehicle production to the U.S. It's not that simple..json already exists in the corpus 8. +2026-01-30 17:40:04.310 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.322 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.322 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.322 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.366 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.367 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.370 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Trump's 'big, beautiful bill' includes these key tax changes for 2025 — what they mean for you.json +2026-01-30 17:40:04.372 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Trump's 'big, beautiful bill' includes these key tax changes for 2025 — what they mean for you.json already exists in the corpus 8. +2026-01-30 17:40:04.372 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.382 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.382 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.382 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.413 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.413 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.416 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing U.S. envoy Steve Witkoff will travel to Israel to address humanitarian crisis in Gaza.json +2026-01-30 17:40:04.417 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id U.S. envoy Steve Witkoff will travel to Israel to address humanitarian crisis in Gaza.json already exists in the corpus 8. +2026-01-30 17:40:04.417 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.424 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.424 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.424 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.442 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.442 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.445 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Ukraine and allies discuss ways to pressure Russia into 30-day ceasefire.json +2026-01-30 17:40:04.447 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Ukraine and allies discuss ways to pressure Russia into 30-day ceasefire.json already exists in the corpus 8. +2026-01-30 17:40:04.447 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.455 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.455 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.455 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.492 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.493 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.497 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Utah lawmakers said gender-affirming care is harmful to kids. Their own study contradicts that claim..json +2026-01-30 17:40:04.498 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Utah lawmakers said gender-affirming care is harmful to kids. Their own study contradicts that claim..json already exists in the corpus 8. +2026-01-30 17:40:04.498 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.506 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.506 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.506 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.539 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.540 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.544 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Waymo's robotaxis to start carrying passengers in Atlanta.json +2026-01-30 17:40:04.545 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Waymo's robotaxis to start carrying passengers in Atlanta.json already exists in the corpus 8. +2026-01-30 17:40:04.545 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.556 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.556 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.556 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.598 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.599 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.602 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing ‘Call Her Daddy’ host Alex Cooper claims college soccer coach sexually harassed her.json +2026-01-30 17:40:04.604 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id ‘Call Her Daddy’ host Alex Cooper claims college soccer coach sexually harassed her.json already exists in the corpus 8. +2026-01-30 17:40:04.604 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.614 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.614 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.614 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.651 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.652 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.655 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing 23-year-old Ukrainian refugee killed on North Carolina transit system.json +2026-01-30 17:40:04.657 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id 23-year-old Ukrainian refugee killed on North Carolina transit system.json already exists in the corpus 8. +2026-01-30 17:40:04.657 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.665 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.665 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.665 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.706 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.707 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.711 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing 23-year-old Ukrainian refugee killed on North Carolina transit system.json +2026-01-30 17:40:04.712 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id 23-year-old Ukrainian refugee killed on North Carolina transit system.json already exists in the corpus 8. +2026-01-30 17:40:04.712 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.721 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.722 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.722 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.740 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.740 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.744 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing A 10-year-old boy in Tokyo ended up with Shohei Ohtani's first home run of the season.json +2026-01-30 17:40:04.746 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id A 10-year-old boy in Tokyo ended up with Shohei Ohtani's first home run of the season.json already exists in the corpus 8. +2026-01-30 17:40:04.746 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.756 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.756 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.756 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.835 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.836 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.840 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Adams case and other Trump moves threaten to open corruption floodgates, experts say.json +2026-01-30 17:40:04.841 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Adams case and other Trump moves threaten to open corruption floodgates, experts say.json already exists in the corpus 8. +2026-01-30 17:40:04.841 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.849 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.849 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.849 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.876 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.876 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.879 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing After a couple struck a deer in Alabama, a fire chief who stopped to help was fatally shot.json +2026-01-30 17:40:04.880 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id After a couple struck a deer in Alabama, a fire chief who stopped to help was fatally shot.json already exists in the corpus 8. +2026-01-30 17:40:04.880 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.888 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.888 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.888 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.926 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.927 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.931 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Agency that handles green cards and citizenship to hire armed agents who can make arrests.json +2026-01-30 17:40:04.933 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Agency that handles green cards and citizenship to hire armed agents who can make arrests.json already exists in the corpus 8. +2026-01-30 17:40:04.933 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.943 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.943 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.943 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.971 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.972 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.976 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Amazon apologizes to Mandy Moore after package is delivered to ruins of in-laws' California home.json +2026-01-30 17:40:04.977 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Amazon apologizes to Mandy Moore after package is delivered to ruins of in-laws' California home.json already exists in the corpus 8. +2026-01-30 17:40:04.977 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.989 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.989 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.989 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.026 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.027 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.029 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing As Trump considers ways to dismantle the Education Deparment, here's what to know about your student loans.json +2026-01-30 17:40:05.030 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id As Trump considers ways to dismantle the Education Deparment, here's what to know about your student loans.json already exists in the corpus 8. +2026-01-30 17:40:05.030 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.038 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.038 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.038 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.059 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.060 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.063 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Belgium's future queen caught up in Trump administration's Harvard foreign student ban effort.json +2026-01-30 17:40:05.065 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Belgium's future queen caught up in Trump administration's Harvard foreign student ban effort.json already exists in the corpus 8. +2026-01-30 17:40:05.065 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.071 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.071 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.071 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.115 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.115 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.119 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Biden administration has no plans to fine companies if TikTok ban goes into effect.json +2026-01-30 17:40:05.120 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Biden administration has no plans to fine companies if TikTok ban goes into effect.json already exists in the corpus 8. +2026-01-30 17:40:05.120 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.128 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.128 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.128 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.157 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.158 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.161 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Bruce Springsteen opens U.K. tour by calling Trump 'unfit' for office.json +2026-01-30 17:40:05.164 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Bruce Springsteen opens U.K. tour by calling Trump 'unfit' for office.json already exists in the corpus 8. +2026-01-30 17:40:05.164 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.174 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.174 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.174 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.195 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.196 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.199 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Charlie Javice, college financial aid startup founder, found guilty of defrauding JPMorgan.json +2026-01-30 17:40:05.201 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Charlie Javice, college financial aid startup founder, found guilty of defrauding JPMorgan.json already exists in the corpus 8. +2026-01-30 17:40:05.201 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.210 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.210 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.210 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.242 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.242 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.245 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Civil rights agency sued over handling of trans worker discrimination complaints.json +2026-01-30 17:40:05.247 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Civil rights agency sued over handling of trans worker discrimination complaints.json already exists in the corpus 8. +2026-01-30 17:40:05.247 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.254 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.254 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.254 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.271 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.271 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.274 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing DHS has begun performing polygraph tests on employees to find leakers.json +2026-01-30 17:40:05.276 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id DHS has begun performing polygraph tests on employees to find leakers.json already exists in the corpus 8. +2026-01-30 17:40:05.276 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.282 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.282 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.282 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.309 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.310 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.314 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Elon Musk turns on Nigel Farage, calls on him to step down as U.K. party leader.json +2026-01-30 17:40:05.315 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Elon Musk turns on Nigel Farage, calls on him to step down as U.K. party leader.json already exists in the corpus 8. +2026-01-30 17:40:05.315 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.322 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.322 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.322 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.371 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.374 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.378 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing How much money you should save for a comfortable retirement.json +2026-01-30 17:40:05.380 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id How much money you should save for a comfortable retirement.json already exists in the corpus 8. +2026-01-30 17:40:05.380 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.389 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.389 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.389 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.411 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.413 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.416 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Israeli strikes kill 14 in Gaza in one day as negotiators work to uphold fragile ceasefire.json +2026-01-30 17:40:05.418 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Israeli strikes kill 14 in Gaza in one day as negotiators work to uphold fragile ceasefire.json already exists in the corpus 8. +2026-01-30 17:40:05.418 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.428 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.428 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.428 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.453 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.453 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.457 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Lakers star Luka Dončić says he took a month off from basketball to transform his body.json +2026-01-30 17:40:05.458 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Lakers star Luka Dončić says he took a month off from basketball to transform his body.json already exists in the corpus 8. +2026-01-30 17:40:05.458 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.468 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.468 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.468 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.490 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.490 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.493 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Musk's brain implant company filed as a 'disadvantaged business'.json +2026-01-30 17:40:05.495 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Musk's brain implant company filed as a 'disadvantaged business'.json already exists in the corpus 8. +2026-01-30 17:40:05.495 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.502 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.502 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.502 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.529 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.530 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.533 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Naomi Osaka tears up after first-round French Open loss to Paula Badosa.json +2026-01-30 17:40:05.534 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Naomi Osaka tears up after first-round French Open loss to Paula Badosa.json already exists in the corpus 8. +2026-01-30 17:40:05.534 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.541 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.542 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.542 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.556 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.556 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.560 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing New York Jets to sign QB Justin Fields, according to reports.json +2026-01-30 17:40:05.561 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id New York Jets to sign QB Justin Fields, according to reports.json already exists in the corpus 8. +2026-01-30 17:40:05.561 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.570 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.570 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.570 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.592 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.593 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.597 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Phillies star Bryce Harper uses a blue bat in gender reveal for his child.json +2026-01-30 17:40:05.599 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Phillies star Bryce Harper uses a blue bat in gender reveal for his child.json already exists in the corpus 8. +2026-01-30 17:40:05.599 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.609 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.609 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.609 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.638 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.638 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.641 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Russian strikes batter Ukraine as Zelenskyy accuses Putin of stalling peace talks.json +2026-01-30 17:40:05.644 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Russian strikes batter Ukraine as Zelenskyy accuses Putin of stalling peace talks.json already exists in the corpus 8. +2026-01-30 17:40:05.644 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.655 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.655 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.655 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.691 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.691 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.695 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Sen. Michael Bennet will run for governor of Colorado in 2026.json +2026-01-30 17:40:05.697 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Sen. Michael Bennet will run for governor of Colorado in 2026.json already exists in the corpus 8. +2026-01-30 17:40:05.697 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.704 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.704 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.704 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.754 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.755 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.758 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Suspect pleads guilty in Highland Park mass shooting at July Fourth parade.json +2026-01-30 17:40:05.759 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Suspect pleads guilty in Highland Park mass shooting at July Fourth parade.json already exists in the corpus 8. +2026-01-30 17:40:05.760 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.767 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.767 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.767 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.818 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.819 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.822 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Target says its holiday sales were better than expected — but its profits weren't.json +2026-01-30 17:40:05.824 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Target says its holiday sales were better than expected — but its profits weren't.json already exists in the corpus 8. +2026-01-30 17:40:05.824 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.833 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.833 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.833 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.881 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.882 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.886 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing The 14 best toothpastes for clean, healthy teeth in 2025.json +2026-01-30 17:40:05.888 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id The 14 best toothpastes for clean, healthy teeth in 2025.json already exists in the corpus 8. +2026-01-30 17:40:05.888 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.900 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.900 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.900 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:06.002 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:06.005 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:06.009 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing The 50+ best white t-shirts tested and ranked, according to NBC Select editors.json +2026-01-30 17:40:06.010 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id The 50+ best white t-shirts tested and ranked, according to NBC Select editors.json already exists in the corpus 8. +2026-01-30 17:40:06.010 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:06.018 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:06.018 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:06.018 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:06.047 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:06.048 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:06.051 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing These three LGBTQ women just made congressional 'herstory'.json +2026-01-30 17:40:06.052 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id These three LGBTQ women just made congressional 'herstory'.json already exists in the corpus 8. +2026-01-30 17:40:06.052 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:06.062 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:06.062 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:06.062 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:06.080 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:06.081 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:06.084 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Thousands of U.S. seniors deal with the harsh realities of homelessness.json +2026-01-30 17:40:06.087 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Thousands of U.S. seniors deal with the harsh realities of homelessness.json already exists in the corpus 8. +2026-01-30 17:40:06.087 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:06.096 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:06.096 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:06.096 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:06.148 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:06.149 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:06.152 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Trump administration uses King's 'Dream' speech to introduce executive orders cutting DEI.json +2026-01-30 17:40:06.153 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Trump administration uses King's 'Dream' speech to introduce executive orders cutting DEI.json already exists in the corpus 8. +2026-01-30 17:40:06.153 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:06.160 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:06.160 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:06.160 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:06.217 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:06.218 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:06.223 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Trump wants automakers to move vehicle production to the U.S. It's not that simple..json +2026-01-30 17:40:06.224 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Trump wants automakers to move vehicle production to the U.S. It's not that simple..json already exists in the corpus 8. +2026-01-30 17:40:06.224 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:06.234 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:06.234 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:06.234 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:06.267 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:06.268 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:06.271 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Trump's 'big, beautiful bill' includes these key tax changes for 2025 — what they mean for you.json +2026-01-30 17:40:06.273 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Trump's 'big, beautiful bill' includes these key tax changes for 2025 — what they mean for you.json already exists in the corpus 8. +2026-01-30 17:40:06.273 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:06.285 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:06.285 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:06.285 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:06.318 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:06.319 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:06.322 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing U.S. envoy Steve Witkoff will travel to Israel to address humanitarian crisis in Gaza.json +2026-01-30 17:40:06.324 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id U.S. envoy Steve Witkoff will travel to Israel to address humanitarian crisis in Gaza.json already exists in the corpus 8. +2026-01-30 17:40:06.324 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:06.332 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:06.332 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:06.332 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:06.352 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:06.353 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:06.355 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Ukraine and allies discuss ways to pressure Russia into 30-day ceasefire.json +2026-01-30 17:40:06.357 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Ukraine and allies discuss ways to pressure Russia into 30-day ceasefire.json already exists in the corpus 8. +2026-01-30 17:40:06.357 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:06.365 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:06.365 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:06.365 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:06.401 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:06.402 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:06.406 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Utah lawmakers said gender-affirming care is harmful to kids. Their own study contradicts that claim..json +2026-01-30 17:40:06.408 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Utah lawmakers said gender-affirming care is harmful to kids. Their own study contradicts that claim..json already exists in the corpus 8. +2026-01-30 17:40:06.408 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:06.415 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:06.415 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:06.415 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:06.441 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:06.442 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:06.445 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Waymo's robotaxis to start carrying passengers in Atlanta.json +2026-01-30 17:40:06.447 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Waymo's robotaxis to start carrying passengers in Atlanta.json already exists in the corpus 8. +2026-01-30 17:40:06.447 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:06.456 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:06.456 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:06.456 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:06.498 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:06.500 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:06.503 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing ‘Call Her Daddy’ host Alex Cooper claims college soccer coach sexually harassed her.json +2026-01-30 17:40:06.505 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id ‘Call Her Daddy’ host Alex Cooper claims college soccer coach sexually harassed her.json already exists in the corpus 8. +2026-01-30 17:40:06.505 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:06.517 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:06.517 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:06.517 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:06.890 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing the Corpus GerParCor_Reichstag +2026-01-30 17:40:06.890 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done with the corpus postprocessing. +2026-01-30 17:40:06.890 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - + +================================= + Done with the corpus import. diff --git a/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java b/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java index bba58c2d..fecbf16e 100644 --- a/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java +++ b/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java @@ -35,12 +35,14 @@ import org.texttechnologylab.uce.common.config.CorpusConfig; import org.texttechnologylab.uce.common.exceptions.DatabaseOperationException; import org.texttechnologylab.uce.common.exceptions.ExceptionUtils; +import org.texttechnologylab.uce.common.models.ModelEntity; import org.texttechnologylab.uce.common.models.UIMAAnnotation; import org.texttechnologylab.uce.common.models.biofid.BiofidTaxon; import org.texttechnologylab.uce.common.models.biofid.GazetteerTaxon; import org.texttechnologylab.uce.common.models.biofid.GnFinderTaxon; import org.texttechnologylab.uce.common.models.corpus.*; import org.texttechnologylab.uce.common.models.corpus.emotion.Feeling; +import org.texttechnologylab.uce.common.models.corpus.emotion.SentenceEmotions; import org.texttechnologylab.uce.common.models.corpus.links.AnnotationLink; import org.texttechnologylab.uce.common.models.corpus.links.AnnotationToDocumentLink; import org.texttechnologylab.uce.common.models.corpus.links.DocumentLink; @@ -54,10 +56,7 @@ import org.texttechnologylab.uce.common.models.negation.*; import org.texttechnologylab.uce.common.models.rag.DocumentChunkEmbedding; import org.texttechnologylab.uce.common.models.rag.DocumentSentenceEmbedding; -import org.texttechnologylab.uce.common.models.topic.TopicValueBase; -import org.texttechnologylab.uce.common.models.topic.TopicValueBaseWithScore; -import org.texttechnologylab.uce.common.models.topic.TopicWord; -import org.texttechnologylab.uce.common.models.topic.UnifiedTopic; +import org.texttechnologylab.uce.common.models.topic.*; import org.texttechnologylab.uce.common.services.*; import org.texttechnologylab.uce.common.utils.*; @@ -69,10 +68,7 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.*; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CopyOnWriteArrayList; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.Executors; +import java.util.concurrent.*; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; @@ -81,7 +77,8 @@ import java.util.zip.ZipInputStream; public class Importer { - + + public static final Map IMPORT_PROGRESS = new ConcurrentHashMap<>(); // Counter for UI Import Loading Bar private static final Gson gson = new Gson(); private static final Logger logger = LogManager.getLogger(Importer.class); private static final int BATCH_SIZE = 2000; @@ -256,6 +253,8 @@ public void storeCorpusFromFolderAsync(String folderName, int numThreads) throws var docInBatch = new AtomicInteger(0); var lock = new Object(); var batchLatch = new AtomicReference<>(new CountDownLatch(0)); + + IMPORT_PROGRESS.put(this.importId,new AtomicInteger(0)); try (var fileStream = Files.walk(inputFolderName)) { fileStream.filter(Files::isRegularFile) @@ -280,6 +279,9 @@ public void storeCorpusFromFolderAsync(String folderName, int numThreads) throws return doc; }) .thenAcceptAsync(doc -> { + if (Importer.IMPORT_PROGRESS != null && Importer.IMPORT_PROGRESS .containsKey(importId)){ + Importer.IMPORT_PROGRESS.get(importId).incrementAndGet(); + } if (doc != null) { logImportInfo("Stored document " + filePath.getFileName(), LogStatus.SAVED, filePath.toString(), 0); logger.info("Finished with the UIMA annotations - postprocessing the doc now."); @@ -361,6 +363,16 @@ public void storeCorpusFromFolderAsync(String folderName, int numThreads) throws ExceptionUtils.tryCatchLog( () -> postProccessCorpus(corpus1, corpusConfigFinal), (ex) -> logger.error("Error in the final postprocessing of the current corpus with id " + corpus1.getId())); + + // Setting Import-Status to FINISHED + if (this.importerNumber == 1){ + ExceptionUtils.tryCatchLog(()->{ + var finalUceImport = db.getUceImportByImportId(this.importId); + finalUceImport.setStatus(ImportStatus.FINISHED); + db.saveOrUpdateUceImport(finalUceImport); + return null; + },ex -> logger.error("Error when trying to set import-status to FINISHED")); + } logger.info("\n\n=================================\n Done with the corpus import."); executor.shutdown(); @@ -384,6 +396,8 @@ public static Corpus CreateDBCorpus(Corpus corpus, CorpusConfig corpusConfig, Po if (corpusConfig.isAddToExistingCorpus()) { var existingCorpus = db.getCorpusByName(corpusConfig.getName()); if (existingCorpus != null) { // If we have the corpus, use that. + existingCorpus.setCorpusJsonConfig(gson.toJson(corpusConfig)); + db.updateCorpusJsonConfig(existingCorpus.getId(),existingCorpus.getCorpusJsonConfig()); return existingCorpus; } throw new DatabaseOperationException("The corpus config specified to add to an existing corpus, " + @@ -529,6 +543,10 @@ public Document XMIToDocument(JCas jCas, Corpus corpus, String filePath, String + " already exists in the corpus " + corpus.getId() + "."); logger.info("Checking if that document was also post-processed yet..."); var existingDoc = db.getDocumentByCorpusAndDocumentId(corpus.getId(), document.getDocumentId()); + + appendNewEmotionsToExistingDocument(existingDoc,jCas); + appendNewSentenceTopicsToExistingDocument(existingDoc, jCas); + if (!existingDoc.isPostProcessed()) { logger.info("Not yet post-processed. Doing that now."); postProccessDocument(existingDoc, corpus, filePath); @@ -605,9 +623,18 @@ public Document XMIToDocument(JCas jCas, Corpus corpus, String filePath, String if (corpusConfig.getAnnotations().isEmotion()) ExceptionUtils.tryCatchLog( - () -> setEmotions(document, jCas), + () -> { + setEmotions(document, jCas); + }, (ex) -> logImportWarn("This file should have contained Emotion annotations, but selecting them caused an error.", ex, filePath)); + if (corpusConfig.getAnnotations().isTopic()) + ExceptionUtils.tryCatchLog( + () -> { + setSentenceTopics(document, jCas); + }, + (ex) -> logImportWarn("This file should have contained Topic annotations, but selecting them caused an error.", ex, filePath)); + if (corpusConfig.getAnnotations().isLemma()) ExceptionUtils.tryCatchLog( () -> setLemmata(document, jCas), @@ -679,7 +706,26 @@ private void setEmotions(Document document, JCas jCas) { var emotion = new org.texttechnologylab.uce.common.models.corpus.emotion.Emotion(e.getBegin(), e.getEnd()); emotion.setCoveredText(e.getCoveredText()); var meta = e.getModel(); - if (meta != null) emotion.setModel(meta.getModelName() + "__v::" + meta.getModelVersion()); + // set emotion model + // Usually getModel() returns the map (at least for the models we tested it with), so getModelEntityByKey might be redundant + ModelEntity foundModal = null; + if (meta != null){ + String modelNameFromXmi = meta.getModelName(); + logger.info("Searching for model " + modelNameFromXmi); + try{ + foundModal = db.getModelEntityByKey(meta.getModelName()); + if(foundModal == null){ + foundModal = db.getModelEntityByMap(modelNameFromXmi); + } + } catch (DatabaseOperationException ex) { + logger.error("Error when looking for model in database " + meta.getModelName()); + } + } + if (foundModal != null){ + emotion.setDbModel(foundModal); + }else{ + logger.warn("Modal not found in database"); + } var feelings = new ArrayList(); for (var annotationComment : e.getEmotions()) { @@ -1430,6 +1476,97 @@ private void setNamedEntities(Document document, JCas jCas) { document.setNamedEntities(nes); logger.info("Setting Named-Entities done."); } + private void setSentenceTopics(Document document, JCas jCas) { + List newTopics = extractSentenceTopics(document, jCas); + + List sentenceTopics = document.getSentenceTopics(); + if (sentenceTopics == null) { + sentenceTopics = new ArrayList<>(); + document.setSentenceTopics(sentenceTopics); + } + + sentenceTopics.addAll(newTopics); + + logger.info("Setting SentenceTopics done. Count={}", sentenceTopics.size()); + } + + private List extractSentenceTopics(Document document, JCas jCas) { + List sentenceTopics = new ArrayList<>(); + + Map sentenceBySpan = document.getSentences().stream() + .collect(Collectors.toMap( + s -> s.getBegin() + "_" + s.getEnd(), + s -> s, + (a, b) -> a + )); + + for (org.texttechnologylab.annotation.Topic topicSpan : JCasUtil.select(jCas, org.texttechnologylab.annotation.Topic.class)) { + Sentence sentence = sentenceBySpan.get(topicSpan.getBegin() + "_" + topicSpan.getEnd()); + if (sentence == null) { + continue; + } + + String modelName = "unknown"; + try { + if (topicSpan.getModel() != null && topicSpan.getModel().getModelName() != null) { + modelName = topicSpan.getModel().getModelName(); + } + } catch (Exception ignored) { + } + + ModelEntity foundModel = null; + try { + foundModel = db.getModelEntityByKey(modelName); + if (foundModel == null) { + foundModel = db.getModelEntityByMap(modelName); + } + } catch (DatabaseOperationException ex) { + logger.error("Error when looking for topic model in database {}", modelName, ex); + } + + if (foundModel == null) { + logger.warn("Topic model not found in DB: {}", modelName); + continue; + } + + var topicsArr = topicSpan.getTopics(); + if (topicsArr == null || topicsArr.size() == 0) { + continue; + } + + for (int i = 0; i < topicsArr.size(); i++) { + var fs = topicsArr.get(i); + if (!(fs instanceof AnnotationComment comment)) { + continue; + } + + String label = comment.getKey(); + String valueStr = comment.getValue(); + if (label == null || label.isBlank() || valueStr == null || valueStr.isBlank()) { + continue; + } + + double score; + try { + score = Double.parseDouble(valueStr); + } catch (NumberFormatException ex) { + continue; + } + + SentenceTopic st = new SentenceTopic(); + st.setDocument(document); + st.setSentence(sentence); + st.setModel(foundModel); + st.setTopicLabel(label); + st.setScore(score); + //st.setUnifiedTopic(foundUnifiedTopic); + + sentenceTopics.add(st); + } + } + + return sentenceTopics; + } /** * Selects and sets the sentences to a document @@ -1792,6 +1929,14 @@ private void postProccessCorpus(Corpus corpus, CorpusConfig corpusConfig) { logger.info("Inserting into Document and Corpus Topic word tables..."); try { + Path insertDocumentTopicsFilePath = Paths.get(commonConfig.getDatabaseScriptsLocation(), "topic/2_updateDocumentTopics.sql"); + var insertDocumentTopicsScript = Files.readString(insertDocumentTopicsFilePath); + + ExceptionUtils.tryCatchLog( + () -> db.executeSqlWithoutReturn(insertDocumentTopicsScript), + (ex) -> logger.error("Error executing SQL script to populate documenttopicsraw table", ex) + ); + Path insertDocumentTopicWordFilePath = Paths.get(commonConfig.getDatabaseScriptsLocation(), "topic/3_updateDocumentTopicWord.sql"); var insertDocumentTopicWordScript = Files.readString(insertDocumentTopicWordFilePath); @@ -1826,6 +1971,18 @@ private void postProccessDocument(Document document, Corpus corpus, String fileP var start = System.currentTimeMillis(); var corpusConfig = corpus.getViewModel().getCorpusConfig(); + // build unifiedtopic + link sentencetopics.unifiedtopic_id + ExceptionUtils.tryCatchLog( + () -> db.ensureUnifiedTopicsForSentenceTopics(document.getId()), + (ex) -> logImportError("Error creating/linking unifiedtopic rows for sentence topics.", ex, filePath) + ); + + ExceptionUtils.tryCatchLog( + () -> db.createSentenceEmotions(document.getId()), + (ex) -> logImportError("Error creating/linking sentenceEmotion rows for sentences.", ex, filePath) + ); + + // Store simple connections between Time, Geonames and Annotation to approximate the question: // This annotation occurred in context with this location at this time. // TODO: This needs a check if the document already was linked before. Sometimes docs are preprocessed when they already exist. @@ -1989,13 +2146,14 @@ private void postProccessDocument(Document document, Corpus corpus, String fileP logger.info("Inserting Sentence and Document Topics..."); try { - Path insertSentenceTopicsFilePath = Paths.get(commonConfig.getDatabaseScriptsLocation(), "topic/1_updateSentenceTopics.sql"); + /**Path insertSentenceTopicsFilePath = Paths.get(commonConfig.getDatabaseScriptsLocation(), "topic/1_updateSentenceTopics.sql"); var insertSentenceTopicsScript = Files.readString(insertSentenceTopicsFilePath); ExceptionUtils.tryCatchLog( () -> db.executeSqlWithoutReturn(insertSentenceTopicsScript), (ex) -> logImportError("Error executing SQL script to populate sentencetopics table", ex, filePath) ); + */ Path insertDocumentTopicsFilePath = Paths.get(commonConfig.getDatabaseScriptsLocation(), "topic/2_updateDocumentTopics.sql"); var insertDocumentTopicsScript = Files.readString(insertDocumentTopicsFilePath); @@ -2158,4 +2316,90 @@ private void logImportError(String message, Exception ex, String file) { logger.error(message, ex); } + /** + * Extracts Emotion annotations from a given JCas, + * then appends them to an already existing Document. + */ + private void appendNewEmotionsToExistingDocument(Document existingDoc, JCas jCas){ + var newEmotions = new ArrayList(); + JCasUtil.select(jCas, Emotion.class).forEach(e -> { + // extract emotion + var emotion = new org.texttechnologylab.uce.common.models.corpus.emotion.Emotion(e.getBegin(),e.getEnd()); + emotion.setCoveredText(e.getCoveredText()); + // extract model + var meta = e.getModel(); + ModelEntity foundModel = null; + if(meta!=null){ + String modelNameFromXmi = meta.getModelName(); + try{ + // Usually getModel() returns the map (at least for the models we tested it with), so getModelEntityByKey might be redundant + foundModel = db.getModelEntityByKey(modelNameFromXmi); + if (foundModel == null) foundModel = db.getModelEntityByMap(modelNameFromXmi); + } catch (DatabaseOperationException ex) { + logger.error("Error when looking for model in database when trying to append new emotions to an existing document" + modelNameFromXmi); + } + } + if(foundModel != null) emotion.setDbModel(foundModel); + + // extract feelings + var feelings = new ArrayList(); + for (var annotationComment : e.getEmotions()){ + var feeling = new Feeling(); + feeling.setEmotion(emotion); + ExceptionUtils.tryCatchLog(() -> feeling.setValue(Double.parseDouble(annotationComment.getValue())),(ex) -> {}); + feeling.setFeeling(annotationComment.getKey()); + feelings.add(feeling); + } + emotion.setFeelings(feelings); + newEmotions.add(emotion); + }); + + if(!newEmotions.isEmpty()){ + if(existingDoc.getEmotions() == null){ + existingDoc.setEmotions(new ArrayList<>()); + } + ExceptionUtils.tryCatchLog( + () -> db.saveNewEmotionsForDocument(existingDoc.getId(),newEmotions), + (ex) -> logger.error("Error when saving new emotions to existing document" + existingDoc.getId(), ex) + ); + ExceptionUtils.tryCatchLog( + () -> db.createSentenceEmotions(existingDoc.getId()), + (ex) -> logger.error("Error when creating sentence emotions after saving new emotions to existing document" + existingDoc.getId(), ex) + ); + } + + } + + /** + * Extracts and appends new sentence topics to an existing document, + * then ensures unified topics are updated accordingly. + */ + private void appendNewSentenceTopicsToExistingDocument(Document existingDoc, JCas jCas) { + List newSentenceTopics = extractSentenceTopics(existingDoc, jCas); + + if (newSentenceTopics.isEmpty()) { + return; + } + + ExceptionUtils.tryCatchLog( + () -> db.saveNewSentenceTopicsForDocument(existingDoc.getId(), newSentenceTopics), + (ex) -> logger.error( + "Error when saving new sentence topics to existing document {}", + existingDoc.getId(), + ex + ) + ); + ExceptionUtils.tryCatchLog( + () -> db.ensureUnifiedTopicsForSentenceTopics(existingDoc.getId()), + (ex) -> logger.error( + "Error when ensuring unified topics for existing document {}", + existingDoc.getId(), + ex + ) + ); + + logger.info("Added {} sentence topics to existing document {}", newSentenceTopics.size(), existingDoc.getId()); + } + + } diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java index 97534c64..da1c10e4 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java @@ -93,7 +93,7 @@ public static void main(String[] args) throws IOException { () -> new AnnotationConfigApplicationContext(SpringConfig.class), (ex) -> logger.fatal("========== [ABORT] ==========\nThe Application context couldn't be established. " + "This is very likely due to a missing/invalid database connection. UCE will have to shutdown.")); - if(context == null) return; + if (context == null) return; logger.info("Loaded application context and services."); // Execute the external database scripts @@ -115,7 +115,7 @@ public static void main(String[] args) throws IOException { logger.info(languageResource.get("search")); // Load in and test the model resources for the Analysis Engine - if(SystemStatus.UceConfig.getSettings().getAnalysis().isEnableAnalysisEngine()){ + if (SystemStatus.UceConfig.getSettings().getAnalysis().isEnableAnalysisEngine()) { var modelResources = new ModelResources(); var ttlabScorer = new TTLabScorerInfo(); var cohMetrixInfo = new CohMetrixInfo(); @@ -134,7 +134,7 @@ public static void main(String[] args) throws IOException { SystemStatus.LexiconIsCalculating = true; var lexiconService = context.getBean(LexiconService.class); var addedLexiconEntries = 0; - if(forceLexicalization) addedLexiconEntries = lexiconService.updateLexicon(true); + if (forceLexicalization) addedLexiconEntries = lexiconService.updateLexicon(true); else addedLexiconEntries = lexiconService.checkForUpdates(); logger.info("Finished updating the lexicon. Added new entries: " + addedLexiconEntries); SystemStatus.LexiconIsCalculating = false; @@ -142,23 +142,23 @@ public static void main(String[] args) throws IOException { logger.info("Checking if we can or should update any linkables... (this may take a moment depending on the time of the last update. Runs asynchronous.)"); CompletableFuture.runAsync(() -> { - try{ + try { var result = context.getBean(PostgresqlDataInterface_Impl.class).callLogicalLinksRefresh(); logger.info("Finished updating the linkables. Updated linkables: " + result); - } catch (Exception ex){ + } catch (Exception ex) { logger.error("There was an error trying to refresh linkables in the startup of the web app. App starts normally though."); } }); logger.info("Checking if we can or should update any geoname locations... (this may take a moment depending on the time of the last update. Runs asynchronous.)"); CompletableFuture.runAsync(() -> { - try{ + try { var result = context.getBean(PostgresqlDataInterface_Impl.class).callGeonameLocationRefresh(); logger.info("Finished updating the geoname locations. Updated locations: " + result); logger.info("Trying to refresh the timeline map cache..."); context.getBean(MapService.class).refreshCachedTimelineMap(false); logger.info("Finished refreshing the timeline map."); - } catch (Exception ex){ + } catch (Exception ex) { logger.error("There was an error trying to refresh geoname locations in the startup of the web app. App starts normally though."); } }); @@ -176,8 +176,7 @@ public static void main(String[] args) throws IOException { // We use the externalLocation method so that the files in the public folder are hot reloaded if (commonConfig.useExternalPublicLocation()) { config.staticFiles.add(commonConfig.getPublicLocation(), Location.EXTERNAL); - } - else { + } else { config.staticFiles.add("/public", Location.CLASSPATH); } logger.info("Setup FreeMarker templates and public folders."); @@ -200,8 +199,7 @@ public static void main(String[] args) throws IOException { ExceptionUtils.tryCatchLog(() -> initMCP(registry, config), (ex) -> logger.error("There was a problem initializing the MCP server, web service will be shut down.", ex)); logger.info("MCP server initialized."); - } - else { + } else { logger.info("MCP server is disabled and will not be initialized."); } config.jsonMapper(mapper); @@ -350,174 +348,184 @@ private static void initSparkRoutes(ApplicationContext context, ApiRegistry regi List groups = modelResources.getGroupedModelObjects(); config.router.apiBuilder(() -> { - before(ctx -> { - ctx.res().setCharacterEncoding("UTF-8"); - // Setup and log all API calls with some information. We don't want to log file uploads, since it would - // destroy the file body stream. - if (!(ctx.contentType() != null && ctx.contentType().contains("multipart/form-data"))) { - ctx.attribute("id", UUID.randomUUID().toString()); - logger.info("Received API call: ID={}, IP={}, Method={}, URI={}, QUERY={}, BODY={}", - ctx.attribute("id"), ctx.ip(), ctx.method().name(), ctx.url(), ctx.queryString(), ctx.body()); - - // Should we log to db as well? - if (commonConfig.getLogToDb() && SystemStatus.PostgresqlDbStatus.isAlive()) { - var uceLog = new UCELog(ctx.ip(), ctx.method().name(), ctx.url(), ctx.body(), ctx.queryString()); - ExceptionUtils.tryCatchLog( - () -> context.getBean(PostgresqlDataInterface_Impl.class).saveUceLog(uceLog), - (ex) -> logger.error("Error storing a log to the database: ", ex)); - logger.info("Last log was also logged to the db with id " + uceLog.getId()); - } - } else { - // Else we have a form-data upload. We handle those explicitly. - // Set the multipart data configs for uploads - ctx.req().setAttribute("org.eclipse.jetty.multipartConfig", new MultipartConfigElement("/tmp")); - } - - // Always inject the current system config into all UI templates - RequestContextHolder.setUceConfigHolder(SystemStatus.UceConfig); - - // Check if the request contains a language parameter - var languageResources = LanguageResources.fromRequest(ctx); - ctx.header("Content-Language", languageResources.getDefaultLanguage()); - RequestContextHolder.setLanguageResources(languageResources); - - // Check if we have an authenticated user in the session and inject it into the template - if (SystemStatus.UceConfig.getSettings().getAuthentication().isActivated()) { - var user = SessionManager.getUserFromRequest(ctx); - RequestContextHolder.setAuthenticatedUceUser(user); - } - }); + before(ctx -> { + ctx.res().setCharacterEncoding("UTF-8"); + // Setup and log all API calls with some information. We don't want to log file uploads, since it would + // destroy the file body stream. + if (!(ctx.contentType() != null && ctx.contentType().contains("multipart/form-data"))) { + ctx.attribute("id", UUID.randomUUID().toString()); + logger.info("Received API call: ID={}, IP={}, Method={}, URI={}, QUERY={}, BODY={}", + ctx.attribute("id"), ctx.ip(), ctx.method().name(), ctx.url(), ctx.queryString(), ctx.body()); + + // Should we log to db as well? + if (commonConfig.getLogToDb() && SystemStatus.PostgresqlDbStatus.isAlive()) { + var uceLog = new UCELog(ctx.ip(), ctx.method().name(), ctx.url(), ctx.body(), ctx.queryString()); + ExceptionUtils.tryCatchLog( + () -> context.getBean(PostgresqlDataInterface_Impl.class).saveUceLog(uceLog), + (ex) -> logger.error("Error storing a log to the database: ", ex)); + logger.info("Last log was also logged to the db with id " + uceLog.getId()); + } + } else { + // Else we have a form-data upload. We handle those explicitly. + // Set the multipart data configs for uploads + ctx.req().setAttribute("org.eclipse.jetty.multipartConfig", new MultipartConfigElement("/tmp")); + } + + // Always inject the current system config into all UI templates + RequestContextHolder.setUceConfigHolder(SystemStatus.UceConfig); + + // Check if the request contains a language parameter + var languageResources = LanguageResources.fromRequest(ctx); + ctx.header("Content-Language", languageResources.getDefaultLanguage()); + RequestContextHolder.setLanguageResources(languageResources); + + // Check if we have an authenticated user in the session and inject it into the template + if (SystemStatus.UceConfig.getSettings().getAuthentication().isActivated()) { + var user = SessionManager.getUserFromRequest(ctx); + RequestContextHolder.setAuthenticatedUceUser(user); + } + }); + + // Landing page + get("/", ctx -> { + var model = new HashMap(); + model.put("title", SystemStatus.UceConfig.getMeta().getName()); + model.put("corpora", context.getBean(PostgresqlDataInterface_Impl.class) + .getAllCorpora() + .stream().map(Corpus::getViewModel) + .toList()); + model.put("commonConf", commonConfig); + model.put("isSparqlAlive", SystemStatus.JenaSparqlStatus.isAlive()); + model.put("isAuthAlive", SystemStatus.AuthenticationService.isAlive()); + model.put("isDbAlive", SystemStatus.PostgresqlDbStatus.isAlive()); + model.put("isRagAlive", SystemStatus.RagServiceStatus.isAlive()); + model.put("isS3StorageAlive", SystemStatus.S3StorageStatus.isAlive()); + model.put("isLexiconCalculating", SystemStatus.LexiconIsCalculating); + model.put("alphabetList", StringUtils.getAlphabetAsList()); + model.put("lexiconEntriesCount", context.getBean(LexiconService.class).countLexiconEntries()); + model.put("lexiconizableAnnotations", LexiconService.lexiconizableAnnotations); + model.put("uceVersion", commonConfig.getUceVersion()); + model.put("modelGroups", groups); + model.put("ttlabScorer", taInputMap); + model.put("cohMetrix", cohMetrixMap); + + // The vm files are located under the resources directory + ctx.render("index.ftl", model); + }); + + // Potential imprint + get("/imprint", ctx -> { + var model = new HashMap(); + model.put("imprint", SystemStatus.UceConfig.getCorporate().getImprint()); + ctx.render("imprint.ftl", model); + }); + + // A document reader view + get("/documentReader", (ctx) -> (registry.get(DocumentApi.class)).getSingleDocumentReadView(ctx)); + + // A corpus World View + get("/globe", (ctx) -> (registry.get(DocumentApi.class)).get3dGlobe(ctx)); + + + path("/auth", () -> { + get("/login", (ctx) -> (registry.get(AuthenticationApi.class)).loginCallback(ctx)); + get("/logout", (ctx) -> (registry.get(AuthenticationApi.class)).logoutCallback(ctx)); + }); + + // API routes + path("/api", () -> { + before("/*", (ctx) -> { + }); - // Landing page - get("/", ctx -> { - var model = new HashMap(); - model.put("title", SystemStatus.UceConfig.getMeta().getName()); - model.put("corpora", context.getBean(PostgresqlDataInterface_Impl.class) - .getAllCorpora() - .stream().map(Corpus::getViewModel) - .toList()); - model.put("commonConf", commonConfig); - model.put("isSparqlAlive", SystemStatus.JenaSparqlStatus.isAlive()); - model.put("isAuthAlive", SystemStatus.AuthenticationService.isAlive()); - model.put("isDbAlive", SystemStatus.PostgresqlDbStatus.isAlive()); - model.put("isRagAlive", SystemStatus.RagServiceStatus.isAlive()); - model.put("isS3StorageAlive", SystemStatus.S3StorageStatus.isAlive()); - model.put("isLexiconCalculating", SystemStatus.LexiconIsCalculating); - model.put("alphabetList", StringUtils.getAlphabetAsList()); - model.put("lexiconEntriesCount", context.getBean(LexiconService.class).countLexiconEntries()); - model.put("lexiconizableAnnotations", LexiconService.lexiconizableAnnotations); - model.put("uceVersion", commonConfig.getUceVersion()); - model.put("modelGroups", groups); - model.put("ttlabScorer", taInputMap); - model.put("cohMetrix", cohMetrixMap); - - // The vm files are located under the resources directory - ctx.render("index.ftl", model); + path("/ie", () -> { + post("/upload/uima", (ctx) -> (registry.get(ImportExportApi.class)).uploadUIMA(ctx)); + get("/download/uima", (ctx) -> (registry.get(ImportExportApi.class)).downloadUIMA(ctx)); + post("/import/upload",(ctx) -> (registry.get(ImportExportApi.class)).importCorpusFromUpload(ctx)); + get("import/status/{importId}",(ctx) -> (registry.get(ImportExportApi.class)).getImportStatus(ctx)); + }); + + path("/wiki", () -> { + get("/page", (ctx) -> (registry.get(WikiApi.class)).getPage(ctx)); + get("/annotation", (ctx) -> (registry.get(WikiApi.class)).getAnnotation(ctx)); + path("/linkable", () -> { + post("/node", (ctx) -> (registry.get(WikiApi.class)).getLinkableNode(ctx)); + }); + path("/lexicon", () -> { + post("/entries", (ctx) -> (registry.get(WikiApi.class)).getLexicon(ctx)); + post("/occurrences", (ctx) -> (registry.get(WikiApi.class)).getOccurrencesOfLexiconEntry(ctx)); }); + post("/queryOntology", (ctx) -> (registry.get(WikiApi.class)).queryOntology(ctx)); + }); - // Potential imprint - get("/imprint", ctx -> { - var model = new HashMap(); - model.put("imprint", SystemStatus.UceConfig.getCorporate().getImprint()); - ctx.render("imprint.ftl", model); + path("/corpus", () -> { + get("/inspector", (ctx) -> (registry.get(DocumentApi.class)).getCorpusInspectorView(ctx)); + get("/documentsList", (ctx) -> (registry.get(DocumentApi.class)).getDocumentListOfCorpus(ctx)); + path("/map", () -> { + post("/linkedOccurrences", (ctx) -> (registry.get(MapApi.class)).getLinkedOccurrences(ctx)); + post("/linkedOccurrenceClusters", (ctx) -> (registry.get(MapApi.class)).getLinkedOccurrenceClusters(ctx)); }); + delete("/delete",(ctx) -> (registry.get(DocumentApi.class)).deleteCorpus(ctx)); + }); - // A document reader view - get("/documentReader", (ctx) -> (registry.get(DocumentApi.class)).getSingleDocumentReadView(ctx)); + path("/search", () -> { + post("/default", (ctx) -> (registry.get(SearchApi.class)).search(ctx)); + post("/semanticRole", (ctx) -> (registry.get(SearchApi.class)).semanticRoleSearch(ctx)); + post("/layered", (ctx) -> (registry.get(SearchApi.class)).layeredSearch(ctx)); + get("/active/page", (ctx) -> (registry.get(SearchApi.class)).activeSearchPage(ctx)); + get("/active/sort", (ctx) -> (registry.get(SearchApi.class)).activeSearchSort(ctx)); + get("/semanticRole/builder", (ctx) -> (registry.get(SearchApi.class)).getSemanticRoleBuilderView(ctx)); + }); - // A corpus World View - get("/globe", (ctx) -> (registry.get(DocumentApi.class)).get3dGlobe(ctx)); + path("/analysis", () -> { + post("/runPipeline", (ctx) -> (registry.get(AnalysisApi.class)).runPipeline(ctx)); + get("/setHistory", (ctx) -> (registry.get(AnalysisApi.class)).setHistory(ctx)); + post("/callHistory", (ctx) -> (registry.get(AnalysisApi.class)).callHistory(ctx)); + post("/callHistoryText", (ctx) -> (registry.get(AnalysisApi.class)).callHistoryText(ctx)); + post("/importCas", (registry.get(AnalysisApi.class)).importCas); //added the importCas path + }); + path("/corpusUniverse", () -> { + // Gets a corpus universe view + get("/new", (ctx) -> (registry.get(CorpusUniverseApi.class)).getCorpusUniverseView(ctx)); + post("/fromSearch", (ctx) -> (registry.get(CorpusUniverseApi.class)).fromSearch(ctx)); + post("/fromCorpus", (ctx) -> (registry.get(CorpusUniverseApi.class)).fromCorpus(ctx)); + get("/nodeInspectorContent", (ctx) -> (registry.get(CorpusUniverseApi.class)).getNodeInspectorContentView(ctx)); + }); - path("/auth", () -> { - get("/login", (ctx) -> (registry.get(AuthenticationApi.class)).loginCallback(ctx)); - get("/logout", (ctx) -> (registry.get(AuthenticationApi.class)).logoutCallback(ctx)); - }); + path("/document", () -> { + get("/reader/pagesList", (ctx) -> (registry.get(DocumentApi.class)).getPagesListView(ctx)); + get("/uceMetadata", (ctx) -> (registry.get(DocumentApi.class)).getUceMetadataOfDocument(ctx)); + get("/topics", (ctx) -> (registry.get(DocumentApi.class)).getDocumentTopics(ctx)); + get("/page/taxon", (ctx) -> (registry.get(DocumentApi.class)).getTaxonCountByPage(ctx)); + get("/page/topics", (ctx) -> (registry.get(DocumentApi.class)).getDocumentTopicDistributionByPage(ctx)); + get("/topicModels", (ctx) -> (registry.get(DocumentApi.class)).getTopicModels(ctx)); + get("/topicModelOverview", (ctx) -> (registry.get(DocumentApi.class)).getTopicModelOverview(ctx)); + get("/topicModelPageCounts", (ctx) -> (registry.get(DocumentApi.class)).getTopicModelPageCounts(ctx)); + get("/page/topicEntityRelation", (ctx) -> (registry.get(DocumentApi.class)).getSentenceTopicsWithEntities(ctx)); + get("/page/topicWords", (ctx) -> (registry.get(DocumentApi.class)).getTopicWordsByDocument(ctx)); + get("/unifiedTopicSentenceMap", (ctx) -> (registry.get(DocumentApi.class)).getUnifiedTopicToSentenceMap(ctx)); + get("/page/namedEntities", (ctx) -> (registry.get(DocumentApi.class)).getDocumentNamedEntitiesByPage(ctx)); + get("/page/lemma", (ctx) -> (registry.get(DocumentApi.class)).getDocumentLemmaByPage(ctx)); + get("/page/geoname", (ctx) -> (registry.get(DocumentApi.class)).getDocumentGeonameByPage(ctx)); + get("/page/emotions", (ctx) -> (registry.get(DocumentApi.class)).getDocumentEmotionsByPage(ctx)); + get("/emotionModels", (ctx) -> (registry.get(DocumentApi.class)).getEmotionModels(ctx)); // optional fürs Dropdown + get("/emotionRadar", (ctx) -> (registry.get(DocumentApi.class)).getEmotionRadar(ctx)); + delete("/delete", (ctx) -> (registry.get(DocumentApi.class)).deleteDocument(ctx)); + get("/findIdByMetadata", (ctx) -> (registry.get(DocumentApi.class)).findDocumentIdByMetadata(ctx)); + get("/findIdsByMetadata", (ctx) -> (registry.get(DocumentApi.class)).findDocumentIdsByMetadata(ctx)); + }); - // API routes - path("/api", () -> { - before("/*", (ctx) -> { - }); - - path("/ie", () -> { - post("/upload/uima", (ctx) -> (registry.get(ImportExportApi.class)).uploadUIMA(ctx)); - get("/download/uima", (ctx) -> (registry.get(ImportExportApi.class)).downloadUIMA(ctx)); - }); - - path("/wiki", () -> { - get("/page", (ctx) -> (registry.get(WikiApi.class)).getPage(ctx)); - get("/annotation", (ctx) -> (registry.get(WikiApi.class)).getAnnotation(ctx)); - path("/linkable", () -> { - post("/node", (ctx) -> (registry.get(WikiApi.class)).getLinkableNode(ctx)); - }); - path("/lexicon", () -> { - post("/entries", (ctx) -> (registry.get(WikiApi.class)).getLexicon(ctx)); - post("/occurrences", (ctx) -> (registry.get(WikiApi.class)).getOccurrencesOfLexiconEntry(ctx)); - }); - post("/queryOntology", (ctx) -> (registry.get(WikiApi.class)).queryOntology(ctx)); - }); - - path("/corpus", () -> { - get("/inspector", (ctx) -> (registry.get(DocumentApi.class)).getCorpusInspectorView(ctx)); - get("/documentsList", (ctx) -> (registry.get(DocumentApi.class)).getDocumentListOfCorpus(ctx)); - path("/map", () -> { - post("/linkedOccurrences", (ctx) -> (registry.get(MapApi.class)).getLinkedOccurrences(ctx)); - post("/linkedOccurrenceClusters", (ctx) -> (registry.get(MapApi.class)).getLinkedOccurrenceClusters(ctx)); - }); - }); - - path("/search", () -> { - post("/default", (ctx) -> (registry.get(SearchApi.class)).search(ctx)); - post("/semanticRole", (ctx) -> (registry.get(SearchApi.class)).semanticRoleSearch(ctx)); - post("/layered", (ctx) -> (registry.get(SearchApi.class)).layeredSearch(ctx)); - get("/active/page", (ctx) -> (registry.get(SearchApi.class)).activeSearchPage(ctx)); - get("/active/sort", (ctx) -> (registry.get(SearchApi.class)).activeSearchSort(ctx)); - get("/semanticRole/builder", (ctx) -> (registry.get(SearchApi.class)).getSemanticRoleBuilderView(ctx)); - }); - - path("/analysis", () -> { - post("/runPipeline", (ctx) -> (registry.get(AnalysisApi.class)).runPipeline(ctx)); - get("/setHistory", (ctx) -> (registry.get(AnalysisApi.class)).setHistory(ctx)); - post("/callHistory", (ctx) -> (registry.get(AnalysisApi.class)).callHistory(ctx)); - post("/callHistoryText", (ctx) -> (registry.get(AnalysisApi.class)).callHistoryText(ctx)); - }); - - path("/corpusUniverse", () -> { - // Gets a corpus universe view - get("/new", (ctx) -> (registry.get(CorpusUniverseApi.class)).getCorpusUniverseView(ctx)); - post("/fromSearch", (ctx) -> (registry.get(CorpusUniverseApi.class)).fromSearch(ctx)); - post("/fromCorpus", (ctx) -> (registry.get(CorpusUniverseApi.class)).fromCorpus(ctx)); - get("/nodeInspectorContent", (ctx) -> (registry.get(CorpusUniverseApi.class)).getNodeInspectorContentView(ctx)); - }); - - path("/document", () -> { - get("/reader/pagesList", (ctx) -> (registry.get(DocumentApi.class)).getPagesListView(ctx)); - get("/uceMetadata", (ctx) -> (registry.get(DocumentApi.class)).getUceMetadataOfDocument(ctx)); - get("/topics", (ctx) -> (registry.get(DocumentApi.class)).getDocumentTopics(ctx)); - get("/page/taxon", (ctx) -> (registry.get(DocumentApi.class)).getTaxonCountByPage(ctx)); - get("/page/topics", (ctx) -> (registry.get(DocumentApi.class)).getDocumentTopicDistributionByPage(ctx)); - get("/page/topicEntityRelation", (ctx) -> (registry.get(DocumentApi.class)).getSentenceTopicsWithEntities(ctx)); - get("/page/topicWords", (ctx) -> (registry.get(DocumentApi.class)).getTopicWordsByDocument(ctx)); - get("/unifiedTopicSentenceMap", (ctx) -> (registry.get(DocumentApi.class)).getUnifiedTopicToSentenceMap(ctx)); - get("/page/namedEntities", (ctx) -> (registry.get(DocumentApi.class)).getDocumentNamedEntitiesByPage(ctx)); - get("/page/lemma", (ctx) -> (registry.get(DocumentApi.class)).getDocumentLemmaByPage(ctx)); - get("/page/geoname", (ctx) -> (registry.get(DocumentApi.class)).getDocumentGeonameByPage(ctx)); - delete("/delete", (ctx) -> (registry.get(DocumentApi.class)).deleteDocument(ctx)); - get("/findIdByMetadata", (ctx) -> (registry.get(DocumentApi.class)).findDocumentIdByMetadata(ctx)); - get("/findIdsByMetadata", (ctx) -> (registry.get(DocumentApi.class)).findDocumentIdsByMetadata(ctx)); - }); - - path("/rag", () -> { - get("/new", (ctx) -> (registry.get(RAGApi.class)).getNewRAGChat(ctx)); - // NOTE we allow also "post" here, as the system prompt can get quite long... - post("/new", (ctx) -> (registry.get(RAGApi.class)).getNewRAGChat(ctx)); - post("/postUserMessage", (ctx) -> (registry.get(RAGApi.class)).postUserMessage(ctx)); - get("/messages", (ctx) -> (registry.get(RAGApi.class)).getMessagesForChat(ctx)); - get("/plotTsne", (ctx) -> (registry.get(RAGApi.class)).getTsnePlot(ctx)); - get("/sentenceEmbeddings", (ctx) -> (registry.get(RAGApi.class)).getSentenceEmbeddings(ctx)); - }); - }); + path("/rag", () -> { + get("/new", (ctx) -> (registry.get(RAGApi.class)).getNewRAGChat(ctx)); + // NOTE we allow also "post" here, as the system prompt can get quite long... + post("/new", (ctx) -> (registry.get(RAGApi.class)).getNewRAGChat(ctx)); + post("/postUserMessage", (ctx) -> (registry.get(RAGApi.class)).postUserMessage(ctx)); + get("/messages", (ctx) -> (registry.get(RAGApi.class)).getMessagesForChat(ctx)); + get("/plotTsne", (ctx) -> (registry.get(RAGApi.class)).getTsnePlot(ctx)); + get("/sentenceEmbeddings", (ctx) -> (registry.get(RAGApi.class)).getSentenceEmbeddings(ctx)); }); + }); + }); } private static JsonMapper getJsonMapper() { diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java index 866242bc..16fbd9eb 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java @@ -3,6 +3,7 @@ import com.google.gson.Gson; import freemarker.template.Configuration; import io.javalin.http.Context; +import io.javalin.http.Handler; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.springframework.context.ApplicationContext; @@ -10,8 +11,10 @@ import org.texttechnologylab.uce.analysis.RunDUUIPipeline; import org.texttechnologylab.uce.analysis.modules.DUUIInformation; import org.texttechnologylab.uce.common.annotations.auth.Authentication; +import org.texttechnologylab.uce.common.models.authentication.UceUser; import org.texttechnologylab.uce.common.models.dto.AnalysisRequestDto; import org.texttechnologylab.uce.common.models.dto.HistoryRequestDto; +import org.texttechnologylab.uce.web.SessionManager; import java.util.HashMap; import java.util.List; @@ -55,11 +58,19 @@ public void runPipeline(Context ctx) { model.put("inputStance", inputStance); model.put("inputLLM", inputLLM); + UceUser user = SessionManager.getUserFromRequest(ctx); + String userId = (user != null) ? user.getUsername() : "user-unknown"; + RunDUUIPipeline.setThreadLocalUserId(userId); + RunDUUIPipeline pipeline = new RunDUUIPipeline(); - DUUIInformation DataRequest = pipeline.getModelResources(selectedModels, inputText, inputClaim, inputCoherence, inputStance, inputLLM); + RunDUUIPipeline.AnalysisResponse resp = + pipeline.getModelResourcesWithHandle(selectedModels, inputText, inputClaim, + inputCoherence, inputStance, inputLLM); + DUUIInformation DataRequest = resp.duuiInformation; model.put("DUUI", DataRequest); model.put("SuccessRequest", true); model.put("modelGroups", DataRequest.getModelGroups()); + model.put("analysisId", resp.analysisId); // set history history.addDuuiInformation(String.valueOf(counter), DataRequest); @@ -180,5 +191,38 @@ public void callHistoryText(Context ctx) { ctx.render("defaultError.ftl"); } } - + // IMPORT ROUTE + @Authentication(required = Authentication.Requirement.LOGGED_IN, + route = Authentication.RouteTypes.POST, + path = "/api/analysis/importCas" + ) + public Handler importCas = ctx -> { + try { + String analysisId = ctx.queryParam("analysisId"); + if (analysisId == null || analysisId.isBlank()) { + ctx.status(400).result("Missing analysisId"); + return; + } + + // Lookup cached session + RunDUUIPipeline.AnalysisSession session = RunDUUIPipeline.getCachedSession(analysisId); + if (session == null) { + ctx.status(404).result("No cached CAS found for analysisId=" + analysisId); + return; + } + + // send to importer + long corpusId = Long.parseLong(ctx.queryParam("corpusId")); // from ?corpusId=... + String importPath = "/api/ie/upload/uima"; + String importUrl = ctx.scheme() + "://" + ctx.host() + importPath; + + RunDUUIPipeline.sendToImporterViaHttp(importUrl, analysisId, corpusId, analysisId, null); + ctx.status(200).result("CAS imported successfully for analysisId=" + analysisId); + } catch (NumberFormatException nfe) { + ctx.status(400).result("corpusId is required and must be a number"); + } catch (Exception e) { + logger.error("Error importing CAS", e); + ctx.status(500).result("Error importing CAS: " + e.getMessage()); + } + }; } diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/DocumentApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/DocumentApi.java index f04ecfa4..89c254f6 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/DocumentApi.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/DocumentApi.java @@ -270,10 +270,14 @@ public void findDocumentIdByMetadata(Context ctx) { } } + /** + * Delete Document from Database + */ public void deleteDocument(Context ctx) throws DatabaseOperationException { var id = ExceptionUtils.tryCatchLog(() -> ctx.queryParam("id"), (ex) -> logger.error("Error: document deletion requires an 'id' query parameter. ", ex)); if (id == null) { + ctx.status(400); ctx.render("defaultError.ftl"); return; } @@ -282,11 +286,27 @@ public void deleteDocument(Context ctx) throws DatabaseOperationException { Map result = new HashMap<>(); result.put("status", "success"); - result.put("message", "NOTE Document deletion is not fully implemented yet."); + result.put("message", "Document successfully deleted"); ctx.json(result); } + /** + * Delete Corpus from Database + */ + public void deleteCorpus(Context ctx) throws DatabaseOperationException{ + var id = ExceptionUtils.tryCatchLog(() -> ctx.queryParam("corpusId"),ex -> logger.error("Error: corpus deletion required a corpusId parameter")); + if (id == null){ + ctx.status(400); + ctx.render("defaultError.ftl"); + return; + } + db.deleteCorpusById(Long.parseLong(id)); + Map result = new HashMap<>(); + result.put("status","success"); + result.put("message","Corpus successfully deleted"); + } + public void getPagesListView(Context ctx) { var model = new HashMap(); @@ -341,7 +361,9 @@ public void getDocumentTopics(Context ctx) { } catch (Exception ex) { logger.error("Error getting document topics.", ex); ctx.status(500); - ctx.render("defaultError.ftl", Map.of("information", "Error retrieving document topics.")); + var errorModel = new HashMap(); + errorModel.put("information", "Error retrieving document topics."); + ctx.render("defaultError.ftl", errorModel); } } @@ -383,10 +405,16 @@ public void getDocumentTopicDistributionByPage(Context ctx) { if (documentId == null) { ctx.status(400); ctx.render("defaultError.ftl", Map.of("information", "Missing documentId parameter")); + return; } + Long modelId = ExceptionUtils.tryCatchLog(() -> { + String modelParam = ctx.queryParam("modelId"); + return (modelParam == null || modelParam.isBlank()) ? null : Long.parseLong(modelParam); + }, (ex) -> logger.error("Error: couldn't determine the modelId for topics. ", ex)); + try { - var topicDistPerPage = db.getTopicDistributionByPageForDocument(documentId); + var topicDistPerPage = db.getTopicDistributionByPageForDocument(documentId, modelId); var result = new ArrayList>(); for (Object[] row : topicDistPerPage) { @@ -403,7 +431,86 @@ public void getDocumentTopicDistributionByPage(Context ctx) { ctx.render("defaultError.ftl", Map.of("information", "Error retrieving document topics.")); } } + public void getTopicModels(Context ctx) { + try { + long documentId = Long.parseLong(ctx.queryParam("documentId")); + List rows = db.getTopicModelsForDocumentWithName(documentId); + + List> result = new ArrayList<>(); + for (Object[] r : rows) { + Number id = (Number) r[0]; + String name = (String) r[1]; + + Map obj = new HashMap<>(); + obj.put("modelId", id == null ? null : id.longValue()); + obj.put("modelName", (name == null || name.isBlank()) ? ("Model " + id) : name); + result.add(obj); + } + + ctx.json(result); + } catch (Exception ex) { + ctx.status(500).json(Map.of("error", "Failed to load topic models", "details", ex.getMessage())); + } + } + public void getTopicModelOverview(Context ctx) { + var documentId = ExceptionUtils.tryCatchLog(() -> Long.parseLong(ctx.queryParam("documentId")), + (ex) -> logger.error("Error: couldn't determine the documentId for topic model overview. ", ex)); + + var modelId = ExceptionUtils.tryCatchLog(() -> Long.parseLong(ctx.queryParam("modelId")), + (ex) -> logger.error("Error: couldn't determine the modelId for topic model overview. ", ex)); + + if (documentId == null || modelId == null) { + ctx.status(400).json(Map.of("error", "Missing documentId or modelId")); + return; + } + + try { + var rows = db.getTopicModelOverview(documentId, modelId); + var result = new ArrayList>(); + + for (Object[] row : rows) { + var obj = new HashMap(); + obj.put("label", row[0]); + obj.put("value", ((Number) row[1]).longValue()); + result.add(obj); + } + + ctx.json(result); + } catch (Exception ex) { + logger.error("Error getting topic model overview.", ex); + ctx.status(500).json(Map.of("error", "Failed to load topic model overview")); + } + } + public void getTopicModelPageCounts(Context ctx) { + var documentId = ExceptionUtils.tryCatchLog(() -> Long.parseLong(ctx.queryParam("documentId")), + (ex) -> logger.error("Error: couldn't determine the documentId for topic model page counts. ", ex)); + + var modelId = ExceptionUtils.tryCatchLog(() -> Long.parseLong(ctx.queryParam("modelId")), + (ex) -> logger.error("Error: couldn't determine the modelId for topic model page counts. ", ex)); + + if (documentId == null || modelId == null) { + ctx.status(400).json(Map.of("error", "Missing documentId or modelId")); + return; + } + + try { + var rows = db.getTopicModelPageCounts(documentId, modelId); + var result = new ArrayList>(); + for (Object[] row : rows) { + var obj = new HashMap(); + obj.put("pageId", row[0]); + obj.put("label", row[1]); + obj.put("value", ((Number) row[2]).longValue()); + result.add(obj); + } + + ctx.json(result); + } catch (Exception ex) { + logger.error("Error getting topic model page counts.", ex); + ctx.status(500).json(Map.of("error", "Failed to load topic model page counts")); + } + } public void getDocumentNamedEntitiesByPage(Context ctx) { var documentId = ExceptionUtils.tryCatchLog(() -> Long.parseLong(ctx.queryParam("documentId")), (ex) -> logger.error("Error: couldn't determine the documentId for entities. ", ex)); @@ -497,7 +604,6 @@ public void getDocumentGeonameByPage(Context ctx) { public void getSentenceTopicsWithEntities(Context ctx) { var documentId = ExceptionUtils.tryCatchLog(() -> Long.parseLong(ctx.queryParam("documentId")), (ex) -> logger.error("Error: couldn't determine the documentId for sentence topics with entities. ", ex)); - if (documentId == null) { ctx.status(400); ctx.render("defaultError.ftl", Map.of("information", "Missing documentId parameter for sentence topics with entities")); @@ -595,4 +701,73 @@ public void getUnifiedTopicToSentenceMap(Context ctx) { } } + public void getDocumentEmotionsByPage(Context ctx) { + try { + long documentId = Long.parseLong(ctx.queryParam("documentId")); + + String modelParam = ctx.queryParam("modelId"); + Long modelId = (modelParam == null || modelParam.isBlank()) ? null : Long.parseLong(modelParam); + + List rows = db.getEmotionByPage(documentId, modelId); + + List> result = new ArrayList<>(); + for (Object[] r : rows) { + Number pageId = (Number) r[0]; + String emotionLabel = (String) r[1]; + + Map obj = new HashMap<>(); + obj.put("pageId", pageId == null ? null : pageId.longValue()); + obj.put("emotionLabel", emotionLabel); + result.add(obj); + } + + ctx.json(result); + } catch (Exception ex) { + ctx.status(500).json(Map.of("error", "Failed to load emotions", "details", ex.getMessage())); + } + } + public void getEmotionRadar(Context ctx) { + try { + long documentId = Long.parseLong(ctx.queryParam("documentId")); + String modelParam = ctx.queryParam("modelId"); + Long modelId = (modelParam == null || modelParam.isBlank()) ? null : Long.parseLong(modelParam); + + List rows = db.getEmotionRadarForDocument(documentId, modelId); + + List> result = new ArrayList<>(); + for (Object[] r : rows) { + String label = (String) r[0]; + Number avg = (Number) r[1]; + + Map obj = new HashMap<>(); + obj.put("label", label); + obj.put("value", avg == null ? 0.0 : avg.doubleValue()); + result.add(obj); + } + + ctx.json(result); + } catch (Exception ex) { + ctx.status(500).json(Map.of("error", "Failed to load emotion radar", "details", ex.getMessage())); + } + } + public void getEmotionModels(Context ctx) { + try { + long documentId = Long.parseLong(ctx.queryParam("documentId")); + List rows = db.getEmotionModelsForDocumentWithName(documentId); + + List> result = new ArrayList<>(); + for (Object[] r : rows) { + Number id = (Number) r[0]; + String name = (String) r[1]; + + Map obj = new HashMap<>(); + obj.put("modelId", id == null ? null : id.longValue()); + obj.put("modelName", (name == null || name.isBlank()) ? ("Model " + id) : name); + result.add(obj); + } + ctx.json(result); + } catch (Exception ex) { + ctx.status(500).json(Map.of("error", "Failed to load emotion models", "details", ex.getMessage())); + } + } } diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java index a47013a9..f095cba8 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java @@ -1,35 +1,46 @@ package org.texttechnologylab.uce.web.routes; import com.google.gson.Gson; -import com.google.gson.JsonIOException; -import com.google.gson.JsonSyntaxException; +import com.google.gson.GsonBuilder; import io.javalin.http.Context; +import io.javalin.http.UploadedFile; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.springframework.context.ApplicationContext; import org.texttechnologylab.uce.common.config.CorpusConfig; +import org.texttechnologylab.uce.common.config.corpusConfig.CorpusAnnotationConfig; +import org.texttechnologylab.uce.common.config.corpusConfig.OtherConfig; +import org.texttechnologylab.uce.common.config.corpusConfig.TaxonConfig; import org.texttechnologylab.uce.common.exceptions.DatabaseOperationException; import org.texttechnologylab.uce.common.exceptions.ExceptionUtils; import org.texttechnologylab.uce.common.models.corpus.Corpus; +import org.texttechnologylab.uce.common.models.imp.ImportStatus; +import org.texttechnologylab.uce.common.models.imp.UCEImport; import org.texttechnologylab.uce.common.services.PostgresqlDataInterface_Impl; import org.texttechnologylab.uce.common.services.S3StorageService; import org.texttechnologylab.uce.common.utils.StringUtils; import org.texttechnologylab.uce.corpusimporter.Importer; +import java.io.IOException; +import java.io.InputStream; import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardCopyOption; import java.util.HashMap; import java.util.Map; +import java.util.UUID; import java.util.concurrent.CompletableFuture; + public class ImportExportApi implements UceApi { + private static final Logger logger = LogManager.getLogger(PostgresqlDataInterface_Impl.class); private S3StorageService s3StorageService; private PostgresqlDataInterface_Impl db; private ApplicationContext serviceContext; - private static final Logger logger = LogManager.getLogger(PostgresqlDataInterface_Impl.class); - private static Gson gson = new Gson(); - public ImportExportApi(ApplicationContext serviceContext) { this.serviceContext = serviceContext; this.s3StorageService = serviceContext.getBean(S3StorageService.class); @@ -54,7 +65,7 @@ public void downloadUIMA(Context ctx) { ctx.res().setContentType(contentType); ctx.res().setHeader("Content-Disposition", "attachment; filename=\"" + objectName + "." + StringUtils.getExtensionByContentType(contentType) + "\""); - var buffer = new byte[8192]; + var buffer = new byte[8192]; int bytesRead; while ((bytesRead = s3Stream.read(buffer)) != -1) { out.write(buffer, 0, bytesRead); @@ -74,9 +85,11 @@ public void uploadUIMA(Context ctx) { // First, we need to know which corpus this document should be added to. var corpusId = ExceptionUtils.tryCatchLog( () -> Long.parseLong(new String(ctx.req().getPart("corpusId").getInputStream().readAllBytes(), StandardCharsets.UTF_8)), - (ex) -> logger.error("Error getting the corpusId this document should be added to. Aborting.", ex)); + (ex) -> logger.error("Error getting corpusId from request.", ex)); + if (corpusId == null) { - ctx.result("Parameter corpusId didn't exist. Without it, the document cannot be uploaded."); + ctx.status(400); + ctx.result("Parameter corpusId didn't exist; cannot upload document."); return; } @@ -91,29 +104,12 @@ public void uploadUIMA(Context ctx) { var corpus = ExceptionUtils.tryCatchLog( () -> db.getCorpusById(corpusId), - (ex) -> logger.error("Couldn't fetch corpus when uploading new document to corpusId " + corpusId, ex)); + (ex) -> logger.error("Couldn't fetch corpus with id " + corpusId, ex)); + if (corpus == null) { - var corpusConfigRaw = ExceptionUtils.tryCatchLog( - () -> new String(ctx.req().getPart("corpusConfig").getInputStream().readAllBytes(), StandardCharsets.UTF_8), - (ex) -> logger.error("Error getting the corpusConfig that should be used for this document. Aborting.", ex)); - if (corpusConfigRaw == null) { - ctx.result("Corpus with id " + corpusId + " wasn't found in the database; no config was provided; can't upload document."); - return; - } - logger.info("Corpus with id " + corpusId + " wasn't found in the database; creating a new corpus with the provided config."); - try { - var corpusConfig = gson.fromJson(corpusConfigRaw, CorpusConfig.class); - corpus = new Corpus(); - var corpusReturn = Importer.CreateDBCorpus(corpus, corpusConfig, this.db); - if (corpusReturn != null) { - corpus = corpusReturn; - } - } catch (JsonIOException | JsonSyntaxException e) { - ctx.result("The corpusConfig provided is not properly formatted."); - } catch (DatabaseOperationException e) { - ctx.result("Error creating a new corpus in the database: " + e.getMessage()); - return; - } + ctx.status(404); + ctx.result("Corpus with id " + corpusId + " wasn't found in the database."); + return; } // TODO just use 1 as default? will throw an error if this is null otherwise... @@ -122,10 +118,9 @@ public void uploadUIMA(Context ctx) { try (var input = ctx.req().getPart("file").getInputStream()) { var fileName = ctx.req().getPart("file").getSubmittedFileName(); // Import the doc in the background - final Corpus corpus1 = corpus; var importFuture = CompletableFuture.supplyAsync(() -> { try { - return importer.storeUploadedXMIToCorpusAsync(input, corpus1, fileName, documentId); + return importer.storeUploadedXMIToCorpusAsync(input, corpus, fileName, documentId); } catch (DatabaseOperationException e) { throw new RuntimeException(e); } @@ -139,6 +134,7 @@ public void uploadUIMA(Context ctx) { if (acceptedContentType != null && acceptedContentType.equals("application/json")) { Map apiResult = new HashMap<>(); apiResult.put("document_id", newDocumentId); +// ctx.contentType("application/json"); //redundant ctx.json(apiResult); return; } @@ -149,6 +145,230 @@ public void uploadUIMA(Context ctx) { ctx.status(500); ctx.result("Error uploading a file: " + e.getMessage()); } - }; + } + + /** + * Handles the HTTP request to upload and import a corpus from user-provided files. + * Extracts files and configuration parameters, sets up a temporary directory, + * and triggers the background import process. + * + */ + public void importCorpusFromUpload(Context ctx) { + try{ + String customImportId = ctx.formParam("importId"); + String importId = (customImportId != null && !customImportId.isBlank() ? customImportId : UUID.randomUUID().toString()); + // Set up directory, folder structure + Path rootDir = java.nio.file.Paths.get(System.getProperty("java.io.tmpdir"), "uce_uploads", importId); + Path inputDir = rootDir.resolve("input"); + Files.createDirectories(inputDir); + + var validFiles = ctx.uploadedFiles("files").stream() + .filter(f -> f.size() > 0 && f.filename() != null && !f.filename().isBlank()) + .toList(); + if (validFiles.isEmpty()) { + ctx.status(400).result("No files selected. Please select at least one XMI file or archive."); + return; + } + + // put all files into the input directory + for(UploadedFile uploadedFile : ctx.uploadedFiles("files")){ + try(InputStream input = uploadedFile.content()){ + Files.copy(input,inputDir.resolve(uploadedFile.filename()), StandardCopyOption.REPLACE_EXISTING); + } + } + + /** + * Extract details and create a corpusConfig.json file + */ + String name = ctx.formParam("name"); + if (name == null || name.isBlank()){ + ctx.status(400).result("No corpus name given"); + } + String addToExistingParam = ctx.formParam("addToExistingCorpus"); + boolean addToExisting = addToExistingParam != null && Boolean.parseBoolean(addToExistingParam); + CorpusConfig config = null; + + Corpus existingCorpus = ExceptionUtils.tryCatchLog(() -> + db.getCorpusByName(name), + (ex) -> logger.warn("Could not fetch existing corpus by name " + name, ex) + + ); + // Check if it's either an import via new corpus or the user wants to import documents to an existing document + if(addToExisting){ + if (existingCorpus != null && existingCorpus.getCorpusJsonConfig() != null) + config = CorpusConfig.fromJson(existingCorpus.getCorpusJsonConfig()); + }else{ + if (existingCorpus != null){ + ctx.status(409).result("A corpus with that name already exists. Please Change the name or add these documents to the existing corpus!"); + try{ + org.apache.commons.io.FileUtils.deleteDirectory(rootDir.toFile()); + }catch (IOException ex){ + logger.warn("Could not delete temp dir after name conflict: " + rootDir,ex); + } + return; + } + } + + if (config == null){ + config = new CorpusConfig(); + config.setAnnotations(new CorpusAnnotationConfig()); + config.getAnnotations().setTaxon(new TaxonConfig()); + config.setOther(new OtherConfig()); + } else{ + if(config.getAnnotations() == null) config.setAnnotations(new CorpusAnnotationConfig()); + if(config.getAnnotations().getTaxon() == null) config.getAnnotations().setTaxon(new TaxonConfig()); + if(config.getOther() == null) config.setOther(new OtherConfig()); + } + + config.setName(name); + config.setAddToExistingCorpus(addToExisting); + String author = ctx.formParam("author"); + if (author != null && !author.isBlank()) config.setAuthor(author); + else if (config.getAuthor() == null) { + ctx.status(400).result("Corpus Author is required"); + return; + } + String language = ctx.formParam("language"); + if (language != null && !language.isBlank()) config.setLanguage(language); + else if (config.getLanguage() == null) { + ctx.status(400).result("Corpus Language is required."); + return; + } + String description = ctx.formParam("description"); + if (description != null && !description.isBlank()) config.setDescription(description); +// Annotations + CorpusAnnotationConfig ann = config.getAnnotations(); + ann.setSentence(ann.isSentence() || ctx.formParam("sentence") != null); + ann.setLemma(ann.isLemma() || ctx.formParam("lemma") != null); + ann.setNamedEntity(ann.isNamedEntity() || ctx.formParam("namedEntity") != null); + ann.setTopic(ann.isTopic() || ctx.formParam("topic") != null); + ann.setSentiment(ann.isSentiment() || ctx.formParam("sentiment") != null); + ann.setEmotion(ann.isEmotion() || ctx.formParam("emotion") != null); + ann.setTime(ann.isTime() || ctx.formParam("time") != null); + ann.setGeoNames(ann.isGeoNames() || ctx.formParam("geoNames") != null); + ann.setWikipediaLink(ann.isWikipediaLink() || ctx.formParam("wikipediaLink") != null); + ann.setImage(ann.isImage() || ctx.formParam("image") != null); + ann.setAnnotatorMetadata(ann.isAnnotatorMetadata() || ctx.formParam("annotatorMetadata") != null); + ann.setUceMetadata(ann.isUceMetadata() || ctx.formParam("uceMetadata") != null); + ann.setLogicalLinks(ann.isLogicalLinks() || ctx.formParam("logicalLinks") != null); + ann.setSrLink(ann.isSrLink() || ctx.formParam("srLink") != null); + ann.setUnifiedTopic(ann.isUnifiedTopic() || ctx.formParam("unifiedTopic") != null); + ann.setOCRPage(ann.isOCRPage() || ctx.formParam("OCRPage") != null); + ann.setOCRParagraph(ann.isOCRParagraph() || ctx.formParam("OCRParagraph") != null); + ann.setOCRBlock(ann.isOCRBlock() || ctx.formParam("OCRBlock") != null); + ann.setOCRLine(ann.isOCRLine() || ctx.formParam("OCRLine") != null); + ann.setCompleteNegation(ann.isCompleteNegation() || ctx.formParam("completeNegation") != null); + ann.setCue(ann.isCue() || ctx.formParam("cue") != null); + ann.setEvent(ann.isEvent() || ctx.formParam("event") != null); + ann.setFocus(ann.isFocus() || ctx.formParam("focus") != null); + ann.setScope(ann.isScope() || ctx.formParam("scope") != null); + ann.setXscope(ann.isXscope() || ctx.formParam("xscope") != null); + + + TaxonConfig taxonConfig = ann.getTaxon(); + taxonConfig.setAnnotated(taxonConfig.isAnnotated() || ctx.formParam("taxonAnnotated") != null); + taxonConfig.setBiofidOnthologyAnnotated(taxonConfig.isBiofidOnthologyAnnotated() || ctx.formParam("biofidOnthologyAnnotated") != null); +// Other Settings + OtherConfig otherConfig = config.getOther(); + otherConfig.setEnableEmbeddings(otherConfig.isEnableEmbeddings() || ctx.formParam("enableEmbeddings") != null); + otherConfig.setEnableRAGBot(otherConfig.isEnableRAGBot() || ctx.formParam("enableRAGBot") != null); + otherConfig.setIncludeKeywordDistribution(otherConfig.isIncludeKeywordDistribution()|| ctx.formParam("includeKeywordDistribution") != null); + otherConfig.setEnableS3Storage(otherConfig.isEnableS3Storage() || ctx.formParam("enableS3Storage") != null); + otherConfig.setAvailableOnFrankfurtUniversityCollection(otherConfig.isAvailableOnFrankfurtUniversityCollection() || ctx.formParam("availableOnFrankfurtUniversityCollection") != null); + + config.setAnnotations(ann); + config.setOther(otherConfig); + + // create corpusConfig.json inside the root directory + Gson gson = new GsonBuilder().setPrettyPrinting().create(); + String jsonString = gson.toJson(config); + Files.writeString(rootDir.resolve("corpusConfig.json"),jsonString,StandardCharsets.UTF_8); + + String numThreadStr = ctx.formParam("numThreads"); + int numThreads = (numThreadStr != null && !numThreadStr.isBlank()) ? Integer.parseInt(numThreadStr) : 1; + String casView = ctx.formParam("casView"); + if(casView != null && casView.isBlank()) casView = null; + int importerNumber = 1; + Importer importer = new Importer(serviceContext,rootDir.toString(),importerNumber,importId,casView); + + String logTitle = (addToExisting ? "ADD_TO:" : "UPLOAD_NEW:") + name; + UCEImport uceImport = new UCEImport(importId,logTitle,ImportStatus.STARTING); + Integer fileCount = ExceptionUtils.tryCatchLog(importer::getXMICountInPath, + (ex) -> logger.warn("IO Error counting upload files.",ex)); + uceImport.setTotalDocuments(fileCount == null ? -1 : fileCount); + db.saveOrUpdateUceImport(uceImport); + + // run import + CompletableFuture.runAsync(() -> { + try{ + importer.start(numThreads); + UCEImport finishedImport = db.getUceImportByImportId(importId); + if (finishedImport != null) { + finishedImport.setStatus(ImportStatus.FINISHED); + db.saveOrUpdateUceImport(finishedImport); + } + } catch (DatabaseOperationException e) { + logger.error("Error during asynchronous corpus uplaod import",e); + try { + UCEImport errImport = db.getUceImportByImportId(importId); + if (errImport != null) { + errImport.setStatus(ImportStatus.ERROR); + db.saveOrUpdateUceImport(errImport); + } + } catch (Exception ignored) {} + + }finally { + try { + org.apache.commons.io.FileUtils.deleteDirectory(rootDir.toFile()); + } catch (IOException ex) { + logger.warn("Could not delete temp upload dir: " + rootDir,ex); + } + } + }); + + ctx.status(200).result("Upload successful. Import started with ID: " + importId); + + } catch (IOException e) { + logger.error("Error handling file upload import", e); + ctx.status(500).result("Error during upload " + e.getMessage()); + } catch (DatabaseOperationException e) { + logger.error("Error saving/updating database during Uce Import", e); + ctx.status(500).result("Error during saving/updating database " + e.getMessage()); + } + } + + /** + * Retrieves the current progress and status of an import + * Sends back a JSON Object containing the count of total and processed documents + */ + public void getImportStatus(Context ctx){ + String importId = ctx.pathParam("importId"); + + try{ + UCEImport uceImport = db.getUceImportByImportId(importId); + if (uceImport == null){ + ctx.status(400).result("Import %s not found".formatted(importId)); + return; + } + + Map statusData = new HashMap<>(); + statusData.put("status",uceImport.getStatus().name()); + statusData.put("total",uceImport.getTotalDocuments()); + int processed = 0; + + if(uceImport.getStatus() == ImportStatus.FINISHED || uceImport.getStatus() == ImportStatus.ERROR){ + processed = uceImport.getTotalDocuments(); + Importer.IMPORT_PROGRESS.remove(importId); + }else if (Importer.IMPORT_PROGRESS.containsKey(importId)){ + processed = Importer.IMPORT_PROGRESS.get(importId).get(); + } + + statusData.put("processed",processed); + ctx.json((statusData)); + } catch (DatabaseOperationException e) { + logger.error("Error when trying to get import-status"); + ctx.status(500).result("Error when trying to get import-status"); + } + } } diff --git a/uce.portal/uce.web/src/main/resources/languageTranslations.json b/uce.portal/uce.web/src/main/resources/languageTranslations.json index f34ce979..185db241 100644 --- a/uce.portal/uce.web/src/main/resources/languageTranslations.json +++ b/uce.portal/uce.web/src/main/resources/languageTranslations.json @@ -538,5 +538,9 @@ "sentenceTopicFlow": { "de-DE": "Satz-Themenfluss", "en-EN": "Sentence Topic Flow" + }, + "sentenceSentiment": { + "de-DE": "Satz-Sentiment", + "en-EN": "Sentence Sentiment" } } \ No newline at end of file diff --git a/uce.portal/uce.web/src/main/resources/models.json b/uce.portal/uce.web/src/main/resources/models.json index b842dee9..10192f51 100644 --- a/uce.portal/uce.web/src/main/resources/models.json +++ b/uce.portal/uce.web/src/main/resources/models.json @@ -43,6 +43,28 @@ "Variant": "Topic", "type": "" }, + "Topic ParlaCAP": { + "url": "", + "github": "", + "huggingface": "", + "paper": "", + "map": "classla/ParlaCAP-Topic-Classifier", + "Name": "Topic ParlaCap", + "Main Tool": "Topic Modeling", + "Variant": "Topic", + "type": "" + }, + "Topic dstefa": { + "url": "", + "github": "", + "huggingface": "", + "paper": "", + "map": "dstefa/roberta-base_topic_classification_nyt_news", + "Name": "Topic dstefa", + "Main Tool": "Topic Modeling", + "Variant": "Topic", + "type": "" + }, "Topic WebOrganizer (EN)": { "url": "http://topic-organize-web.service.component.duui.texttechnologylab.org", "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-topic",