From 10f4b2d26072a1a91a83418d4122347913a99b97 Mon Sep 17 00:00:00 2001 From: dua Date: Mon, 15 Sep 2025 11:25:25 +0200 Subject: [PATCH 01/47] added function for: analysis result send to the importer and save analysis result as cache --- .../templates/wiki/analysisResultFragment.ftl | 18 ++ .../uce/analysis/DUUIPipeline.java | 2 +- .../uce/analysis/RunDUUIPipeline.java | 257 ++++++++++++++++++ .../src/main/resources/defaultUceConfig.json | 2 +- .../org/texttechnologylab/uce/web/App.java | 1 + .../uce/web/routes/AnalysisApi.java | 47 ++++ 6 files changed, 325 insertions(+), 2 deletions(-) diff --git a/uce.portal/resources/templates/wiki/analysisResultFragment.ftl b/uce.portal/resources/templates/wiki/analysisResultFragment.ftl index 7e012f0c..592ab90e 100644 --- a/uce.portal/resources/templates/wiki/analysisResultFragment.ftl +++ b/uce.portal/resources/templates/wiki/analysisResultFragment.ftl @@ -1,4 +1,22 @@ +<#if analysisId??> +
+ +
+ + <#if DUUI??> <#if DUUI.modelGroups?has_content> <#if DUUI.isTopic> diff --git a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/DUUIPipeline.java b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/DUUIPipeline.java index 3cb3d72a..8c666e80 100644 --- a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/DUUIPipeline.java +++ b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/DUUIPipeline.java @@ -120,7 +120,7 @@ public JCas getLanguage(String inputText) throws Exception { HashMap urls = new HashMap<>(); urls.put("LanguageDetection", "http://language.service.component.duui.texttechnologylab.org"); DUUIComposer composer = setListComposer(urls); - cas = runPipeline(cas, composer); + //cas = runPipeline(cas, composer); // Assuming the language detection component sets the language in the JCas String language = "en"; language = cas.getDocumentLanguage(); diff --git a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java index 02687feb..74a6bdc7 100644 --- a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java +++ b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java @@ -8,11 +8,36 @@ import org.texttechnologylab.uce.analysis.modules.*; import org.texttechnologylab.uce.analysis.typeClasses.TextClass; + + + +import java.time.Instant; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; +// Added imports (save .jcas, HTTP import, logging) +//import java.nio.file.Path; +//import java.nio.file.Paths; +//import java.nio.file.Files; +//import java.io.OutputStream; +import java.io.InputStream; +import java.io.DataOutputStream; +import java.net.HttpURLConnection; +import java.net.URL; +import java.nio.charset.StandardCharsets; + import java.util.*; public class RunDUUIPipeline { + private static final AnalysisCache analysisCache = new AnalysisCache(); + private static final ThreadLocal lastAnalysisIdTL = new ThreadLocal<>(); + public static AnalysisSession getCachedSession(String analysisId) { return analysisCache.get(analysisId); } + + private static String getCurrentUserId() { + // TODO: replace with your auth/session identity + return "user-unknown"; + } public DUUIInformation getModelResources(List modelGroups, String inputText, String claim, String coherenceText, String stanceText, String systemPrompt) throws Exception { ModelResources modelResources = new ModelResources(); @@ -189,10 +214,13 @@ public DUUIInformation getModelResources(List modelGroups, String inputT newCas.setDocumentText(text); cas = newCas; + System.out.println("[CAS] Created secondary JCas for special models (fact/coherence/stance/LLM)"); + } // run pipeline DUUIComposer composer = pipeline.setComposer(modelInfosMap); JCas result = pipeline.runPipeline(cas, composer); + System.out.println("[CAS] Final result JCas created via pipeline.runPipeline(cas, composer)"); // get results Object[] results = pipeline.getJCasResults(result, modelInfosList, ttlabScorerGroups, cohmetrixScorerGroups); // print results @@ -232,9 +260,28 @@ public DUUIInformation getModelResources(List modelGroups, String inputT if (isCohmetrix) { duuiInformation.setCohMetrixGroups(cohmetrixScorerGroups); } + String analysisId = UUID.randomUUID().toString(); + String userId = getCurrentUserId(); + String title = "Analysis " + Instant.now(); + + byte[] xmiBytes = toXmiBytes(result); + AnalysisSession session = new AnalysisSession( + analysisId, userId, title, /*externalId*/ null, + result, /*xmiBytes*/ xmiBytes + ); + analysisCache.put(session); + lastAnalysisIdTL.set(analysisId); + System.out.println("[CACHE] Added analysisId=" + analysisId + " (stored in memory; TTL=45min)"); return duuiInformation; } + public AnalysisResponse getModelResourcesWithHandle(List modelGroups, String inputText, String claim, + String coherenceText, String stanceText, String systemPrompt) throws Exception { + DUUIInformation info = getModelResources(modelGroups, inputText, claim, coherenceText, stanceText, systemPrompt); + String id = lastAnalysisIdTL.get(); + return new AnalysisResponse(id, info); + } + public static void main(String[] args) throws Exception { ModelResources modelResources = new ModelResources(); List modelGroups = modelResources.getGroupedModelObjects(); @@ -256,5 +303,215 @@ public static void main(String[] args) throws Exception { DUUIInformation duuiInformation = new RunDUUIPipeline().getModelResources(modelGroupNames, inputText, claim, coherenceText, stanceText, systemPrompt); } + public static final class AnalysisResponse { + public final String analysisId; + public final DUUIInformation duuiInformation; + + public AnalysisResponse(String analysisId, DUUIInformation duuiInformation) { + this.analysisId = analysisId; + this.duuiInformation = duuiInformation; + } + } + + + //AnalysisSession + public static final class AnalysisSession { + public final String analysisId; + public final String userId; + public final long createdAtMillis; + public final String title; + public final String externalId; + public final JCas jcas; + public final byte[] xmiBytes; + + public AnalysisSession(String analysisId, String userId, String title, String externalId, + JCas jcas, byte[] xmiBytes) { + this.analysisId = analysisId; + this.userId = userId; + this.title = title; + this.externalId = externalId; + this.createdAtMillis = System.currentTimeMillis(); + this.jcas = jcas; + this.xmiBytes = xmiBytes; + } + } + + + // AnalysisCache + public static final class AnalysisCache { + private final Map map = new ConcurrentHashMap<>(); + private final long ttlMillis = 45 * 60 * 1000L; // 45 minutes + + public void put(AnalysisSession s) { map.put(s.analysisId, s); } + + public AnalysisSession get(String id) { // Retrieve a session from the cache + AnalysisSession s = map.get(id); + if (s == null) return null; + + if (System.currentTimeMillis() - s.createdAtMillis > ttlMillis) { // If this session is older than 45 minutes -> expire it + map.remove(id); + return null; + } + return s; + } + + public void remove(String id) { map.remove(id); } //Manually remove a session by ID + + + public void cleanupExpired() { // cleanup all expired sessions + long now = System.currentTimeMillis(); + for (var entry : map.entrySet()) { + AnalysisSession s = entry.getValue(); + if (now - s.createdAtMillis > ttlMillis) { + map.remove(entry.getKey()); + System.out.println("[CRON] Removed expired session: " + s.analysisId); + } + } + } + } + private static final java.util.concurrent.ScheduledExecutorService scheduler = //Cron job for automatic cleanup every 5 minutes + java.util.concurrent.Executors.newScheduledThreadPool(1); + + static { + scheduler.scheduleAtFixedRate(() -> { + try { + analysisCache.cleanupExpired(); + } catch (Exception e) { + System.err.println("[CACHE] Cache cleanup failed: " + e.getMessage()); + } + }, 5, 5, java.util.concurrent.TimeUnit.MINUTES); + + scheduler.scheduleAtFixedRate(() -> { + System.out.println("[CACHE] Running cache cleanup task..."); + analysisCache.cleanupExpired(); // your cleanup method + }, 1, 5, TimeUnit.MINUTES); + + + } + + + private static byte[] toXmiBytes(org.apache.uima.jcas.JCas jcas) throws Exception { + java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream(); + org.apache.uima.cas.impl.XmiCasSerializer ser = + new org.apache.uima.cas.impl.XmiCasSerializer(jcas.getTypeSystem()); + org.apache.uima.util.XMLSerializer xmlSer = + new org.apache.uima.util.XMLSerializer(bos, true); + xmlSer.setOutputProperty(javax.xml.transform.OutputKeys.VERSION, "1.1"); + ser.serialize(jcas.getCas(), xmlSer.getContentHandler()); + return bos.toByteArray(); + } + + + // When we send CAS to the importer via HTTP, we want to capture the response. + // This small class acts like a container for the HTTP response details + private static class HttpResult { + final int status; + final String body; + final String locationHeader; + HttpResult(int status, String body, String locationHeader) { + this.status = status; this.body = body; this.locationHeader = locationHeader; + } + } + + + // Send CAS via HTTP + private static HttpResult postMultipart(String urlStr, + Map fields, + String fileField, String filename, + String fileContentType, byte[] fileBytes) throws Exception { + String boundary = "----JAVA-" + UUID.randomUUID(); //Generate a boundary string to separate parts in multipart body + URL url = new URL(urlStr); //Open HTTP connection to the importer endpoint + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setDoOutput(true); + conn.setRequestMethod("POST"); + conn.setRequestProperty("Content-Type", "multipart/form-data; boundary=" + boundary); + + try (DataOutputStream out = new DataOutputStream(conn.getOutputStream())) { //Write request body + // text fields + for (var e : fields.entrySet()) { + out.writeBytes("--" + boundary + "\r\n"); + out.writeBytes("Content-Disposition: form-data; name=\"" + e.getKey() + "\"\r\n\r\n"); + out.write(e.getValue().getBytes(StandardCharsets.UTF_8)); + out.writeBytes("\r\n"); + } + // file field + out.writeBytes("--" + boundary + "\r\n"); + out.writeBytes("Content-Disposition: form-data; name=\"" + fileField + "\"; filename=\"" + filename + "\"\r\n"); + out.writeBytes("Content-Type: " + fileContentType + "\r\n\r\n"); + out.write(fileBytes); + out.writeBytes("\r\n"); + out.writeBytes("--" + boundary + "--\r\n"); + out.flush(); + } + + int status = conn.getResponseCode(); //Read the HTTP response from the importer + String location = conn.getHeaderField("Location"); + String body; + + try (InputStream in = (status >= 200 && status < 400) ? conn.getInputStream() : conn.getErrorStream()) { + body = (in != null) ? new String(in.readAllBytes(), StandardCharsets.UTF_8) : ""; + } + conn.disconnect(); + return new HttpResult(status, body, location); + } + + public static HttpResult sendToImporterViaHttp(String importUrl, //Send cached CAS to importer + String analysisId, + long corpusId, + String documentId, + String casView) throws Exception { + AnalysisSession s = getCachedSession(analysisId); + if (s == null) throw new IllegalArgumentException("No cached session for id: " + analysisId); + + java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream(); // Convert JCas -> XMI bytes + org.apache.uima.cas.impl.XmiCasSerializer ser = + new org.apache.uima.cas.impl.XmiCasSerializer(s.jcas.getTypeSystem()); + org.apache.uima.util.XMLSerializer xmlSer = + new org.apache.uima.util.XMLSerializer(bos, /*prettyPrint*/ true); + xmlSer.setOutputProperty(javax.xml.transform.OutputKeys.VERSION, "1.1"); + ser.serialize(s.jcas.getCas(), xmlSer.getContentHandler()); + byte[] casBytes = bos.toByteArray(); + + Map fields = new LinkedHashMap<>(); // Form-data fields + fields.put("analysisId", analysisId); + fields.put("corpusId", Long.toString(corpusId)); + if (documentId != null && !documentId.isBlank()) fields.put("documentId", documentId); + if (casView != null && !casView.isBlank()) fields.put("casView", casView); + + String corpusConfigJson = System.getenv("UCE_CORPUS_CONFIG_JSON"); // Include corpusConfig + if (corpusConfigJson == null || corpusConfigJson.isBlank()) { + String cfgPath = System.getenv("UCE_CORPUS_CONFIG_PATH"); + if (cfgPath != null && !cfgPath.isBlank()) { + corpusConfigJson = java.nio.file.Files.readString( + java.nio.file.Path.of(cfgPath), + java.nio.charset.StandardCharsets.UTF_8 + ); + } + } + if (corpusConfigJson != null && !corpusConfigJson.isBlank()) { + fields.put("corpusConfig", corpusConfigJson); + } + + // Send multipart as XMI + String filename = "cas_" + analysisId + ".xmi"; + System.out.println("[IMPORT][HTTP] POST " + importUrl + + " corpusId=" + corpusId + " analysisId=" + analysisId + + " documentId=" + documentId + " casView=" + casView + + " file=" + filename + " (" + casBytes.length + " bytes)"); + + HttpResult res = postMultipart( + importUrl, + fields, + "file", + filename, + "application/xml", + casBytes + ); + System.out.println("[IMPORT][HTTP] status=" + res.status + + (res.locationHeader != null ? " Location=" + res.locationHeader : "") + + (res.body != null && !res.body.isBlank() ? " body=" + res.body : "")); + return res; + } + } diff --git a/uce.portal/uce.common/src/main/resources/defaultUceConfig.json b/uce.portal/uce.common/src/main/resources/defaultUceConfig.json index b9ff7a68..7280bb23 100644 --- a/uce.portal/uce.common/src/main/resources/defaultUceConfig.json +++ b/uce.portal/uce.common/src/main/resources/defaultUceConfig.json @@ -79,7 +79,7 @@ "enableAnalysisEngine": true }, "authentication": { - "isActivated": true, + "isActivated": false, "publicUrl": "http://localhost:8080", "redirectUrl": "http://localhost:4567/auth" }, diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java index 00766037..9652ae1e 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java @@ -469,6 +469,7 @@ private static void initSparkRoutes(ApplicationContext context, ApiRegistry regi get("/setHistory", (ctx) -> (registry.get(AnalysisApi.class)).setHistory(ctx)); post("/callHistory", (ctx) -> (registry.get(AnalysisApi.class)).callHistory(ctx)); post("/callHistoryText", (ctx) -> (registry.get(AnalysisApi.class)).callHistoryText(ctx)); + post("/importCas", (registry.get(AnalysisApi.class)).importCas); //added the importCas path }); path("/corpusUniverse", () -> { diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java index 866242bc..99d47a94 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java @@ -3,6 +3,7 @@ import com.google.gson.Gson; import freemarker.template.Configuration; import io.javalin.http.Context; +import io.javalin.http.Handler; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.springframework.context.ApplicationContext; @@ -56,10 +57,14 @@ public void runPipeline(Context ctx) { model.put("inputLLM", inputLLM); RunDUUIPipeline pipeline = new RunDUUIPipeline(); + RunDUUIPipeline.AnalysisResponse resp = + pipeline.getModelResourcesWithHandle(selectedModels, inputText, inputClaim, + inputCoherence, inputStance, inputLLM); DUUIInformation DataRequest = pipeline.getModelResources(selectedModels, inputText, inputClaim, inputCoherence, inputStance, inputLLM); model.put("DUUI", DataRequest); model.put("SuccessRequest", true); model.put("modelGroups", DataRequest.getModelGroups()); + model.put("analysisId", resp.analysisId); // set history history.addDuuiInformation(String.valueOf(counter), DataRequest); @@ -180,5 +185,47 @@ public void callHistoryText(Context ctx) { ctx.render("defaultError.ftl"); } } + // NEW IMPORT ROUTE (Javalin) + @Authentication(required = Authentication.Requirement.LOGGED_IN, + route = Authentication.RouteTypes.POST, + path = "/api/analysis/importCas" + ) + public Handler importCas = ctx -> { + try { + String analysisId = ctx.queryParam("analysisId"); + if (analysisId == null || analysisId.isBlank()) { + ctx.status(400).result("Missing analysisId"); + return; + } + + // Lookup cached session + RunDUUIPipeline.AnalysisSession session = RunDUUIPipeline.getCachedSession(analysisId); + if (session == null) { + ctx.status(404).result("No cached CAS found for analysisId=" + analysisId); + return; + } + + // send to importer + long corpusId = Long.parseLong(System.getenv().getOrDefault("UCE_IMPORT_CORPUS_ID", "1")); + String documentId = null; // String documentId = "doc-" + analysisId; + String casView = null; + + try { + RunDUUIPipeline.sendToImporterViaHttp( + "http://localhost:4567/api/ie/upload/uima", + analysisId, corpusId, documentId, casView + ); + } catch (Exception e) { + e.printStackTrace(); + ctx.status(500).result("Importer HTTP failed: " + e.getMessage()); + return; + } + + ctx.status(200).result("CAS imported successfully for analysisId=" + analysisId); + } catch (Exception ex) { + logger.error("Error importing CAS", ex); + ctx.status(500).result("Error importing CAS: " + ex.getMessage()); + } + }; } From e68ec44a356b58ccee51785689fe59e84f43996b Mon Sep 17 00:00:00 2001 From: dua Date: Mon, 15 Sep 2025 12:43:31 +0200 Subject: [PATCH 02/47] enabled pipeline run again --- .../java/org/texttechnologylab/uce/analysis/DUUIPipeline.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/DUUIPipeline.java b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/DUUIPipeline.java index 8c666e80..3cb3d72a 100644 --- a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/DUUIPipeline.java +++ b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/DUUIPipeline.java @@ -120,7 +120,7 @@ public JCas getLanguage(String inputText) throws Exception { HashMap urls = new HashMap<>(); urls.put("LanguageDetection", "http://language.service.component.duui.texttechnologylab.org"); DUUIComposer composer = setListComposer(urls); - //cas = runPipeline(cas, composer); + cas = runPipeline(cas, composer); // Assuming the language detection component sets the language in the JCas String language = "en"; language = cas.getDocumentLanguage(); From ea1b98786c9745ba17ff6e0a5ce7d804adf551b4 Mon Sep 17 00:00:00 2001 From: dua Date: Mon, 15 Sep 2025 15:02:42 +0200 Subject: [PATCH 03/47] replaced system.out with logger.info --- .../uce/analysis/RunDUUIPipeline.java | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java index 74a6bdc7..6cfaf868 100644 --- a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java +++ b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java @@ -1,6 +1,8 @@ package org.texttechnologylab.uce.analysis; import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.uima.fit.factory.JCasFactory; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.JCas; @@ -14,11 +16,6 @@ import java.time.Instant; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeUnit; -// Added imports (save .jcas, HTTP import, logging) -//import java.nio.file.Path; -//import java.nio.file.Paths; -//import java.nio.file.Files; -//import java.io.OutputStream; import java.io.InputStream; import java.io.DataOutputStream; import java.net.HttpURLConnection; @@ -31,6 +28,8 @@ public class RunDUUIPipeline { private static final AnalysisCache analysisCache = new AnalysisCache(); private static final ThreadLocal lastAnalysisIdTL = new ThreadLocal<>(); + private static final Logger logger = LogManager.getLogger(RunDUUIPipeline.class); + public static AnalysisSession getCachedSession(String analysisId) { return analysisCache.get(analysisId); } @@ -214,13 +213,13 @@ public DUUIInformation getModelResources(List modelGroups, String inputT newCas.setDocumentText(text); cas = newCas; - System.out.println("[CAS] Created secondary JCas for special models (fact/coherence/stance/LLM)"); + logger.info("[CAS] Created secondary JCas for special models (fact/coherence/stance/LLM)"); } // run pipeline DUUIComposer composer = pipeline.setComposer(modelInfosMap); JCas result = pipeline.runPipeline(cas, composer); - System.out.println("[CAS] Final result JCas created via pipeline.runPipeline(cas, composer)"); + logger.info("[CAS] Final result JCas created via pipeline.runPipeline(cas, composer)"); // get results Object[] results = pipeline.getJCasResults(result, modelInfosList, ttlabScorerGroups, cohmetrixScorerGroups); // print results @@ -271,7 +270,7 @@ public DUUIInformation getModelResources(List modelGroups, String inputT ); analysisCache.put(session); lastAnalysisIdTL.set(analysisId); - System.out.println("[CACHE] Added analysisId=" + analysisId + " (stored in memory; TTL=45min)"); + logger.info("[CACHE] Added analysisId=" + analysisId + " (stored in memory; TTL=45min)"); return duuiInformation; } @@ -364,7 +363,7 @@ public void cleanupExpired() { // cleanup all expired sessions AnalysisSession s = entry.getValue(); if (now - s.createdAtMillis > ttlMillis) { map.remove(entry.getKey()); - System.out.println("[CRON] Removed expired session: " + s.analysisId); + logger.info("[CRON] Removed expired session: " + s.analysisId); } } } @@ -377,12 +376,12 @@ public void cleanupExpired() { // cleanup all expired sessions try { analysisCache.cleanupExpired(); } catch (Exception e) { - System.err.println("[CACHE] Cache cleanup failed: " + e.getMessage()); + logger.error("[CACHE] Cache cleanup failed: " + e.getMessage()); } }, 5, 5, java.util.concurrent.TimeUnit.MINUTES); scheduler.scheduleAtFixedRate(() -> { - System.out.println("[CACHE] Running cache cleanup task..."); + logger.info("[CACHE] Running cache cleanup task..."); analysisCache.cleanupExpired(); // your cleanup method }, 1, 5, TimeUnit.MINUTES); @@ -494,7 +493,7 @@ public static HttpResult sendToImporterViaHttp(String importUrl, //Send cached C // Send multipart as XMI String filename = "cas_" + analysisId + ".xmi"; - System.out.println("[IMPORT][HTTP] POST " + importUrl + logger.info("[IMPORT][HTTP] POST " + importUrl + " corpusId=" + corpusId + " analysisId=" + analysisId + " documentId=" + documentId + " casView=" + casView + " file=" + filename + " (" + casBytes.length + " bytes)"); @@ -507,7 +506,7 @@ public static HttpResult sendToImporterViaHttp(String importUrl, //Send cached C "application/xml", casBytes ); - System.out.println("[IMPORT][HTTP] status=" + res.status + logger.info("[IMPORT][HTTP] status=" + res.status + (res.locationHeader != null ? " Location=" + res.locationHeader : "") + (res.body != null && !res.body.isBlank() ? " body=" + res.body : "")); return res; From 9f1d81c7f5487b75dd5ab5b7e8beaa68620bf8e7 Mon Sep 17 00:00:00 2001 From: dua Date: Thu, 18 Sep 2025 13:08:35 +0200 Subject: [PATCH 04/47] Added input field for corpusId --- .../templates/wiki/analysisResultFragment.ftl | 38 ++++++++++--------- .../uce/web/routes/AnalysisApi.java | 31 ++++++--------- 2 files changed, 32 insertions(+), 37 deletions(-) diff --git a/uce.portal/resources/templates/wiki/analysisResultFragment.ftl b/uce.portal/resources/templates/wiki/analysisResultFragment.ftl index 592ab90e..4cbcec2c 100644 --- a/uce.portal/resources/templates/wiki/analysisResultFragment.ftl +++ b/uce.portal/resources/templates/wiki/analysisResultFragment.ftl @@ -1,22 +1,26 @@ <#if analysisId??> -
- -
+
+ + + +
- - + + <#if DUUI??> <#if DUUI.modelGroups?has_content> <#if DUUI.isTopic> diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java index 99d47a94..b3952aa3 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java @@ -185,7 +185,7 @@ public void callHistoryText(Context ctx) { ctx.render("defaultError.ftl"); } } - // NEW IMPORT ROUTE (Javalin) + // IMPORT ROUTE @Authentication(required = Authentication.Requirement.LOGGED_IN, route = Authentication.RouteTypes.POST, path = "/api/analysis/importCas" @@ -206,26 +206,17 @@ public void callHistoryText(Context ctx) { } // send to importer - long corpusId = Long.parseLong(System.getenv().getOrDefault("UCE_IMPORT_CORPUS_ID", "1")); - String documentId = null; // String documentId = "doc-" + analysisId; - String casView = null; - - try { - RunDUUIPipeline.sendToImporterViaHttp( - "http://localhost:4567/api/ie/upload/uima", - analysisId, corpusId, documentId, casView - ); - } catch (Exception e) { - e.printStackTrace(); - ctx.status(500).result("Importer HTTP failed: " + e.getMessage()); - return; - } - + long corpusId = Long.parseLong(ctx.queryParam("corpusId")); // from ?corpusId=... + RunDUUIPipeline.sendToImporterViaHttp( + "http://localhost:4567/api/ie/upload/uima", + analysisId, corpusId, analysisId, null + ); ctx.status(200).result("CAS imported successfully for analysisId=" + analysisId); - - } catch (Exception ex) { - logger.error("Error importing CAS", ex); - ctx.status(500).result("Error importing CAS: " + ex.getMessage()); + } catch (NumberFormatException nfe) { + ctx.status(400).result("corpusId is required and must be a number"); + } catch (Exception e) { + logger.error("Error importing CAS", e); + ctx.status(500).result("Error importing CAS: " + e.getMessage()); } }; } From 38f76b057311659649bb7fbbbbe6d35d470b76a1 Mon Sep 17 00:00:00 2001 From: dua Date: Mon, 22 Sep 2025 12:18:23 +0200 Subject: [PATCH 05/47] derive importer URL from request host; remove hardcoded localhost removed un-used fields --- .../uce/analysis/RunDUUIPipeline.java | 22 +------------------ .../uce/web/routes/AnalysisApi.java | 10 ++++----- 2 files changed, 6 insertions(+), 26 deletions(-) diff --git a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java index 6cfaf868..5174f85d 100644 --- a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java +++ b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java @@ -462,14 +462,7 @@ public static HttpResult sendToImporterViaHttp(String importUrl, //Send cached C AnalysisSession s = getCachedSession(analysisId); if (s == null) throw new IllegalArgumentException("No cached session for id: " + analysisId); - java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream(); // Convert JCas -> XMI bytes - org.apache.uima.cas.impl.XmiCasSerializer ser = - new org.apache.uima.cas.impl.XmiCasSerializer(s.jcas.getTypeSystem()); - org.apache.uima.util.XMLSerializer xmlSer = - new org.apache.uima.util.XMLSerializer(bos, /*prettyPrint*/ true); - xmlSer.setOutputProperty(javax.xml.transform.OutputKeys.VERSION, "1.1"); - ser.serialize(s.jcas.getCas(), xmlSer.getContentHandler()); - byte[] casBytes = bos.toByteArray(); + byte[] casBytes = toXmiBytes(s.jcas); Map fields = new LinkedHashMap<>(); // Form-data fields fields.put("analysisId", analysisId); @@ -477,19 +470,6 @@ public static HttpResult sendToImporterViaHttp(String importUrl, //Send cached C if (documentId != null && !documentId.isBlank()) fields.put("documentId", documentId); if (casView != null && !casView.isBlank()) fields.put("casView", casView); - String corpusConfigJson = System.getenv("UCE_CORPUS_CONFIG_JSON"); // Include corpusConfig - if (corpusConfigJson == null || corpusConfigJson.isBlank()) { - String cfgPath = System.getenv("UCE_CORPUS_CONFIG_PATH"); - if (cfgPath != null && !cfgPath.isBlank()) { - corpusConfigJson = java.nio.file.Files.readString( - java.nio.file.Path.of(cfgPath), - java.nio.charset.StandardCharsets.UTF_8 - ); - } - } - if (corpusConfigJson != null && !corpusConfigJson.isBlank()) { - fields.put("corpusConfig", corpusConfigJson); - } // Send multipart as XMI String filename = "cas_" + analysisId + ".xmi"; diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java index b3952aa3..26a0ba7b 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java @@ -60,7 +60,7 @@ public void runPipeline(Context ctx) { RunDUUIPipeline.AnalysisResponse resp = pipeline.getModelResourcesWithHandle(selectedModels, inputText, inputClaim, inputCoherence, inputStance, inputLLM); - DUUIInformation DataRequest = pipeline.getModelResources(selectedModels, inputText, inputClaim, inputCoherence, inputStance, inputLLM); + DUUIInformation DataRequest = resp.duuiInformation; model.put("DUUI", DataRequest); model.put("SuccessRequest", true); model.put("modelGroups", DataRequest.getModelGroups()); @@ -207,10 +207,10 @@ public void callHistoryText(Context ctx) { // send to importer long corpusId = Long.parseLong(ctx.queryParam("corpusId")); // from ?corpusId=... - RunDUUIPipeline.sendToImporterViaHttp( - "http://localhost:4567/api/ie/upload/uima", - analysisId, corpusId, analysisId, null - ); + String importPath = "/api/ie/upload/uima"; + String importUrl = ctx.scheme() + "://" + ctx.host() + importPath; + + RunDUUIPipeline.sendToImporterViaHttp(importUrl, analysisId, corpusId, analysisId, null); ctx.status(200).result("CAS imported successfully for analysisId=" + analysisId); } catch (NumberFormatException nfe) { ctx.status(400).result("corpusId is required and must be a number"); From f47df35f3e5c163be94877a1883d70b820f79a28 Mon Sep 17 00:00:00 2001 From: dua Date: Thu, 2 Oct 2025 11:00:43 +0200 Subject: [PATCH 06/47] Restored importer_cache_branch changes after stash --- .../src/main/resources/corpusConfig.json | 2 +- .../src/main/resources/corpusConfig2.json | 45 +++++++++++++++++++ .../uce/web/routes/ImportExportApi.java | 12 +++-- 3 files changed, 54 insertions(+), 5 deletions(-) create mode 100644 uce.portal/uce.common/src/main/resources/corpusConfig2.json diff --git a/uce.portal/uce.common/src/main/resources/corpusConfig.json b/uce.portal/uce.common/src/main/resources/corpusConfig.json index ba5592b2..1df15fc7 100644 --- a/uce.portal/uce.common/src/main/resources/corpusConfig.json +++ b/uce.portal/uce.common/src/main/resources/corpusConfig.json @@ -3,7 +3,7 @@ "author": "[author/owner of the corpus]", "language": "[de-DE, en-EN, ...]", "description": "", - "addToExistingCorpus": false, + "addToExistingCorpus": true, "annotations": { "annotatorMetadata": false, diff --git a/uce.portal/uce.common/src/main/resources/corpusConfig2.json b/uce.portal/uce.common/src/main/resources/corpusConfig2.json new file mode 100644 index 00000000..3a18c818 --- /dev/null +++ b/uce.portal/uce.common/src/main/resources/corpusConfig2.json @@ -0,0 +1,45 @@ +{ + "name": "[corpus_2]", + "author": "[author/owner of the corpus]", + "language": "[de-DE, en-EN, ...]", + "description": "", + "addToExistingCorpus": false, + + "annotations": { + "annotatorMetadata": false, + "uceMetadata": false, + "logicalLinks": false, + + "OCRPage": false, + "OCRParagraph": false, + "OCRBlock": false, + "OCRLine": false, + + "srLink": false, + "namedEntity": false, + "sentiment": false, + "emotion": false, + "geoNames": false, + "lemma": false, + "sentence": false, + "taxon": { + "annotated": false, + "//comment": "[Are the taxons annotated with biofid onthologies through the 'identifier' property?]", + "biofidOnthologyAnnotated": false + }, + "time": false, + "wikipediaLink": false, + "completeNegation": false, + "unifiedTopic": false + + }, + "other": { + "//comment": "[Is this corpus also available on https://sammlungen.ub.uni-frankfurt.de/? Either true or false]", + "availableOnFrankfurtUniversityCollection": false, + + "includeKeywordDistribution": false, + "enableEmbeddings": false, + "enableRAGBot": false, + "enableS3Storage": false + } +} \ No newline at end of file diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java index bca4b9f7..5124599c 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java @@ -68,9 +68,11 @@ public void uploadUIMA(Context ctx) { // First, we need to know which corpus this document should be added to. var corpusId = ExceptionUtils.tryCatchLog( () -> Long.parseLong(new String(ctx.req().getPart("corpusId").getInputStream().readAllBytes(), StandardCharsets.UTF_8)), - (ex) -> logger.error("Error getting the corpusId this document should be added to. Aborting.", ex)); + (ex) -> logger.error("Error getting corpusId from request.", ex)); + if (corpusId == null) { - ctx.result("Parameter corpusId didn't exist. Without it, the document cannot be uploaded."); + ctx.status(400); + ctx.result("Parameter corpusId didn't exist; cannot upload document."); return; } @@ -85,9 +87,11 @@ public void uploadUIMA(Context ctx) { var corpus = ExceptionUtils.tryCatchLog( () -> db.getCorpusById(corpusId), - (ex) -> logger.error("Couldn't fetch corpus when uploading new document to corpusId " + corpusId, ex)); + (ex) -> logger.error("Couldn't fetch corpus with id " + corpusId, ex)); + if (corpus == null) { - ctx.result("Corpus with id " + corpusId + " wasn't found in the database; can't upload document."); + ctx.status(404); + ctx.result("Corpus with id " + corpusId + " wasn't found in the database."); return; } From e46ae617648ee6bcf1fadfbc144858c99702c6da Mon Sep 17 00:00:00 2001 From: dua Date: Thu, 2 Oct 2025 13:33:53 +0200 Subject: [PATCH 07/47] Changed corpus ID input to dropdown --- .../templates/wiki/analysisResultFragment.ftl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/uce.portal/resources/templates/wiki/analysisResultFragment.ftl b/uce.portal/resources/templates/wiki/analysisResultFragment.ftl index 4cbcec2c..2db15fd2 100644 --- a/uce.portal/resources/templates/wiki/analysisResultFragment.ftl +++ b/uce.portal/resources/templates/wiki/analysisResultFragment.ftl @@ -1,14 +1,19 @@ <#if analysisId??>
- - + + +
\ No newline at end of file diff --git a/uce.portal/resources/templates/wiki/analysisResultFragment.ftl b/uce.portal/resources/templates/wiki/analysisResultFragment.ftl index 2db15fd2..1249253a 100644 --- a/uce.portal/resources/templates/wiki/analysisResultFragment.ftl +++ b/uce.portal/resources/templates/wiki/analysisResultFragment.ftl @@ -1,19 +1,19 @@ <#if analysisId??>
- - -
+ <#if DUUI??> <#if DUUI.modelGroups?has_content> diff --git a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java index 5174f85d..c34c6f15 100644 --- a/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java +++ b/uce.portal/uce.analysis/src/main/java/org/texttechnologylab/uce/analysis/RunDUUIPipeline.java @@ -29,13 +29,21 @@ public class RunDUUIPipeline { private static final AnalysisCache analysisCache = new AnalysisCache(); private static final ThreadLocal lastAnalysisIdTL = new ThreadLocal<>(); private static final Logger logger = LogManager.getLogger(RunDUUIPipeline.class); + private static final ThreadLocal currentUserIdTL = new ThreadLocal<>(); - public static AnalysisSession getCachedSession(String analysisId) { return analysisCache.get(analysisId); } + public static AnalysisSession getCachedSession(String analysisId) { + return analysisCache.get(analysisId); + } + + public static void setThreadLocalUserId(String userId) { + currentUserIdTL.set(userId); + } private static String getCurrentUserId() { // TODO: replace with your auth/session identity - return "user-unknown"; + + return currentUserIdTL.get(); } public DUUIInformation getModelResources(List modelGroups, String inputText, String claim, String coherenceText, String stanceText, String systemPrompt) throws Exception { @@ -261,6 +269,7 @@ public DUUIInformation getModelResources(List modelGroups, String inputT } String analysisId = UUID.randomUUID().toString(); String userId = getCurrentUserId(); + logger.info("[USER] Running pipeline for User: " + userId); String title = "Analysis " + Instant.now(); byte[] xmiBytes = toXmiBytes(result); @@ -354,41 +363,41 @@ public AnalysisSession get(String id) { // Retrieve a session from the cache return s; } - public void remove(String id) { map.remove(id); } //Manually remove a session by ID - - - public void cleanupExpired() { // cleanup all expired sessions - long now = System.currentTimeMillis(); - for (var entry : map.entrySet()) { - AnalysisSession s = entry.getValue(); - if (now - s.createdAtMillis > ttlMillis) { - map.remove(entry.getKey()); - logger.info("[CRON] Removed expired session: " + s.analysisId); - } - } - } - } - private static final java.util.concurrent.ScheduledExecutorService scheduler = //Cron job for automatic cleanup every 5 minutes - java.util.concurrent.Executors.newScheduledThreadPool(1); - - static { - scheduler.scheduleAtFixedRate(() -> { - try { - analysisCache.cleanupExpired(); - } catch (Exception e) { - logger.error("[CACHE] Cache cleanup failed: " + e.getMessage()); - } - }, 5, 5, java.util.concurrent.TimeUnit.MINUTES); - - scheduler.scheduleAtFixedRate(() -> { - logger.info("[CACHE] Running cache cleanup task..."); - analysisCache.cleanupExpired(); // your cleanup method - }, 1, 5, TimeUnit.MINUTES); - - +// public void remove(String id) { +// map.remove(id); +// } //Manually remove a session by ID +// +// +// public void cleanupExpired() { // cleanup all expired sessions +// long now = System.currentTimeMillis(); +// for (var entry : map.entrySet()) { +// AnalysisSession s = entry.getValue(); +// if (now - s.createdAtMillis > ttlMillis) { +// map.remove(entry.getKey()); +// logger.info("[CRON] Removed expired session: " + s.analysisId); +// } +// } +// } +// } +// private static final java.util.concurrent.ScheduledExecutorService scheduler = //Cron job for automatic cleanup every 5 minutes +// java.util.concurrent.Executors.newScheduledThreadPool(1); +// +// static { +// scheduler.scheduleAtFixedRate(() -> { +// try { +// analysisCache.cleanupExpired(); +// } catch (Exception e) { +// logger.error("[CACHE] Cache cleanup failed: " + e.getMessage()); +// } +// }, 5, 5, java.util.concurrent.TimeUnit.MINUTES); +// +// scheduler.scheduleAtFixedRate(() -> { +// logger.info("[CACHE] Running cache cleanup task..."); +// analysisCache.cleanupExpired(); // your cleanup method +// }, 1, 5, TimeUnit.MINUTES); +// +// } - - private static byte[] toXmiBytes(org.apache.uima.jcas.JCas jcas) throws Exception { java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream(); org.apache.uima.cas.impl.XmiCasSerializer ser = diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/uceConfig/SettingsConfig.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/uceConfig/SettingsConfig.java index 3d2671f7..1fcfd6d9 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/uceConfig/SettingsConfig.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/uceConfig/SettingsConfig.java @@ -12,4 +12,5 @@ public class SettingsConfig { private EmbeddingsConfig embeddings; private AuthConfig authentication; private MCPConfig mcp = new MCPConfig(); + private boolean enablePathImport = false; } diff --git a/uce.portal/uce.common/src/main/resources/corpusConfig2.json b/uce.portal/uce.common/src/main/resources/corpusConfig2.json deleted file mode 100644 index 3a18c818..00000000 --- a/uce.portal/uce.common/src/main/resources/corpusConfig2.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "name": "[corpus_2]", - "author": "[author/owner of the corpus]", - "language": "[de-DE, en-EN, ...]", - "description": "", - "addToExistingCorpus": false, - - "annotations": { - "annotatorMetadata": false, - "uceMetadata": false, - "logicalLinks": false, - - "OCRPage": false, - "OCRParagraph": false, - "OCRBlock": false, - "OCRLine": false, - - "srLink": false, - "namedEntity": false, - "sentiment": false, - "emotion": false, - "geoNames": false, - "lemma": false, - "sentence": false, - "taxon": { - "annotated": false, - "//comment": "[Are the taxons annotated with biofid onthologies through the 'identifier' property?]", - "biofidOnthologyAnnotated": false - }, - "time": false, - "wikipediaLink": false, - "completeNegation": false, - "unifiedTopic": false - - }, - "other": { - "//comment": "[Is this corpus also available on https://sammlungen.ub.uni-frankfurt.de/? Either true or false]", - "availableOnFrankfurtUniversityCollection": false, - - "includeKeywordDistribution": false, - "enableEmbeddings": false, - "enableRAGBot": false, - "enableS3Storage": false - } -} \ No newline at end of file diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java index 26a0ba7b..16fbd9eb 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/AnalysisApi.java @@ -11,8 +11,10 @@ import org.texttechnologylab.uce.analysis.RunDUUIPipeline; import org.texttechnologylab.uce.analysis.modules.DUUIInformation; import org.texttechnologylab.uce.common.annotations.auth.Authentication; +import org.texttechnologylab.uce.common.models.authentication.UceUser; import org.texttechnologylab.uce.common.models.dto.AnalysisRequestDto; import org.texttechnologylab.uce.common.models.dto.HistoryRequestDto; +import org.texttechnologylab.uce.web.SessionManager; import java.util.HashMap; import java.util.List; @@ -56,6 +58,10 @@ public void runPipeline(Context ctx) { model.put("inputStance", inputStance); model.put("inputLLM", inputLLM); + UceUser user = SessionManager.getUserFromRequest(ctx); + String userId = (user != null) ? user.getUsername() : "user-unknown"; + RunDUUIPipeline.setThreadLocalUserId(userId); + RunDUUIPipeline pipeline = new RunDUUIPipeline(); RunDUUIPipeline.AnalysisResponse resp = pipeline.getModelResourcesWithHandle(selectedModels, inputText, inputClaim, diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java index 8a7243f8..62231580 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java @@ -120,7 +120,7 @@ public void uploadUIMA(Context ctx) { if (acceptedContentType != null && acceptedContentType.equals("application/json")) { Map apiResult = new HashMap<>(); apiResult.put("document_id", newDocumentId); - ctx.contentType("application/json"); +// ctx.contentType("application/json"); //redundant ctx.json(apiResult); return; } @@ -147,6 +147,7 @@ public void importCorpusFromPath(Context ctx) { if (path == null || path.isBlank()) { ctx.status(400).result("Path is required"); + return; } String importId = UUID.randomUUID().toString(); @@ -167,6 +168,8 @@ public void importCorpusFromPath(Context ctx) { ctx.status(200).result("Import started. Import ID: " + importId); } catch (DatabaseOperationException e) { logger.error("Error when creating saving/updating to database" + e); + ctx.status(500).result("Database error initiating corpus import" + e.getMessage()); + } catch (Exception e) { logger.error("Error initiating corpus import", e); ctx.status(500).result("Error initiating import: " + e.getMessage()); From 62877ff8e67901965de53ba8e582948dabfd8aed Mon Sep 17 00:00:00 2001 From: Mark Ian Braun Date: Mon, 26 Jan 2026 01:14:33 +0100 Subject: [PATCH 10/47] New Import where Users can select which files to upload to either a new corpora or a already uploaded corpora. --- .../templates/corpus/corpusInspector.ftl | 11 + .../resources/templates/landing-page.ftl | 231 ++++++++++++++++++ .../org/texttechnologylab/uce/web/App.java | 3 +- .../uce/web/routes/ImportExportApi.java | 132 ++++++++++ 4 files changed, 376 insertions(+), 1 deletion(-) diff --git a/uce.portal/resources/templates/corpus/corpusInspector.ftl b/uce.portal/resources/templates/corpus/corpusInspector.ftl index 484c6790..c074f490 100644 --- a/uce.portal/resources/templates/corpus/corpusInspector.ftl +++ b/uce.portal/resources/templates/corpus/corpusInspector.ftl @@ -17,6 +17,17 @@ +
+ +
diff --git a/uce.portal/resources/templates/landing-page.ftl b/uce.portal/resources/templates/landing-page.ftl index 3637337b..987da127 100644 --- a/uce.portal/resources/templates/landing-page.ftl +++ b/uce.portal/resources/templates/landing-page.ftl @@ -25,6 +25,9 @@ +
@@ -88,6 +91,7 @@ +<#--Modal for importing files via a path-->
@@ -323,6 +326,7 @@ +
diff --git a/uce.portal/resources/templates/wiki/analysisResultFragment.ftl b/uce.portal/resources/templates/wiki/analysisResultFragment.ftl index 00cd6d5c..2fce37a3 100644 --- a/uce.portal/resources/templates/wiki/analysisResultFragment.ftl +++ b/uce.portal/resources/templates/wiki/analysisResultFragment.ftl @@ -26,7 +26,10 @@
<#list DUUI.textInformation.topicAVG as model>
-
${model.getModelInfo().getName()}
+
+ ${model.getModelInfo().getName()} + +
<#list model.topics as topic> <#assign opacity = topic.getScore()?string?replace(",", ".")> @@ -35,6 +38,9 @@
+
diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/HibernateConf.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/HibernateConf.java index 03afde97..3469904f 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/HibernateConf.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/HibernateConf.java @@ -11,6 +11,7 @@ import org.texttechnologylab.uce.common.models.corpus.*; import org.texttechnologylab.uce.common.models.corpus.emotion.Emotion; import org.texttechnologylab.uce.common.models.corpus.emotion.Feeling; +import org.texttechnologylab.uce.common.models.corpus.emotion.SentenceEmotion; import org.texttechnologylab.uce.common.models.corpus.links.AnnotationLink; import org.texttechnologylab.uce.common.models.corpus.links.AnnotationToDocumentLink; import org.texttechnologylab.uce.common.models.corpus.links.DocumentLink; @@ -55,6 +56,7 @@ public static SessionFactory buildSessionFactory() { metadataSources.addAnnotatedClass(Sentiment.class); metadataSources.addAnnotatedClass(Emotion.class); metadataSources.addAnnotatedClass(Feeling.class); + metadataSources.addAnnotatedClass(SentenceEmotion.class); metadataSources.addAnnotatedClass(GeoName.class); metadataSources.addAnnotatedClass(Paragraph.class); metadataSources.addAnnotatedClass(Sentence.class); diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/Emotion.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/Emotion.java index 45c78768..f649a78f 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/Emotion.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/Emotion.java @@ -24,6 +24,10 @@ public class Emotion extends UIMAAnnotation implements WikiModel { @JoinColumn(name = "emotion_id") private List feelings; + @OneToMany(mappedBy = "emotion", cascade = CascadeType.ALL, orphanRemoval = true) + private List sentenceEmotions; + + public String generateEmotionMarker() { var tooltip = ""; if (this.feelings != null && !this.feelings.isEmpty()) { diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/SentenceEmotion.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/SentenceEmotion.java new file mode 100644 index 00000000..bb579170 --- /dev/null +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/SentenceEmotion.java @@ -0,0 +1,79 @@ +package org.texttechnologylab.uce.common.models.corpus.emotion; + +import lombok.Getter; +import lombok.Setter; +import org.texttechnologylab.uce.common.models.corpus.Sentence; + +import javax.persistence.*; +import java.io.Serializable; +import java.util.Objects; + +@Getter +@Setter +@Entity +@Table(name = "sentenceemotion") +@IdClass(SentenceEmotion.SentenceEmotionId.class) +public class SentenceEmotion { + + @Id + @ManyToOne(fetch = FetchType.LAZY, optional = false) + @JoinColumn(name = "sentence_id", nullable = false) + private Sentence sentence; + + @Id + @ManyToOne(fetch = FetchType.LAZY, optional = false) + @JoinColumn(name = "emotion_id", nullable = false) + private Emotion emotion; + + @Id + @Column(name = "model", nullable = false, length = 255) + private String model; + + @Id + @Column(name = "feeling", nullable = false, length = 255) + private String feeling; + + @Column(name = "value") + private Double value; + + public SentenceEmotion() {} + + public SentenceEmotion(Sentence sentence, Emotion emotion, String model, String feeling, Double value) { + this.sentence = sentence; + this.emotion = emotion; + this.model = model; + this.feeling = feeling; + this.value = value; + } + + public static class SentenceEmotionId implements Serializable { + private Long sentence; // references Sentence.id + private Long emotion; // references Emotion.id + private String model; + private String feeling; + + public SentenceEmotionId() {} + + public SentenceEmotionId(Long sentence, Long emotion, String model, String feeling) { + this.sentence = sentence; + this.emotion = emotion; + this.model = model; + this.feeling = feeling; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof SentenceEmotionId that)) return false; + return Objects.equals(sentence, that.sentence) + && Objects.equals(emotion, that.emotion) + && Objects.equals(model, that.model) + && Objects.equals(feeling, that.feeling); + } + + @Override + public int hashCode() { + return Objects.hash(sentence, emotion, model, feeling); + } + } +} diff --git a/uce.portal/uce.common/src/main/resources/defaultUceConfig.json b/uce.portal/uce.common/src/main/resources/defaultUceConfig.json index 3ccf0a6a..3e5462bc 100644 --- a/uce.portal/uce.common/src/main/resources/defaultUceConfig.json +++ b/uce.portal/uce.common/src/main/resources/defaultUceConfig.json @@ -144,7 +144,7 @@ ] }, "analysis": { - "enableAnalysisEngine": false + "enableAnalysisEngine": true }, "authentication": { "isActivated": false, diff --git a/uce.portal/uce.corpus-importer/logs/uce-corpus-importer-2025-11-27-1.log b/uce.portal/uce.corpus-importer/logs/uce-corpus-importer-2025-11-27-1.log new file mode 100644 index 00000000..e03b5847 --- /dev/null +++ b/uce.portal/uce.corpus-importer/logs/uce-corpus-importer-2025-11-27-1.log @@ -0,0 +1,66 @@ +2025-11-27 20:53:27.127 [main] INFO org.hibernate.Version - HHH000412: Hibernate ORM core version 5.6.15.Final +2025-11-27 20:53:27.228 [main] INFO org.hibernate.spatial.integration.SpatialService - HHH80000001: hibernate-spatial integration enabled : true +2025-11-27 20:53:27.265 [main] INFO org.hibernate.annotations.common.Version - HCANN000001: Hibernate Commons Annotations {5.1.2.Final} +2025-11-27 20:53:27.370 [main] WARN org.hibernate.orm.connections.pooling - HHH10001002: Using Hibernate built-in connection pool (not for production use!) +2025-11-27 20:53:27.370 [main] INFO org.hibernate.orm.connections.pooling - HHH10001005: using driver [null] at URL [jdbc:postgresql://localhost:8002/uce] +2025-11-27 20:53:27.371 [main] INFO org.hibernate.orm.connections.pooling - HHH10001001: Connection properties: {password=****, user=postgres} +2025-11-27 20:53:27.371 [main] INFO org.hibernate.orm.connections.pooling - HHH10001003: Autocommit mode: false +2025-11-27 20:53:27.374 [main] INFO org.hibernate.engine.jdbc.connections.internal.DriverManagerConnectionProviderImpl - HHH000115: Hibernate connection pool size: 20 (min=1) +2025-11-27 20:53:27.532 [main] INFO org.hibernate.dialect.Dialect - HHH000400: Using dialect: org.hibernate.dialect.PostgreSQL10Dialect +2025-11-27 20:53:28.416 [main] INFO org.hibernate.orm.connections.access - HHH10001501: Connection obtained from JdbcConnectionAccess [org.hibernate.engine.jdbc.env.internal.JdbcEnvironmentInitiator$ConnectionProviderJdbcConnectionAccess@1f7557fe] for (non-JTA) DDL execution was not in auto-commit mode; the Connection 'local transaction' will be committed and the Connection will be set into auto-commit mode. +2025-11-27 20:53:29.335 [main] INFO org.texttechnologylab.uce.corpusimporter.App - Executing external database scripts from ../database/ +2025-11-27 20:53:29.337 [main] WARN org.texttechnologylab.uce.corpusimporter.App - Couldn't read the db scripts in the external database scripts folder; path wasn't found or other IO problems. +java.nio.file.NoSuchFileException: ..\database + at java.base/sun.nio.fs.WindowsException.translateToIOException(WindowsException.java:85) ~[?:?] + at java.base/sun.nio.fs.WindowsException.rethrowAsIOException(WindowsException.java:103) ~[?:?] + at java.base/sun.nio.fs.WindowsException.rethrowAsIOException(WindowsException.java:108) ~[?:?] + at java.base/sun.nio.fs.WindowsDirectoryStream.(WindowsDirectoryStream.java:86) ~[?:?] + at java.base/sun.nio.fs.WindowsFileSystemProvider.newDirectoryStream(WindowsFileSystemProvider.java:541) ~[?:?] + at java.base/java.nio.file.Files.newDirectoryStream(Files.java:482) ~[?:?] + at java.base/java.nio.file.Files.list(Files.java:3785) ~[?:?] + at org.texttechnologylab.uce.common.utils.SystemStatus.executeExternalDatabaseScripts(SystemStatus.java:38) ~[classes/:?] + at org.texttechnologylab.uce.corpusimporter.App.lambda$main$0(App.java:44) ~[classes/:?] + at org.texttechnologylab.uce.common.exceptions.ExceptionUtils.tryCatchLog(ExceptionUtils.java:30) [classes/:?] + at org.texttechnologylab.uce.corpusimporter.App.main(App.java:43) [classes/:?] +2025-11-27 20:53:29.341 [main] INFO org.texttechnologylab.uce.corpusimporter.App - Finished with executing external database scripts. +2025-11-27 20:55:05.236 [main] INFO org.hibernate.Version - HHH000412: Hibernate ORM core version 5.6.15.Final +2025-11-27 20:55:05.340 [main] INFO org.hibernate.spatial.integration.SpatialService - HHH80000001: hibernate-spatial integration enabled : true +2025-11-27 20:55:05.373 [main] INFO org.hibernate.annotations.common.Version - HCANN000001: Hibernate Commons Annotations {5.1.2.Final} +2025-11-27 20:55:05.475 [main] WARN org.hibernate.orm.connections.pooling - HHH10001002: Using Hibernate built-in connection pool (not for production use!) +2025-11-27 20:55:05.475 [main] INFO org.hibernate.orm.connections.pooling - HHH10001005: using driver [null] at URL [jdbc:postgresql://localhost:8002/uce] +2025-11-27 20:55:05.475 [main] INFO org.hibernate.orm.connections.pooling - HHH10001001: Connection properties: {password=****, user=postgres} +2025-11-27 20:55:05.475 [main] INFO org.hibernate.orm.connections.pooling - HHH10001003: Autocommit mode: false +2025-11-27 20:55:05.477 [main] INFO org.hibernate.engine.jdbc.connections.internal.DriverManagerConnectionProviderImpl - HHH000115: Hibernate connection pool size: 20 (min=1) +2025-11-27 20:55:05.624 [main] INFO org.hibernate.dialect.Dialect - HHH000400: Using dialect: org.hibernate.dialect.PostgreSQL10Dialect +2025-11-27 20:55:06.528 [main] INFO org.hibernate.orm.connections.access - HHH10001501: Connection obtained from JdbcConnectionAccess [org.hibernate.engine.jdbc.env.internal.JdbcEnvironmentInitiator$ConnectionProviderJdbcConnectionAccess@52963839] for (non-JTA) DDL execution was not in auto-commit mode; the Connection 'local transaction' will be committed and the Connection will be set into auto-commit mode. +2025-11-27 20:55:07.427 [main] INFO org.texttechnologylab.uce.corpusimporter.App - Executing external database scripts from ../database/ +2025-11-27 20:55:07.430 [main] WARN org.texttechnologylab.uce.corpusimporter.App - Couldn't read the db scripts in the external database scripts folder; path wasn't found or other IO problems. +java.nio.file.NoSuchFileException: ..\database + at java.base/sun.nio.fs.WindowsException.translateToIOException(WindowsException.java:85) ~[?:?] + at java.base/sun.nio.fs.WindowsException.rethrowAsIOException(WindowsException.java:103) ~[?:?] + at java.base/sun.nio.fs.WindowsException.rethrowAsIOException(WindowsException.java:108) ~[?:?] + at java.base/sun.nio.fs.WindowsDirectoryStream.(WindowsDirectoryStream.java:86) ~[?:?] + at java.base/sun.nio.fs.WindowsFileSystemProvider.newDirectoryStream(WindowsFileSystemProvider.java:541) ~[?:?] + at java.base/java.nio.file.Files.newDirectoryStream(Files.java:482) ~[?:?] + at java.base/java.nio.file.Files.list(Files.java:3785) ~[?:?] + at org.texttechnologylab.uce.common.utils.SystemStatus.executeExternalDatabaseScripts(SystemStatus.java:38) ~[classes/:?] + at org.texttechnologylab.uce.corpusimporter.App.lambda$main$0(App.java:44) ~[classes/:?] + at org.texttechnologylab.uce.common.exceptions.ExceptionUtils.tryCatchLog(ExceptionUtils.java:30) [classes/:?] + at org.texttechnologylab.uce.corpusimporter.App.main(App.java:43) [classes/:?] +2025-11-27 20:55:07.433 [main] INFO org.texttechnologylab.uce.corpusimporter.App - Finished with executing external database scripts. +2025-11-27 20:55:07.524 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - + _ _ _____ _____ _____ _ +| | | / __ \| ___| |_ _| | | +| | | | / \/| |__ | | _ __ ___ _ __ ___ _ __| |_ +| | | | | | __| | || '_ ` _ \| '_ \ / _ \| '__| __| +| |_| | \__/\| |___ _| || | | | | | |_) | (_) | | | |_ + \___/ \____/\____/ \___/_| |_| |_| .__/ \___/|_| \__| + | | + |_| +2025-11-27 20:55:07.524 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - ===========> Global Import Id: 07d96ad0-e619-4231-832a-e2202fc86c9d +2025-11-27 20:55:07.525 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - ===========> Importer Number: 1 +2025-11-27 20:55:07.525 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - ===========> Used Threads: 1 +2025-11-27 20:55:07.525 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - ===========> Importing from path: F:\Area51\UCE\corpora\my_first_corpus +2025-11-27 20:55:07.525 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - ===========> Reading view: null + + diff --git a/uce.portal/uce.corpus-importer/logs/uce-corpus-importer.log b/uce.portal/uce.corpus-importer/logs/uce-corpus-importer.log new file mode 100644 index 00000000..060a6efa --- /dev/null +++ b/uce.portal/uce.corpus-importer/logs/uce-corpus-importer.log @@ -0,0 +1,1650 @@ +2026-01-30 17:38:54.080 [main] INFO org.hibernate.Version - HHH000412: Hibernate ORM core version 5.6.15.Final +2026-01-30 17:38:54.188 [main] INFO org.hibernate.spatial.integration.SpatialService - HHH80000001: hibernate-spatial integration enabled : true +2026-01-30 17:38:54.225 [main] INFO org.hibernate.annotations.common.Version - HCANN000001: Hibernate Commons Annotations {5.1.2.Final} +2026-01-30 17:38:54.329 [main] WARN org.hibernate.orm.connections.pooling - HHH10001002: Using Hibernate built-in connection pool (not for production use!) +2026-01-30 17:38:54.329 [main] INFO org.hibernate.orm.connections.pooling - HHH10001005: using driver [null] at URL [jdbc:postgresql://localhost:8002/uce] +2026-01-30 17:38:54.342 [main] INFO org.hibernate.orm.connections.pooling - HHH10001001: Connection properties: {password=****, user=postgres} +2026-01-30 17:38:54.342 [main] INFO org.hibernate.orm.connections.pooling - HHH10001003: Autocommit mode: false +2026-01-30 17:38:54.345 [main] INFO org.hibernate.engine.jdbc.connections.internal.DriverManagerConnectionProviderImpl - HHH000115: Hibernate connection pool size: 20 (min=1) +2026-01-30 17:38:54.507 [main] INFO org.hibernate.dialect.Dialect - HHH000400: Using dialect: org.hibernate.dialect.PostgreSQL10Dialect +2026-01-30 17:38:55.386 [main] INFO org.hibernate.orm.connections.access - HHH10001501: Connection obtained from JdbcConnectionAccess [org.hibernate.engine.jdbc.env.internal.JdbcEnvironmentInitiator$ConnectionProviderJdbcConnectionAccess@42e4431] for (non-JTA) DDL execution was not in auto-commit mode; the Connection 'local transaction' will be committed and the Connection will be set into auto-commit mode. +2026-01-30 17:38:56.293 [main] INFO org.texttechnologylab.uce.corpusimporter.App - Executing external database scripts from ../database/ +2026-01-30 17:38:56.295 [main] WARN org.texttechnologylab.uce.corpusimporter.App - Couldn't read the db scripts in the external database scripts folder; path wasn't found or other IO problems. +java.nio.file.NoSuchFileException: ..\database + at java.base/sun.nio.fs.WindowsException.translateToIOException(WindowsException.java:85) ~[?:?] + at java.base/sun.nio.fs.WindowsException.rethrowAsIOException(WindowsException.java:103) ~[?:?] + at java.base/sun.nio.fs.WindowsException.rethrowAsIOException(WindowsException.java:108) ~[?:?] + at java.base/sun.nio.fs.WindowsDirectoryStream.(WindowsDirectoryStream.java:86) ~[?:?] + at java.base/sun.nio.fs.WindowsFileSystemProvider.newDirectoryStream(WindowsFileSystemProvider.java:541) ~[?:?] + at java.base/java.nio.file.Files.newDirectoryStream(Files.java:482) ~[?:?] + at java.base/java.nio.file.Files.list(Files.java:3785) ~[?:?] + at org.texttechnologylab.uce.common.utils.SystemStatus.executeExternalDatabaseScripts(SystemStatus.java:38) ~[classes/:?] + at org.texttechnologylab.uce.corpusimporter.App.lambda$main$0(App.java:44) ~[classes/:?] + at org.texttechnologylab.uce.common.exceptions.ExceptionUtils.tryCatchLog(ExceptionUtils.java:30) [classes/:?] + at org.texttechnologylab.uce.corpusimporter.App.main(App.java:43) [classes/:?] +2026-01-30 17:38:56.299 [main] INFO org.texttechnologylab.uce.corpusimporter.App - Finished with executing external database scripts. +2026-01-30 17:38:56.316 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - + _ _ _____ _____ _____ _ +| | | / __ \| ___| |_ _| | | +| | | | / \/| |__ | | _ __ ___ _ __ ___ _ __| |_ +| | | | | | __| | || '_ ` _ \| '_ \ / _ \| '__| __| +| |_| | \__/\| |___ _| || | | | | | |_) | (_) | | | |_ + \___/ \____/\____/ \___/_| |_| |_| .__/ \___/|_| \__| + | | + |_| +2026-01-30 17:38:56.316 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - ===========> Global Import Id: f3ff8f20-2b53-4dc9-9b09-81964b9a3a8c +2026-01-30 17:38:56.316 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - ===========> Importer Number: 1 +2026-01-30 17:38:56.316 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - ===========> Used Threads: 1 +2026-01-30 17:38:56.316 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - ===========> Importing from path: F:\Area51\UCE\corpora\my_first_corpus +2026-01-30 17:38:56.316 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - ===========> Reading view: null + + +2026-01-30 17:38:56.345 [main] WARN org.hibernate.orm.deprecation - HHH90000022: Hibernate's legacy org.hibernate.Criteria API is deprecated; use the JPA javax.persistence.criteria.CriteriaQuery instead +2026-01-30 17:38:57.584 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:38:57.599 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:38:57.606 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing 23-year-old Ukrainian refugee killed on North Carolina transit system.json +2026-01-30 17:38:57.678 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:38:57.679 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:38:57.680 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:38:57.681 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:38:57.684 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:38:57.710 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:38:57.712 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:38:57.717 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\23-year-old Ukrainian refugee killed on North Carolina transit system.json.xmi.gz.xmi.gz +2026-01-30 17:38:57.717 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:38:57.717 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id 23-year-old Ukrainian refugee killed on North Carolina transit system.json... +2026-01-30 17:38:59.940 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document 23-year-old Ukrainian refugee killed on North Carolina transit system.json.xmi.gz.xmi.gz +2026-01-30 17:38:59.940 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:38:59.946 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\23-year-old Ukrainian refugee killed on North Carolina transit system.json.xmi.gz.xmi.gz +2026-01-30 17:38:59.950 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\23-year-old Ukrainian refugee killed on North Carolina transit system.json.xmi.gz.xmi.gz +2026-01-30 17:39:00.025 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:00.028 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:00.050 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing A 10-year-old boy in Tokyo ended up with Shohei Ohtani's first home run of the season.json +2026-01-30 17:39:00.054 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:00.054 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:00.054 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:00.055 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:00.056 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:00.061 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:00.061 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:00.064 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\A 10-year-old boy in Tokyo ended up with Shohei Ohtani's first home run of the season.json.xmi.gz.xmi.gz +2026-01-30 17:39:00.064 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:00.064 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id A 10-year-old boy in Tokyo ended up with Shohei Ohtani's first home run of the season.json... +2026-01-30 17:39:00.954 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:01.094 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document A 10-year-old boy in Tokyo ended up with Shohei Ohtani's first home run of the season.json.xmi.gz.xmi.gz +2026-01-30 17:39:01.094 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:01.098 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\A 10-year-old boy in Tokyo ended up with Shohei Ohtani's first home run of the season.json.xmi.gz.xmi.gz +2026-01-30 17:39:01.101 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\A 10-year-old boy in Tokyo ended up with Shohei Ohtani's first home run of the season.json.xmi.gz.xmi.gz +2026-01-30 17:39:01.315 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:01.323 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:01.327 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Adams case and other Trump moves threaten to open corruption floodgates, experts say.json +2026-01-30 17:39:01.331 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:01.331 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:01.331 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:01.331 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:01.332 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:01.362 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:01.363 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:01.367 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Adams case and other Trump moves threaten to open corruption floodgates, experts say.json.xmi.gz.xmi.gz +2026-01-30 17:39:01.367 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:01.367 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Adams case and other Trump moves threaten to open corruption floodgates, experts say.json... +2026-01-30 17:39:01.673 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:05.728 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Adams case and other Trump moves threaten to open corruption floodgates, experts say.json.xmi.gz.xmi.gz +2026-01-30 17:39:05.728 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:05.732 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Adams case and other Trump moves threaten to open corruption floodgates, experts say.json.xmi.gz.xmi.gz +2026-01-30 17:39:05.737 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Adams case and other Trump moves threaten to open corruption floodgates, experts say.json.xmi.gz.xmi.gz +2026-01-30 17:39:05.781 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:05.785 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:05.788 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing After a couple struck a deer in Alabama, a fire chief who stopped to help was fatally shot.json +2026-01-30 17:39:05.791 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:05.791 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:05.791 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:05.792 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:05.793 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:05.797 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:05.798 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:05.802 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\After a couple struck a deer in Alabama, a fire chief who stopped to help was fatally shot.json.xmi.gz.xmi.gz +2026-01-30 17:39:05.802 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:05.802 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id After a couple struck a deer in Alabama, a fire chief who stopped to help was fatally shot.json... +2026-01-30 17:39:07.020 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document After a couple struck a deer in Alabama, a fire chief who stopped to help was fatally shot.json.xmi.gz.xmi.gz +2026-01-30 17:39:07.020 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:07.024 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\After a couple struck a deer in Alabama, a fire chief who stopped to help was fatally shot.json.xmi.gz.xmi.gz +2026-01-30 17:39:07.028 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\After a couple struck a deer in Alabama, a fire chief who stopped to help was fatally shot.json.xmi.gz.xmi.gz +2026-01-30 17:39:07.095 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:07.099 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:07.124 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Agency that handles green cards and citizenship to hire armed agents who can make arrests.json +2026-01-30 17:39:07.128 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:07.129 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:07.129 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:07.129 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:07.130 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:07.148 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:07.149 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:07.155 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Agency that handles green cards and citizenship to hire armed agents who can make arrests.json.xmi.gz.xmi.gz +2026-01-30 17:39:07.155 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:07.155 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Agency that handles green cards and citizenship to hire armed agents who can make arrests.json... +2026-01-30 17:39:07.614 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:07.670 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:09.302 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Agency that handles green cards and citizenship to hire armed agents who can make arrests.json.xmi.gz.xmi.gz +2026-01-30 17:39:09.302 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:09.306 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Agency that handles green cards and citizenship to hire armed agents who can make arrests.json.xmi.gz.xmi.gz +2026-01-30 17:39:09.311 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Agency that handles green cards and citizenship to hire armed agents who can make arrests.json.xmi.gz.xmi.gz +2026-01-30 17:39:09.342 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:09.345 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:09.349 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Amazon apologizes to Mandy Moore after package is delivered to ruins of in-laws' California home.json +2026-01-30 17:39:09.352 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:09.352 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:09.352 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:09.353 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:09.354 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:09.361 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:09.361 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:09.365 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Amazon apologizes to Mandy Moore after package is delivered to ruins of in-laws' California home.json.xmi.gz.xmi.gz +2026-01-30 17:39:09.365 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:09.366 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Amazon apologizes to Mandy Moore after package is delivered to ruins of in-laws' California home.json... +2026-01-30 17:39:10.270 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:10.623 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Amazon apologizes to Mandy Moore after package is delivered to ruins of in-laws' California home.json.xmi.gz.xmi.gz +2026-01-30 17:39:10.624 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:10.628 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Amazon apologizes to Mandy Moore after package is delivered to ruins of in-laws' California home.json.xmi.gz.xmi.gz +2026-01-30 17:39:10.632 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Amazon apologizes to Mandy Moore after package is delivered to ruins of in-laws' California home.json.xmi.gz.xmi.gz +2026-01-30 17:39:10.668 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:10.670 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:10.672 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing As Trump considers ways to dismantle the Education Deparment, here's what to know about your student loans.json +2026-01-30 17:39:10.674 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:10.675 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:10.675 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:10.675 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:10.675 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:10.681 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:10.681 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:10.684 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\As Trump considers ways to dismantle the Education Deparment, here's what to know about your student loans.json.xmi.gz.xmi.gz +2026-01-30 17:39:10.684 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:10.684 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id As Trump considers ways to dismantle the Education Deparment, here's what to know about your student loans.json... +2026-01-30 17:39:11.151 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:12.346 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document As Trump considers ways to dismantle the Education Deparment, here's what to know about your student loans.json.xmi.gz.xmi.gz +2026-01-30 17:39:12.346 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:12.348 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\As Trump considers ways to dismantle the Education Deparment, here's what to know about your student loans.json.xmi.gz.xmi.gz +2026-01-30 17:39:12.352 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\As Trump considers ways to dismantle the Education Deparment, here's what to know about your student loans.json.xmi.gz.xmi.gz +2026-01-30 17:39:12.368 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:12.370 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:12.372 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Belgium's future queen caught up in Trump administration's Harvard foreign student ban effort.json +2026-01-30 17:39:12.374 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:12.374 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:12.374 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:12.374 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:12.374 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:12.377 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:12.378 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:12.380 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Belgium's future queen caught up in Trump administration's Harvard foreign student ban effort.json.xmi.gz.xmi.gz +2026-01-30 17:39:12.380 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:12.381 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Belgium's future queen caught up in Trump administration's Harvard foreign student ban effort.json... +2026-01-30 17:39:12.982 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:13.154 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Belgium's future queen caught up in Trump administration's Harvard foreign student ban effort.json.xmi.gz.xmi.gz +2026-01-30 17:39:13.155 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:13.156 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Belgium's future queen caught up in Trump administration's Harvard foreign student ban effort.json.xmi.gz.xmi.gz +2026-01-30 17:39:13.162 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Belgium's future queen caught up in Trump administration's Harvard foreign student ban effort.json.xmi.gz.xmi.gz +2026-01-30 17:39:13.204 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:13.206 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:13.209 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Biden administration has no plans to fine companies if TikTok ban goes into effect.json +2026-01-30 17:39:13.211 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:13.212 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:13.212 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:13.212 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:13.212 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:13.217 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:13.217 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:13.220 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Biden administration has no plans to fine companies if TikTok ban goes into effect.json.xmi.gz.xmi.gz +2026-01-30 17:39:13.221 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:13.221 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Biden administration has no plans to fine companies if TikTok ban goes into effect.json... +2026-01-30 17:39:13.467 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:15.004 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Biden administration has no plans to fine companies if TikTok ban goes into effect.json.xmi.gz.xmi.gz +2026-01-30 17:39:15.004 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:15.006 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Biden administration has no plans to fine companies if TikTok ban goes into effect.json.xmi.gz.xmi.gz +2026-01-30 17:39:15.009 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Biden administration has no plans to fine companies if TikTok ban goes into effect.json.xmi.gz.xmi.gz +2026-01-30 17:39:15.032 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:15.033 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:15.036 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Bruce Springsteen opens U.K. tour by calling Trump 'unfit' for office.json +2026-01-30 17:39:15.038 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:15.038 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:15.038 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:15.038 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:15.039 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:15.041 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:15.041 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:15.044 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Bruce Springsteen opens U.K. tour by calling Trump 'unfit' for office.json.xmi.gz.xmi.gz +2026-01-30 17:39:15.044 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:15.044 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Bruce Springsteen opens U.K. tour by calling Trump 'unfit' for office.json... +2026-01-30 17:39:15.745 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:16.236 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Bruce Springsteen opens U.K. tour by calling Trump 'unfit' for office.json.xmi.gz.xmi.gz +2026-01-30 17:39:16.236 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:16.238 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Bruce Springsteen opens U.K. tour by calling Trump 'unfit' for office.json.xmi.gz.xmi.gz +2026-01-30 17:39:16.241 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Bruce Springsteen opens U.K. tour by calling Trump 'unfit' for office.json.xmi.gz.xmi.gz +2026-01-30 17:39:16.271 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:16.272 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:16.276 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Charlie Javice, college financial aid startup founder, found guilty of defrauding JPMorgan.json +2026-01-30 17:39:16.278 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:16.278 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:16.278 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:16.278 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:16.278 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:16.280 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:16.280 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:16.283 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Charlie Javice, college financial aid startup founder, found guilty of defrauding JPMorgan.json.xmi.gz.xmi.gz +2026-01-30 17:39:16.283 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:16.283 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Charlie Javice, college financial aid startup founder, found guilty of defrauding JPMorgan.json... +2026-01-30 17:39:16.745 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:17.126 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Charlie Javice, college financial aid startup founder, found guilty of defrauding JPMorgan.json.xmi.gz.xmi.gz +2026-01-30 17:39:17.126 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:17.130 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Charlie Javice, college financial aid startup founder, found guilty of defrauding JPMorgan.json.xmi.gz.xmi.gz +2026-01-30 17:39:17.132 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Charlie Javice, college financial aid startup founder, found guilty of defrauding JPMorgan.json.xmi.gz.xmi.gz +2026-01-30 17:39:17.157 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:17.159 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:17.161 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Civil rights agency sued over handling of trans worker discrimination complaints.json +2026-01-30 17:39:17.163 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:17.163 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:17.163 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:17.163 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:17.163 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:17.165 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:17.166 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:17.168 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Civil rights agency sued over handling of trans worker discrimination complaints.json.xmi.gz.xmi.gz +2026-01-30 17:39:17.168 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:17.168 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Civil rights agency sued over handling of trans worker discrimination complaints.json... +2026-01-30 17:39:17.507 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:18.438 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Civil rights agency sued over handling of trans worker discrimination complaints.json.xmi.gz.xmi.gz +2026-01-30 17:39:18.438 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:18.441 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Civil rights agency sued over handling of trans worker discrimination complaints.json.xmi.gz.xmi.gz +2026-01-30 17:39:18.444 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Civil rights agency sued over handling of trans worker discrimination complaints.json.xmi.gz.xmi.gz +2026-01-30 17:39:18.465 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:18.466 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:18.470 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing DHS has begun performing polygraph tests on employees to find leakers.json +2026-01-30 17:39:18.472 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:18.472 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:18.472 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:18.472 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:18.472 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:18.474 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:18.474 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:18.478 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\DHS has begun performing polygraph tests on employees to find leakers.json.xmi.gz.xmi.gz +2026-01-30 17:39:18.478 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:18.478 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id DHS has begun performing polygraph tests on employees to find leakers.json... +2026-01-30 17:39:19.007 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:19.246 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document DHS has begun performing polygraph tests on employees to find leakers.json.xmi.gz.xmi.gz +2026-01-30 17:39:19.246 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:19.248 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\DHS has begun performing polygraph tests on employees to find leakers.json.xmi.gz.xmi.gz +2026-01-30 17:39:19.251 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\DHS has begun performing polygraph tests on employees to find leakers.json.xmi.gz.xmi.gz +2026-01-30 17:39:19.274 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:19.276 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:19.278 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Elon Musk turns on Nigel Farage, calls on him to step down as U.K. party leader.json +2026-01-30 17:39:19.280 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:19.281 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:19.281 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:19.281 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:19.281 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:19.283 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:19.283 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:19.285 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Elon Musk turns on Nigel Farage, calls on him to step down as U.K. party leader.json.xmi.gz.xmi.gz +2026-01-30 17:39:19.285 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:19.285 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Elon Musk turns on Nigel Farage, calls on him to step down as U.K. party leader.json... +2026-01-30 17:39:19.571 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:20.504 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Elon Musk turns on Nigel Farage, calls on him to step down as U.K. party leader.json.xmi.gz.xmi.gz +2026-01-30 17:39:20.504 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:20.506 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Elon Musk turns on Nigel Farage, calls on him to step down as U.K. party leader.json.xmi.gz.xmi.gz +2026-01-30 17:39:20.509 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Elon Musk turns on Nigel Farage, calls on him to step down as U.K. party leader.json.xmi.gz.xmi.gz +2026-01-30 17:39:20.560 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:20.562 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:20.564 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing How much money you should save for a comfortable retirement.json +2026-01-30 17:39:20.566 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:20.566 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:20.566 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:20.566 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:20.566 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:20.568 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:20.568 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:20.571 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\How much money you should save for a comfortable retirement.json.xmi.gz.xmi.gz +2026-01-30 17:39:20.571 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:20.571 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id How much money you should save for a comfortable retirement.json... +2026-01-30 17:39:20.952 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:23.767 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document How much money you should save for a comfortable retirement.json.xmi.gz.xmi.gz +2026-01-30 17:39:23.767 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:23.771 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\How much money you should save for a comfortable retirement.json.xmi.gz.xmi.gz +2026-01-30 17:39:23.774 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\How much money you should save for a comfortable retirement.json.xmi.gz.xmi.gz +2026-01-30 17:39:23.796 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:23.797 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:23.800 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Israeli strikes kill 14 in Gaza in one day as negotiators work to uphold fragile ceasefire.json +2026-01-30 17:39:23.802 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:23.802 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:23.802 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:23.802 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:23.802 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:23.804 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:23.804 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:23.807 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Israeli strikes kill 14 in Gaza in one day as negotiators work to uphold fragile ceasefire.json.xmi.gz.xmi.gz +2026-01-30 17:39:23.807 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:23.807 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Israeli strikes kill 14 in Gaza in one day as negotiators work to uphold fragile ceasefire.json... +2026-01-30 17:39:24.940 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:24.953 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Israeli strikes kill 14 in Gaza in one day as negotiators work to uphold fragile ceasefire.json.xmi.gz.xmi.gz +2026-01-30 17:39:24.954 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:24.956 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Israeli strikes kill 14 in Gaza in one day as negotiators work to uphold fragile ceasefire.json.xmi.gz.xmi.gz +2026-01-30 17:39:24.959 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Israeli strikes kill 14 in Gaza in one day as negotiators work to uphold fragile ceasefire.json.xmi.gz.xmi.gz +2026-01-30 17:39:24.989 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:24.991 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:24.993 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Lakers star Luka Dončić says he took a month off from basketball to transform his body.json +2026-01-30 17:39:24.995 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:24.996 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:24.996 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:24.996 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:24.996 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:24.998 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:24.998 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:25.001 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Lakers star Luka Dončić says he took a month off from basketball to transform his body.json.xmi.gz.xmi.gz +2026-01-30 17:39:25.001 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:25.001 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Lakers star Luka Dončić says he took a month off from basketball to transform his body.json... +2026-01-30 17:39:25.347 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:26.238 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Lakers star Luka Dončić says he took a month off from basketball to transform his body.json.xmi.gz.xmi.gz +2026-01-30 17:39:26.238 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:26.241 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Lakers star Luka Dončić says he took a month off from basketball to transform his body.json.xmi.gz.xmi.gz +2026-01-30 17:39:26.243 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Lakers star Luka Dončić says he took a month off from basketball to transform his body.json.xmi.gz.xmi.gz +2026-01-30 17:39:26.251 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:26.252 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:26.255 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Musk's brain implant company filed as a 'disadvantaged business'.json +2026-01-30 17:39:26.256 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:26.256 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:26.256 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:26.257 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:26.257 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:26.258 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:26.258 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:26.262 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Musk's brain implant company filed as a 'disadvantaged business'.json.xmi.gz.xmi.gz +2026-01-30 17:39:26.262 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:26.262 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Musk's brain implant company filed as a 'disadvantaged business'.json... +2026-01-30 17:39:26.817 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:27.492 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Musk's brain implant company filed as a 'disadvantaged business'.json.xmi.gz.xmi.gz +2026-01-30 17:39:27.492 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:27.495 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Musk's brain implant company filed as a 'disadvantaged business'.json.xmi.gz.xmi.gz +2026-01-30 17:39:27.497 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Musk's brain implant company filed as a 'disadvantaged business'.json.xmi.gz.xmi.gz +2026-01-30 17:39:27.522 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:27.524 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:27.527 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Naomi Osaka tears up after first-round French Open loss to Paula Badosa.json +2026-01-30 17:39:27.529 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:27.529 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:27.529 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:27.529 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:27.529 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:27.531 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:27.531 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:27.533 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Naomi Osaka tears up after first-round French Open loss to Paula Badosa.json.xmi.gz.xmi.gz +2026-01-30 17:39:27.534 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:27.534 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Naomi Osaka tears up after first-round French Open loss to Paula Badosa.json... +2026-01-30 17:39:28.206 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:29.243 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Naomi Osaka tears up after first-round French Open loss to Paula Badosa.json.xmi.gz.xmi.gz +2026-01-30 17:39:29.243 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:29.246 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Naomi Osaka tears up after first-round French Open loss to Paula Badosa.json.xmi.gz.xmi.gz +2026-01-30 17:39:29.249 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Naomi Osaka tears up after first-round French Open loss to Paula Badosa.json.xmi.gz.xmi.gz +2026-01-30 17:39:29.257 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:29.257 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:29.260 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing New York Jets to sign QB Justin Fields, according to reports.json +2026-01-30 17:39:29.262 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:29.262 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:29.262 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:29.262 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:29.262 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:29.262 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:29.263 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:29.265 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\New York Jets to sign QB Justin Fields, according to reports.json.xmi.gz.xmi.gz +2026-01-30 17:39:29.265 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:29.265 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id New York Jets to sign QB Justin Fields, according to reports.json... +2026-01-30 17:39:29.789 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:29.825 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document New York Jets to sign QB Justin Fields, according to reports.json.xmi.gz.xmi.gz +2026-01-30 17:39:29.826 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:29.829 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\New York Jets to sign QB Justin Fields, according to reports.json.xmi.gz.xmi.gz +2026-01-30 17:39:29.832 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\New York Jets to sign QB Justin Fields, according to reports.json.xmi.gz.xmi.gz +2026-01-30 17:39:29.848 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:29.849 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:29.852 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Phillies star Bryce Harper uses a blue bat in gender reveal for his child.json +2026-01-30 17:39:29.853 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:29.853 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:29.853 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:29.854 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:29.854 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:29.855 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:29.855 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:29.858 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Phillies star Bryce Harper uses a blue bat in gender reveal for his child.json.xmi.gz.xmi.gz +2026-01-30 17:39:29.858 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:29.858 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Phillies star Bryce Harper uses a blue bat in gender reveal for his child.json... +2026-01-30 17:39:30.039 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:30.867 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Phillies star Bryce Harper uses a blue bat in gender reveal for his child.json.xmi.gz.xmi.gz +2026-01-30 17:39:30.867 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:30.869 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Phillies star Bryce Harper uses a blue bat in gender reveal for his child.json.xmi.gz.xmi.gz +2026-01-30 17:39:30.872 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Phillies star Bryce Harper uses a blue bat in gender reveal for his child.json.xmi.gz.xmi.gz +2026-01-30 17:39:30.892 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:30.893 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:30.895 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Russian strikes batter Ukraine as Zelenskyy accuses Putin of stalling peace talks.json +2026-01-30 17:39:30.897 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:30.897 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:30.897 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:30.897 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:30.897 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:30.899 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:30.899 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:30.901 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Russian strikes batter Ukraine as Zelenskyy accuses Putin of stalling peace talks.json.xmi.gz.xmi.gz +2026-01-30 17:39:30.901 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:30.901 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Russian strikes batter Ukraine as Zelenskyy accuses Putin of stalling peace talks.json... +2026-01-30 17:39:31.310 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:31.988 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Russian strikes batter Ukraine as Zelenskyy accuses Putin of stalling peace talks.json.xmi.gz.xmi.gz +2026-01-30 17:39:31.988 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:31.990 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Russian strikes batter Ukraine as Zelenskyy accuses Putin of stalling peace talks.json.xmi.gz.xmi.gz +2026-01-30 17:39:31.992 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Russian strikes batter Ukraine as Zelenskyy accuses Putin of stalling peace talks.json.xmi.gz.xmi.gz +2026-01-30 17:39:32.022 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:32.023 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:32.026 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Sen. Michael Bennet will run for governor of Colorado in 2026.json +2026-01-30 17:39:32.028 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:32.028 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:32.028 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:32.028 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:32.028 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:32.029 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:32.029 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:32.032 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Sen. Michael Bennet will run for governor of Colorado in 2026.json.xmi.gz.xmi.gz +2026-01-30 17:39:32.032 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:32.032 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Sen. Michael Bennet will run for governor of Colorado in 2026.json... +2026-01-30 17:39:32.455 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:33.698 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Sen. Michael Bennet will run for governor of Colorado in 2026.json.xmi.gz.xmi.gz +2026-01-30 17:39:33.698 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:33.700 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Sen. Michael Bennet will run for governor of Colorado in 2026.json.xmi.gz.xmi.gz +2026-01-30 17:39:33.703 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Sen. Michael Bennet will run for governor of Colorado in 2026.json.xmi.gz.xmi.gz +2026-01-30 17:39:33.755 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:33.757 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:33.760 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Suspect pleads guilty in Highland Park mass shooting at July Fourth parade.json +2026-01-30 17:39:33.762 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:33.762 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:33.762 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:33.762 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:33.763 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:33.765 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:33.766 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:33.769 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Suspect pleads guilty in Highland Park mass shooting at July Fourth parade.json.xmi.gz.xmi.gz +2026-01-30 17:39:33.769 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:33.769 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Suspect pleads guilty in Highland Park mass shooting at July Fourth parade.json... +2026-01-30 17:39:34.440 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:36.454 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Suspect pleads guilty in Highland Park mass shooting at July Fourth parade.json.xmi.gz.xmi.gz +2026-01-30 17:39:36.454 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:36.457 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Suspect pleads guilty in Highland Park mass shooting at July Fourth parade.json.xmi.gz.xmi.gz +2026-01-30 17:39:36.460 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Suspect pleads guilty in Highland Park mass shooting at July Fourth parade.json.xmi.gz.xmi.gz +2026-01-30 17:39:36.525 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:36.527 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:36.530 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Target says its holiday sales were better than expected — but its profits weren't.json +2026-01-30 17:39:36.532 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:36.532 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:36.532 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:36.532 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:36.533 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:36.535 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:36.535 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:36.538 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Target says its holiday sales were better than expected — but its profits weren't.json.xmi.gz.xmi.gz +2026-01-30 17:39:36.538 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:36.538 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Target says its holiday sales were better than expected — but its profits weren't.json... +2026-01-30 17:39:37.556 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:39.092 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Target says its holiday sales were better than expected — but its profits weren't.json.xmi.gz.xmi.gz +2026-01-30 17:39:39.093 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:39.095 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Target says its holiday sales were better than expected — but its profits weren't.json.xmi.gz.xmi.gz +2026-01-30 17:39:39.098 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Target says its holiday sales were better than expected — but its profits weren't.json.xmi.gz.xmi.gz +2026-01-30 17:39:39.155 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:39.157 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:39.159 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing The 14 best toothpastes for clean, healthy teeth in 2025.json +2026-01-30 17:39:39.162 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:39.162 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:39.162 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:39.162 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:39.162 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:39.164 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:39.164 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:39.166 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\The 14 best toothpastes for clean, healthy teeth in 2025.json.xmi.gz.xmi.gz +2026-01-30 17:39:39.166 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:39.166 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id The 14 best toothpastes for clean, healthy teeth in 2025.json... +2026-01-30 17:39:40.127 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:41.418 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document The 14 best toothpastes for clean, healthy teeth in 2025.json.xmi.gz.xmi.gz +2026-01-30 17:39:41.418 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:41.420 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\The 14 best toothpastes for clean, healthy teeth in 2025.json.xmi.gz.xmi.gz +2026-01-30 17:39:41.423 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\The 14 best toothpastes for clean, healthy teeth in 2025.json.xmi.gz.xmi.gz +2026-01-30 17:39:41.528 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:41.532 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:41.534 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing The 50+ best white t-shirts tested and ranked, according to NBC Select editors.json +2026-01-30 17:39:41.536 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:41.536 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:41.536 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:41.536 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:41.537 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:41.540 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:41.541 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:41.544 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\The 50 best white t-shirts tested and ranked, according to NBC Select editors.json.xmi.gz.xmi.gz +2026-01-30 17:39:41.544 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:41.544 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id The 50+ best white t-shirts tested and ranked, according to NBC Select editors.json... +2026-01-30 17:39:42.345 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:46.996 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document The 50 best white t-shirts tested and ranked, according to NBC Select editors.json.xmi.gz.xmi.gz +2026-01-30 17:39:46.996 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:46.999 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\The 50 best white t-shirts tested and ranked, according to NBC Select editors.json.xmi.gz.xmi.gz +2026-01-30 17:39:47.002 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\The 50 best white t-shirts tested and ranked, according to NBC Select editors.json.xmi.gz.xmi.gz +2026-01-30 17:39:47.034 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:47.035 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:47.037 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing These three LGBTQ women just made congressional 'herstory'.json +2026-01-30 17:39:47.039 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:47.039 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:47.039 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:47.039 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:47.039 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:47.041 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:47.041 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:47.044 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\These three LGBTQ women just made congressional 'herstory'.json.xmi.gz.xmi.gz +2026-01-30 17:39:47.044 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:47.044 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id These three LGBTQ women just made congressional 'herstory'.json... +2026-01-30 17:39:48.227 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document These three LGBTQ women just made congressional 'herstory'.json.xmi.gz.xmi.gz +2026-01-30 17:39:48.227 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:48.229 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\These three LGBTQ women just made congressional 'herstory'.json.xmi.gz.xmi.gz +2026-01-30 17:39:48.232 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\These three LGBTQ women just made congressional 'herstory'.json.xmi.gz.xmi.gz +2026-01-30 17:39:48.248 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:48.248 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:48.254 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Thousands of U.S. seniors deal with the harsh realities of homelessness.json +2026-01-30 17:39:48.255 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:48.256 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:48.256 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:48.256 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:48.256 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:48.258 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:48.258 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:48.260 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Thousands of U.S. seniors deal with the harsh realities of homelessness.json.xmi.gz.xmi.gz +2026-01-30 17:39:48.261 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:48.261 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Thousands of U.S. seniors deal with the harsh realities of homelessness.json... +2026-01-30 17:39:48.748 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:49.048 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Thousands of U.S. seniors deal with the harsh realities of homelessness.json.xmi.gz.xmi.gz +2026-01-30 17:39:49.048 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:49.050 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Thousands of U.S. seniors deal with the harsh realities of homelessness.json.xmi.gz.xmi.gz +2026-01-30 17:39:49.052 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Thousands of U.S. seniors deal with the harsh realities of homelessness.json.xmi.gz.xmi.gz +2026-01-30 17:39:49.100 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:49.102 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:49.104 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Trump administration uses King's 'Dream' speech to introduce executive orders cutting DEI.json +2026-01-30 17:39:49.105 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:49.106 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:49.106 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:49.106 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:49.106 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:49.108 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:49.108 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:49.110 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Trump administration uses King's 'Dream' speech to introduce executive orders cutting DEI.json.xmi.gz.xmi.gz +2026-01-30 17:39:49.110 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:49.110 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Trump administration uses King's 'Dream' speech to introduce executive orders cutting DEI.json... +2026-01-30 17:39:49.195 [ForkJoinPool.commonPool-worker-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:49.358 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:51.251 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Trump administration uses King's 'Dream' speech to introduce executive orders cutting DEI.json.xmi.gz.xmi.gz +2026-01-30 17:39:51.251 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:51.255 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Trump administration uses King's 'Dream' speech to introduce executive orders cutting DEI.json.xmi.gz.xmi.gz +2026-01-30 17:39:51.257 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Trump administration uses King's 'Dream' speech to introduce executive orders cutting DEI.json.xmi.gz.xmi.gz +2026-01-30 17:39:51.328 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:51.329 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:51.331 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Trump wants automakers to move vehicle production to the U.S. It's not that simple..json +2026-01-30 17:39:51.333 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:51.333 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:51.334 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:51.334 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:51.334 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:51.336 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:51.337 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:51.339 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Trump wants automakers to move vehicle production to the U.S. It's not that simple..json.xmi.gz.xmi.gz +2026-01-30 17:39:51.339 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:51.339 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Trump wants automakers to move vehicle production to the U.S. It's not that simple..json... +2026-01-30 17:39:52.155 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:54.184 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Trump wants automakers to move vehicle production to the U.S. It's not that simple..json.xmi.gz.xmi.gz +2026-01-30 17:39:54.184 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:54.186 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Trump wants automakers to move vehicle production to the U.S. It's not that simple..json.xmi.gz.xmi.gz +2026-01-30 17:39:54.190 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Trump wants automakers to move vehicle production to the U.S. It's not that simple..json.xmi.gz.xmi.gz +2026-01-30 17:39:54.229 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:54.230 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:54.233 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Trump's 'big, beautiful bill' includes these key tax changes for 2025 — what they mean for you.json +2026-01-30 17:39:54.235 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:54.235 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:54.235 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:54.236 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:54.236 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:54.237 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:54.237 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:54.239 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Trump's 'big, beautiful bill' includes these key tax changes for 2025 — what they mean for you.json.xmi.gz.xmi.gz +2026-01-30 17:39:54.239 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:54.239 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Trump's 'big, beautiful bill' includes these key tax changes for 2025 — what they mean for you.json... +2026-01-30 17:39:55.349 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:55.742 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Trump's 'big, beautiful bill' includes these key tax changes for 2025 — what they mean for you.json.xmi.gz.xmi.gz +2026-01-30 17:39:55.742 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:55.745 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Trump's 'big, beautiful bill' includes these key tax changes for 2025 — what they mean for you.json.xmi.gz.xmi.gz +2026-01-30 17:39:55.748 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Trump's 'big, beautiful bill' includes these key tax changes for 2025 — what they mean for you.json.xmi.gz.xmi.gz +2026-01-30 17:39:55.774 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:55.775 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:55.777 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing U.S. envoy Steve Witkoff will travel to Israel to address humanitarian crisis in Gaza.json +2026-01-30 17:39:55.779 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:55.779 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:55.779 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:55.779 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:55.779 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:55.780 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:55.780 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:55.782 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\U.S. envoy Steve Witkoff will travel to Israel to address humanitarian crisis in Gaza.json.xmi.gz.xmi.gz +2026-01-30 17:39:55.782 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:55.782 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id U.S. envoy Steve Witkoff will travel to Israel to address humanitarian crisis in Gaza.json... +2026-01-30 17:39:56.331 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:57.112 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document U.S. envoy Steve Witkoff will travel to Israel to address humanitarian crisis in Gaza.json.xmi.gz.xmi.gz +2026-01-30 17:39:57.112 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:57.114 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\U.S. envoy Steve Witkoff will travel to Israel to address humanitarian crisis in Gaza.json.xmi.gz.xmi.gz +2026-01-30 17:39:57.117 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\U.S. envoy Steve Witkoff will travel to Israel to address humanitarian crisis in Gaza.json.xmi.gz.xmi.gz +2026-01-30 17:39:57.130 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:57.130 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:57.132 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Ukraine and allies discuss ways to pressure Russia into 30-day ceasefire.json +2026-01-30 17:39:57.134 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:57.134 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:57.134 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:57.134 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:57.134 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:57.135 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:57.135 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:57.137 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Ukraine and allies discuss ways to pressure Russia into 30-day ceasefire.json.xmi.gz.xmi.gz +2026-01-30 17:39:57.137 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:57.137 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Ukraine and allies discuss ways to pressure Russia into 30-day ceasefire.json... +2026-01-30 17:39:57.664 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:57.833 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Ukraine and allies discuss ways to pressure Russia into 30-day ceasefire.json.xmi.gz.xmi.gz +2026-01-30 17:39:57.833 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:57.835 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Ukraine and allies discuss ways to pressure Russia into 30-day ceasefire.json.xmi.gz.xmi.gz +2026-01-30 17:39:57.837 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Ukraine and allies discuss ways to pressure Russia into 30-day ceasefire.json.xmi.gz.xmi.gz +2026-01-30 17:39:57.871 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:57.872 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:57.875 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Utah lawmakers said gender-affirming care is harmful to kids. Their own study contradicts that claim..json +2026-01-30 17:39:57.877 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:57.877 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:57.878 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:57.878 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:57.878 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:57.879 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:57.880 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:57.882 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Utah lawmakers said gender-affirming care is harmful to kids. Their own study contradicts that claim..json.xmi.gz.xmi.gz +2026-01-30 17:39:57.883 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:57.883 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Utah lawmakers said gender-affirming care is harmful to kids. Their own study contradicts that claim..json... +2026-01-30 17:39:58.114 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:39:59.606 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Utah lawmakers said gender-affirming care is harmful to kids. Their own study contradicts that claim..json.xmi.gz.xmi.gz +2026-01-30 17:39:59.606 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:39:59.608 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Utah lawmakers said gender-affirming care is harmful to kids. Their own study contradicts that claim..json.xmi.gz.xmi.gz +2026-01-30 17:39:59.610 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Utah lawmakers said gender-affirming care is harmful to kids. Their own study contradicts that claim..json.xmi.gz.xmi.gz +2026-01-30 17:39:59.630 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:39:59.631 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:39:59.634 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Waymo's robotaxis to start carrying passengers in Atlanta.json +2026-01-30 17:39:59.635 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:39:59.635 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:39:59.636 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:39:59.636 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:39:59.636 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:39:59.637 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:39:59.637 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:39:59.638 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Waymo's robotaxis to start carrying passengers in Atlanta.json.xmi.gz.xmi.gz +2026-01-30 17:39:59.638 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:39:59.638 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id Waymo's robotaxis to start carrying passengers in Atlanta.json... +2026-01-30 17:40:00.332 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:40:00.675 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document Waymo's robotaxis to start carrying passengers in Atlanta.json.xmi.gz.xmi.gz +2026-01-30 17:40:00.675 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:40:00.678 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Waymo's robotaxis to start carrying passengers in Atlanta.json.xmi.gz.xmi.gz +2026-01-30 17:40:00.680 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\Waymo's robotaxis to start carrying passengers in Atlanta.json.xmi.gz.xmi.gz +2026-01-30 17:40:00.719 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:00.721 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:00.723 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing ‘Call Her Daddy’ host Alex Cooper claims college soccer coach sexually harassed her.json +2026-01-30 17:40:00.724 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting full text done. +2026-01-30 17:40:00.724 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - No DocumentAnnotation found. Skipping this annotation then. +2026-01-30 17:40:00.724 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting sentences done. +2026-01-30 17:40:00.724 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Named-Entities done. +2026-01-30 17:40:00.724 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Emotions done. +2026-01-30 17:40:00.726 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Lemmas done. +2026-01-30 17:40:00.726 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting synthetic pages done. +2026-01-30 17:40:00.729 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully extracted all annotations from F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\‘Call Her Daddy’ host Alex Cooper claims college soccer coach sexually harassed her.json.xmi.gz.xmi.gz +2026-01-30 17:40:00.729 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:00.729 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Trying to store document with document id ‘Call Her Daddy’ host Alex Cooper claims college soccer coach sexually harassed her.json... +2026-01-30 17:40:01.094 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:40:02.532 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Stored document ‘Call Her Daddy’ host Alex Cooper claims college soccer coach sexually harassed her.json.xmi.gz.xmi.gz +2026-01-30 17:40:02.532 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with the UIMA annotations - postprocessing the doc now. +2026-01-30 17:40:02.534 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\‘Call Her Daddy’ host Alex Cooper claims college soccer coach sexually harassed her.json.xmi.gz.xmi.gz +2026-01-30 17:40:02.537 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Successfully post processed document F:\Area51\UCE\corpora\my_first_corpus\input\emotion\bert-emotion\‘Call Her Daddy’ host Alex Cooper claims college soccer coach sexually harassed her.json.xmi.gz.xmi.gz +2026-01-30 17:40:02.570 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:02.570 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:02.572 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing 23-year-old Ukrainian refugee killed on North Carolina transit system.json +2026-01-30 17:40:02.576 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id 23-year-old Ukrainian refugee killed on North Carolina transit system.json already exists in the corpus 8. +2026-01-30 17:40:02.576 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:02.615 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:02.615 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:02.615 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:02.637 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:02.637 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:02.641 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing A 10-year-old boy in Tokyo ended up with Shohei Ohtani's first home run of the season.json +2026-01-30 17:40:02.643 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id A 10-year-old boy in Tokyo ended up with Shohei Ohtani's first home run of the season.json already exists in the corpus 8. +2026-01-30 17:40:02.643 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:02.657 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:02.657 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:02.657 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:02.754 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:02.758 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:02.762 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Adams case and other Trump moves threaten to open corruption floodgates, experts say.json +2026-01-30 17:40:02.764 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Adams case and other Trump moves threaten to open corruption floodgates, experts say.json already exists in the corpus 8. +2026-01-30 17:40:02.764 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:02.783 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:02.783 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:02.783 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:02.812 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:02.813 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:02.817 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing After a couple struck a deer in Alabama, a fire chief who stopped to help was fatally shot.json +2026-01-30 17:40:02.819 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id After a couple struck a deer in Alabama, a fire chief who stopped to help was fatally shot.json already exists in the corpus 8. +2026-01-30 17:40:02.819 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:02.837 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:02.837 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:02.837 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:02.878 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:02.879 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:02.882 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Agency that handles green cards and citizenship to hire armed agents who can make arrests.json +2026-01-30 17:40:02.884 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Agency that handles green cards and citizenship to hire armed agents who can make arrests.json already exists in the corpus 8. +2026-01-30 17:40:02.884 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:02.899 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:02.899 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:02.899 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:02.927 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:02.928 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:02.930 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Amazon apologizes to Mandy Moore after package is delivered to ruins of in-laws' California home.json +2026-01-30 17:40:02.931 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Amazon apologizes to Mandy Moore after package is delivered to ruins of in-laws' California home.json already exists in the corpus 8. +2026-01-30 17:40:02.931 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:02.942 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:02.942 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:02.942 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:02.976 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:02.976 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:02.980 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing As Trump considers ways to dismantle the Education Deparment, here's what to know about your student loans.json +2026-01-30 17:40:02.982 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id As Trump considers ways to dismantle the Education Deparment, here's what to know about your student loans.json already exists in the corpus 8. +2026-01-30 17:40:02.982 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:02.996 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:02.996 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:02.996 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.018 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.019 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.022 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Belgium's future queen caught up in Trump administration's Harvard foreign student ban effort.json +2026-01-30 17:40:03.024 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Belgium's future queen caught up in Trump administration's Harvard foreign student ban effort.json already exists in the corpus 8. +2026-01-30 17:40:03.024 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.039 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.039 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.039 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.085 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.086 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.088 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Biden administration has no plans to fine companies if TikTok ban goes into effect.json +2026-01-30 17:40:03.089 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Biden administration has no plans to fine companies if TikTok ban goes into effect.json already exists in the corpus 8. +2026-01-30 17:40:03.089 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.100 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.101 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.101 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.130 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.131 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.133 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Bruce Springsteen opens U.K. tour by calling Trump 'unfit' for office.json +2026-01-30 17:40:03.134 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Bruce Springsteen opens U.K. tour by calling Trump 'unfit' for office.json already exists in the corpus 8. +2026-01-30 17:40:03.135 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.147 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.147 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.147 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.170 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.170 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.172 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Charlie Javice, college financial aid startup founder, found guilty of defrauding JPMorgan.json +2026-01-30 17:40:03.174 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Charlie Javice, college financial aid startup founder, found guilty of defrauding JPMorgan.json already exists in the corpus 8. +2026-01-30 17:40:03.174 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.186 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.186 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.186 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.228 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.231 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.233 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Civil rights agency sued over handling of trans worker discrimination complaints.json +2026-01-30 17:40:03.235 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Civil rights agency sued over handling of trans worker discrimination complaints.json already exists in the corpus 8. +2026-01-30 17:40:03.235 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.249 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.249 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.249 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.269 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.269 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.272 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing DHS has begun performing polygraph tests on employees to find leakers.json +2026-01-30 17:40:03.274 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id DHS has begun performing polygraph tests on employees to find leakers.json already exists in the corpus 8. +2026-01-30 17:40:03.274 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.286 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.286 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.286 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.314 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.315 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.317 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Elon Musk turns on Nigel Farage, calls on him to step down as U.K. party leader.json +2026-01-30 17:40:03.319 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Elon Musk turns on Nigel Farage, calls on him to step down as U.K. party leader.json already exists in the corpus 8. +2026-01-30 17:40:03.319 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.330 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.330 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.330 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.375 [ForkJoinPool.commonPool-worker-2] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with import. +2026-01-30 17:40:03.386 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.387 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.390 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing How much money you should save for a comfortable retirement.json +2026-01-30 17:40:03.393 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id How much money you should save for a comfortable retirement.json already exists in the corpus 8. +2026-01-30 17:40:03.394 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.410 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.410 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.410 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.448 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.449 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.453 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Israeli strikes kill 14 in Gaza in one day as negotiators work to uphold fragile ceasefire.json +2026-01-30 17:40:03.456 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Israeli strikes kill 14 in Gaza in one day as negotiators work to uphold fragile ceasefire.json already exists in the corpus 8. +2026-01-30 17:40:03.456 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.472 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.472 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.472 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.498 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.499 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.501 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Lakers star Luka Dončić says he took a month off from basketball to transform his body.json +2026-01-30 17:40:03.503 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Lakers star Luka Dončić says he took a month off from basketball to transform his body.json already exists in the corpus 8. +2026-01-30 17:40:03.503 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.513 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.513 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.513 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.536 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.537 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.540 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Musk's brain implant company filed as a 'disadvantaged business'.json +2026-01-30 17:40:03.541 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Musk's brain implant company filed as a 'disadvantaged business'.json already exists in the corpus 8. +2026-01-30 17:40:03.541 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.549 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.549 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.549 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.576 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.577 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.580 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Naomi Osaka tears up after first-round French Open loss to Paula Badosa.json +2026-01-30 17:40:03.582 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Naomi Osaka tears up after first-round French Open loss to Paula Badosa.json already exists in the corpus 8. +2026-01-30 17:40:03.582 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.589 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.589 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.589 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.605 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.606 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.609 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing New York Jets to sign QB Justin Fields, according to reports.json +2026-01-30 17:40:03.611 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id New York Jets to sign QB Justin Fields, according to reports.json already exists in the corpus 8. +2026-01-30 17:40:03.611 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.621 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.622 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.622 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.645 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.646 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.650 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Phillies star Bryce Harper uses a blue bat in gender reveal for his child.json +2026-01-30 17:40:03.652 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Phillies star Bryce Harper uses a blue bat in gender reveal for his child.json already exists in the corpus 8. +2026-01-30 17:40:03.652 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.662 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.662 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.662 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.690 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.691 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.694 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Russian strikes batter Ukraine as Zelenskyy accuses Putin of stalling peace talks.json +2026-01-30 17:40:03.697 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Russian strikes batter Ukraine as Zelenskyy accuses Putin of stalling peace talks.json already exists in the corpus 8. +2026-01-30 17:40:03.697 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.705 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.705 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.705 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.739 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.740 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.744 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Sen. Michael Bennet will run for governor of Colorado in 2026.json +2026-01-30 17:40:03.745 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Sen. Michael Bennet will run for governor of Colorado in 2026.json already exists in the corpus 8. +2026-01-30 17:40:03.745 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.753 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.753 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.753 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.804 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.805 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.808 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Suspect pleads guilty in Highland Park mass shooting at July Fourth parade.json +2026-01-30 17:40:03.810 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Suspect pleads guilty in Highland Park mass shooting at July Fourth parade.json already exists in the corpus 8. +2026-01-30 17:40:03.810 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.820 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.821 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.821 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.885 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.887 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.890 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Target says its holiday sales were better than expected — but its profits weren't.json +2026-01-30 17:40:03.892 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Target says its holiday sales were better than expected — but its profits weren't.json already exists in the corpus 8. +2026-01-30 17:40:03.892 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.903 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.903 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.903 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:03.952 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:03.953 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:03.956 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing The 14 best toothpastes for clean, healthy teeth in 2025.json +2026-01-30 17:40:03.957 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id The 14 best toothpastes for clean, healthy teeth in 2025.json already exists in the corpus 8. +2026-01-30 17:40:03.957 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:03.965 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:03.965 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:03.965 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.080 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.083 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.087 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing The 50+ best white t-shirts tested and ranked, according to NBC Select editors.json +2026-01-30 17:40:04.090 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id The 50+ best white t-shirts tested and ranked, according to NBC Select editors.json already exists in the corpus 8. +2026-01-30 17:40:04.090 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.101 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.101 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.101 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.129 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.130 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.132 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing These three LGBTQ women just made congressional 'herstory'.json +2026-01-30 17:40:04.135 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id These three LGBTQ women just made congressional 'herstory'.json already exists in the corpus 8. +2026-01-30 17:40:04.135 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.142 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.142 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.143 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.160 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.161 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.164 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Thousands of U.S. seniors deal with the harsh realities of homelessness.json +2026-01-30 17:40:04.165 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Thousands of U.S. seniors deal with the harsh realities of homelessness.json already exists in the corpus 8. +2026-01-30 17:40:04.165 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.173 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.173 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.173 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.225 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.226 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.229 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Trump administration uses King's 'Dream' speech to introduce executive orders cutting DEI.json +2026-01-30 17:40:04.230 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Trump administration uses King's 'Dream' speech to introduce executive orders cutting DEI.json already exists in the corpus 8. +2026-01-30 17:40:04.230 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.238 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.238 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.238 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.302 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.304 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.309 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Trump wants automakers to move vehicle production to the U.S. It's not that simple..json +2026-01-30 17:40:04.310 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Trump wants automakers to move vehicle production to the U.S. It's not that simple..json already exists in the corpus 8. +2026-01-30 17:40:04.310 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.322 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.322 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.322 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.366 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.367 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.370 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Trump's 'big, beautiful bill' includes these key tax changes for 2025 — what they mean for you.json +2026-01-30 17:40:04.372 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Trump's 'big, beautiful bill' includes these key tax changes for 2025 — what they mean for you.json already exists in the corpus 8. +2026-01-30 17:40:04.372 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.382 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.382 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.382 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.413 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.413 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.416 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing U.S. envoy Steve Witkoff will travel to Israel to address humanitarian crisis in Gaza.json +2026-01-30 17:40:04.417 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id U.S. envoy Steve Witkoff will travel to Israel to address humanitarian crisis in Gaza.json already exists in the corpus 8. +2026-01-30 17:40:04.417 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.424 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.424 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.424 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.442 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.442 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.445 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Ukraine and allies discuss ways to pressure Russia into 30-day ceasefire.json +2026-01-30 17:40:04.447 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Ukraine and allies discuss ways to pressure Russia into 30-day ceasefire.json already exists in the corpus 8. +2026-01-30 17:40:04.447 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.455 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.455 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.455 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.492 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.493 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.497 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Utah lawmakers said gender-affirming care is harmful to kids. Their own study contradicts that claim..json +2026-01-30 17:40:04.498 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Utah lawmakers said gender-affirming care is harmful to kids. Their own study contradicts that claim..json already exists in the corpus 8. +2026-01-30 17:40:04.498 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.506 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.506 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.506 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.539 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.540 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.544 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Waymo's robotaxis to start carrying passengers in Atlanta.json +2026-01-30 17:40:04.545 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Waymo's robotaxis to start carrying passengers in Atlanta.json already exists in the corpus 8. +2026-01-30 17:40:04.545 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.556 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.556 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.556 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.598 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.599 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.602 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing ‘Call Her Daddy’ host Alex Cooper claims college soccer coach sexually harassed her.json +2026-01-30 17:40:04.604 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id ‘Call Her Daddy’ host Alex Cooper claims college soccer coach sexually harassed her.json already exists in the corpus 8. +2026-01-30 17:40:04.604 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.614 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.614 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.614 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.651 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.652 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.655 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing 23-year-old Ukrainian refugee killed on North Carolina transit system.json +2026-01-30 17:40:04.657 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id 23-year-old Ukrainian refugee killed on North Carolina transit system.json already exists in the corpus 8. +2026-01-30 17:40:04.657 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.665 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.665 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.665 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.706 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.707 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.711 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing 23-year-old Ukrainian refugee killed on North Carolina transit system.json +2026-01-30 17:40:04.712 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id 23-year-old Ukrainian refugee killed on North Carolina transit system.json already exists in the corpus 8. +2026-01-30 17:40:04.712 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.721 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.722 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.722 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.740 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.740 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.744 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing A 10-year-old boy in Tokyo ended up with Shohei Ohtani's first home run of the season.json +2026-01-30 17:40:04.746 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id A 10-year-old boy in Tokyo ended up with Shohei Ohtani's first home run of the season.json already exists in the corpus 8. +2026-01-30 17:40:04.746 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.756 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.756 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.756 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.835 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.836 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.840 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Adams case and other Trump moves threaten to open corruption floodgates, experts say.json +2026-01-30 17:40:04.841 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Adams case and other Trump moves threaten to open corruption floodgates, experts say.json already exists in the corpus 8. +2026-01-30 17:40:04.841 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.849 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.849 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.849 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.876 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.876 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.879 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing After a couple struck a deer in Alabama, a fire chief who stopped to help was fatally shot.json +2026-01-30 17:40:04.880 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id After a couple struck a deer in Alabama, a fire chief who stopped to help was fatally shot.json already exists in the corpus 8. +2026-01-30 17:40:04.880 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.888 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.888 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.888 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.926 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.927 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.931 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Agency that handles green cards and citizenship to hire armed agents who can make arrests.json +2026-01-30 17:40:04.933 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Agency that handles green cards and citizenship to hire armed agents who can make arrests.json already exists in the corpus 8. +2026-01-30 17:40:04.933 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.943 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.943 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.943 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:04.971 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:04.972 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:04.976 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Amazon apologizes to Mandy Moore after package is delivered to ruins of in-laws' California home.json +2026-01-30 17:40:04.977 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Amazon apologizes to Mandy Moore after package is delivered to ruins of in-laws' California home.json already exists in the corpus 8. +2026-01-30 17:40:04.977 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:04.989 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:04.989 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:04.989 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.026 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.027 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.029 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing As Trump considers ways to dismantle the Education Deparment, here's what to know about your student loans.json +2026-01-30 17:40:05.030 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id As Trump considers ways to dismantle the Education Deparment, here's what to know about your student loans.json already exists in the corpus 8. +2026-01-30 17:40:05.030 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.038 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.038 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.038 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.059 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.060 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.063 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Belgium's future queen caught up in Trump administration's Harvard foreign student ban effort.json +2026-01-30 17:40:05.065 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Belgium's future queen caught up in Trump administration's Harvard foreign student ban effort.json already exists in the corpus 8. +2026-01-30 17:40:05.065 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.071 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.071 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.071 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.115 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.115 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.119 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Biden administration has no plans to fine companies if TikTok ban goes into effect.json +2026-01-30 17:40:05.120 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Biden administration has no plans to fine companies if TikTok ban goes into effect.json already exists in the corpus 8. +2026-01-30 17:40:05.120 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.128 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.128 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.128 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.157 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.158 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.161 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Bruce Springsteen opens U.K. tour by calling Trump 'unfit' for office.json +2026-01-30 17:40:05.164 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Bruce Springsteen opens U.K. tour by calling Trump 'unfit' for office.json already exists in the corpus 8. +2026-01-30 17:40:05.164 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.174 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.174 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.174 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.195 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.196 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.199 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Charlie Javice, college financial aid startup founder, found guilty of defrauding JPMorgan.json +2026-01-30 17:40:05.201 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Charlie Javice, college financial aid startup founder, found guilty of defrauding JPMorgan.json already exists in the corpus 8. +2026-01-30 17:40:05.201 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.210 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.210 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.210 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.242 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.242 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.245 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Civil rights agency sued over handling of trans worker discrimination complaints.json +2026-01-30 17:40:05.247 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Civil rights agency sued over handling of trans worker discrimination complaints.json already exists in the corpus 8. +2026-01-30 17:40:05.247 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.254 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.254 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.254 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.271 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.271 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.274 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing DHS has begun performing polygraph tests on employees to find leakers.json +2026-01-30 17:40:05.276 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id DHS has begun performing polygraph tests on employees to find leakers.json already exists in the corpus 8. +2026-01-30 17:40:05.276 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.282 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.282 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.282 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.309 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.310 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.314 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Elon Musk turns on Nigel Farage, calls on him to step down as U.K. party leader.json +2026-01-30 17:40:05.315 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Elon Musk turns on Nigel Farage, calls on him to step down as U.K. party leader.json already exists in the corpus 8. +2026-01-30 17:40:05.315 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.322 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.322 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.322 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.371 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.374 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.378 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing How much money you should save for a comfortable retirement.json +2026-01-30 17:40:05.380 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id How much money you should save for a comfortable retirement.json already exists in the corpus 8. +2026-01-30 17:40:05.380 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.389 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.389 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.389 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.411 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.413 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.416 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Israeli strikes kill 14 in Gaza in one day as negotiators work to uphold fragile ceasefire.json +2026-01-30 17:40:05.418 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Israeli strikes kill 14 in Gaza in one day as negotiators work to uphold fragile ceasefire.json already exists in the corpus 8. +2026-01-30 17:40:05.418 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.428 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.428 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.428 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.453 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.453 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.457 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Lakers star Luka Dončić says he took a month off from basketball to transform his body.json +2026-01-30 17:40:05.458 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Lakers star Luka Dončić says he took a month off from basketball to transform his body.json already exists in the corpus 8. +2026-01-30 17:40:05.458 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.468 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.468 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.468 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.490 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.490 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.493 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Musk's brain implant company filed as a 'disadvantaged business'.json +2026-01-30 17:40:05.495 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Musk's brain implant company filed as a 'disadvantaged business'.json already exists in the corpus 8. +2026-01-30 17:40:05.495 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.502 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.502 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.502 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.529 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.530 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.533 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Naomi Osaka tears up after first-round French Open loss to Paula Badosa.json +2026-01-30 17:40:05.534 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Naomi Osaka tears up after first-round French Open loss to Paula Badosa.json already exists in the corpus 8. +2026-01-30 17:40:05.534 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.541 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.542 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.542 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.556 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.556 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.560 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing New York Jets to sign QB Justin Fields, according to reports.json +2026-01-30 17:40:05.561 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id New York Jets to sign QB Justin Fields, according to reports.json already exists in the corpus 8. +2026-01-30 17:40:05.561 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.570 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.570 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.570 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.592 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.593 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.597 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Phillies star Bryce Harper uses a blue bat in gender reveal for his child.json +2026-01-30 17:40:05.599 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Phillies star Bryce Harper uses a blue bat in gender reveal for his child.json already exists in the corpus 8. +2026-01-30 17:40:05.599 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.609 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.609 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.609 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.638 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.638 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.641 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Russian strikes batter Ukraine as Zelenskyy accuses Putin of stalling peace talks.json +2026-01-30 17:40:05.644 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Russian strikes batter Ukraine as Zelenskyy accuses Putin of stalling peace talks.json already exists in the corpus 8. +2026-01-30 17:40:05.644 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.655 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.655 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.655 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.691 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.691 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.695 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Sen. Michael Bennet will run for governor of Colorado in 2026.json +2026-01-30 17:40:05.697 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Sen. Michael Bennet will run for governor of Colorado in 2026.json already exists in the corpus 8. +2026-01-30 17:40:05.697 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.704 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.704 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.704 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.754 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.755 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.758 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Suspect pleads guilty in Highland Park mass shooting at July Fourth parade.json +2026-01-30 17:40:05.759 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Suspect pleads guilty in Highland Park mass shooting at July Fourth parade.json already exists in the corpus 8. +2026-01-30 17:40:05.760 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.767 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.767 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.767 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.818 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.819 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.822 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Target says its holiday sales were better than expected — but its profits weren't.json +2026-01-30 17:40:05.824 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Target says its holiday sales were better than expected — but its profits weren't.json already exists in the corpus 8. +2026-01-30 17:40:05.824 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.833 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.833 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.833 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:05.881 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:05.882 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:05.886 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing The 14 best toothpastes for clean, healthy teeth in 2025.json +2026-01-30 17:40:05.888 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id The 14 best toothpastes for clean, healthy teeth in 2025.json already exists in the corpus 8. +2026-01-30 17:40:05.888 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:05.900 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:05.900 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:05.900 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:06.002 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:06.005 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:06.009 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing The 50+ best white t-shirts tested and ranked, according to NBC Select editors.json +2026-01-30 17:40:06.010 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id The 50+ best white t-shirts tested and ranked, according to NBC Select editors.json already exists in the corpus 8. +2026-01-30 17:40:06.010 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:06.018 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:06.018 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:06.018 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:06.047 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:06.048 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:06.051 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing These three LGBTQ women just made congressional 'herstory'.json +2026-01-30 17:40:06.052 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id These three LGBTQ women just made congressional 'herstory'.json already exists in the corpus 8. +2026-01-30 17:40:06.052 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:06.062 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:06.062 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:06.062 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:06.080 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:06.081 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:06.084 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Thousands of U.S. seniors deal with the harsh realities of homelessness.json +2026-01-30 17:40:06.087 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Thousands of U.S. seniors deal with the harsh realities of homelessness.json already exists in the corpus 8. +2026-01-30 17:40:06.087 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:06.096 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:06.096 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:06.096 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:06.148 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:06.149 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:06.152 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Trump administration uses King's 'Dream' speech to introduce executive orders cutting DEI.json +2026-01-30 17:40:06.153 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Trump administration uses King's 'Dream' speech to introduce executive orders cutting DEI.json already exists in the corpus 8. +2026-01-30 17:40:06.153 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:06.160 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:06.160 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:06.160 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:06.217 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:06.218 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:06.223 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Trump wants automakers to move vehicle production to the U.S. It's not that simple..json +2026-01-30 17:40:06.224 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Trump wants automakers to move vehicle production to the U.S. It's not that simple..json already exists in the corpus 8. +2026-01-30 17:40:06.224 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:06.234 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:06.234 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:06.234 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:06.267 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:06.268 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:06.271 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Trump's 'big, beautiful bill' includes these key tax changes for 2025 — what they mean for you.json +2026-01-30 17:40:06.273 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Trump's 'big, beautiful bill' includes these key tax changes for 2025 — what they mean for you.json already exists in the corpus 8. +2026-01-30 17:40:06.273 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:06.285 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:06.285 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:06.285 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:06.318 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:06.319 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:06.322 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing U.S. envoy Steve Witkoff will travel to Israel to address humanitarian crisis in Gaza.json +2026-01-30 17:40:06.324 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id U.S. envoy Steve Witkoff will travel to Israel to address humanitarian crisis in Gaza.json already exists in the corpus 8. +2026-01-30 17:40:06.324 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:06.332 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:06.332 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:06.332 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:06.352 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:06.353 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:06.355 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Ukraine and allies discuss ways to pressure Russia into 30-day ceasefire.json +2026-01-30 17:40:06.357 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Ukraine and allies discuss ways to pressure Russia into 30-day ceasefire.json already exists in the corpus 8. +2026-01-30 17:40:06.357 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:06.365 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:06.365 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:06.365 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:06.401 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:06.402 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:06.406 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Utah lawmakers said gender-affirming care is harmful to kids. Their own study contradicts that claim..json +2026-01-30 17:40:06.408 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Utah lawmakers said gender-affirming care is harmful to kids. Their own study contradicts that claim..json already exists in the corpus 8. +2026-01-30 17:40:06.408 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:06.415 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:06.415 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:06.415 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:06.441 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:06.442 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:06.445 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing Waymo's robotaxis to start carrying passengers in Atlanta.json +2026-01-30 17:40:06.447 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id Waymo's robotaxis to start carrying passengers in Atlanta.json already exists in the corpus 8. +2026-01-30 17:40:06.447 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:06.456 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:06.456 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:06.456 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:06.498 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - =============================== Importing a new CAS as a Document. =============================== +2026-01-30 17:40:06.500 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Setting Metadata done. +2026-01-30 17:40:06.503 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Importing ‘Call Her Daddy’ host Alex Cooper claims college soccer coach sexually harassed her.json +2026-01-30 17:40:06.505 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document with id ‘Call Her Daddy’ host Alex Cooper claims college soccer coach sexually harassed her.json already exists in the corpus 8. +2026-01-30 17:40:06.505 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Checking if that document was also post-processed yet... +2026-01-30 17:40:06.517 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Document was already post-processed. +2026-01-30 17:40:06.517 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done. +2026-01-30 17:40:06.517 [pool-2-thread-1] INFO org.texttechnologylab.uce.corpusimporter.Importer - Finished with importing that CAS. + + + +2026-01-30 17:40:06.890 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - Postprocessing the Corpus GerParCor_Reichstag +2026-01-30 17:40:06.890 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - Done with the corpus postprocessing. +2026-01-30 17:40:06.890 [main] INFO org.texttechnologylab.uce.corpusimporter.Importer - + +================================= + Done with the corpus import. diff --git a/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java b/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java index 80b27310..4d98c653 100644 --- a/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java +++ b/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java @@ -41,6 +41,7 @@ import org.texttechnologylab.uce.common.models.biofid.GnFinderTaxon; import org.texttechnologylab.uce.common.models.corpus.*; import org.texttechnologylab.uce.common.models.corpus.emotion.Feeling; +import org.texttechnologylab.uce.common.models.corpus.emotion.SentenceEmotion; import org.texttechnologylab.uce.common.models.corpus.links.AnnotationLink; import org.texttechnologylab.uce.common.models.corpus.links.AnnotationToDocumentLink; import org.texttechnologylab.uce.common.models.corpus.links.DocumentLink; @@ -481,6 +482,34 @@ public Document XMIToDocument(JCas jCas, Corpus corpus, String filePath) { return XMIToDocument(jCas, corpus, filePath, null); } + private void linkSentenceEmotions(Document document) { + if (document.getSentences() == null || document.getEmotions() == null) return; + + var sentenceBySpan = new java.util.HashMap(); + for (var s : document.getSentences()) { + sentenceBySpan.put(s.getBegin() + ":" + s.getEnd(), s); + } + + for (var e : document.getEmotions()) { + var s = sentenceBySpan.get(e.getBegin() + ":" + e.getEnd()); + if (s == null) continue; + + if (e.getSentenceEmotions() == null) { + e.setSentenceEmotions(new java.util.ArrayList<>()); + } + + var model = (e.getModel() != null) ? e.getModel() : "unknown"; + + if (e.getFeelings() == null) continue; + + for (var f : e.getFeelings()) { + e.getSentenceEmotions().add( + new SentenceEmotion(s, e, model, f.getFeeling(), f.getValue()) + ); + } + } + } + /** * Convert a UIMA jCas to an OCRDocument * @@ -605,7 +634,10 @@ public Document XMIToDocument(JCas jCas, Corpus corpus, String filePath, String if (corpusConfig.getAnnotations().isEmotion()) ExceptionUtils.tryCatchLog( - () -> setEmotions(document, jCas), + () -> { + setEmotions(document, jCas); + linkSentenceEmotions(document); + }, (ex) -> logImportWarn("This file should have contained Emotion annotations, but selecting them caused an error.", ex, filePath)); if (corpusConfig.getAnnotations().isLemma()) diff --git a/uce.portal/uce.web/src/main/resources/languageTranslations.json b/uce.portal/uce.web/src/main/resources/languageTranslations.json index f34ce979..185db241 100644 --- a/uce.portal/uce.web/src/main/resources/languageTranslations.json +++ b/uce.portal/uce.web/src/main/resources/languageTranslations.json @@ -538,5 +538,9 @@ "sentenceTopicFlow": { "de-DE": "Satz-Themenfluss", "en-EN": "Sentence Topic Flow" + }, + "sentenceSentiment": { + "de-DE": "Satz-Sentiment", + "en-EN": "Sentence Sentiment" } } \ No newline at end of file From 6c909143a80503e927d094191aaeb03288df6f04 Mon Sep 17 00:00:00 2001 From: Mark Ian Braun Date: Mon, 2 Mar 2026 17:30:35 +0100 Subject: [PATCH 14/47] created a models table, connected emotions table with models table via model_id --- .../uce/common/config/HibernateConf.java | 3 + .../uce/common/models/ModelEntity.java | 44 ++++++++++ .../common/models/corpus/emotion/Emotion.java | 5 ++ .../uce/common/services/DataInterface.java | 16 ++++ .../PostgresqlDataInterface_Impl.java | 84 +++++++++++++++++++ .../uce/corpusimporter/Importer.java | 20 ++++- 6 files changed, 171 insertions(+), 1 deletion(-) create mode 100644 uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/ModelEntity.java diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/HibernateConf.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/HibernateConf.java index 3469904f..86cbd7ce 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/HibernateConf.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/HibernateConf.java @@ -5,6 +5,7 @@ import org.hibernate.boot.registry.StandardServiceRegistryBuilder; import org.springframework.context.annotation.Configuration; import org.springframework.transaction.annotation.EnableTransactionManagement; +import org.texttechnologylab.uce.common.models.ModelEntity; import org.texttechnologylab.uce.common.models.biofid.BiofidTaxon; import org.texttechnologylab.uce.common.models.biofid.GazetteerTaxon; import org.texttechnologylab.uce.common.models.biofid.GnFinderTaxon; @@ -88,6 +89,8 @@ public static SessionFactory buildSessionFactory() { metadataSources.addAnnotatedClass(TopicWord.class); metadataSources.addAnnotatedClass(TopicValueBase.class); metadataSources.addAnnotatedClass(TopicValueBaseWithScore.class); + //models + metadataSources.addAnnotatedClass(ModelEntity.class); metadataSources.addAnnotatedClass(DocumentTopThreeTopics.class); var metadata = metadataSources.buildMetadata(); diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/ModelEntity.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/ModelEntity.java new file mode 100644 index 00000000..705aa1f9 --- /dev/null +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/ModelEntity.java @@ -0,0 +1,44 @@ +package org.texttechnologylab.uce.common.models; + +import lombok.Getter; +import lombok.Setter; +import javax.persistence.*; + +@Setter +@Getter +@Entity +@Table(name = "models") +public class ModelEntity { + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + @Column(name = "model_key", unique = true, nullable = false) + private String modelKey; + + private String name; + + @Column(columnDefinition = "TEXT") + private String url; + + @Column(columnDefinition = "TEXT") + private String github; + + @Column(columnDefinition = "TEXT") + private String huggingface; + + @Column(columnDefinition = "TEXT") + private String paper; + + private String map; + private String variant; + + @Column(name = "main_tool") + private String mainTool; + + @Column(name = "model_type") + private String modelType; + + public ModelEntity() { + } +} diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/Emotion.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/Emotion.java index f649a78f..8d7fc18e 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/Emotion.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/Emotion.java @@ -3,6 +3,7 @@ import lombok.Getter; import lombok.Setter; import org.texttechnologylab.uce.common.annotations.Typesystem; +import org.texttechnologylab.uce.common.models.ModelEntity; import org.texttechnologylab.uce.common.models.UIMAAnnotation; import org.texttechnologylab.uce.common.models.WikiModel; @@ -26,6 +27,10 @@ public class Emotion extends UIMAAnnotation implements WikiModel { @OneToMany(mappedBy = "emotion", cascade = CascadeType.ALL, orphanRemoval = true) private List sentenceEmotions; + + @ManyToOne + @JoinColumn(name = "model_id") + private ModelEntity dbModel; public String generateEmotionMarker() { diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/DataInterface.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/DataInterface.java index f3c5f722..8391cbd1 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/DataInterface.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/DataInterface.java @@ -15,6 +15,7 @@ import org.texttechnologylab.uce.common.models.imp.ImportLog; import org.texttechnologylab.uce.common.models.imp.UCEImport; import org.texttechnologylab.uce.common.models.search.*; +import org.texttechnologylab.uce.common.models.ModelEntity; import java.util.List; @@ -366,4 +367,19 @@ public DocumentSearchResult defaultSearchForDocuments(int skip, * @param corpus */ public void saveCorpus(Corpus corpus) throws DatabaseOperationException; + + /** + * Stores or updates a ModelEntity in the database + */ + public void saveOrUpdateModelEntity(ModelEntity model) throws DatabaseOperationException; + + /** + * Gets a Model based on its JSON-Key + */ + public ModelEntity getModelEntityByKey(String modelKey) throws DatabaseOperationException; + + /** + * Gets a Model based on its map column value + */ + public ModelEntity getModelEntityByMap(String mapString) throws DatabaseOperationException; } diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java index 17dfb943..79781b04 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java @@ -15,6 +15,7 @@ import org.texttechnologylab.uce.common.exceptions.ExceptionUtils; import org.texttechnologylab.uce.common.models.Linkable; import org.texttechnologylab.uce.common.models.ModelBase; +import org.texttechnologylab.uce.common.models.ModelEntity; import org.texttechnologylab.uce.common.models.UIMAAnnotation; import org.texttechnologylab.uce.common.models.biofid.BiofidTaxon; import org.texttechnologylab.uce.common.models.biofid.GazetteerTaxon; @@ -42,6 +43,11 @@ import javax.persistence.criteria.Order; import javax.persistence.criteria.Path; import javax.persistence.criteria.Predicate; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.lang.reflect.Type; +import java.nio.charset.StandardCharsets; import java.sql.Array; import java.sql.PreparedStatement; import java.sql.ResultSet; @@ -65,6 +71,7 @@ private Session getCurrentSession() { public PostgresqlDataInterface_Impl() { sessionFactory = HibernateConf.buildSessionFactory(); TestConnection(); + initializeModelsFromJson(); } public void TestConnection() { @@ -2336,5 +2343,82 @@ public int ensureUnifiedTopicsForSentenceTopics(long documentId) throws Database return updated; }); } + + @Override + public void saveOrUpdateModelEntity(ModelEntity model) throws DatabaseOperationException{ + executeOperationSafely((session) -> { + session.saveOrUpdate(model); + return null; + }); + } + + @Override + public ModelEntity getModelEntityByKey(String modelKey) throws DatabaseOperationException{ + return executeOperationSafely((session) -> { + var cb = session.getCriteriaBuilder(); + var cq = cb.createQuery(ModelEntity.class); + var root = cq.from(ModelEntity.class); + + cq.select(root).where(cb.equal(root.get("modelKey"),modelKey)); + + var query = session.createQuery(cq); + query.setMaxResults(1); + return query.uniqueResult(); + }); + } + + @Override + public ModelEntity getModelEntityByMap(String mapString) throws DatabaseOperationException { + return executeOperationSafely((session) -> { + var cb = session.getCriteriaBuilder(); + var cq = cb.createQuery(org.texttechnologylab.uce.common.models.ModelEntity.class); + var root = cq.from(org.texttechnologylab.uce.common.models.ModelEntity.class); + + cq.select(root).where(cb.equal(root.get("map"), mapString)); + + var query = session.createQuery(cq); + query.setMaxResults(1); + return query.uniqueResult(); + }); + } + + /** + * Creates a models table in the database and stores all models from models.json + */ + public void initializeModelsFromJson(){ + try(InputStream is = getClass().getClassLoader().getResourceAsStream("models.json"); + InputStreamReader reader = new InputStreamReader(is, StandardCharsets.UTF_8)){ + Type type = new TypeToken>>(){}.getType(); + Map> modelsMap = gson.fromJson(reader, type); + + if (modelsMap != null){ + for (Map.Entry> entry : modelsMap.entrySet()){ + String key = entry.getKey(); + Map info = entry.getValue(); + ModelEntity dbModel = getModelEntityByKey(key); + if(dbModel == null){ + dbModel = new ModelEntity(); + dbModel.setModelKey(key); + } + dbModel.setName(info.get("Name")); + dbModel.setUrl(info.get("url")); + dbModel.setGithub(info.get("github")); + dbModel.setHuggingface(info.get("huggingface")); + dbModel.setPaper(info.get("paper")); + dbModel.setMap(info.get("map")); + dbModel.setVariant(info.get("Variant")); + dbModel.setMainTool(info.get("Main Tool")); + dbModel.setModelType(info.get("type")); + + saveOrUpdateModelEntity(dbModel); + } + } + + } catch (IOException e) { + System.err.println("Error during initializing models from models.json"); + } catch (DatabaseOperationException e) { + System.err.println("Error during getting ModalEntity from database"); + } + } } diff --git a/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java b/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java index 4d98c653..543c8259 100644 --- a/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java +++ b/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java @@ -35,6 +35,7 @@ import org.texttechnologylab.uce.common.config.CorpusConfig; import org.texttechnologylab.uce.common.exceptions.DatabaseOperationException; import org.texttechnologylab.uce.common.exceptions.ExceptionUtils; +import org.texttechnologylab.uce.common.models.ModelEntity; import org.texttechnologylab.uce.common.models.UIMAAnnotation; import org.texttechnologylab.uce.common.models.biofid.BiofidTaxon; import org.texttechnologylab.uce.common.models.biofid.GazetteerTaxon; @@ -711,7 +712,24 @@ private void setEmotions(Document document, JCas jCas) { var emotion = new org.texttechnologylab.uce.common.models.corpus.emotion.Emotion(e.getBegin(), e.getEnd()); emotion.setCoveredText(e.getCoveredText()); var meta = e.getModel(); - if (meta != null) emotion.setModel(meta.getModelName() + "__v::" + meta.getModelVersion()); + ModelEntity foundModal = null; + if (meta != null){ + String modelNameFromXmi = meta.getModelName(); + logger.info("Searching for model " + modelNameFromXmi); + try{ + foundModal = db.getModelEntityByKey(meta.getModelName()); + if(foundModal == null){ + foundModal = db.getModelEntityByMap(modelNameFromXmi); + } + } catch (DatabaseOperationException ex) { + logger.error("Error when looking for model in database " + meta.getModelName()); + } + } + if (foundModal != null){ + emotion.setDbModel(foundModal); + }else{ + logger.warn("Modal not found in database"); + } var feelings = new ArrayList(); for (var annotationComment : e.getEmotions()) { From 78b6a753ca406900a8647088159b421af2616a52 Mon Sep 17 00:00:00 2001 From: Francesco Da Silva Saporito Date: Tue, 3 Mar 2026 14:50:20 +0100 Subject: [PATCH 15/47] added sentenceemotion table and implemented method for creating the rows --- .../uce/common/config/HibernateConf.java | 4 +- .../common/models/corpus/emotion/Emotion.java | 3 - .../corpus/emotion/SentenceEmotion.java | 79 ------------------- .../corpus/emotion/SentenceEmotions.java | 40 ++++++++++ .../PostgresqlDataInterface_Impl.java | 20 +++++ .../uce/corpusimporter/Importer.java | 36 ++------- 6 files changed, 68 insertions(+), 114 deletions(-) delete mode 100644 uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/SentenceEmotion.java create mode 100644 uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/SentenceEmotions.java diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/HibernateConf.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/HibernateConf.java index 86cbd7ce..7aa14783 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/HibernateConf.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/HibernateConf.java @@ -12,7 +12,7 @@ import org.texttechnologylab.uce.common.models.corpus.*; import org.texttechnologylab.uce.common.models.corpus.emotion.Emotion; import org.texttechnologylab.uce.common.models.corpus.emotion.Feeling; -import org.texttechnologylab.uce.common.models.corpus.emotion.SentenceEmotion; +import org.texttechnologylab.uce.common.models.corpus.emotion.SentenceEmotions; import org.texttechnologylab.uce.common.models.corpus.links.AnnotationLink; import org.texttechnologylab.uce.common.models.corpus.links.AnnotationToDocumentLink; import org.texttechnologylab.uce.common.models.corpus.links.DocumentLink; @@ -57,7 +57,7 @@ public static SessionFactory buildSessionFactory() { metadataSources.addAnnotatedClass(Sentiment.class); metadataSources.addAnnotatedClass(Emotion.class); metadataSources.addAnnotatedClass(Feeling.class); - metadataSources.addAnnotatedClass(SentenceEmotion.class); + metadataSources.addAnnotatedClass(SentenceEmotions.class); metadataSources.addAnnotatedClass(GeoName.class); metadataSources.addAnnotatedClass(Paragraph.class); metadataSources.addAnnotatedClass(Sentence.class); diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/Emotion.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/Emotion.java index 8d7fc18e..c83d0108 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/Emotion.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/Emotion.java @@ -24,9 +24,6 @@ public class Emotion extends UIMAAnnotation implements WikiModel { @OneToMany(cascade = CascadeType.ALL) @JoinColumn(name = "emotion_id") private List feelings; - - @OneToMany(mappedBy = "emotion", cascade = CascadeType.ALL, orphanRemoval = true) - private List sentenceEmotions; @ManyToOne @JoinColumn(name = "model_id") diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/SentenceEmotion.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/SentenceEmotion.java deleted file mode 100644 index bb579170..00000000 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/SentenceEmotion.java +++ /dev/null @@ -1,79 +0,0 @@ -package org.texttechnologylab.uce.common.models.corpus.emotion; - -import lombok.Getter; -import lombok.Setter; -import org.texttechnologylab.uce.common.models.corpus.Sentence; - -import javax.persistence.*; -import java.io.Serializable; -import java.util.Objects; - -@Getter -@Setter -@Entity -@Table(name = "sentenceemotion") -@IdClass(SentenceEmotion.SentenceEmotionId.class) -public class SentenceEmotion { - - @Id - @ManyToOne(fetch = FetchType.LAZY, optional = false) - @JoinColumn(name = "sentence_id", nullable = false) - private Sentence sentence; - - @Id - @ManyToOne(fetch = FetchType.LAZY, optional = false) - @JoinColumn(name = "emotion_id", nullable = false) - private Emotion emotion; - - @Id - @Column(name = "model", nullable = false, length = 255) - private String model; - - @Id - @Column(name = "feeling", nullable = false, length = 255) - private String feeling; - - @Column(name = "value") - private Double value; - - public SentenceEmotion() {} - - public SentenceEmotion(Sentence sentence, Emotion emotion, String model, String feeling, Double value) { - this.sentence = sentence; - this.emotion = emotion; - this.model = model; - this.feeling = feeling; - this.value = value; - } - - public static class SentenceEmotionId implements Serializable { - private Long sentence; // references Sentence.id - private Long emotion; // references Emotion.id - private String model; - private String feeling; - - public SentenceEmotionId() {} - - public SentenceEmotionId(Long sentence, Long emotion, String model, String feeling) { - this.sentence = sentence; - this.emotion = emotion; - this.model = model; - this.feeling = feeling; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (!(o instanceof SentenceEmotionId that)) return false; - return Objects.equals(sentence, that.sentence) - && Objects.equals(emotion, that.emotion) - && Objects.equals(model, that.model) - && Objects.equals(feeling, that.feeling); - } - - @Override - public int hashCode() { - return Objects.hash(sentence, emotion, model, feeling); - } - } -} diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/SentenceEmotions.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/SentenceEmotions.java new file mode 100644 index 00000000..04c4eed1 --- /dev/null +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/SentenceEmotions.java @@ -0,0 +1,40 @@ +package org.texttechnologylab.uce.common.models.corpus.emotion; + +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; +import org.texttechnologylab.uce.common.models.ModelBase; +import org.texttechnologylab.uce.common.models.ModelEntity; +import org.texttechnologylab.uce.common.models.corpus.Document; +import org.texttechnologylab.uce.common.models.corpus.Sentence; + +import javax.persistence.*; + +@Getter +@Setter +@NoArgsConstructor +@Entity +@Table(name = "sentenceemotions") +public class SentenceEmotions extends ModelBase { + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "document_id", nullable = false) + private Document document; + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "sentence_id", nullable = false) + private Sentence sentence; + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "emotion_id", nullable = false) + private Emotion emotion; + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "model_id", nullable = false) + private ModelEntity model; + + public SentenceEmotions(Sentence sentence, Emotion emotion, ModelEntity model) { + this.sentence = sentence; + this.emotion = emotion; + this.model = model; + } +} diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java index 79781b04..b6110349 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java @@ -2343,6 +2343,26 @@ public int ensureUnifiedTopicsForSentenceTopics(long documentId) throws Database return updated; }); } + + public int createSentenceEmotions(long documentId) throws DatabaseOperationException { + return executeOperationSafely(session -> { + String createSentenceEmotions = + """ + INSERT INTO sentenceemotions (sentence_id, emotion_id, model_id, document_id) + SELECT s.id, e.id, e.model_id, s.document_id + FROM emotion e + JOIN sentence s + ON s.beginn = e.beginn AND s.endd = e.endd and s.document_id = :docId ; + """; + + System.out.println(documentId); + return session.createNativeQuery(createSentenceEmotions) + .setParameter("docId", documentId) + .executeUpdate(); + }); + } + + @Override public void saveOrUpdateModelEntity(ModelEntity model) throws DatabaseOperationException{ diff --git a/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java b/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java index 543c8259..deaf07fd 100644 --- a/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java +++ b/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java @@ -42,7 +42,7 @@ import org.texttechnologylab.uce.common.models.biofid.GnFinderTaxon; import org.texttechnologylab.uce.common.models.corpus.*; import org.texttechnologylab.uce.common.models.corpus.emotion.Feeling; -import org.texttechnologylab.uce.common.models.corpus.emotion.SentenceEmotion; +import org.texttechnologylab.uce.common.models.corpus.emotion.SentenceEmotions; import org.texttechnologylab.uce.common.models.corpus.links.AnnotationLink; import org.texttechnologylab.uce.common.models.corpus.links.AnnotationToDocumentLink; import org.texttechnologylab.uce.common.models.corpus.links.DocumentLink; @@ -483,34 +483,6 @@ public Document XMIToDocument(JCas jCas, Corpus corpus, String filePath) { return XMIToDocument(jCas, corpus, filePath, null); } - private void linkSentenceEmotions(Document document) { - if (document.getSentences() == null || document.getEmotions() == null) return; - - var sentenceBySpan = new java.util.HashMap(); - for (var s : document.getSentences()) { - sentenceBySpan.put(s.getBegin() + ":" + s.getEnd(), s); - } - - for (var e : document.getEmotions()) { - var s = sentenceBySpan.get(e.getBegin() + ":" + e.getEnd()); - if (s == null) continue; - - if (e.getSentenceEmotions() == null) { - e.setSentenceEmotions(new java.util.ArrayList<>()); - } - - var model = (e.getModel() != null) ? e.getModel() : "unknown"; - - if (e.getFeelings() == null) continue; - - for (var f : e.getFeelings()) { - e.getSentenceEmotions().add( - new SentenceEmotion(s, e, model, f.getFeeling(), f.getValue()) - ); - } - } - } - /** * Convert a UIMA jCas to an OCRDocument * @@ -637,7 +609,6 @@ public Document XMIToDocument(JCas jCas, Corpus corpus, String filePath, String ExceptionUtils.tryCatchLog( () -> { setEmotions(document, jCas); - linkSentenceEmotions(document); }, (ex) -> logImportWarn("This file should have contained Emotion annotations, but selecting them caused an error.", ex, filePath)); @@ -1945,6 +1916,11 @@ private void postProccessDocument(Document document, Corpus corpus, String fileP (ex) -> logImportError("Error creating/linking unifiedtopic rows for sentence topics.", ex, filePath) ); + ExceptionUtils.tryCatchLog( + () -> db.createSentenceEmotions(document.getId()), + (ex) -> logImportError("Error creating/linking sentenceEmotion rows for sentences.", ex, filePath) + ); + // Store simple connections between Time, Geonames and Annotation to approximate the question: // This annotation occurred in context with this location at this time. From 6c924efed035bb5b4e6ce0b13feebd5e3bb10966 Mon Sep 17 00:00:00 2001 From: Ph1l1ppGitHub Date: Tue, 3 Mar 2026 17:34:36 +0100 Subject: [PATCH 16/47] Update topic tables, docker config and importer logic --- database/10_topicTables.sql | 3 ++- docker-compose.yaml | 1 + .../services/PostgresqlDataInterface_Impl.java | 12 ++++++++---- .../uce/corpusimporter/Importer.java | 11 ++++++++++- 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/database/10_topicTables.sql b/database/10_topicTables.sql index d0ec4cf8..ec4541ac 100644 --- a/database/10_topicTables.sql +++ b/database/10_topicTables.sql @@ -4,7 +4,8 @@ CREATE TABLE IF NOT EXISTS sentencetopics ( sentence_id BIGINT, topicinstance_id BIGINT, -- refers to topicvaluebase.id topiclabel VARCHAR(255), -- refers to topicvaluebase.value - thetast DOUBLE PRECISION + thetast DOUBLE PRECISION, + model VARCHAR(255) ); CREATE TABLE IF NOT EXISTS documenttopicsraw ( document_id BIGINT, diff --git a/docker-compose.yaml b/docker-compose.yaml index c8b383ab..97e1468a 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -98,6 +98,7 @@ services: - app_net healthcheck: test: ["CMD-SHELL", "pg_isready -U postgres"] + start_period: 5s interval: 5s timeout: 5s start_interval: 1s diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java index b6110349..60355423 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java @@ -2279,19 +2279,22 @@ private String escapeSql(String input) { * This method matches a sentence by its begin and end * offsets within a given document and inserts a corresponding entry into the sentencetopics table */ - public int insertSentenceTopicBySpan(long documentId, int begin, int end, String topicLabel, double score) + public int insertSentenceTopicBySpan(long documentId, int begin, int end, + String topicLabel, double score, String modelName) throws DatabaseOperationException { return executeOperationSafely((session) -> { String sql = - "INSERT INTO sentencetopics (document_id, sentence_id, topiclabel, thetast) " + - "SELECT :docId, s.id, :label, :score " + + "INSERT INTO sentencetopics (document_id, sentence_id, topiclabel, thetast, model) " + + "SELECT :docId, s.id, :label, :score, :model " + "FROM sentence s " + "WHERE s.document_id = :docId AND s.beginn = :begin AND s.endd = :end " + "AND NOT EXISTS ( " + " SELECT 1 FROM sentencetopics st " + - " WHERE st.sentence_id = s.id AND st.topiclabel = :label " + + " WHERE st.sentence_id = s.id " + + " AND st.topiclabel = :label " + + " AND st.model = :model " + ")"; var query = session.createNativeQuery(sql); @@ -2300,6 +2303,7 @@ public int insertSentenceTopicBySpan(long documentId, int begin, int end, String query.setParameter("end", end); query.setParameter("label", topicLabel); query.setParameter("score", score); + query.setParameter("model", modelName); return query.executeUpdate(); }); diff --git a/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java b/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java index deaf07fd..c430c6be 100644 --- a/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java +++ b/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java @@ -531,6 +531,8 @@ public Document XMIToDocument(JCas jCas, Corpus corpus, String filePath, String + " already exists in the corpus " + corpus.getId() + "."); logger.info("Checking if that document was also post-processed yet..."); var existingDoc = db.getDocumentByCorpusAndDocumentId(corpus.getId(), document.getDocumentId()); + importSentenceTopicsFromXmiIntoDb(document, filePath); + if (!existingDoc.isPostProcessed()) { logger.info("Not yet post-processed. Doing that now."); postProccessDocument(existingDoc, corpus, filePath); @@ -1871,6 +1873,12 @@ private void importSentenceTopicsFromXmiIntoDb(Document document, String xmiFile for (var topicSpan : topicAnnos) { int begin = topicSpan.getBegin(); int end = topicSpan.getEnd(); + String model = "unknown"; + try { + if (topicSpan.getModel() != null && topicSpan.getModel().getModelName() != null) { + model = topicSpan.getModel().getModelName(); + } + } catch (Exception ignored) { } var topicsArr = topicSpan.getTopics(); if (topicsArr == null || topicsArr.size() == 0) continue; @@ -1887,7 +1895,7 @@ private void importSentenceTopicsFromXmiIntoDb(Document document, String xmiFile try { score = Double.parseDouble(valueStr); } catch (NumberFormatException nfe) { continue; } - inserted += db.insertSentenceTopicBySpan(document.getId(), begin, end, label, score); + inserted += db.insertSentenceTopicBySpan(document.getId(), begin, end, label, score, model); } } @@ -1899,6 +1907,7 @@ private void importSentenceTopicsFromXmiIntoDb(Document document, String xmiFile } } + /** * Here we apply any postprocessing of a document that isn't DUUI and needs the document to be stored once like * the rag vector embeddings. From ab58275f6e5a6fdc012917630a458ba5288e7756 Mon Sep 17 00:00:00 2001 From: Mark Ian Braun Date: Wed, 4 Mar 2026 00:55:13 +0100 Subject: [PATCH 17/47] modelle werden nun in sentencetopics auch beim spaeterem importieren eingefuegt --- .../java/org/texttechnologylab/uce/corpusimporter/Importer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java b/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java index c430c6be..e5261f40 100644 --- a/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java +++ b/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java @@ -531,7 +531,7 @@ public Document XMIToDocument(JCas jCas, Corpus corpus, String filePath, String + " already exists in the corpus " + corpus.getId() + "."); logger.info("Checking if that document was also post-processed yet..."); var existingDoc = db.getDocumentByCorpusAndDocumentId(corpus.getId(), document.getDocumentId()); - importSentenceTopicsFromXmiIntoDb(document, filePath); + importSentenceTopicsFromXmiIntoDb(existingDoc, filePath); if (!existingDoc.isPostProcessed()) { logger.info("Not yet post-processed. Doing that now."); From 9800654b5f205ad23698be104c468fe340be3672 Mon Sep 17 00:00:00 2001 From: Ph1l1ppGitHub Date: Thu, 5 Mar 2026 13:05:30 +0100 Subject: [PATCH 18/47] Add model_id support and update topic model handling --- database/10_topicTables.sql | 2 +- .../PostgresqlDataInterface_Impl.java | 84 +- .../uce.common/src/main/resources/models.json | 1201 +++++++++++++++++ .../uce/corpusimporter/Importer.java | 8 +- 4 files changed, 1251 insertions(+), 44 deletions(-) create mode 100644 uce.portal/uce.common/src/main/resources/models.json diff --git a/database/10_topicTables.sql b/database/10_topicTables.sql index ec4541ac..dfe2cb8d 100644 --- a/database/10_topicTables.sql +++ b/database/10_topicTables.sql @@ -5,7 +5,7 @@ CREATE TABLE IF NOT EXISTS sentencetopics ( topicinstance_id BIGINT, -- refers to topicvaluebase.id topiclabel VARCHAR(255), -- refers to topicvaluebase.value thetast DOUBLE PRECISION, - model VARCHAR(255) + model_id BIGINT ); CREATE TABLE IF NOT EXISTS documenttopicsraw ( document_id BIGINT, diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java index 60355423..b7e862dc 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java @@ -2066,45 +2066,50 @@ SELECT DISTINCT ON (st.document_id, st.sentence_id) }); } - public List getSentenceTopicsWithEntitiesByPageForDocument(long documentId) throws DatabaseOperationException { + public List getSentenceTopicsWithEntitiesByPageForDocument(long documentId, int model_id) + throws DatabaseOperationException { + return executeOperationSafely((session) -> { + String sql = """ - WITH best_topic_per_sentence AS ( - SELECT DISTINCT ON (st.document_id, st.sentence_id) - st.sentence_id, - st.topiclabel - FROM - sentencetopics st - WHERE - st.document_id = :document_id - ORDER BY - st.document_id, st.sentence_id, st.thetast DESC - ), - entities_in_sentences AS ( - SELECT DISTINCT - s.id AS sentence_id, - ne.typee AS entity_type - FROM - sentence s - JOIN namedentity ne ON - ne.document_id = s.document_id AND - ne.beginn >= s.beginn AND - ne.endd <= s.endd - WHERE - s.document_id = :document_id - ) - SELECT - btps.topiclabel, - eis.entity_type + WITH best_topic_per_sentence AS ( + SELECT DISTINCT ON (st.document_id, st.sentence_id) + st.sentence_id, + st.topiclabel + FROM + sentencetopics st + WHERE + st.document_id = :document_id + AND st.model_id = :model_id + ORDER BY + st.document_id, st.sentence_id, st.thetast DESC + ), + entities_in_sentences AS ( + SELECT DISTINCT + s.id AS sentence_id, + ne.typee AS entity_type FROM - best_topic_per_sentence btps - JOIN entities_in_sentences eis ON btps.sentence_id = eis.sentence_id - ORDER BY - btps.sentence_id, eis.entity_type - """; + sentence s + JOIN namedentity ne ON + ne.document_id = s.document_id AND + ne.beginn >= s.beginn AND + ne.endd <= s.endd + WHERE + s.document_id = :document_id + ) + SELECT + btps.topiclabel, + eis.entity_type + FROM + best_topic_per_sentence btps + JOIN entities_in_sentences eis ON btps.sentence_id = eis.sentence_id + ORDER BY + btps.sentence_id, eis.entity_type + """; Query query = session.createNativeQuery(sql) - .setParameter("document_id", documentId); + .setParameter("document_id", documentId) + .setParameter("model_id", model_id); return query.getResultList(); }); @@ -2280,21 +2285,22 @@ private String escapeSql(String input) { * offsets within a given document and inserts a corresponding entry into the sentencetopics table */ public int insertSentenceTopicBySpan(long documentId, int begin, int end, - String topicLabel, double score, String modelName) + String topicLabel, double score, String modelMap) throws DatabaseOperationException { return executeOperationSafely((session) -> { String sql = - "INSERT INTO sentencetopics (document_id, sentence_id, topiclabel, thetast, model) " + - "SELECT :docId, s.id, :label, :score, :model " + + "INSERT INTO sentencetopics (document_id, sentence_id, topiclabel, thetast, model_id) " + + "SELECT :docId, s.id, :label, :score, m.id " + "FROM sentence s " + + "JOIN models m ON m.map = :modelMap " + "WHERE s.document_id = :docId AND s.beginn = :begin AND s.endd = :end " + "AND NOT EXISTS ( " + " SELECT 1 FROM sentencetopics st " + " WHERE st.sentence_id = s.id " + " AND st.topiclabel = :label " + - " AND st.model = :model " + + " AND st.model_id = m.id " + ")"; var query = session.createNativeQuery(sql); @@ -2303,7 +2309,7 @@ public int insertSentenceTopicBySpan(long documentId, int begin, int end, query.setParameter("end", end); query.setParameter("label", topicLabel); query.setParameter("score", score); - query.setParameter("model", modelName); + query.setParameter("modelMap", modelMap); return query.executeUpdate(); }); diff --git a/uce.portal/uce.common/src/main/resources/models.json b/uce.portal/uce.common/src/main/resources/models.json new file mode 100644 index 00000000..edb566b2 --- /dev/null +++ b/uce.portal/uce.common/src/main/resources/models.json @@ -0,0 +1,1201 @@ +{ + "Topic Tweet": { + "url": "http://tweentopic.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-topic", + "huggingface": "https://huggingface.co/cardiffnlp/tweet-topic-large-multilingual", + "paper": "https://arxiv.org/abs/2410.03075", + "map": "cardiffnlp/tweet-topic-large-multilingual", + "Name": "Topic Tweet", + "Main Tool": "Topic Modeling", + "Variant": "Topic", + "type": "" + }, + "Topic IPTC ": { + "url": "http://iptc.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-topic", + "huggingface": "https://huggingface.co/classla/multilingual-IPTC-news-topic-classifier", + "paper": "", + "map": "classla/multilingual-IPTC-news-topic-classifier", + "Name": "Topic IPTC", + "Main Tool": "Topic Modeling", + "Variant": "Topic", + "type": "" + }, + "Topic Manifesto": { + "url": "http://topic-manifestoberta-xlm-roberta.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-topic", + "huggingface": "https://huggingface.co/manifesto-project/manifestoberta-xlm-roberta-56policy-topics-context-2023-1-1", + "paper": "https://doi.org/10.25522/manifesto.manifestoberta.56topics.context.2023.1.1", + "map": "manifesto-project/manifestoberta-xlm-roberta-56policy-topics-context-2023-1-1", + "Name": "Topic Manifesto", + "Main Tool": "Topic Modeling", + "Variant": "Topic", + "type": "" + }, + "Topic ParlaCAP": { + "url": "", + "github": "", + "huggingface": "", + "paper": "", + "map": "classla/ParlaCAP-Topic-Classifier", + "Name": "Topic ParlaCap", + "Main Tool": "Topic Modeling", + "Variant": "Topic", + "type": "" + }, + "Topic dstefa": { + "url": "", + "github": "", + "huggingface": "", + "paper": "", + "map": "dstefa/roberta-base_topic_classification_nyt_news", + "Name": "Topic dstefa", + "Main Tool": "Topic Modeling", + "Variant": "Topic", + "type": "" + }, + "Topic Cardiffnlp (EN)": { + "url": "http://topic-cardiffnlp-roberta-large-tweet-topic-single-all.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-topic", + "huggingface": "https://huggingface.co/cardiffnlp/roberta-large-tweet-topic-single-all", + "paper": "https://aclanthology.org/2022.coling-1.299/", + "map": "cardiffnlp/roberta-large-tweet-topic-single-all", + "Name": "Topic Cardiffnlp (EN)", + "Main Tool": "Topic Modeling", + "Variant": "Topic", + "type": "" + }, + "Topic WebOrganizer (EN)": { + "url": "http://topic-organize-web.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-topic", + "huggingface": "https://huggingface.co/WebOrganizer/TopicClassifier", + "paper": "https://arxiv.org/abs/2502.10341", + "map": "WebOrganizer/TopicClassifier", + "Name": "Topic WebOrganizer (EN)", + "Main Tool": "Topic Modeling", + "Variant": "Topic", + "type": "" + }, + "Sentiment CardiffNLP": { + "url": "http://sentiment-cardiffnlp.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-sentiment-atomar", + "huggingface": "https://huggingface.co/cardiffnlp/twitter-xlm-roberta-base-sentiment", + "paper": "https://arxiv.org/abs/2104.12250", + "map": "cardiffnlp/twitter-xlm-roberta-base-sentiment", + "Name": "Sentiment CardiffNLP", + "Main Tool": "Sentiment Analysis", + "Variant": "Sentiment", + "type": "" + }, + "Sentiment CitizenLab": { + "url": "http://sentiment-citizenlab.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-sentiment-atomar", + "huggingface": "https://huggingface.co/citizenlab/twitter-xlm-roberta-base-sentiment-finetunned", + "paper": "", + "map": "citizenlab/twitter-xlm-roberta-base-sentiment-finetunned", + "Name": "Sentiment CitizenLab", + "Main Tool": "Sentiment Analysis", + "Variant": "Sentiment", + "type": "" + }, + "Sentiment Multilingual DistilBert Students": { + "url": "http://duui-transformers-sentiment-atomar-distilbert-student.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-sentiment-atomar", + "huggingface": "https://huggingface.co/lxyuan/distilbert-base-multilingual-cased-sentiments-student", + "paper": "", + "map": "lxyuan/distilbert-base-multilingual-cased-sentiments-student", + "Name": "Sentiment Multilingual DistilBert Students", + "Main Tool": "Sentiment Analysis", + "Variant": "Sentiment", + "type": "" + }, + "Sentiment Multilingual DistilBert": { + "url": "http://duui-transformers-sentiment-atomar-distilbert-multilingual.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-sentiment-atomar", + "huggingface": "https://huggingface.co/philschmid/distilbert-base-multilingual-cased-sentiment", + "paper": "", + "map": "philschmid/distilbert-base-multilingual-cased-sentiment", + "Name": "Sentiment Multilingual DistilBert", + "Main Tool": "Sentiment Analysis", + "Variant": "Sentiment", + "type": "" + }, + "Sentiment CardiffNLP (EN)": { + "url": "http://duui-transformers-sentiment-atomar-cardiffnlp-sentiment-en.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-sentiment-atomar", + "huggingface": "https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment-latest", + "paper": "", + "map": "cardiffnlp/twitter-roberta-base-sentiment-latest", + "Name": "Sentiment CardiffNLP (EN)", + "Main Tool": "Sentiment Analysis", + "Variant": "Sentiment", + "type": "" + }, + "Sentiment RoBERTa based (EN)": { + "url": "http://duui-transformers-sentiment-atomar-roberta-based-en.service.component.duui.texttechnologylab.org/", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-sentiment-atomar", + "huggingface": "https://huggingface.co/j-hartmann/sentiment-roberta-large-english-3-classes", + "paper": "", + "map": "j-hartmann/sentiment-roberta-large-english-3-classes", + "Name": "Sentiment RoBERTa based (EN)", + "Main Tool": "Sentiment Analysis", + "Variant": "Sentiment", + "type": "" + }, + "Sentiment Finance (DE)": { + "url": "http://duui-transformers-sentiment-atomar-finance-sentiment-de.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-sentiment-atomar", + "huggingface": "https://huggingface.co/bardsai/finance-sentiment-de-base", + "paper": "", + "map": "bardsai/finance-sentiment-de-base", + "Name": "Sentiment Finance (DE)", + "Main Tool": "Sentiment Analysis", + "Variant": "Sentiment", + "type": "" + }, + "Hate Cardiffnlp": { + "url": "http://cardiffnlp.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/cardiffnlp/twitter-roberta-base-hate-latest", + "paper": "https://aclanthology.org/2023.woah-1.25.pdf", + "map": "cardiffnlp/twitter-roberta-base-hate-latest", + "Name": "Hate Cardiffnlp", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate MeHate-RoBERTa": { + "url": "http://hate-l3cube.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/l3cube-pune/me-hate-roberta", + "paper": "https://aclanthology.org/2023.findings-ijcnlp.22.pdf", + "map": "l3cube-pune/me-hate-roberta", + "Name": "Hate MeHate-RoBERTa", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate Andrazp": { + "url": "http://hate-andrazp.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/Andrazp/multilingual-hate-speech-robacofi", + "paper": "", + "map": "Andrazp/multilingual-hate-speech-robacofi", + "Name": "Hate Andrazp", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "HateBERT GroNLP (EN)": { + "url": "http://hate-groNLP.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/GroNLP/hateBERT", + "paper": "https://aclanthology.org/2021.woah-1.3/", + "map": "GroNLP/hateBERT", + "Name": "HateBERT GroNLP (EN)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate-speech-CNERG (EN)": { + "url": "http://hate-cnerg.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/Hate-speech-CNERG/dehatebert-mono-english", + "paper": "https://arxiv.org/abs/2004.06465", + "map": "Hate-speech-CNERG/dehatebert-mono-english", + "Name": "Hate-speech-CNERG (EN)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate-speech-CNERG (DE)": { + "url": "http://hate-cnergde.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/Hate-speech-CNERG/dehatebert-mono-german", + "paper": "https://arxiv.org/abs/2004.06465", + "map": "Hate-speech-CNERG/dehatebert-mono-german", + "Name": "Hate-speech-CNERG (DE)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate-speech-CNERG (PL)": { + "url": "http://hate-cnergpl.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/Hate-speech-CNERG/dehatebert-mono-polish", + "paper": "https://arxiv.org/abs/2004.06465", + "map": "Hate-speech-CNERG/dehatebert-mono-polish", + "Name": "Hate-speech-CNERG (PL)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate-speech-CNERG (ES)": { + "url": "http://hate-cnerges.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/Hate-speech-CNERG/dehatebert-mono-spanish", + "paper": "https://arxiv.org/abs/2004.06465", + "map": "Hate-speech-CNERG/dehatebert-mono-spanish", + "Name": "Hate-speech-CNERG (ES)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate-speech-CNERG (IT)": { + "url": "http://hate-cnergit.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/Hate-speech-CNERG/dehatebert-mono-italian", + "paper": "https://arxiv.org/abs/2004.06465", + "map": "Hate-speech-CNERG/dehatebert-mono-italian", + "Name": "Hate-speech-CNERG (IT)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate-speech-CNERG (PT)": { + "url": "http://hate-cnergpt.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/Hate-speech-CNERG/dehatebert-mono-portuguese", + "paper": "https://arxiv.org/abs/2004.06465", + "map": "Hate-speech-CNERG/dehatebert-mono-portugese", + "Name": "Hate-speech-CNERG (PT)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate-speech-CNERG (AR)": { + "url": "http://hate-cnergar.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/Hate-speech-CNERG/dehatebert-mono-arabic", + "paper": "https://arxiv.org/abs/2004.06465", + "map": "Hate-speech-CNERG/dehatebert-mono-arabic", + "Name": "Hate-speech-CNERG (AR)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate-speech-CNERG (FR)": { + "url": "http://hate-cnergfr.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/Hate-speech-CNERG/dehatebert-mono-french", + "paper": "https://arxiv.org/abs/2004.06465", + "map": "Hate-speech-CNERG/dehatebert-mono-french", + "Name": "Hate-speech-CNERG (FR)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate-speech-CNERG (ID)": { + "url": "http://hate-cnergid.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/Hate-speech-CNERG/dehatebert-mono-indonesian", + "paper": "https://arxiv.org/abs/2004.06465", + "map": "Hate-speech-CNERG/dehatebert-mono-indonesian", + "Name": "Hate-speech-CNERG (ID)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate-speech-Alexandrainst (DA)": { + "url": "http://hate-alexandrainst.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/alexandrainst/da-hatespeech-detection-base", + "paper": "", + "map": "alexandrainst/da-hatespeech-detection-base", + "Name": "Hate-speech-Alexandrainst (DA)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate Deepset GermanBert (DE)": { + "url": "http://hate-deepset.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/deepset/bert-base-german-cased-hatespeech-GermEval18Coarse", + "paper": "", + "map": "deepset/bert-base-german-cased-hatespeech-GermEval18Coarse", + "Name": "Hate Deepset GermanBert (DE)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate LFTW Facebook (EN)": { + "url": "http://hate-lftw-facebook.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/facebook/roberta-hate-speech-dynabench-r4-target", + "paper": "", + "map": "facebook/roberta-hate-speech-dynabench-r4-target", + "Name": "Hate LFTW Facebook (EN)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate MetaHateBERT (EN)": { + "url": "http://hate-metahatebert.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/irlab-udc/MetaHateBERT", + "paper": "", + "map": "irlab-udc/MetaHateBERT", + "Name": "Hate MetaHateBERT (EN)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate HateBERT hateval (EN)": { + "url": "http://hate-hatebert-hateval.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://osf.io/tbd58/files/osfstorage?view_only=d90e681c672a494bb555de99fc7ae780", + "paper": "", + "map": "HateBERT_hateval", + "Name": "Hate HateBERT hateval (EN)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Hate Check Eziisk (EN)": { + "url": "http://hate-hate-check-eziisk.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Hate", + "huggingface": "https://huggingface.co/EZiisk/EZ_finetune_Vidgen_model_RHS_Best", + "paper": "", + "map": "HateCheckEziisk", + "Name": "Hate Check Eziisk (EN)", + "Main Tool": "Hate Speech Detection", + "Variant": "Hate", + "type": "" + }, + "Detoxify": { + "url": "http://toxic-detoxify.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://github.com/unitaryai/detoxify", + "paper": "", + "map": "Detoxify", + "Name": "Detoxify", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "One for all Toxicity": { + "url": "http://toxic-one-for-all-toxicity-v3.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/FredZhang7/one-for-all-toxicity-v3", + "paper": "", + "map": "FredZhang7/one-for-all-toxicity-v3", + "Name": "One for all Toxicity", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "Toxic Multilingual Distil-Bert": { + "url": "http://toxic-distilbert-base-multilingual-cased-toxicity.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "citizenlab/distilbert-base-multilingual-cased-toxicity", + "paper": "", + "map": "citizenlab/distilbert-base-multilingual-cased-toxicity", + "Name": "Toxic Multilingual Distil-Bert", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "Toxic Comment (EN)": { + "url": "http://toxic-toxic-comment-model.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "citizenlab/distilbert-base-multilingual-cased-toxicity", + "paper": "", + "map": "martin-ha/toxic-comment-model", + "Name": "Toxic Comment (EN)", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "Roberta Toxicity Classifier (EN)": { + "url": "http://toxic-roberta-toxicity-classifier.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/s-nlp/roberta_toxicity_classifier", + "paper": "https://aclanthology.org/2022.acl-long.469", + "map": "s-nlp/roberta_toxicity_classifier", + "Name": "Roberta Toxicity Classifier (EN)", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "ToxiGen (EN)": { + "url": "http://toxic-toxigen.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/tomh/toxigen_roberta", + "paper": "https://aclanthology.org/2022.acl-long.234/", + "map": "tomh/toxigen_roberta", + "Name": "ToxiGen (EN)", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "Toxicity German (DE)": { + "url": "http://toxic-german-toxicity-classifier-plus-v2.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/EIStakovskii/german_toxicity_classifier_plus_v2", + "paper": "", + "map": "EIStakovskii/german_toxicity_classifier_plus_v2", + "Name": "Toxicity German (DE)", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "Toxic XLM-Roberta": { + "url": "http://toxic-xlm-roberta.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus", + "paper": "", + "map": "EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus", + "Name": "Toxicity XLM-Roberta", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "RUSSE-2022 (RU)": { + "url": "http://toxic-russe-2022.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/s-nlp/russian_toxicity_classifier", + "paper": "", + "map": "s-nlp/russian_toxicity_classifier", + "Name": "RUSSE-2022 (RU)", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "Multilingual Binary Toxicity Classification": { + "url": "http://toxic-xlm-multi-toxic.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/malexandersalazar/xlm-roberta-large-binary-cls-toxicity", + "paper": "", + "map": "malexandersalazar/xlm-roberta-large-binary-cls-toxicity", + "Name": "Multilingual Binary Toxicity Classification", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "RuBert Toxic (RU)": { + "url": "http://toxic-rubert-toxic.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/sismetanin/rubert-toxic-pikabu-2ch", + "paper": "", + "map": "sismetanin/rubert-toxic-pikabu-2ch", + "Name": "RuBert Toxic (RU)", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "TextDetox Glot500": { + "url": "http://toxic-textdetox-glot500.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/textdetox/glot500-toxicity-classifier", + "paper": "", + "map": "textdetox/glot500-toxicity-classifier", + "Name": "TextDetox Glot500", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "TextDetox BERT": { + "url": "http://toxic-textdetox-bert.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/textdetox/bert-multilingual-toxicity-classifier", + "paper": "", + "map": "textdetox/bert-multilingual-toxicity-classifier", + "Name": "TextDetox BERT", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "Toxicity Classifier UK": { + "url": "http://toxic-toxicity-classifier-uk.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/dardem/xlm-roberta-large-uk-toxicity", + "paper": "", + "map": "dardem/xlm-roberta-large-uk-toxicity", + "Name": "Toxicity Classifier UK", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "ToxDect (EN)": { + "url": "http://toxic-toxdect.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/Xuhui/ToxDect-roberta-large", + "paper": "", + "map": "Xuhui/ToxDect-roberta-large", + "Name": "ToxDect (EN)", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "XLM-RoBertA Multilingual Toxic Classifier": { + "url": "http://toxic-multi-toxic-classifier-plus.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-toxic", + "huggingface": "https://huggingface.co/EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus", + "paper": "", + "map": "EIStakovskii/xlm_roberta_base_multilingual_toxicity_classifier_plus", + "Name": "XLM-RoBertA Multilingual Toxic Classifier", + "Main Tool": "Toxic Language Detection", + "Variant": "Toxic", + "type": "" + }, + "CNERG HateExplain (EN)": { + "url": "http://offensive-cnerg-hatexplain.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface": "https://huggingface.co/Hate-speech-CNERG/bert-base-uncased-hatexplain", + "paper": "", + "map": "Hate-speech-CNERG/bert-base-uncased-hatexplain", + "Name": "CNERG HateExplain (EN)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "CNERG HateExplain-Rationale (EN)": { + "url": "http://offensive-cnerg-hatexplain-rationale.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface": "https://huggingface.co/Hate-speech-CNERG/bert-base-uncased-hatexplain-rationale-two", + "paper": "", + "map": "Hate-speech-CNERG/bert-base-uncased-hatexplain-rationale-two", + "Name": "CNERG HateExplain-Rationale (EN)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "NaijaXLM-T-base Hate (EN,IG,YO,HA,PIGDIN)": { + "url": "http://offensive-naija-xlm-t-base-hate.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface": "https://huggingface.co/worldbank/naija-xlm-twitter-base-hate", + "paper": "", + "map": "worldbank/naija-xlm-twitter-base-hate", + "Name": "NaijaXLM-T-base Hate (EN,IG,YO,HA,PIGDIN)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "HateBERT abuseval (EN)": { + "url": "http://offensive-hatebert-abuseval.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface": "https://osf.io/tbd58/files/osfstorage?view_only=d90e681c672a494bb555de99fc7ae780", + "paper": "", + "map": "HateBERT_abuseval", + "Name": "HateBERT abuseval (EN)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "HateBERT offenseval (EN)": { + "url": "http://offensive-hatebert-offenseval.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface": "https://osf.io/tbd58/files/osfstorage?view_only=d90e681c672a494bb555de99fc7ae780", + "paper": "", + "map": "HateBERT_offenseval", + "Name": "HateBERT offenseval (EN)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "BERTweet Hate Speech (EN)": { + "url": "http://offensive-bertweet-hate-speech.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface":"https://huggingface.co/pysentimiento/bertweet-hate-speech", + "paper": "", + "map": "pysentimiento/bertweet-hate-speech", + "Name": "BERTweet Hate Speech (EN)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "RoBERTuito Hate Speech (ES)": { + "url": "http://offensive-robertuito-hate-speech.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface":"https://huggingface.co/pysentimiento/robertuito-hate-speech", + "paper": "", + "map": "pysentimiento/robertuito-hate-speech", + "Name": "RoBERTuito Hate Speech (ES)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "BERTabaporu Hate Speech (PT)": { + "url": "http://offensive-bertabaporu-hate-speech.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface":"https://huggingface.co/pysentimiento/bertabaporu-pt-hate-speech", + "paper": "", + "map": "pysentimiento/bertabaporu-pt-hate-speech", + "Name": "BERTabaporu Hate Speech (PT)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "BERT-IT Hate Speech (IT)": { + "url": "http://offensive-bert-it-hate-speech.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface":"https://huggingface.co/pysentimiento/bert-it-hate-speech", + "paper": "", + "map": "pysentimiento/bert-it-hate-speech", + "Name": "BERT-IT Hate Speech (IT)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "IMSyPP Social Media Hate Speech": { + "url": "http://offensive-imsypp-social-media.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface":"https://huggingface.co/IMSyPP/hate_speech_multilingual", + "paper": "", + "map": "IMSyPP/hate_speech_multilingual", + "Name": "IMSyPP Social Media Hate Speech", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "IMSyPP Social Media Hate Speech (EN)": { + "url": "http://offensive-imsypp-social-media-en.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface":"https://huggingface.co/IMSyPP/hate_speech_en", + "paper": "", + "map": "IMSyPP/hate_speech_en", + "Name": "IMSyPP Social Media Hate Speech (EN)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "IMSyPP Social Media Hate Speech (IT)": { + "url": "http://offensive-imsypp-social-media-it.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface":"https://huggingface.co/IMSyPP/hate_speech_it", + "paper": "", + "map": "IMSyPP/hate_speech_it", + "Name": "IMSyPP Social Media Hate Speech (IT)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "IMSyPP Social Media Hate Speech (NL)": { + "url": "http://offensive-imsypp-social-media-nl.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface":"https://huggingface.co/IMSyPP/hate_speech_nl", + "paper": "", + "map": "IMSyPP/hate_speech_nl", + "Name": "IMSyPP Social Media Hate Speech (NL)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "IMSyPP Social Media Hate Speech (SLO)": { + "url": "http://offensive-imsypp-social-media-slo.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface":"https://huggingface.co/IMSyPP/hate_speech_SLO", + "paper": "", + "map": "IMSyPP/hate_speech_slo", + "Name": "IMSyPP Social Media Hate Speech (SLO)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "CardiffNLP Multiclass Hate (EN)": { + "url": "http://offensive-cardiffnlp-hate-multiclass.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface":"https://huggingface.co/cardiffnlp/twitter-roberta-base-hate-multiclass-latest", + "paper": "", + "map": "cardiffnlp/twitter-roberta-base-hate-multiclass-latest", + "Name": "CardiffNLP Multiclass Hate (EN)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "CardiffNLP Multilabel Sensitive (EN)": { + "url": "http://offensive-cardiffnlp-sensitive-multilabel.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-offensive", + "huggingface":"https://huggingface.co/cardiffnlp/twitter-roberta-large-sensitive-multilabel", + "paper": "", + "map": "cardiffnlp/twitter-roberta-large-sensitive-multilabel", + "Name": "CardiffNLP Multilabel Sensitive (EN)", + "Main Tool": "Multilabel Offensive Speech Detection", + "Variant": "Offensive", + "type": "" + }, + "Emotion Twitter": { + "url": "http://emotion-finetuned-twitter-xlm-roberta-base-emotion.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/02shanky/finetuned-twitter-xlm-roberta-base-emotion", + "paper": "", + "map": "02shanky/finetuned-twitter-xlm-roberta-base-emotion", + "Name": "Emotion Twitter", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Emotion DReAMy": { + "url": "http://emotion-dreamy-xlm-roberta-emotion.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/DReAMy-lib/xlm-roberta-large-DreamBank-emotion-presence", + "paper": "https://www.sciencedirect.com/science/article/pii/S1389945723015186", + "map": "DReAMy-lib/xlm-roberta-large-DreamBank-emotion-presence", + "Name": "Emotion DReAMy", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Emotion XLM-EMO": { + "url": "http://emotion-xlm-emo-t.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/DReAMy-lib/xlm-roberta-large-DreamBank-emotion-presence", + "paper": "https://aclanthology.org/2022.wassa-1.18/", + "map": "MilaNLProc/xlm-emo-t", + "Name": "Emotion XLM-EMO", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Emotion pol emo mDeBERTa": { + "url": "http://duui-transformers-emotion-pol-emo-mdeberta.service.component.duui.texttechnologylab.org/", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://github.com/tweedmann/pol_emo_mDeBERTa2 ", + "paper": "", + "map": "pol_emo_mDeBERTa", + "Name": "Emotion pol emo mDeBERTa", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Emotion DistilRoBERTa-base (EN)": { + "url": "http://emotion-emotion-english-distilroberta-base.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/DReAMy-lib/xlm-roberta-large-DreamBank-emotion-presence", + "paper": "", + "map": "j-hartmann/emotion-english-distilroberta-base", + "Name": "Emotion DistilRoBERTa-base (EN)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Emotion BerTweet (EN)": { + "url": "http://emotion-bertweet-base-emotion-analysis.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/finiteautomata/bertweet-base-emotion-analysis", + "paper": "https://arxiv.org/abs/2106.09462", + "map": "finiteautomata/bertweet-base-emotion-analysis", + "Name": "Emotion BerTweet (EN)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Emotion DistilBert-base (EN)": { + "url": "http://emotion-distilbert-base-uncased-finetuned-emotion.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/ActivationAI/distilbert-base-uncased-finetuned-emotion", + "paper": "", + "map": "ActivationAI/distilbert-base-uncased-finetuned-emotion", + "Name": "Emotion DistilBert-base (EN)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Emotion T5-base (EN)": { + "url": "http://emotion-t5-base-finetuned-emotion.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/mrm8488/t5-base-finetuned-emotion", + "paper": "", + "map": "mrm8488/t5-base-finetuned-emotion", + "Name": "Emotion T5-base (EN)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Emotion EmoAtlas (EN)": { + "url": "http://emotion-emoatlas.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://github.com/massimostel/emoatlas", + "paper": "", + "map": "EmoAtlas", + "Name": "Emotion EmoAtlas (EN)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Emotion Text Classifier (EN)": { + "url": "http://emotion-emotion-text-classifier.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/michellejieli/emotion_text_classifier", + "paper": "", + "map": "michellejieli/emotion_text_classifier", + "Name": "Emotion Text Classifier (EN)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Emotion pysentimiento (EN,ES,IT,PT)": { + "url": "http://emotion-pysentimiento.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://github.com/pysentimiento/pysentimiento/", + "paper": "https://arxiv.org/abs/2106.09462", + "map": "pysentimiento", + "Name": "Emotion EmoAtlas (EN,ES,IT,PT)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "CardiffNLP Twitter (EN)": { + "url": "http://emotion-cardiffnlp-twitter.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/cardiffnlp/twitter-roberta-base-emotion", + "paper": "", + "map": "cardiffnlp/twitter-roberta-base-emotion", + "Name": "CardiffNLP Twitter (EN)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Roberta Go (EN)": { + "url": "http://emotion-roberta-go.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/SamLowe/roberta-base-go_emotions", + "paper": "", + "map": "SamLowe/roberta-base-go_emotions", + "Name": "Roberta Go (EN)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Exalt Baseline": { + "url": "http://emotion-exalt-baseline.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/pranaydeeps/EXALT-Baseline", + "paper": "", + "map": "pranaydeeps/EXALT-Baseline", + "Name": "Exalt Baseline", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "BERT-Emotion (EN)": { + "url": "http://emotion-bert-emotion.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/boltuix/bert-emotion", + "paper": "", + "map": "boltuix/bert-emotion", + "Name": "BERT-Emotion (EN)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Feel IT (IT)": { + "url": "http://emotion-feel-it.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/MilaNLProc/feel-it-italian-emotion", + "paper": "", + "map": "MilaNLProc/feel-it-italian-emotion", + "Name": "Feel IT (IT)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "CardiffNLP Multilabel (EN)": { + "url": "http://emotion-cardiffnlp-multilabel.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/cardiffnlp/twitter-roberta-base-emotion-multilabel-latest", + "paper": "", + "map": "cardiffnlp/twitter-roberta-base-emotion-multilabel-latest", + "Name": "CardiffNLP Multilabel (EN)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Emotion Analysis in Spanish (ES)": { + "url": "http://emotion-beto-es-analysis.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/finiteautomata/beto-emotion-analysis", + "paper": "", + "map": "finiteautomata/beto-emotion-analysis", + "Name": "Emotion Analysis in Spanish (ES)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "twitter-XLM-roBERTa-base for Emotion Analysis (ES)": { + "url": "http://emotion-twitter-xlm-roberta.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/daveni/twitter-xlm-roberta-emotion-es", + "paper": "", + "map": "daveni/twitter-xlm-roberta-emotion-es", + "Name": "twitter-XLM-roBERTa-base for Emotion Analysis (ES)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "German-Emotions (DE)": { + "url": "http://emotion-german-emotions.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/daveni/ChrisLalk/German-Emotions", + "paper": "", + "map": "ChrisLalk/German-Emotions", + "Name": "German-Emotions (DE)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "XLM-R Multi-Emotion Classifier (AR, EN)": { + "url": "http://emotion-xlm-emo-multi.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/msgfrom96/xlm_emo_multi", + "paper": "", + "map": "msgfrom96/xlm_emo_multi", + "Name": "XLM-R Multi-Emotion Classifier (AR, EN)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "RuBert cedr Emotion (RU)": { + "url": "http://emotion-rubert-cedr-emotion.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/cointegrated/rubert-tiny2-cedr-emotion-detection", + "paper": "", + "map": "cointegrated/rubert-tiny2-cedr-emotion-detection", + "Name": "RuBert cedr Emotion (RU)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "RuBert-Tiny2-Russian-Emotion (RU)": { + "url": "http://emotion-rubert-tiny2-russian.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/Aniemore/rubert-tiny2-russian-emotion-detection", + "paper": "", + "map": "Aniemore/rubert-tiny2-russian-emotion-detection", + "Name": "RuBert-Tiny2-Russian-Emotion (RU)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Chinese-Emotion-Small (ZH)": { + "url": "http://emotion-chinese-emotion-small.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/Johnson8187/Chinese-Emotion-Small", + "paper": "", + "map": "Johnson8187/Chinese-Emotion-Small", + "Name": "Chinese-Emotion-Small (ZH)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Chinese-Emotion (ZH)": { + "url": "http://emotion-chinese-emotion.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/Johnson8187/Chinese-Emotion", + "paper": "", + "map": "Johnson8187/Chinese-Emotion", + "Name": "Chinese-Emotion-Small (ZH)", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "Multilingual Go Emotions": { + "url": "http://emotion-multi-go-emotions.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Emotion", + "huggingface": "https://huggingface.co/AnasAlokla/multilingual_go_emotions", + "paper": "", + "map": "AnasAlokla/multilingual_go_emotions", + "Name": "Multilingual Go Emotions", + "Main Tool": "Emotion Recognition", + "Variant": "Emotion", + "type": "" + }, + "FactChecking UniEval (EN)": { + "url": "http://factchecking-unieval.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-FactChecking", + "huggingface": "https://github.com/maszhongming/UniEval", + "paper": "https://arxiv.org/abs/2210.07197", + "map": "unieval", + "Name": "FactChecking UniEval (EN)", + "Main Tool": "Fact Checking", + "Variant": "Factchecking", + "type": "" + }, + "FactChecking NUBIA (EN)": { + "url": "http://factchecking-nubia.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-FactChecking", + "huggingface": "https://github.com/wl-research/nubia", + "paper": "https://aclanthology.org/2020.evalnlgeval-1.4/", + "map": "nubia", + "Name": "FactChecking NUBIA (EN)", + "Main Tool": "Fact Checking", + "Variant": "Factchecking", + "type": "" + }, + "FactChecking FactCC (EN)": { + "url": "http://factchecking-factcc.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-FactChecking", + "huggingface": "https://huggingface.co/manueldeprada/FactCC", + "paper": "https://aclanthology.org/2020.emnlp-main.750/", + "map": "manueldeprada/FactCC", + "Name": "FactChecking FactCC (EN)", + "Main Tool": "Fact Checking", + "Variant": "Factchecking", + "type": "" + }, + "FactChecking MiniCheck (EN)": { + "url": "http://factchecking-minicheck.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-FactChecking", + "huggingface": "https://github.com/Liyan06/MiniCheck", + "paper": "https://aclanthology.org/2020.evalnlgeval-1.4/", + "map": "MiniCheck", + "Name": "FactChecking MiniCheck (EN)", + "Main Tool": "Fact Checking", + "Variant": "Factchecking", + "type": "" + }, + "Cohesion Intfloat": { + "url": "http://transformers-complexity.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Complexity", + "huggingface": "", + "paper": "", + "map": "intfloat/multilingual-e5-base", + "Name": "Cohesion Intfloat", + "Main Tool": "Text Cohesion Analysis", + "Variant": "Coherence", + "type": "BertSentence" + }, + "Cohesion BERT": { + "url": "http://transformers-complexity.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Complexity", + "huggingface": "", + "paper": "", + "map": "google-bert/bert-base-multilingual-cased", + "Name": "Cohesion BERT", + "Main Tool": "Text Cohesion Analysis", + "Variant": "Coherence", + "type": "BERT" + }, + "Cohesion FaceBook": { + "url": "http://transformers-complexity.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Complexity", + "huggingface": "", + "paper": "", + "map": "facebook/xlm-v-base", + "Name": "Cohesion FaceBook", + "Main Tool": "Text Cohesion Analysis", + "Variant": "Coherence", + "type": "BERT" + }, + "Cohesion FaceBook-XLM-Roberta": { + "url": "http://transformers-complexity.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Complexity", + "huggingface": "", + "paper": "", + "map": "FacebookAI/xlm-roberta-large", + "Name": "Cohesion FaceBook-XLM-Roberta", + "Main Tool": "Text Cohesion Analysis", + "Variant": "Coherence", + "type": "BERT" + }, + "Cohesion CardiffNLP": { + "url": "http://transformers-complexity.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Complexity", + "huggingface": "", + "paper": "", + "map": "cardiffnlp/twitter-xlm-roberta-base", + "Name": "Cohesion CardiffNLP", + "Main Tool": "Text Cohesion Analysis", + "Variant": "Coherence", + "type": "BERT" + }, + "Cohesion LEALLA": { + "url": "http://transformers-complexity.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Complexity", + "huggingface": "", + "paper": "", + "map": "setu4993/LEALLA-small", + "Name": "Cohesion LEALLA", + "Main Tool": "Text Cohesion Analysis", + "Variant": "Coherence", + "type": "BertSentence" + }, + "Cohesion Twitter": { + "url": "http://transformers-complexity.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Complexity", + "huggingface": "", + "paper": "", + "map": "Twitter/twhin-bert-large", + "Name": "Cohesion Twitter", + "Main Tool": "Text Cohesion Analysis", + "Variant": "Coherence", + "type": "Bert" + }, + "Cohesion LABSE": { + "url": "http://transformers-complexity.service.component.duui.texttechnologylab.org", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-transformers-Complexity", + "huggingface": "", + "paper": "", + "map": "sentence-transformers/LaBSE", + "Name": "Cohesion LABSE", + "Main Tool": "Text Cohesion Analysis", + "Variant": "Coherence", + "type": "Sentence" + }, + "Stance Trump Twitter US Election 2020 (EN)": { + "url": "http://stance-kornosk.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Stance", + "huggingface": "https://huggingface.co/kornosk/bert-election2020-twitter-stance-trump-KE-MLM", + "paper": "https://www.aclweb.org/anthology/2021.naacl-main.376", + "map": "kornosk", + "Name": "Stance Trump Twitter US Election 2020 (EN)", + "Main Tool": "Stance detection", + "Variant": "Stance", + "type": "Stance" + }, + "Stance ZeroShot PoliStance (EN)": { + "url": "http://stance-mlburnham.service.component.duui.texttechnologylab.org", + "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-Stance", + "huggingface": "https://huggingface.co/mlburnham/deberta-v3-base-polistance-affect-v1.0", + "paper": "https://arxiv.org/abs/2409.02078", + "map": "mlburnham", + "Name": "Stance ZeroShot PoliStance (EN)", + "Main Tool": "Stance detection", + "Variant": "Stance", + "type": "Stance" + }, + "Readability (EN)": { + "url": "http://readability.service.component.duui.texttechnologylab.org/", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-readability", + "huggingface": "", + "paper": "", + "map": "Readability", + "Name": "Readability (EN)", + "Main Tool": "Text Readability", + "Variant": "Readability", + "type": "Readability" + }, + "Textstat (EN)": { + "url": "http://readability-textstat.service.component.duui.texttechnologylab.org/", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-readability", + "huggingface": "", + "paper": "", + "map": "Textstat", + "Name": "Textstat (EN)", + "Main Tool": "Text Readability", + "Variant": "Readability", + "type": "Readability" + }, + "Diversity (EN)": { + "url": "http://readability-diversity.service.component.duui.texttechnologylab.org/", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-readability", + "huggingface": "", + "paper": "", + "map": "Diversity", + "Name": "Diversity (EN)", + "Main Tool": "Text Readability", + "Variant": "Readability", + "type": "Readability" + }, + "Readability Advance (EN)": { + "url": "http://readability-readability.service.component.duui.texttechnologylab.org/", + "github": "https://github.com/mevbagci/duui-uima/tree/main/duui-readability", + "huggingface": "", + "paper": "", + "map": "Readability", + "Name": "Readability Advance (EN)", + "Main Tool": "Text Readability", + "Variant": "Readability", + "type": "Readability" + } +} \ No newline at end of file diff --git a/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java b/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java index e5261f40..b987fdde 100644 --- a/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java +++ b/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java @@ -1920,10 +1920,10 @@ private void postProccessDocument(Document document, Corpus corpus, String fileP importSentenceTopicsFromXmiIntoDb(document, filePath); // build unifiedtopic + link sentencetopics.unifiedtopic_id - ExceptionUtils.tryCatchLog( - () -> db.ensureUnifiedTopicsForSentenceTopics(document.getId()), - (ex) -> logImportError("Error creating/linking unifiedtopic rows for sentence topics.", ex, filePath) - ); + //ExceptionUtils.tryCatchLog( + // () -> db.ensureUnifiedTopicsForSentenceTopics(document.getId()), + // (ex) -> logImportError("Error creating/linking unifiedtopic rows for sentence topics.", ex, filePath) + //); ExceptionUtils.tryCatchLog( () -> db.createSentenceEmotions(document.getId()), From 2b1c2f272bd8d41d8b55557e824cba39d3692ff6 Mon Sep 17 00:00:00 2001 From: Ph1l1ppGitHub Date: Thu, 5 Mar 2026 13:17:10 +0100 Subject: [PATCH 19/47] Fix model_id parameter handling and update DocumentApi --- .../uce/common/services/PostgresqlDataInterface_Impl.java | 6 ++---- .../org/texttechnologylab/uce/web/routes/DocumentApi.java | 1 - 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java index b7e862dc..e53b8dd3 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java @@ -2066,7 +2066,7 @@ SELECT DISTINCT ON (st.document_id, st.sentence_id) }); } - public List getSentenceTopicsWithEntitiesByPageForDocument(long documentId, int model_id) + public List getSentenceTopicsWithEntitiesByPageForDocument(long documentId) throws DatabaseOperationException { return executeOperationSafely((session) -> { @@ -2080,7 +2080,6 @@ SELECT DISTINCT ON (st.document_id, st.sentence_id) sentencetopics st WHERE st.document_id = :document_id - AND st.model_id = :model_id ORDER BY st.document_id, st.sentence_id, st.thetast DESC ), @@ -2108,8 +2107,7 @@ entities_in_sentences AS ( """; Query query = session.createNativeQuery(sql) - .setParameter("document_id", documentId) - .setParameter("model_id", model_id); + .setParameter("document_id", documentId); return query.getResultList(); }); diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/DocumentApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/DocumentApi.java index f04ecfa4..f8bc9fe2 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/DocumentApi.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/DocumentApi.java @@ -497,7 +497,6 @@ public void getDocumentGeonameByPage(Context ctx) { public void getSentenceTopicsWithEntities(Context ctx) { var documentId = ExceptionUtils.tryCatchLog(() -> Long.parseLong(ctx.queryParam("documentId")), (ex) -> logger.error("Error: couldn't determine the documentId for sentence topics with entities. ", ex)); - if (documentId == null) { ctx.status(400); ctx.render("defaultError.ftl", Map.of("information", "Missing documentId parameter for sentence topics with entities")); From 1c43688cafd4da3cd92503c0a175fd098870cb80 Mon Sep 17 00:00:00 2001 From: Mark Ian Braun Date: Thu, 5 Mar 2026 13:39:49 +0100 Subject: [PATCH 20/47] in der emotions tabelle und sentenceemotions tabelle werden nun mehrere modelle eingespeichert --- .../uce/common/services/DataInterface.java | 7 +++ .../PostgresqlDataInterface_Impl.java | 23 ++++++++- .../uce/corpusimporter/Importer.java | 47 +++++++++++++++++++ 3 files changed, 76 insertions(+), 1 deletion(-) diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/DataInterface.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/DataInterface.java index 8391cbd1..9ae312d8 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/DataInterface.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/DataInterface.java @@ -382,4 +382,11 @@ public DocumentSearchResult defaultSearchForDocuments(int skip, * Gets a Model based on its map column value */ public ModelEntity getModelEntityByMap(String mapString) throws DatabaseOperationException; + + /** + * Adds new emotions to an existing document in the database + */ + public void saveNewEmotionsForDocument(long documentId, List newEmotions) throws DatabaseOperationException; + + } diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java index 60355423..2c1fb923 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java @@ -2356,7 +2356,11 @@ INSERT INTO sentenceemotions (sentence_id, emotion_id, model_id, document_id) SELECT s.id, e.id, e.model_id, s.document_id FROM emotion e JOIN sentence s - ON s.beginn = e.beginn AND s.endd = e.endd and s.document_id = :docId ; + ON s.beginn = e.beginn AND s.endd = e.endd and s.document_id = :docId + WHERE NOT EXISTS( + SELECT 1 FROM sentenceemotions se + WHERE se.sentence_id = s.id AND se.emotion_id = e.id + ); """; System.out.println(documentId); @@ -2366,6 +2370,23 @@ INSERT INTO sentenceemotions (sentence_id, emotion_id, model_id, document_id) }); } + public void saveNewEmotionsForDocument(long documentId, List newEmotions) throws DatabaseOperationException { + executeOperationSafely((session) -> { + Document doc = session.get(Document.class, documentId); + if (doc != null) { + Hibernate.initialize(doc.getEmotions()); + for (var emotion : newEmotions) { + if (emotion.getDbModel() != null) { + emotion.setDbModel((ModelEntity) session.merge(emotion.getDbModel())); + } + } + doc.getEmotions().addAll(newEmotions); + session.update(doc); + } + return null; + }); + } + @Override diff --git a/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java b/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java index e5261f40..2f2b6f27 100644 --- a/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java +++ b/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java @@ -532,6 +532,7 @@ public Document XMIToDocument(JCas jCas, Corpus corpus, String filePath, String logger.info("Checking if that document was also post-processed yet..."); var existingDoc = db.getDocumentByCorpusAndDocumentId(corpus.getId(), document.getDocumentId()); importSentenceTopicsFromXmiIntoDb(existingDoc, filePath); + appendNewEmotionsToExistingDocument(existingDoc,jCas); if (!existingDoc.isPostProcessed()) { logger.info("Not yet post-processed. Doing that now."); @@ -2263,5 +2264,51 @@ private void logImportError(String message, Exception ex, String file) { tryStoreUCEImportLog(importLog); logger.error(message, ex); } + + private void appendNewEmotionsToExistingDocument(Document existingDoc, JCas jCas){ + var newEmotions = new ArrayList(); + JCasUtil.select(jCas, Emotion.class).forEach(e -> { + var emotion = new org.texttechnologylab.uce.common.models.corpus.emotion.Emotion(e.getBegin(),e.getEnd()); + emotion.setCoveredText(e.getCoveredText()); + var meta = e.getModel(); + ModelEntity foundModel = null; + if(meta!=null){ + String modelNameFromXmi = meta.getModelName(); + try{ + foundModel = db.getModelEntityByKey(modelNameFromXmi); + if (foundModel == null) foundModel = db.getModelEntityByMap(modelNameFromXmi); + } catch (DatabaseOperationException ex) { + logger.error("Error when looking for model in database" + modelNameFromXmi); + } + } + if(foundModel != null) emotion.setDbModel(foundModel); + + var feelings = new ArrayList(); + for (var annotationComment : e.getEmotions()){ + var feeling = new Feeling(); + feeling.setEmotion(emotion); + ExceptionUtils.tryCatchLog(() -> feeling.setValue(Double.parseDouble(annotationComment.getValue())),(ex) -> {}); + feeling.setFeeling(annotationComment.getKey()); + feelings.add(feeling); + } + emotion.setFeelings(feelings); + newEmotions.add(emotion); + }); + + if(!newEmotions.isEmpty()){ + if(existingDoc.getEmotions() == null){ + existingDoc.setEmotions(new ArrayList<>()); + } + ExceptionUtils.tryCatchLog( + () -> db.saveNewEmotionsForDocument(existingDoc.getId(),newEmotions), + (ex) -> logger.error("Error when saving new emotions to existing document" + existingDoc.getId(), ex) + ); + ExceptionUtils.tryCatchLog( + () -> db.createSentenceEmotions(existingDoc.getId()), + (ex) -> logger.error("Error when creating sentence emotions after saving new emotions to existing document" + existingDoc.getId(), ex) + ); + } + + } } From 8efa8c350e845fb0b0db2bcc899fd0da596938d0 Mon Sep 17 00:00:00 2001 From: Francesco Da Silva Saporito Date: Thu, 5 Mar 2026 22:12:50 +0100 Subject: [PATCH 21/47] Visual update regarding buttons, deleted model coloumn in table emotion. --- .../templates/css/document-reader.css | 86 +++++++++++++++++-- .../templates/reader/documentReaderView.ftl | 58 +++++++++++-- .../common/models/corpus/emotion/Emotion.java | 2 - 3 files changed, 131 insertions(+), 15 deletions(-) diff --git a/uce.portal/resources/templates/css/document-reader.css b/uce.portal/resources/templates/css/document-reader.css index e8104f16..d2609393 100644 --- a/uce.portal/resources/templates/css/document-reader.css +++ b/uce.portal/resources/templates/css/document-reader.css @@ -711,14 +711,16 @@ body { /* Bottom Navigation */ .tab-pane .viz-bottom-nav { position: fixed; - right: 0%; + left: 50%; bottom: 30px; transform: translateX(-50%); - width: auto; - min-width: 320px; - max-width: 200vw; + right: auto; + width: max-content; + max-width: calc(100vw - 40px); display: flex; - justify-content: space-around; + justify-content: center; + gap: 10px; + overflow: visible; border-radius: 24px; box-shadow: 0 4px 24px rgba(0,0,0,0.12); background: #fff; @@ -951,6 +953,7 @@ body { border-radius: 4px; cursor: pointer; padding: 0; +} .paragraph .paragraph-header { border-radius: 16px; @@ -964,5 +967,76 @@ body { background-color: white; color: var(--prime); border: 1px solid var(--prime); +} + + /* --- Dropdown Navigation Erweiterung (Hover) --- */ + .tab-pane .viz-bottom-nav.viz-dropdown-nav{ + justify-content: center; /* statt space-around */ + gap: 10px; + overflow: visible; /* wichtig, damit Menüs nicht abgeschnitten werden */ + } + + .tab-pane .viz-nav-group{ + position: relative; + } + + /* Parent-Buttons bleiben optisch wie .viz-nav-btn */ + .tab-pane .viz-nav-parent{ + width: auto; /* nicht auf 100% ziehen, sonst werden Gruppen riesig */ + padding: 8px 14px; + display: inline-flex; + align-items: center; + gap: 8px; + } + + /* Dropdown-Menü */ + .tab-pane .viz-nav-menu{ + display: none; + position: absolute; + left: 0; + bottom: calc(100% + 10px); /* klappt nach oben auf */ + min-width: 240px; + background: #fff; + border: 1px solid #e0e0e0; + border-radius: 14px; + box-shadow: 0 10px 25px rgba(0,0,0,0.12); + padding: 6px; + z-index: 9999; + } + + /* Öffnen per Hover */ + .tab-pane .viz-nav-group:hover .viz-nav-menu{ + display: block; + } + + /* Menü-Items */ + .tab-pane .viz-nav-item{ + display: block; + padding: 8px 10px; + border-radius: 10px; + text-decoration: none; + color: #555; + font-weight: 600; + cursor: pointer; + white-space: nowrap; + } + + .tab-pane .viz-nav-item:hover{ + background: rgba(0,0,0,0.06); + } + + /* Optional: Disabled */ + .tab-pane .viz-nav-item.viz-disabled{ + opacity: 0.6; + cursor: not-allowed; + } -} \ No newline at end of file + /* Optional: kleiner “Puffer”, damit Menü nicht sofort schließt */ + .tab-pane .viz-nav-menu::before{ + content: ""; + position: absolute; + left: 0; + right: 0; + bottom: -10px; + height: 10px; + } \ No newline at end of file diff --git a/uce.portal/resources/templates/reader/documentReaderView.ftl b/uce.portal/resources/templates/reader/documentReaderView.ftl index 58eb198e..002a1460 100644 --- a/uce.portal/resources/templates/reader/documentReaderView.ftl +++ b/uce.portal/resources/templates/reader/documentReaderView.ftl @@ -320,13 +320,57 @@ -
- - - - - - +
diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/Emotion.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/Emotion.java index c83d0108..936c335a 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/Emotion.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/emotion/Emotion.java @@ -19,8 +19,6 @@ @Typesystem(types = {Emotion.class}) public class Emotion extends UIMAAnnotation implements WikiModel { - private String model; - @OneToMany(cascade = CascadeType.ALL) @JoinColumn(name = "emotion_id") private List feelings; From 53f8932ed49e30479eb27118ae201bb81a11ba14 Mon Sep 17 00:00:00 2001 From: Mark Ian Braun Date: Thu, 5 Mar 2026 23:38:11 +0100 Subject: [PATCH 22/47] models.json fuer mich gefixt --- .../uce.web/src/main/resources/models.json | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/uce.portal/uce.web/src/main/resources/models.json b/uce.portal/uce.web/src/main/resources/models.json index b842dee9..10192f51 100644 --- a/uce.portal/uce.web/src/main/resources/models.json +++ b/uce.portal/uce.web/src/main/resources/models.json @@ -43,6 +43,28 @@ "Variant": "Topic", "type": "" }, + "Topic ParlaCAP": { + "url": "", + "github": "", + "huggingface": "", + "paper": "", + "map": "classla/ParlaCAP-Topic-Classifier", + "Name": "Topic ParlaCap", + "Main Tool": "Topic Modeling", + "Variant": "Topic", + "type": "" + }, + "Topic dstefa": { + "url": "", + "github": "", + "huggingface": "", + "paper": "", + "map": "dstefa/roberta-base_topic_classification_nyt_news", + "Name": "Topic dstefa", + "Main Tool": "Topic Modeling", + "Variant": "Topic", + "type": "" + }, "Topic WebOrganizer (EN)": { "url": "http://topic-organize-web.service.component.duui.texttechnologylab.org", "github": "https://github.com/texttechnologylab/duui-uima/tree/main/duui-transformers-topic", From f8e90c6be3e0b36ce1c295c3905eecee22442a3f Mon Sep 17 00:00:00 2001 From: Francesco Da Silva Saporito Date: Fri, 6 Mar 2026 00:28:39 +0100 Subject: [PATCH 23/47] Implement emotion model selection and ECharts radar rendering in document reader --- .../resources/templates/js/documentReader.js | 110 +++++++++++++++++- .../templates/reader/documentReaderView.ftl | 8 +- .../PostgresqlDataInterface_Impl.java | 71 +++++++++++ .../org/texttechnologylab/uce/web/App.java | 3 + .../uce/web/routes/DocumentApi.java | 72 ++++++++++++ 5 files changed, 255 insertions(+), 9 deletions(-) diff --git a/uce.portal/resources/templates/js/documentReader.js b/uce.portal/resources/templates/js/documentReader.js index 97fc11ba..df485e85 100644 --- a/uce.portal/resources/templates/js/documentReader.js +++ b/uce.portal/resources/templates/js/documentReader.js @@ -3,7 +3,7 @@ let searchTokens = ""; let currentSelectedTopic = null; let currentTopicIndex = -1; let matchingTopics = []; - +let selectedEmotionModelId = null; let defaultTopicColorMap = getDefaultTopicColorMap(); let defaultTopicSettings = { topicCount: 10, @@ -873,6 +873,8 @@ document.querySelectorAll('.tab-btn').forEach(btn => { $('.scrollbar-minimap').show(); } if (targetId === 'visualization-tab') { + const docId = document.getElementsByClassName('reader-container')[0].getAttribute('data-id'); + loadEmotionModels(docId); setTimeout(() => renderTemporalExplorer('vp-1'), 500); $('.viz-nav-btn').removeClass('active'); $('.viz-nav-btn').first().addClass('active'); @@ -880,6 +882,17 @@ document.querySelectorAll('.tab-btn').forEach(btn => { $('.viz-panel').removeClass('active'); $('.viz-panel').first().addClass('active'); } + $(document).on('click', '.emotion-model-item', function (e) { + e.preventDefault(); + selectedEmotionModelId = $(this).data('model-id'); + + $('.emotion-model-item').removeClass('active'); + $(this).addClass('active'); + + // mittig anzeigen (der Container in der Visualization ist vp-1) + $('#vp-1').removeClass('rendered'); + renderEmotionRadar('vp-1'); + }); }); }); @@ -1437,13 +1450,17 @@ function renderTemporalExplorer(containerId) { $('.visualization-spinner').show() const docId = document.getElementsByClassName('reader-container')[0].getAttribute('data-id'); + const emotionReq = $.get('/api/document/page/emotions', { + documentId: docId, + modelId: selectedEmotionModelId + }); const taxonReq = $.get('/api/document/page/taxon', { documentId: docId }); const topicReq = $.get('/api/document/page/topics', { documentId: docId }); const entityReq = $.get('/api/document/page/namedEntities', { documentId: docId }); const lemmaReq = $.get('/api/document/page/lemma', { documentId: docId }); const geonameReq = $.get('/api/document/page/geoname', { documentId: docId }); - Promise.all([taxonReq, topicReq, entityReq, lemmaReq, geonameReq]).then(([taxon, topics, entities, lemma, geoname]) => { + Promise.all([taxonReq, topicReq, entityReq, lemmaReq, geonameReq, emotionReq]).then(([taxon, topics, entities, lemma, geoname, emotions]) => { $('.visualization-spinner').hide() if ((!taxon || taxon.length === 0) && (!topics || topics.length === 0) && (!entities || entities.length === 0) && (!lemma || lemma.length === 0 && !geoname || geoname.length === 0)) { const container = document.getElementById(containerId); @@ -1495,6 +1512,14 @@ function renderTemporalExplorer(containerId) { valueField: 'geonameValue', label: 'Geonames', color: '#c680ff', + }, + { + key: 'Emotions', + data: emotions, + pageField: 'pageId', + valueField: 'emotionLabel', + label: 'Emotions', + color: '#f5c542' } ]; @@ -1528,7 +1553,8 @@ function renderTemporalExplorer(containerId) { Topics: [], "Named Entities": [], Lemmas: [], - Geonames: [] + Geonames: [], + Emotions: [] }); } @@ -1619,7 +1645,85 @@ function renderTemporalExplorer(containerId) { console.error("Error loading or processing annotation data:", err); }); } +function loadEmotionModels(docId) { + return $.get('/api/document/emotionModels', { documentId: docId }) + .then((models) => { + const $menu = $('#emotion-model-menu'); + $menu.empty(); + + if (!models || models.length === 0) { + $menu.append('No models found'); + return; + } + + models.forEach((m) => { + $menu.append( + '' + + (m.modelName ? m.modelName : ('Model ' + m.modelId)) + + '' + ); + }); + }) + .catch(() => { + $('#emotion-model-menu').html('Failed to load'); + }); +} +function renderEmotionRadar(containerId) { + const container = document.getElementById(containerId); + if (!container) return; + + // jedes Mal neu zeichnen (bei Model-Wechsel) + container.classList.remove('rendered'); + container.innerHTML = '
'; + + const docId = document.getElementsByClassName('reader-container')[0].getAttribute('data-id'); + const modelId = selectedEmotionModelId || null; + + $('.visualization-spinner').show(); + + $.get('/api/document/emotionRadar', { documentId: docId, modelId: modelId }) + .then(data => { + $('.visualization-spinner').hide(); + + if (!data || !Array.isArray(data) || data.length === 0) { + container.innerHTML = '
No emotion data for this model
'; + container.classList.add('rendered'); + return; + } + + // ECharts Radar expects indicators with max. + // feeling.value scheint bei dir oft 0..1 zu sein → max=1. + const indicators = data.map(d => ({ name: d.label, max: 1 })); + const values = data.map(d => d.value); + const chartDom = document.getElementById(containerId + '-radar'); + const chart = echarts.init(chartDom); + + const option = { + title: { text: 'Emotion Radar' }, + tooltip: {}, + radar: { + indicator: indicators, + radius: '65%' + }, + series: [{ + type: 'radar', + data: [{ + value: values, + name: 'Avg intensity' + }] + }] + }; + + chart.setOption(option); + container.classList.add('rendered'); + }) + .catch(() => { + $('.visualization-spinner').hide(); + container.innerHTML = '
Failed to load emotion radar
'; + container.classList.add('rendered'); + }); +} function initializeTopicSettingsPanel() { if (topicSettings.colorMode === 'per-topic') { diff --git a/uce.portal/resources/templates/reader/documentReaderView.ftl b/uce.portal/resources/templates/reader/documentReaderView.ftl index 002a1460..1599d77e 100644 --- a/uce.portal/resources/templates/reader/documentReaderView.ftl +++ b/uce.portal/resources/templates/reader/documentReaderView.ftl @@ -362,13 +362,9 @@
- + -
- No models yet -
+
diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java index 1e89cb39..7d4f2520 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java @@ -2469,5 +2469,76 @@ public void initializeModelsFromJson(){ System.err.println("Error during getting ModalEntity from database"); } } + public List getEmotionByPage(long documentId, Long modelId) throws DatabaseOperationException { + return executeOperationSafely((session) -> { + + String sql = """ + WITH best_emotion_per_sentence AS ( + SELECT DISTINCT ON (se.document_id, se.sentence_id) + se.sentence_id, + f.feeling AS emotion_label, + f.value AS emotion_value + FROM sentenceemotions se + JOIN emotion e ON e.id = se.emotion_id + JOIN feeling f ON f.emotion_id = e.id + WHERE se.document_id = :documentId + AND (:modelId IS NULL OR se.model_id = :modelId) + ORDER BY se.document_id, se.sentence_id, f.value DESC + ) + SELECT + s.page_id, + bes.emotion_label + FROM best_emotion_per_sentence bes + JOIN sentence s ON s.id = bes.sentence_id + WHERE s.document_id = :documentId + ORDER BY s.page_id, bes.emotion_label + """; + + var query = session.createNativeQuery(sql) + .setParameter("documentId", documentId) + .setParameter("modelId", modelId); + + return query.getResultList(); + }); + } + public List getEmotionRadarForDocument(long documentId, Long modelId) throws DatabaseOperationException { + return executeOperationSafely((session) -> { + + String sql = """ + SELECT + f.feeling AS feeling_label, + AVG(f.value) AS avg_value + FROM sentenceemotions se + JOIN emotion e ON e.id = se.emotion_id + JOIN feeling f ON f.emotion_id = e.id + WHERE se.document_id = :documentId + AND (:modelId IS NULL OR se.model_id = :modelId) + GROUP BY f.feeling + ORDER BY avg_value DESC + LIMIT 12 + """; + + var query = session.createNativeQuery(sql) + .setParameter("documentId", documentId) + .setParameter("modelId", modelId); + + return query.getResultList(); + }); + } + public List getEmotionModelsForDocumentWithName(long documentId) throws DatabaseOperationException { + return executeOperationSafely((session) -> { + String sql = """ + SELECT DISTINCT m.id AS model_id, m.name AS model_name + FROM sentenceemotions se + JOIN models m ON m.id = se.model_id + WHERE se.document_id = :documentId + ORDER BY m.id + """; + + return session.createNativeQuery(sql) + .setParameter("documentId", documentId) + .getResultList(); + }); + } } diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java index 6037916a..46602b0f 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java @@ -503,6 +503,9 @@ private static void initSparkRoutes(ApplicationContext context, ApiRegistry regi get("/page/namedEntities", (ctx) -> (registry.get(DocumentApi.class)).getDocumentNamedEntitiesByPage(ctx)); get("/page/lemma", (ctx) -> (registry.get(DocumentApi.class)).getDocumentLemmaByPage(ctx)); get("/page/geoname", (ctx) -> (registry.get(DocumentApi.class)).getDocumentGeonameByPage(ctx)); + get("/page/emotions", (ctx) -> (registry.get(DocumentApi.class)).getDocumentEmotionsByPage(ctx)); + get("/emotionModels", (ctx) -> (registry.get(DocumentApi.class)).getEmotionModels(ctx)); // optional fürs Dropdown + get("/emotionRadar", (ctx) -> (registry.get(DocumentApi.class)).getEmotionRadar(ctx)); delete("/delete", (ctx) -> (registry.get(DocumentApi.class)).deleteDocument(ctx)); get("/findIdByMetadata", (ctx) -> (registry.get(DocumentApi.class)).findDocumentIdByMetadata(ctx)); get("/findIdsByMetadata", (ctx) -> (registry.get(DocumentApi.class)).findDocumentIdsByMetadata(ctx)); diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/DocumentApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/DocumentApi.java index f8bc9fe2..7b050bfa 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/DocumentApi.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/DocumentApi.java @@ -594,4 +594,76 @@ public void getUnifiedTopicToSentenceMap(Context ctx) { } } + public void getDocumentEmotionsByPage(Context ctx) { + try { + long documentId = Long.parseLong(ctx.queryParam("documentId")); + + // Optional: wenn du mehrere Emotion-Modelle hast + String modelParam = ctx.queryParam("modelId"); + Long modelId = (modelParam == null || modelParam.isBlank()) ? null : Long.parseLong(modelParam); + + // DB liefert rows: [page_id, emotion_label] + List rows = db.getEmotionByPage(documentId, modelId); + + List> result = new ArrayList<>(); + for (Object[] r : rows) { + Number pageId = (Number) r[0]; + String emotionLabel = (String) r[1]; + + Map obj = new HashMap<>(); + obj.put("pageId", pageId == null ? null : pageId.longValue()); + obj.put("emotionLabel", emotionLabel); + result.add(obj); + } + + ctx.json(result); + } catch (Exception ex) { + ctx.status(500).json(Map.of("error", "Failed to load emotions", "details", ex.getMessage())); + } + } + public void getEmotionRadar(Context ctx) { + try { + long documentId = Long.parseLong(ctx.queryParam("documentId")); + String modelParam = ctx.queryParam("modelId"); + Long modelId = (modelParam == null || modelParam.isBlank()) ? null : Long.parseLong(modelParam); + + List rows = db.getEmotionRadarForDocument(documentId, modelId); + + List> result = new ArrayList<>(); + for (Object[] r : rows) { + String label = (String) r[0]; + Number avg = (Number) r[1]; + + Map obj = new HashMap<>(); + obj.put("label", label); + obj.put("value", avg == null ? 0.0 : avg.doubleValue()); + result.add(obj); + } + + ctx.json(result); + } catch (Exception ex) { + ctx.status(500).json(Map.of("error", "Failed to load emotion radar", "details", ex.getMessage())); + } + } + public void getEmotionModels(Context ctx) { + try { + long documentId = Long.parseLong(ctx.queryParam("documentId")); + List rows = db.getEmotionModelsForDocumentWithName(documentId); + + List> result = new ArrayList<>(); + for (Object[] r : rows) { + Number id = (Number) r[0]; + String name = (String) r[1]; + + Map obj = new HashMap<>(); + obj.put("modelId", id == null ? null : id.longValue()); + obj.put("modelName", (name == null || name.isBlank()) ? ("Model " + id) : name); + result.add(obj); + } + ctx.json(result); + } catch (Exception ex) { + ctx.status(500).json(Map.of("error", "Failed to load emotion models", "details", ex.getMessage())); + } + } + } From 1a882f252b1131e4e45039f3d89baa9676b3b3cf Mon Sep 17 00:00:00 2001 From: Mark Ian Braun Date: Fri, 6 Mar 2026 22:25:04 +0100 Subject: [PATCH 24/47] it's possible to now import a corpusConfig.json File, change annotation and rag-service flags on existing corporas --- .../templates/corpus/corpusInspector.ftl | 16 +- .../resources/templates/landing-page.ftl | 161 +++++++++++++++++- .../uce/common/services/DataInterface.java | 7 +- .../PostgresqlDataInterface_Impl.java | 11 ++ .../uce/corpusimporter/Importer.java | 2 + .../uce/web/routes/ImportExportApi.java | 110 +++++++----- 6 files changed, 256 insertions(+), 51 deletions(-) diff --git a/uce.portal/resources/templates/corpus/corpusInspector.ftl b/uce.portal/resources/templates/corpus/corpusInspector.ftl index c074f490..99357fad 100644 --- a/uce.portal/resources/templates/corpus/corpusInspector.ftl +++ b/uce.portal/resources/templates/corpus/corpusInspector.ftl @@ -19,12 +19,18 @@
diff --git a/uce.portal/resources/templates/landing-page.ftl b/uce.portal/resources/templates/landing-page.ftl index 987da127..82b98dc3 100644 --- a/uce.portal/resources/templates/landing-page.ftl +++ b/uce.portal/resources/templates/landing-page.ftl @@ -155,6 +155,14 @@ Select one or more .xmi/.xml/.gz files. +
+ +
+ + +
+ Upload a CorpusConfig file +

@@ -189,7 +197,7 @@
@@ -308,7 +316,7 @@ $(document).ready(() => { $('#uploadCorpusModal').appendTo('body'); }); - $('body').on('change','#uploadFiles',function(){ + $('body').on('change','.custom-file-input',function(){ const fileNames = []; for (var i = 0; i< this.files.length; i++){ fileNames.push(this.files[i].name); @@ -318,8 +326,96 @@ if(labelText.length > maxLength){ labelText= labelText.substring(0,maxLength) + '...' } + if(labelText == ''){ + labelText = $(this).attr('id') === 'uploadConfigFile' ? 'Choose config file...' : 'Choose files...'; + } $(this).next('.custom-file-label').html(labelText); }) + $('body').on('change','#uploadConfigFile',function(event) { + const file = event.target.files[0]; + if (!file) return; + const reader = new FileReader(); + + reader.onload = function (e){ + try{ + const config = JSON.parse(e.target.result); + // UI Reset + $('#advancedSettings input[type="checkbox"]').prop('checked', false); + if (!$('#uploadCorpusName').prop('readonly')) $('#uploadCorpusName').val(''); + if (!$('#uploadCorpusAuthor').prop('readonly')) $('#uploadCorpusAuthor').val(''); + if (!$('#uploadCorpusLanguage').prop('readonly')) $('#uploadCorpusLanguage').val(''); + if (!$('#uploadCorpusDescription').prop('readonly')) $('#uploadCorpusDescription').val(''); + + if (config.addToExistingCorpus === true && $('#uploadAddToExisting').val() === 'false') { + $('#uploadResult').html( + `
+ + Note: Your uploaded config has addToExistingCorpus: true. + Since you are creating a new corpus, this will be automatically handled as false. +
` + ); + } else { + $('#uploadResult').html(''); + } + + if (config.name && !$('#uploadCorpusName').prop('readonly')) $('#uploadCorpusName').val(config.name); + if (config.author && !$('#uploadCorpusAuthor').prop('readonly')) $('#uploadCorpusAuthor').val(config.author); + if (config.language && !$('#uploadCorpusLanguage').prop('readonly')) $('#uploadCorpusLanguage').val(config.language); + if (config.description && !$('#uploadCorpusDescription').prop('readonly')) $('#uploadCorpusDescription').val(config.description); + + if (config.annotations) { + const ann = config.annotations; + if (ann.sentence) $('#annoSentence').prop('checked', true); + if (ann.lemma) $('#annoLemma').prop('checked', true); + if (ann.namedEntity) $('#annoNE').prop('checked', true); + if (ann.sentiment) $('#annoSentiment').prop('checked', true); + if (ann.emotion) $('#annoEmotion').prop('checked', true); + if (ann.time) $('#annoTime').prop('checked', true); + if (ann.geoNames) $('#annoGeo').prop('checked', true); + if (ann.wikipediaLink) $('#annoWiki').prop('checked', true); + if (ann.image) $('#annoImage').prop('checked', true); + if (ann.annotatorMetadata) $('#annoAnnotatorMeta').prop('checked', true); + if (ann.uceMetadata) $('#annoUceMeta').prop('checked', true); + if (ann.logicalLinks) $('#annoLogical').prop('checked', true); + if (ann.srLink) $('#annoSrLink').prop('checked', true); + if (ann.unifiedTopic) $('#annoUnifiedTopic').prop('checked', true); + if (ann.OCRPage) $('#annoOCRPage').prop('checked', true); + if (ann.OCRParagraph) $('#annoOCRPara').prop('checked', true); + if (ann.OCRBlock) $('#annoOCRBlock').prop('checked', true); + if (ann.OCRLine) $('#annoOCRLine').prop('checked', true); + if (ann.completeNegation) $('#annoNegation').prop('checked', true); + if (ann.cue) $('#annoCue').prop('checked', true); + if (ann.event) $('#annoEvent').prop('checked', true); + if (ann.focus) $('#annoFocus').prop('checked', true); + if (ann.scope) $('#annoScope').prop('checked', true); + if (ann.xscope) $('#annoXScope').prop('checked', true); + + if (ann.taxon) { + if (ann.taxon.annotated) $('#taxonAnnotated').prop('checked', true); + if (ann.taxon.biofidOnthologyAnnotated) $('#taxonBiofid').prop('checked', true); + } + } + + if (config.other) { + const oth = config.other; + if (oth.enableEmbeddings) $('#otherEmbeddings').prop('checked', true); + if (oth.enableRAGBot) $('#otherRAG').prop('checked', true); + if (oth.availableOnFrankfurtUniversityCollection) $('#otherGoethe').prop('checked', true); + if (oth.includeKeywordDistribution) $('#otherKeywords').prop('checked', true); + if (oth.enableS3Storage) $('#otherS3').prop('checked', true); + } + + if (!$('#advancedSettings').hasClass('show')) { + $('#advancedSettings').collapse('show'); + } + }catch (err) { + console.error("Error when parsing uploaded corpusConfig file", err); + alert("This corpusConfig file is invalid"); + } + }; + reader.readAsText(file); + }) + function openUploadForNewCorpora(){ const form = document.getElementById('uploadCorpusForm'); form.reset(); @@ -330,12 +426,66 @@ $('#uploadCorpusAuthor').prop('readonly',false); $('#uploadCorpusDescription').prop('readonly',false); $('#uploadFiles').next('.custom-file-label').html('Choose Files...'); + $('#uploadConfigFile').next('.custom-file-label').html('Choose corpusConfig file...'); $('#uploadResult').html(''); - $('#uploadCorpusModal').modal('show'); + + $('#uploadCorpusModal').modal('show'); + $('#configUploadGroup').show(); + } - function openUploadForExistingCorpora(corpusName,author,language,description){ + function openUploadForExistingCorpora(corpusName,author,language,description,configJsonStr){ const form = document.getElementById('uploadCorpusForm'); form.reset(); + $('#advancedSettings input[type="checkbox"]').prop('checked',false).prop('disabled',false); + if (configJsonStr){ + try{ + const config = typeof configJsonStr === 'string' ? JSON.parse(configJsonStr) : configJsonStr; + if (config.annotations) { + const ann = config.annotations; + if (ann.sentence) $('#annoSentence').prop('checked', true).prop('disabled', true); + if (ann.lemma) $('#annoLemma').prop('checked', true).prop('disabled', true); + if (ann.namedEntity) $('#annoNE').prop('checked', true).prop('disabled', true); + if (ann.sentiment) $('#annoSentiment').prop('checked', true).prop('disabled', true); + if (ann.emotion) $('#annoEmotion').prop('checked', true).prop('disabled', true); + if (ann.time) $('#annoTime').prop('checked', true).prop('disabled', true); + if (ann.geoNames) $('#annoGeo').prop('checked', true).prop('disabled', true); + if (ann.wikipediaLink) $('#annoWiki').prop('checked', true).prop('disabled', true); + if (ann.image) $('#annoImage').prop('checked', true).prop('disabled', true); + if (ann.annotatorMetadata) $('#annoAnnotatorMeta').prop('checked', true).prop('disabled', true); + if (ann.uceMetadata) $('#annoUceMeta').prop('checked', true).prop('disabled', true); + if (ann.logicalLinks) $('#annoLogical').prop('checked', true).prop('disabled', true); + if (ann.srLink) $('#annoSrLink').prop('checked', true).prop('disabled', true); + if (ann.unifiedTopic) $('#annoUnifiedTopic').prop('checked', true).prop('disabled', true); + if (ann.OCRPage) $('#annoOCRPage').prop('checked', true).prop('disabled', true); + if (ann.OCRParagraph) $('#annoOCRPara').prop('checked', true).prop('disabled', true); + if (ann.OCRBlock) $('#annoOCRBlock').prop('checked', true).prop('disabled', true); + if (ann.OCRLine) $('#annoOCRLine').prop('checked', true).prop('disabled', true); + if (ann.completeNegation) $('#annoNegation').prop('checked', true).prop('disabled', true); + if (ann.cue) $('#annoCue').prop('checked', true).prop('disabled', true); + if (ann.event) $('#annoEvent').prop('checked', true).prop('disabled', true); + if (ann.focus) $('#annoFocus').prop('checked', true).prop('disabled', true); + if (ann.scope) $('#annoScope').prop('checked', true).prop('disabled', true); + if (ann.xscope) $('#annoXScope').prop('checked', true).prop('disabled', true); + + if (ann.taxon) { + if (ann.taxon.annotated) $('#taxonAnnotated').prop('checked', true).prop('disabled', true); + if (ann.taxon.biofidOnthologyAnnotated) $('#taxonBiofid').prop('checked', true).prop('disabled', true); + } + if (config.other) { + const oth = config.other; + if (oth.enableEmbeddings) $('#otherEmbeddings').prop('checked', true).prop('disabled', true); + if (oth.enableRAGBot) $('#otherRAG').prop('checked', true).prop('disabled', true); + if (oth.availableOnFrankfurtUniversityCollection) $('#otherGoethe').prop('checked', true).prop('disabled', true); + if (oth.includeKeywordDistribution) $('#otherKeywords').prop('checked', true).prop('disabled', true); + if (oth.enableS3Storage) $('#otherS3').prop('checked', true).prop('disabled', true); + if (oth.availableOnFrankfurtUniversityCollection) $('#otherGoethe').prop('checked', true).prop('disabled', true); + } + } + }catch(e){ + console.error("Error parsing config string for locking UI Components") + } + } + $('#uploadAddToExisting').val('true'); $('#uploadModalTitle').html('Add Files to "' + corpusName + '"'); $('#uploadCorpusName').val(corpusName).prop('readonly',true); @@ -343,8 +493,11 @@ $('#uploadCorpusAuthor').val(author).prop('readonly',true); $('#uploadCorpusDescription').val(description) .prop('readonly',true); $('#uploadFiles').next('.custom-file-label').html('Choose Files...'); + $('#uploadConfigFile').prop('disabled', true); $('#uploadResult').html(''); + $('#uploadCorpusModal').modal('show'); + $('#configUploadGroup').hide(); } function submitCorpusUpload(){ const form = document.getElementById('uploadCorpusForm'); diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/DataInterface.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/DataInterface.java index 9ae312d8..69a199ef 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/DataInterface.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/DataInterface.java @@ -387,6 +387,9 @@ public DocumentSearchResult defaultSearchForDocuments(int skip, * Adds new emotions to an existing document in the database */ public void saveNewEmotionsForDocument(long documentId, List newEmotions) throws DatabaseOperationException; - - + + /** + * Updates a corpusJsonConfig in the database + */ + public void updateCorpusJsonConfig(long corpusId,String jsonConfig) throws DatabaseOperationException; } diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java index 7d4f2520..e431c876 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java @@ -2540,5 +2540,16 @@ public List getEmotionModelsForDocumentWithName(long documentId) throw .getResultList(); }); } + + public void updateCorpusJsonConfig(long corpusId,String jsonConfig) throws DatabaseOperationException{ + executeOperationSafely((session) -> { + Corpus corpus = session.get(Corpus.class,corpusId); + if (corpus != null){ + corpus.setCorpusJsonConfig(jsonConfig); + session.update(corpus); + } + return null; + }); + } } diff --git a/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java b/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java index 2f750404..a48bfec7 100644 --- a/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java +++ b/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java @@ -386,6 +386,8 @@ public static Corpus CreateDBCorpus(Corpus corpus, CorpusConfig corpusConfig, Po if (corpusConfig.isAddToExistingCorpus()) { var existingCorpus = db.getCorpusByName(corpusConfig.getName()); if (existingCorpus != null) { // If we have the corpus, use that. + existingCorpus.setCorpusJsonConfig(gson.toJson(corpusConfig)); + db.updateCorpusJsonConfig(existingCorpus.getId(),existingCorpus.getCorpusJsonConfig()); return existingCorpus; } throw new DatabaseOperationException("The corpus config specified to add to an existing corpus, " + diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java index bc43a31e..3aa6ea5b 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java @@ -13,6 +13,7 @@ import org.texttechnologylab.uce.common.config.corpusConfig.TaxonConfig; import org.texttechnologylab.uce.common.exceptions.DatabaseOperationException; import org.texttechnologylab.uce.common.exceptions.ExceptionUtils; +import org.texttechnologylab.uce.common.models.corpus.Corpus; import org.texttechnologylab.uce.common.models.imp.ImportStatus; import org.texttechnologylab.uce.common.models.imp.UCEImport; import org.texttechnologylab.uce.common.services.PostgresqlDataInterface_Impl; @@ -212,61 +213,90 @@ public void importCorpusFromUpload(Context ctx){ } } - CorpusConfig config = new CorpusConfig(); String name = ctx.formParam("name"); if (name == null || name.isBlank()){ ctx.status(400).result("No corpus name given"); } + String addToExistingParam = ctx.formParam("addToExistingCorpus"); + boolean addToExisting = addToExistingParam != null && Boolean.parseBoolean(addToExistingParam); + CorpusConfig config = null; + if(addToExisting){ + Corpus existingCorpus = ExceptionUtils.tryCatchLog(() -> + db.getCorpusByName(name), + (ex) -> logger.warn("Could not fetch existing corpus config for merging",ex) + ); + if (existingCorpus != null && existingCorpus.getCorpusJsonConfig() != null) + config = CorpusConfig.fromJson(existingCorpus.getCorpusJsonConfig()); + } + if (config == null){ + config = new CorpusConfig(); + config.setAnnotations(new CorpusAnnotationConfig()); + config.getAnnotations().setTaxon(new TaxonConfig()); + config.setOther(new OtherConfig()); + } else{ + if(config.getAnnotations() == null) config.setAnnotations(new CorpusAnnotationConfig()); + if(config.getAnnotations().getTaxon() == null) config.getAnnotations().setTaxon(new TaxonConfig()); + if(config.getOther() == null) config.setOther(new OtherConfig()); + } + config.setName(name); + config.setAddToExistingCorpus(addToExisting); String author = ctx.formParam("author"); - if (author == null || author.isBlank()) { - ctx.status(400).result("Corpus Author is required."); + if (author != null && !author.isBlank()) config.setAuthor(author); + else if (config.getAuthor() == null) { + ctx.status(400).result("Corpus Author is required"); return; } - config.setAuthor(author); String language = ctx.formParam("language"); - if (language == null || language.isBlank()) { + if (language != null && !language.isBlank()) config.setLanguage(language); + else if (config.getLanguage() == null) { ctx.status(400).result("Corpus Language is required."); return; } - config.setLanguage(language); - config.setDescription(ctx.formParam("description")); - String addToExistingParam = ctx.formParam("addToExistingCorpus"); - boolean addToExisting = addToExistingParam != null && Boolean.parseBoolean(addToExistingParam); - config.setAddToExistingCorpus(addToExisting); - + String description = ctx.formParam("description"); + if (description != null && !description.isBlank()) config.setDescription(description); // Annotations - CorpusAnnotationConfig annotations = new CorpusAnnotationConfig(); - annotations.setSentence(ctx.formParam("sentence") != null); - annotations.setLemma(ctx.formParam("lemma") != null); - annotations.setNamedEntity(ctx.formParam("namedEntity") != null); - annotations.setSentiment(ctx.formParam("sentiment") != null); - annotations.setEmotion(ctx.formParam("emotion") != null); - annotations.setTime(ctx.formParam("time") != null); - annotations.setGeoNames(ctx.formParam("geoNames") != null); - annotations.setWikipediaLink(ctx.formParam("wikipediaLink") != null); - annotations.setImage(ctx.formParam("image") != null); - annotations.setUnifiedTopic(ctx.formParam("unifiedTopic") != null); - annotations.setOCRPage(ctx.formParam("OCRPage") != null); - annotations.setOCRParagraph(ctx.formParam("OCRParagraph") != null); - annotations.setOCRBlock(ctx.formParam("OCRBlock") != null); - annotations.setOCRLine(ctx.formParam("OCRLine") != null); - - TaxonConfig taxonConfig = new TaxonConfig(); - taxonConfig.setAnnotated(ctx.formParam("taxonAnnotated") != null); - taxonConfig.setBiofidOnthologyAnnotated(ctx.formParam("biofidOnthologyAnnotated") != null); - - annotations.setTaxon(taxonConfig); - config.setAnnotations(annotations); - + CorpusAnnotationConfig ann = config.getAnnotations(); + ann.setSentence(ann.isSentence() || ctx.formParam("sentence") != null); + ann.setLemma(ann.isLemma() || ctx.formParam("lemma") != null); + ann.setNamedEntity(ann.isNamedEntity() || ctx.formParam("namedEntity") != null); + ann.setSentiment(ann.isSentiment() || ctx.formParam("sentiment") != null); + ann.setEmotion(ann.isEmotion() || ctx.formParam("emotion") != null); + ann.setTime(ann.isTime() || ctx.formParam("time") != null); + ann.setGeoNames(ann.isGeoNames() || ctx.formParam("geoNames") != null); + ann.setWikipediaLink(ann.isWikipediaLink() || ctx.formParam("wikipediaLink") != null); + ann.setImage(ann.isImage() || ctx.formParam("image") != null); + ann.setAnnotatorMetadata(ann.isAnnotatorMetadata() || ctx.formParam("annotatorMetadata") != null); + ann.setUceMetadata(ann.isUceMetadata() || ctx.formParam("uceMetadata") != null); + ann.setLogicalLinks(ann.isLogicalLinks() || ctx.formParam("logicalLinks") != null); + ann.setSrLink(ann.isSrLink() || ctx.formParam("srLink") != null); + ann.setUnifiedTopic(ann.isUnifiedTopic() || ctx.formParam("unifiedTopic") != null); + ann.setOCRPage(ann.isOCRPage() || ctx.formParam("OCRPage") != null); + ann.setOCRParagraph(ann.isOCRParagraph() || ctx.formParam("OCRParagraph") != null); + ann.setOCRBlock(ann.isOCRBlock() || ctx.formParam("OCRBlock") != null); + ann.setOCRLine(ann.isOCRLine() || ctx.formParam("OCRLine") != null); + ann.setCompleteNegation(ann.isCompleteNegation() || ctx.formParam("completeNegation") != null); + ann.setCue(ann.isCue() || ctx.formParam("cue") != null); + ann.setEvent(ann.isEvent() || ctx.formParam("event") != null); + ann.setFocus(ann.isFocus() || ctx.formParam("focus") != null); + ann.setScope(ann.isScope() || ctx.formParam("scope") != null); + ann.setXscope(ann.isXscope() || ctx.formParam("xscope") != null); + + + TaxonConfig taxonConfig = ann.getTaxon(); + taxonConfig.setAnnotated(taxonConfig.isAnnotated() || ctx.formParam("taxonAnnotated") != null); + taxonConfig.setBiofidOnthologyAnnotated(taxonConfig.isBiofidOnthologyAnnotated() || ctx.formParam("biofidOnthologyAnnotated") != null); // Other Settings - OtherConfig otherConfig = new OtherConfig(); - otherConfig.setEnableEmbeddings(ctx.formParam("enableEmbeddings") != null); - otherConfig.setEnableRAGBot(ctx.formParam("enableRAGBot") != null); - otherConfig.setIncludeKeywordDistribution(ctx.formParam("includeKeywordDistribution") != null); - otherConfig.setEnableS3Storage(ctx.formParam("enableS3Storage") != null); + OtherConfig otherConfig = config.getOther(); + otherConfig.setEnableEmbeddings(otherConfig.isEnableEmbeddings() || ctx.formParam("enableEmbeddings") != null); + otherConfig.setEnableRAGBot(otherConfig.isEnableRAGBot() || ctx.formParam("enableRAGBot") != null); + otherConfig.setIncludeKeywordDistribution(otherConfig.isIncludeKeywordDistribution()|| ctx.formParam("includeKeywordDistribution") != null); + otherConfig.setEnableS3Storage(otherConfig.isEnableS3Storage() || ctx.formParam("enableS3Storage") != null); + otherConfig.setAvailableOnFrankfurtUniversityCollection(otherConfig.isAvailableOnFrankfurtUniversityCollection() || ctx.formParam("availableOnFrankfurtUniversityCollection") != null); + + config.setAnnotations(ann); config.setOther(otherConfig); - + Gson gson = new GsonBuilder().setPrettyPrinting().create(); String jsonString = gson.toJson(config); Files.writeString(rootDir.resolve("corpusConfig.json"),jsonString,StandardCharsets.UTF_8); From 03ee47c941869f630c7714f023340c8b9acde4bc Mon Sep 17 00:00:00 2001 From: Francesco Da Silva Saporito Date: Sun, 8 Mar 2026 18:17:37 +0100 Subject: [PATCH 25/47] Implement emotion visualization toggle with model-based radar, timeline, and heatmap views --- .../resources/templates/js/documentReader.js | 462 ++++++++++++++++-- .../templates/reader/documentReaderView.ftl | 7 +- 2 files changed, 434 insertions(+), 35 deletions(-) diff --git a/uce.portal/resources/templates/js/documentReader.js b/uce.portal/resources/templates/js/documentReader.js index df485e85..bd98632e 100644 --- a/uce.portal/resources/templates/js/documentReader.js +++ b/uce.portal/resources/templates/js/documentReader.js @@ -4,6 +4,7 @@ let currentSelectedTopic = null; let currentTopicIndex = -1; let matchingTopics = []; let selectedEmotionModelId = null; +let selectedEmotionVizType = 'radar'; let defaultTopicColorMap = getDefaultTopicColorMap(); let defaultTopicSettings = { topicCount: 10, @@ -848,6 +849,22 @@ function updateFloatingUIPositions() { window.addEventListener('resize', updateFloatingUIPositions); window.addEventListener('DOMContentLoaded', updateFloatingUIPositions); +function activateVisualizationPanel(target, $button) { + clearTopicColoring(); + hideTopicNavButtons(); + $('.scrollbar-minimap').hide(); + + $('.viz-nav-btn').removeClass('active'); + $('.viz-nav-parent').removeClass('active'); + + if ($button && $button.length) { + $button.addClass('active'); + } + + $('.viz-panel').removeClass('active'); + $(target).addClass('active'); +} + document.querySelectorAll('.tab-btn').forEach(btn => { btn.addEventListener('click', async () => { const targetId = btn.getAttribute('data-tab'); @@ -867,49 +884,81 @@ document.querySelectorAll('.tab-btn').forEach(btn => { $('.scrollbar-minimap').hide(); sideBar.classList.add('visualization-expanded'); } else { - setTimeout(updateFloatingUIPositions,500) ; + setTimeout(updateFloatingUIPositions, 500); currentSelectedTopic = null; sideBar.classList.remove('visualization-expanded'); $('.scrollbar-minimap').show(); } + if (targetId === 'visualization-tab') { const docId = document.getElementsByClassName('reader-container')[0].getAttribute('data-id'); loadEmotionModels(docId); + activateVisualizationPanel('#viz-panel-1', $('.viz-nav-group[data-category="topic"] .viz-nav-parent')); setTimeout(() => renderTemporalExplorer('vp-1'), 500); - $('.viz-nav-btn').removeClass('active'); - $('.viz-nav-btn').first().addClass('active'); - - $('.viz-panel').removeClass('active'); - $('.viz-panel').first().addClass('active'); } - $(document).on('click', '.emotion-model-item', function (e) { - e.preventDefault(); - selectedEmotionModelId = $(this).data('model-id'); + }); +}); - $('.emotion-model-item').removeClass('active'); - $(this).addClass('active'); +$(document).on('click', '.viz-nav-item[data-target]', function (e) { + e.preventDefault(); - // mittig anzeigen (der Container in der Visualization ist vp-1) - $('#vp-1').removeClass('rendered'); - renderEmotionRadar('vp-1'); - }); - }); + const target = $(this).data('target'); + const $group = $(this).closest('.viz-nav-group'); + activateVisualizationPanel(target, $group.find('.viz-nav-parent')); + if (target === '#viz-panel-1') { + setTimeout(() => renderTemporalExplorer('vp-1'), 500); + } + if (target === '#viz-panel-2') { + setTimeout(() => renderTopicEntityChordDiagram('vp-2'), 500); + } + if (target === '#viz-panel-3') { + setTimeout(() => renderSentenceTopicNetwork('vp-3'), 500); + } + if (target === '#viz-panel-4') { + $('.selector-container').hide(); + setTimeout(() => renderTopicSimilarityMatrix('vp-4'), 500); + } + if (target === '#viz-panel-5') { + setTimeout(() => renderSentenceTopicSankey('vp-5'), 500); + } + if (target === '#viz-panel-6') { + setTimeout(() => renderTemporalExplorer('vp-6'), 500); + } }); -$(document).on('click', '.viz-nav-btn', function () { - const target = $(this).data('target'); - clearTopicColoring(); - hideTopicNavButtons(); - $('.scrollbar-minimap').hide(); +$(document).on('click', '.emotion-model-item', function (e) { + e.preventDefault(); - // Update active button - $('.viz-nav-btn').removeClass('active'); + selectedEmotionModelId = $(this).data('model-id'); + $('.emotion-model-item').removeClass('active'); $(this).addClass('active'); - // Update visible panel - $('.viz-panel').removeClass('active'); - $(target).addClass('active'); + activateVisualizationPanel('#viz-panel-7', $('.viz-nav-group[data-category="emotion"] .viz-nav-parent')); + $('#vp-7').removeClass('rendered'); + renderEmotionViz('vp-7'); +}); + +$(document).on('click', '.emotion-viz-toggle-btn', function (e) { + e.preventDefault(); + + const nextVizType = $(this).data('viz-type'); + if (!nextVizType || nextVizType === selectedEmotionVizType) return; + + selectedEmotionVizType = nextVizType; + $('#vp-7').removeClass('rendered'); + renderEmotionViz('vp-7'); +}); + +$(document).on('click', '.viz-nav-btn', function (e) { + const target = $(this).data('target'); + + if (!target) { + return; + } + + e.preventDefault(); + activateVisualizationPanel(target, $(this)); if (target === '#viz-panel-1') { setTimeout(() => renderTemporalExplorer('vp-1'), 500); @@ -923,15 +972,12 @@ $(document).on('click', '.viz-nav-btn', function () { if (target === '#viz-panel-4') { $('.selector-container').hide(); setTimeout(() => renderTopicSimilarityMatrix('vp-4'), 500); - } if (target === '#viz-panel-5') { setTimeout(() => renderSentenceTopicSankey('vp-5'), 500); - } if (target === '#viz-panel-6') { setTimeout(() => renderTemporalExplorer('vp-6'), 500); - } }); @@ -1652,29 +1698,100 @@ function loadEmotionModels(docId) { $menu.empty(); if (!models || models.length === 0) { + selectedEmotionModelId = null; $menu.append('No models found'); return; } + const hasSelectedModel = models.some(function (m) { + return String(m.modelId) === String(selectedEmotionModelId); + }); + + if (!hasSelectedModel) { + selectedEmotionModelId = models[0].modelId; + } + models.forEach((m) => { + const isActive = String(m.modelId) === String(selectedEmotionModelId) ? ' active' : ''; $menu.append( - '' + + '' + (m.modelName ? m.modelName : ('Model ' + m.modelId)) + '' ); }); }) .catch(() => { + selectedEmotionModelId = null; $('#emotion-model-menu').html('Failed to load'); }); } +function getSelectedEmotionModelName() { + const $activeModel = $('.emotion-model-item.active'); + if ($activeModel.length > 0) { + return $.trim($activeModel.text()); + } + + if (selectedEmotionModelId) { + return 'Model ' + selectedEmotionModelId; + } + + return 'Emotion model'; +} + +function renderEmotionViz(containerId) { + const container = document.getElementById(containerId); + if (!container) return; + + if (!selectedEmotionModelId) { + container.classList.remove('rendered'); + container.innerHTML = '
Please choose an emotion model
'; + container.classList.add('rendered'); + return; + } + + const modelName = getSelectedEmotionModelName(); + + const radarBtnClass = selectedEmotionVizType === 'radar' + ? 'btn btn-sm btn-primary emotion-viz-toggle-btn' + : 'btn btn-sm btn-light emotion-viz-toggle-btn'; + + const timelineBtnClass = selectedEmotionVizType === 'timeline' + ? 'btn btn-sm btn-primary emotion-viz-toggle-btn' + : 'btn btn-sm btn-light emotion-viz-toggle-btn'; + + const heatmapBtnClass = selectedEmotionVizType === 'heatmap' + ? 'btn btn-sm btn-primary emotion-viz-toggle-btn' + : 'btn btn-sm btn-light emotion-viz-toggle-btn'; + + container.classList.remove('rendered'); + container.innerHTML = '' + + '
' + + '
' + + '
Emotion
' + + '
' + modelName + '
' + + '
' + + '
' + + '' + + '' + + '' + + '
' + + '
' + + '
'; + + if (selectedEmotionVizType === 'timeline') { + renderEmotionTimeline(containerId + '-body'); + } else if (selectedEmotionVizType === 'heatmap') { + renderEmotionHeatmap(containerId + '-body'); + } else { + renderEmotionRadar(containerId + '-body'); + } +} function renderEmotionRadar(containerId) { const container = document.getElementById(containerId); if (!container) return; - // jedes Mal neu zeichnen (bei Model-Wechsel) container.classList.remove('rendered'); - container.innerHTML = '
'; + container.innerHTML = '
'; const docId = document.getElementsByClassName('reader-container')[0].getAttribute('data-id'); const modelId = selectedEmotionModelId || null; @@ -1691,8 +1808,6 @@ function renderEmotionRadar(containerId) { return; } - // ECharts Radar expects indicators with max. - // feeling.value scheint bei dir oft 0..1 zu sein → max=1. const indicators = data.map(d => ({ name: d.label, max: 1 })); const values = data.map(d => d.value); @@ -1724,6 +1839,285 @@ function renderEmotionRadar(containerId) { container.classList.add('rendered'); }); } +function scrollToEmotionPage(pageNumber) { + const pageElement = document.querySelector('.page[data-id="' + pageNumber + '"]'); + if (pageElement) { + pageElement.scrollIntoView({ behavior: 'smooth', block: 'start' }); + } +} + +function loadEmotionPageCounts() { + const docId = document.getElementsByClassName('reader-container')[0].getAttribute('data-id'); + const modelId = selectedEmotionModelId || null; + + return $.get('/api/document/page/emotions', { documentId: docId, modelId: modelId }) + .then(function (data) { + if (!data || !Array.isArray(data) || data.length === 0) { + return null; + } + + const rawPageIds = []; + data.forEach(function (item) { + const pid = parseInt(item.pageId, 10); + if (!isNaN(pid)) rawPageIds.push(pid); + }); + + const uniqueSortedPageIds = Array.from(new Set(rawPageIds)).sort(function (a, b) { + return a - b; + }); + + const pageIdToPageNumber = new Map(); + uniqueSortedPageIds.forEach(function (pid, idx) { + pageIdToPageNumber.set(pid, idx + 1); + }); + + const pageEmotionCounts = new Map(); + const totalEmotionCounts = {}; + + data.forEach(function (item) { + const pid = parseInt(item.pageId, 10); + const pageNumber = pageIdToPageNumber.get(pid); + const label = item.emotionLabel ? String(item.emotionLabel).trim() : ''; + + if (!pageNumber || !label) return; + + if (!pageEmotionCounts.has(pageNumber)) { + pageEmotionCounts.set(pageNumber, {}); + } + + const currentPageMap = pageEmotionCounts.get(pageNumber); + currentPageMap[label] = (currentPageMap[label] || 0) + 1; + totalEmotionCounts[label] = (totalEmotionCounts[label] || 0) + 1; + }); + + const pages = Array.from(pageEmotionCounts.keys()).sort(function (a, b) { + return a - b; + }) + const topLabels = Object.keys(totalEmotionCounts) + .sort(function (a, b) { + return totalEmotionCounts[b] - totalEmotionCounts[a]; + }) + .slice(0, 6); + + return { + pages: pages, + labels: topLabels, + pageEmotionCounts: pageEmotionCounts, + totalEmotionCounts: totalEmotionCounts + }; + }); +} + +function renderEmotionTimeline(containerId) { + const container = document.getElementById(containerId); + if (!container) return; + + container.classList.remove('rendered'); + container.innerHTML = '
'; + + $('.visualization-spinner').show(); + + loadEmotionPageCounts() + .then(function (result) { + $('.visualization-spinner').hide(); + + if (!result || !result.pages || result.pages.length === 0 || !result.labels || result.labels.length === 0) { + container.innerHTML = '
No emotion timeline data for this model
'; + container.classList.add('rendered'); + return; + } + + const pages = result.pages; + const labels = result.labels; + const pageEmotionCounts = result.pageEmotionCounts; + + const series = labels.map(function (label) { + return { + name: label, + type: 'line', + smooth: true, + symbol: 'circle', + symbolSize: 6, + data: pages.map(function (page) { + const counts = pageEmotionCounts.get(page) || {}; + return counts[label] || 0; + }) + }; + }); + + const chartDom = document.getElementById(containerId + '-timeline'); + const chart = echarts.init(chartDom); + + const option = { + title: { text: 'Emotion Timeline' }, + tooltip: { + trigger: 'axis', + formatter: function (params) { + if (!params || params.length === 0) return ''; + + const page = params[0].axisValue; + let html = '
Page ' + page + '
'; + + params + .slice() + .sort(function (a, b) { return b.value - a.value; }) + .forEach(function (p) { + html += '
' + p.seriesName + ': ' + p.value + '
'; + }); + + return html; + } + }, + legend: { + type: 'scroll', + top: 30 + }, + grid: { + left: 50, + right: 20, + top: 80, + bottom: 50 + }, + xAxis: { + type: 'category', + name: 'Page', + data: pages + }, + yAxis: { + type: 'value', + name: 'Count' + }, + series: series + }; + + chart.setOption(option); + + chart.on('click', function (params) { + const pageNumber = parseInt(params.name, 10); + if (!isNaN(pageNumber)) { + scrollToEmotionPage(pageNumber); + } + }); + + container.classList.add('rendered'); + }) + .catch(function () { + $('.visualization-spinner').hide(); + container.innerHTML = '
Failed to load emotion timeline
'; + container.classList.add('rendered'); + }); +} + +function renderEmotionHeatmap(containerId) { + const container = document.getElementById(containerId); + if (!container) return; + + container.classList.remove('rendered'); + container.innerHTML = '
'; + + $('.visualization-spinner').show(); + + loadEmotionPageCounts() + .then(function (result) { + $('.visualization-spinner').hide(); + + if (!result || !result.pages || result.pages.length === 0 || !result.labels || result.labels.length === 0) { + container.innerHTML = '
No emotion heatmap data for this model
'; + container.classList.add('rendered'); + return; + } + + const pages = result.pages; + const labels = result.labels; + const pageEmotionCounts = result.pageEmotionCounts; + + const heatmapData = []; + let maxValue = 0; + + pages.forEach(function (page, pageIndex) { + const counts = pageEmotionCounts.get(page) || {}; + + labels.forEach(function (label, labelIndex) { + const value = counts[label] || 0; + if (value > maxValue) maxValue = value; + heatmapData.push([pageIndex, labelIndex, value]); + }); + }); + + const chartDom = document.getElementById(containerId + '-heatmap'); + const chart = echarts.init(chartDom); + + const option = { + title: { text: 'Emotion Heatmap' }, + tooltip: { + position: 'top', + formatter: function (params) { + const page = pages[params.value[0]]; + const label = labels[params.value[1]]; + const value = params.value[2]; + return '
Page ' + page + '
' + label + ': ' + value + '
'; + } + }, + grid: { + left: 90, + right: 30, + top: 60, + bottom: 60 + }, + xAxis: { + type: 'category', + name: 'Page', + data: pages, + splitArea: { show: true } + }, + yAxis: { + type: 'category', + name: 'Emotion', + data: labels, + splitArea: { show: true } + }, + visualMap: { + min: 0, + max: maxValue > 0 ? maxValue : 1, + calculable: true, + orient: 'horizontal', + left: 'center', + bottom: 10 + }, + series: [{ + name: 'Emotion Count', + type: 'heatmap', + data: heatmapData, + label: { + show: false + }, + emphasis: { + itemStyle: { + shadowBlur: 10, + shadowColor: 'rgba(0, 0, 0, 0.35)' + } + } + }] + }; + + chart.setOption(option); + + chart.on('click', function (params) { + const pageIndex = params.value[0]; + const pageNumber = pages[pageIndex]; + if (pageNumber) { + scrollToEmotionPage(pageNumber); + } + }); + + container.classList.add('rendered'); + }) + .catch(function () { + $('.visualization-spinner').hide(); + container.innerHTML = '
Failed to load emotion heatmap
'; + container.classList.add('rendered'); + }); +} function initializeTopicSettingsPanel() { if (topicSettings.colorMode === 'per-topic') { diff --git a/uce.portal/resources/templates/reader/documentReaderView.ftl b/uce.portal/resources/templates/reader/documentReaderView.ftl index 1599d77e..b3446875 100644 --- a/uce.portal/resources/templates/reader/documentReaderView.ftl +++ b/uce.portal/resources/templates/reader/documentReaderView.ftl @@ -318,6 +318,9 @@
+
+
+
@@ -362,7 +365,9 @@
- +
From c7ebd4d9da9d2842e87d1080fb025fe8bb18d909 Mon Sep 17 00:00:00 2001 From: Ph1l1ppGitHub Date: Tue, 10 Mar 2026 00:13:40 +0100 Subject: [PATCH 26/47] Add SentenceTopic persistence and topic postprocessing integration with unifiedTopic processing --- .../uce/common/config/HibernateConf.java | 6 +- .../corpusConfig/CorpusAnnotationConfig.java | 1 + .../uce/common/models/corpus/Document.java | 6 + .../common/models/topic/SentenceTopic.java | 46 ++++++ .../PostgresqlDataInterface_Impl.java | 71 +++++++- .../uce/corpusimporter/Importer.java | 152 ++++++++++++++++-- 6 files changed, 262 insertions(+), 20 deletions(-) create mode 100644 uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/topic/SentenceTopic.java diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/HibernateConf.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/HibernateConf.java index 7aa14783..e2c3758c 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/HibernateConf.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/HibernateConf.java @@ -21,10 +21,7 @@ import org.texttechnologylab.uce.common.models.imp.ImportLog; import org.texttechnologylab.uce.common.models.imp.UCEImport; import org.texttechnologylab.uce.common.models.negation.*; -import org.texttechnologylab.uce.common.models.topic.TopicValueBase; -import org.texttechnologylab.uce.common.models.topic.TopicValueBaseWithScore; -import org.texttechnologylab.uce.common.models.topic.TopicWord; -import org.texttechnologylab.uce.common.models.topic.UnifiedTopic; +import org.texttechnologylab.uce.common.models.topic.*; import java.util.HashMap; @@ -89,6 +86,7 @@ public static SessionFactory buildSessionFactory() { metadataSources.addAnnotatedClass(TopicWord.class); metadataSources.addAnnotatedClass(TopicValueBase.class); metadataSources.addAnnotatedClass(TopicValueBaseWithScore.class); + metadataSources.addAnnotatedClass(SentenceTopic.class); //models metadataSources.addAnnotatedClass(ModelEntity.class); diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/corpusConfig/CorpusAnnotationConfig.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/corpusConfig/CorpusAnnotationConfig.java index 14bc3e18..cf80c528 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/corpusConfig/CorpusAnnotationConfig.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/config/corpusConfig/CorpusAnnotationConfig.java @@ -32,4 +32,5 @@ public class CorpusAnnotationConfig { private boolean scope; private boolean xscope; private boolean unifiedTopic; + public boolean topic; } diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/Document.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/Document.java index 2d2d3740..7b611e4d 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/Document.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/corpus/Document.java @@ -21,6 +21,7 @@ import org.texttechnologylab.uce.common.models.corpus.links.DocumentLink; import org.texttechnologylab.uce.common.models.corpus.links.DocumentToAnnotationLink; import org.texttechnologylab.uce.common.models.negation.*; +import org.texttechnologylab.uce.common.models.topic.SentenceTopic; import org.texttechnologylab.uce.common.models.topic.TopicValueBase; import org.texttechnologylab.uce.common.models.topic.TopicValueBaseWithScore; import org.texttechnologylab.uce.common.models.topic.UnifiedTopic; @@ -234,6 +235,11 @@ public long getPrimaryDbIdentifier() { @JoinColumn(name = "document_Id") private List images; + @Getter + @Setter + @OneToMany(mappedBy = "document", cascade = CascadeType.ALL, orphanRemoval = false) + private List sentenceTopics = new ArrayList<>(); + public Document() { metadataTitleInfo = new MetadataTitleInfo(); } diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/topic/SentenceTopic.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/topic/SentenceTopic.java new file mode 100644 index 00000000..bac172e4 --- /dev/null +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/models/topic/SentenceTopic.java @@ -0,0 +1,46 @@ +package org.texttechnologylab.uce.common.models.topic; + +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; +import org.texttechnologylab.uce.common.models.ModelBase; +import org.texttechnologylab.uce.common.models.ModelEntity; +import org.texttechnologylab.uce.common.models.corpus.Document; +import org.texttechnologylab.uce.common.models.corpus.Sentence; +import org.texttechnologylab.uce.common.models.topic.UnifiedTopic; + +import javax.persistence.*; + +@Getter +@Setter +@NoArgsConstructor +@Entity +@Table(name = "sentencetopics") +public class SentenceTopic extends ModelBase { + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "unifiedtopic_id") + private UnifiedTopic unifiedTopic; + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "topicinstance_id") + private TopicValueBase topicInstance; + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "document_id", nullable = false) + private Document document; + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "sentence_id", nullable = false) + private Sentence sentence; + + @ManyToOne(fetch = FetchType.LAZY) + @JoinColumn(name = "model_id", nullable = false) + private ModelEntity model; + + @Column(name = "topiclabel", nullable = false) + private String topicLabel; + + @Column(name = "thetast", nullable = false) + private Double score; +} \ No newline at end of file diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java index e431c876..040ba8d0 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java @@ -34,6 +34,7 @@ import org.texttechnologylab.uce.common.models.topic.TopicValueBase; import org.texttechnologylab.uce.common.models.topic.TopicWord; import org.texttechnologylab.uce.common.models.topic.UnifiedTopic; +import org.texttechnologylab.uce.common.models.topic.SentenceTopic; import org.texttechnologylab.uce.common.models.util.HealthStatus; import org.texttechnologylab.uce.common.utils.ReflectionUtils; import org.texttechnologylab.uce.common.utils.StringUtils; @@ -1302,6 +1303,7 @@ public Document getDocumentByCorpusAndDocumentId(long corpusId, String documentI Document doc = session.createQuery(criteriaQuery).uniqueResult(); if (doc != null) { + Hibernate.initialize(doc.getSentences()); //initializeCompleteDocument(doc, 0, 999999); } return doc; @@ -2472,7 +2474,11 @@ public void initializeModelsFromJson(){ public List getEmotionByPage(long documentId, Long modelId) throws DatabaseOperationException { return executeOperationSafely((session) -> { - String sql = """ + String sql; + var query = session.createNativeQuery(""); + + if (modelId == null) { + sql = """ WITH best_emotion_per_sentence AS ( SELECT DISTINCT ON (se.document_id, se.sentence_id) se.sentence_id, @@ -2482,7 +2488,6 @@ SELECT DISTINCT ON (se.document_id, se.sentence_id) JOIN emotion e ON e.id = se.emotion_id JOIN feeling f ON f.emotion_id = e.id WHERE se.document_id = :documentId - AND (:modelId IS NULL OR se.model_id = :modelId) ORDER BY se.document_id, se.sentence_id, f.value DESC ) SELECT @@ -2492,11 +2497,37 @@ SELECT DISTINCT ON (se.document_id, se.sentence_id) JOIN sentence s ON s.id = bes.sentence_id WHERE s.document_id = :documentId ORDER BY s.page_id, bes.emotion_label - """; + """; - var query = session.createNativeQuery(sql) - .setParameter("documentId", documentId) - .setParameter("modelId", modelId); + query = session.createNativeQuery(sql) + .setParameter("documentId", documentId, LongType.INSTANCE); + } else { + sql = """ + WITH best_emotion_per_sentence AS ( + SELECT DISTINCT ON (se.document_id, se.sentence_id) + se.sentence_id, + f.feeling AS emotion_label, + f.value AS emotion_value + FROM sentenceemotions se + JOIN emotion e ON e.id = se.emotion_id + JOIN feeling f ON f.emotion_id = e.id + WHERE se.document_id = :documentId + AND se.model_id = :modelId + ORDER BY se.document_id, se.sentence_id, f.value DESC + ) + SELECT + s.page_id, + bes.emotion_label + FROM best_emotion_per_sentence bes + JOIN sentence s ON s.id = bes.sentence_id + WHERE s.document_id = :documentId + ORDER BY s.page_id, bes.emotion_label + """; + + query = session.createNativeQuery(sql) + .setParameter("documentId", documentId, LongType.INSTANCE) + .setParameter("modelId", modelId, LongType.INSTANCE); + } return query.getResultList(); }); @@ -2552,4 +2583,32 @@ public void updateCorpusJsonConfig(long corpusId,String jsonConfig) throws Datab }); } + public void saveNewSentenceTopicsForDocument(long documentId, List newSentenceTopics) + throws DatabaseOperationException { + + executeOperationSafely(session -> { + Document doc = session.get(Document.class, documentId); + + if (doc == null || newSentenceTopics == null || newSentenceTopics.isEmpty()) { + return null; + } + + for (SentenceTopic st : newSentenceTopics) { + st.setDocument(doc); + + if (st.getSentence() != null) { + st.setSentence(session.get(Sentence.class, st.getSentence().getId())); + } + + if (st.getModel() != null) { + st.setModel((ModelEntity) session.merge(st.getModel())); + } + + session.save(st); + } + + return null; + }); + } + } diff --git a/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java b/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java index a48bfec7..319a7bc9 100644 --- a/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java +++ b/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java @@ -56,10 +56,7 @@ import org.texttechnologylab.uce.common.models.negation.*; import org.texttechnologylab.uce.common.models.rag.DocumentChunkEmbedding; import org.texttechnologylab.uce.common.models.rag.DocumentSentenceEmbedding; -import org.texttechnologylab.uce.common.models.topic.TopicValueBase; -import org.texttechnologylab.uce.common.models.topic.TopicValueBaseWithScore; -import org.texttechnologylab.uce.common.models.topic.TopicWord; -import org.texttechnologylab.uce.common.models.topic.UnifiedTopic; +import org.texttechnologylab.uce.common.models.topic.*; import org.texttechnologylab.uce.common.services.*; import org.texttechnologylab.uce.common.utils.*; @@ -533,8 +530,10 @@ public Document XMIToDocument(JCas jCas, Corpus corpus, String filePath, String + " already exists in the corpus " + corpus.getId() + "."); logger.info("Checking if that document was also post-processed yet..."); var existingDoc = db.getDocumentByCorpusAndDocumentId(corpus.getId(), document.getDocumentId()); - importSentenceTopicsFromXmiIntoDb(existingDoc, filePath); + + //importSentenceTopicsFromXmiIntoDb(existingDoc, filePath); appendNewEmotionsToExistingDocument(existingDoc,jCas); + appendNewSentenceTopicsToExistingDocument(existingDoc, jCas); if (!existingDoc.isPostProcessed()) { logger.info("Not yet post-processed. Doing that now."); @@ -617,6 +616,13 @@ public Document XMIToDocument(JCas jCas, Corpus corpus, String filePath, String }, (ex) -> logImportWarn("This file should have contained Emotion annotations, but selecting them caused an error.", ex, filePath)); + if (corpusConfig.getAnnotations().isTopic()) + ExceptionUtils.tryCatchLog( + () -> { + setSentenceTopics(document, jCas); + }, + (ex) -> logImportWarn("This file should have contained Topic annotations, but selecting them caused an error.", ex, filePath)); + if (corpusConfig.getAnnotations().isLemma()) ExceptionUtils.tryCatchLog( () -> setLemmata(document, jCas), @@ -1456,6 +1462,97 @@ private void setNamedEntities(Document document, JCas jCas) { document.setNamedEntities(nes); logger.info("Setting Named-Entities done."); } + private void setSentenceTopics(Document document, JCas jCas) { + List newTopics = extractSentenceTopics(document, jCas); + + List sentenceTopics = document.getSentenceTopics(); + if (sentenceTopics == null) { + sentenceTopics = new ArrayList<>(); + document.setSentenceTopics(sentenceTopics); + } + + sentenceTopics.addAll(newTopics); + + logger.info("Setting SentenceTopics done. Count={}", sentenceTopics.size()); + } + + private List extractSentenceTopics(Document document, JCas jCas) { + List sentenceTopics = new ArrayList<>(); + + Map sentenceBySpan = document.getSentences().stream() + .collect(Collectors.toMap( + s -> s.getBegin() + "_" + s.getEnd(), + s -> s, + (a, b) -> a + )); + + for (org.texttechnologylab.annotation.Topic topicSpan : JCasUtil.select(jCas, org.texttechnologylab.annotation.Topic.class)) { + Sentence sentence = sentenceBySpan.get(topicSpan.getBegin() + "_" + topicSpan.getEnd()); + if (sentence == null) { + continue; + } + + String modelName = "unknown"; + try { + if (topicSpan.getModel() != null && topicSpan.getModel().getModelName() != null) { + modelName = topicSpan.getModel().getModelName(); + } + } catch (Exception ignored) { + } + + ModelEntity foundModel = null; + try { + foundModel = db.getModelEntityByKey(modelName); + if (foundModel == null) { + foundModel = db.getModelEntityByMap(modelName); + } + } catch (DatabaseOperationException ex) { + logger.error("Error when looking for topic model in database {}", modelName, ex); + } + + if (foundModel == null) { + logger.warn("Topic model not found in DB: {}", modelName); + continue; + } + + var topicsArr = topicSpan.getTopics(); + if (topicsArr == null || topicsArr.size() == 0) { + continue; + } + + for (int i = 0; i < topicsArr.size(); i++) { + var fs = topicsArr.get(i); + if (!(fs instanceof AnnotationComment comment)) { + continue; + } + + String label = comment.getKey(); + String valueStr = comment.getValue(); + if (label == null || label.isBlank() || valueStr == null || valueStr.isBlank()) { + continue; + } + + double score; + try { + score = Double.parseDouble(valueStr); + } catch (NumberFormatException ex) { + continue; + } + + SentenceTopic st = new SentenceTopic(); + st.setDocument(document); + st.setSentence(sentence); + st.setModel(foundModel); + st.setTopicLabel(label); + st.setScore(score); + //st.setUnifiedTopic(foundUnifiedTopic); + + sentenceTopics.add(st); + } + } + + return sentenceTopics; + } /** * Selects and sets the sentences to a document @@ -1818,6 +1915,14 @@ private void postProccessCorpus(Corpus corpus, CorpusConfig corpusConfig) { logger.info("Inserting into Document and Corpus Topic word tables..."); try { + Path insertDocumentTopicsFilePath = Paths.get(commonConfig.getDatabaseScriptsLocation(), "topic/2_updateDocumentTopics.sql"); + var insertDocumentTopicsScript = Files.readString(insertDocumentTopicsFilePath); + + ExceptionUtils.tryCatchLog( + () -> db.executeSqlWithoutReturn(insertDocumentTopicsScript), + (ex) -> logger.error("Error executing SQL script to populate documenttopicsraw table", ex) + ); + Path insertDocumentTopicWordFilePath = Paths.get(commonConfig.getDatabaseScriptsLocation(), "topic/3_updateDocumentTopicWord.sql"); var insertDocumentTopicWordScript = Files.readString(insertDocumentTopicWordFilePath); @@ -1920,13 +2025,13 @@ private void postProccessDocument(Document document, Corpus corpus, String fileP var start = System.currentTimeMillis(); var corpusConfig = corpus.getViewModel().getCorpusConfig(); // Import sentence-level topic annotations (News XMI: annotation2:Topic + AnnotationComment) - importSentenceTopicsFromXmiIntoDb(document, filePath); + //importSentenceTopicsFromXmiIntoDb(document, filePath); // build unifiedtopic + link sentencetopics.unifiedtopic_id - //ExceptionUtils.tryCatchLog( - // () -> db.ensureUnifiedTopicsForSentenceTopics(document.getId()), - // (ex) -> logImportError("Error creating/linking unifiedtopic rows for sentence topics.", ex, filePath) - //); + ExceptionUtils.tryCatchLog( + () -> db.ensureUnifiedTopicsForSentenceTopics(document.getId()), + (ex) -> logImportError("Error creating/linking unifiedtopic rows for sentence topics.", ex, filePath) + ); ExceptionUtils.tryCatchLog( () -> db.createSentenceEmotions(document.getId()), @@ -2312,5 +2417,32 @@ private void appendNewEmotionsToExistingDocument(Document existingDoc, JCas jCas } } + private void appendNewSentenceTopicsToExistingDocument(Document existingDoc, JCas jCas) { + List newSentenceTopics = extractSentenceTopics(existingDoc, jCas); + + if (newSentenceTopics.isEmpty()) { + return; + } + + ExceptionUtils.tryCatchLog( + () -> db.saveNewSentenceTopicsForDocument(existingDoc.getId(), newSentenceTopics), + (ex) -> logger.error( + "Error when saving new sentence topics to existing document {}", + existingDoc.getId(), + ex + ) + ); + ExceptionUtils.tryCatchLog( + () -> db.ensureUnifiedTopicsForSentenceTopics(existingDoc.getId()), + (ex) -> logger.error( + "Error when ensuring unified topics for existing document {}", + existingDoc.getId(), + ex + ) + ); + + logger.info("Added {} sentence topics to existing document {}", newSentenceTopics.size(), existingDoc.getId()); + } + } From 5610e827b524586ec35416b9435731da7b30b4b6 Mon Sep 17 00:00:00 2001 From: Francesco Da Silva Saporito Date: Tue, 10 Mar 2026 14:08:35 +0100 Subject: [PATCH 27/47] Implement Topic Visualization toggle with model-based Chrod diagram, timeline and heatmap views. Also added visualization for Annotations for better overview. --- .../templates/css/corpus-inspector.css | 9 + .../resources/templates/js/documentReader.js | 612 ++++++++++++++++-- .../templates/reader/documentReaderView.ftl | 21 +- .../PostgresqlDataInterface_Impl.java | 130 +++- .../org/texttechnologylab/uce/web/App.java | 3 + .../uce/web/routes/DocumentApi.java | 88 ++- 6 files changed, 769 insertions(+), 94 deletions(-) diff --git a/uce.portal/resources/templates/css/corpus-inspector.css b/uce.portal/resources/templates/css/corpus-inspector.css index e3d5367b..fae4b47b 100644 --- a/uce.portal/resources/templates/css/corpus-inspector.css +++ b/uce.portal/resources/templates/css/corpus-inspector.css @@ -84,4 +84,13 @@ width: 100%; border-bottom: lightgray 1px solid; padding:16px; +} +.corpus-inspector .annotation-entry:not(:has(input[type="checkbox"]:checked)) > div { + background: #e3e3e3; + border: 1px solid #cfcfcf !important; + border-radius: 6px; +} + +.corpus-inspector .annotation-entry:not(:has(input[type="checkbox"]:checked)) > div label { + color: #666; } \ No newline at end of file diff --git a/uce.portal/resources/templates/js/documentReader.js b/uce.portal/resources/templates/js/documentReader.js index bd98632e..66a67a05 100644 --- a/uce.portal/resources/templates/js/documentReader.js +++ b/uce.portal/resources/templates/js/documentReader.js @@ -3,6 +3,9 @@ let searchTokens = ""; let currentSelectedTopic = null; let currentTopicIndex = -1; let matchingTopics = []; +let selectedTopicModelId = null; +let selectedTopicModelName = null; +let selectedTopicVizType = 'overview'; let selectedEmotionModelId = null; let selectedEmotionVizType = 'radar'; let defaultTopicColorMap = getDefaultTopicColorMap(); @@ -783,7 +786,7 @@ function minimapToDocumentPosition(minimapPos, dimensions) { function createMinimapMarker(options) { const { top, height, color, elementId, topic, className } = options; - + const $marker = $('
') .addClass('minimap-marker') .addClass(className || '') @@ -892,9 +895,30 @@ document.querySelectorAll('.tab-btn').forEach(btn => { if (targetId === 'visualization-tab') { const docId = document.getElementsByClassName('reader-container')[0].getAttribute('data-id'); + loadEmotionModels(docId); - activateVisualizationPanel('#viz-panel-1', $('.viz-nav-group[data-category="topic"] .viz-nav-parent')); - setTimeout(() => renderTemporalExplorer('vp-1'), 500); + + loadTopicMenu(docId).then(function (topicState) { + if (topicState.hasSemanticDensity) { + activateVisualizationPanel('#viz-panel-1', $('.viz-nav-group[data-category="topic"] .viz-nav-parent')); + $('#vp-1').removeClass('rendered'); + setTimeout(() => renderTemporalExplorer('vp-1'), 500); + return; + } + + if (topicState.hasTopicEntity) { + activateVisualizationPanel('#viz-panel-2', $('.viz-nav-group[data-category="topic"] .viz-nav-parent')); + $('#vp-2').removeClass('rendered'); + setTimeout(() => renderTopicEntityChordDiagram('vp-2'), 500); + return; + } + + if (topicState.models && topicState.models.length > 0) { + activateVisualizationPanel('#viz-panel-3', $('.viz-nav-group[data-category="topic"] .viz-nav-parent')); + $('#vp-3').removeClass('rendered'); + setTimeout(() => renderTopicViz('vp-3'), 500); + } + }); } }); }); @@ -904,29 +928,43 @@ $(document).on('click', '.viz-nav-item[data-target]', function (e) { const target = $(this).data('target'); const $group = $(this).closest('.viz-nav-group'); + activateVisualizationPanel(target, $group.find('.viz-nav-parent')); if (target === '#viz-panel-1') { + $('#vp-1').removeClass('rendered'); setTimeout(() => renderTemporalExplorer('vp-1'), 500); } + if (target === '#viz-panel-2') { + $('#vp-2').removeClass('rendered'); setTimeout(() => renderTopicEntityChordDiagram('vp-2'), 500); } - if (target === '#viz-panel-3') { - setTimeout(() => renderSentenceTopicNetwork('vp-3'), 500); - } - if (target === '#viz-panel-4') { - $('.selector-container').hide(); - setTimeout(() => renderTopicSimilarityMatrix('vp-4'), 500); - } - if (target === '#viz-panel-5') { - setTimeout(() => renderSentenceTopicSankey('vp-5'), 500); - } - if (target === '#viz-panel-6') { - setTimeout(() => renderTemporalExplorer('vp-6'), 500); - } }); +$(document).on('click', '.topic-model-item', function (e) { + e.preventDefault(); + + selectedTopicModelId = $(this).data('model-id'); + selectedTopicModelName = $.trim($(this).text()); + + $('.topic-model-item').removeClass('active'); + $(this).addClass('active'); + + activateVisualizationPanel('#viz-panel-3', $('.viz-nav-group[data-category="topic"] .viz-nav-parent')); + $('#vp-3').removeClass('rendered'); + renderTopicViz('vp-3'); +}); +$(document).on('click', '.topic-viz-toggle-btn', function (e) { + e.preventDefault(); + + const nextVizType = $(this).data('viz-type'); + if (!nextVizType || nextVizType === selectedTopicVizType) return; + + selectedTopicVizType = nextVizType; + $('#vp-3').removeClass('rendered'); + renderTopicViz('vp-3'); +}); $(document).on('click', '.emotion-model-item', function (e) { e.preventDefault(); @@ -949,39 +987,6 @@ $(document).on('click', '.emotion-viz-toggle-btn', function (e) { $('#vp-7').removeClass('rendered'); renderEmotionViz('vp-7'); }); - -$(document).on('click', '.viz-nav-btn', function (e) { - const target = $(this).data('target'); - - if (!target) { - return; - } - - e.preventDefault(); - activateVisualizationPanel(target, $(this)); - - if (target === '#viz-panel-1') { - setTimeout(() => renderTemporalExplorer('vp-1'), 500); - } - if (target === '#viz-panel-2') { - setTimeout(() => renderTopicEntityChordDiagram('vp-2'), 500); - } - if (target === '#viz-panel-3') { - setTimeout(() => renderSentenceTopicNetwork('vp-3'), 500); - } - if (target === '#viz-panel-4') { - $('.selector-container').hide(); - setTimeout(() => renderTopicSimilarityMatrix('vp-4'), 500); - } - if (target === '#viz-panel-5') { - setTimeout(() => renderSentenceTopicSankey('vp-5'), 500); - } - if (target === '#viz-panel-6') { - setTimeout(() => renderTemporalExplorer('vp-6'), 500); - } -}); - - function renderSentenceTopicNetwork(containerId) { const container = document.getElementById(containerId); if (!container || container.classList.contains('rendered')) return; @@ -1187,7 +1192,54 @@ function computeTopicSimilarityMatrix(data, type = "cosine") { matrix: matrix }; } +function renderTopicViz(containerId) { + const container = document.getElementById(containerId); + if (!container) return; + if (!selectedTopicModelId) { + container.classList.remove('rendered'); + container.innerHTML = '
Please choose a topic model
'; + container.classList.add('rendered'); + return; + } + + const modelName = selectedTopicModelName || ('Model ' + selectedTopicModelId); + + const overviewBtnClass = selectedTopicVizType === 'overview' + ? 'btn btn-sm btn-primary topic-viz-toggle-btn' + : 'btn btn-sm btn-light topic-viz-toggle-btn'; + + const timelineBtnClass = selectedTopicVizType === 'timeline' + ? 'btn btn-sm btn-primary topic-viz-toggle-btn' + : 'btn btn-sm btn-light topic-viz-toggle-btn'; + + const heatmapBtnClass = selectedTopicVizType === 'heatmap' + ? 'btn btn-sm btn-primary topic-viz-toggle-btn' + : 'btn btn-sm btn-light topic-viz-toggle-btn'; + + container.classList.remove('rendered'); + container.innerHTML = '' + + '
' + + '
' + + '
Topic
' + + '
' + modelName + '
' + + '
' + + '
' + + '' + + '' + + '' + + '
' + + '
' + + '
'; + + if (selectedTopicVizType === 'timeline') { + renderTopicTimeline(containerId + '-body'); + } else if (selectedTopicVizType === 'heatmap') { + renderTopicHeatmap(containerId + '-body'); + } else { + renderTopicModelOverview(containerId + '-body'); + } +} function renderTopicSimilarityMatrix(containerId) { const container = document.getElementById(containerId); if (!container || container.classList.contains('rendered')){ @@ -1714,7 +1766,7 @@ function loadEmotionModels(docId) { models.forEach((m) => { const isActive = String(m.modelId) === String(selectedEmotionModelId) ? ' active' : ''; $menu.append( - '' + + '' + (m.modelName ? m.modelName : ('Model ' + m.modelId)) + '' ); @@ -1725,6 +1777,466 @@ function loadEmotionModels(docId) { $('#emotion-model-menu').html('Failed to load'); }); } +function loadTopicMenu(docId) { + const $menu = $('#topic-menu'); + const semanticLabel = $menu.attr('data-label-semantic-density') || 'Semantic Density'; + const entityLabel = $menu.attr('data-label-topic-entity') || 'Topic Entity'; + const noDataLabel = $menu.attr('data-label-no-data') || 'No data available'; + + $menu.empty(); + + const topicPageReq = $.get('/api/document/page/topics', { documentId: docId }) + .then(function (data) { return data; }) + .catch(function () { return []; }); + + const topicEntityReq = $.get('/api/document/page/topicEntityRelation', { documentId: docId }) + .then(function (data) { return data; }) + .catch(function () { return []; }); + + const topicModelsReq = $.get('/api/document/topicModels', { documentId: docId }) + .then(function (data) { return data; }) + .catch(function () { return []; }); + + return Promise.all([topicPageReq, topicEntityReq, topicModelsReq]).then(function (results) { + const topicPageData = results[0] || []; + const topicEntityData = results[1] || []; + const topicModels = results[2] || []; + + const hasSemanticDensity = Array.isArray(topicPageData) && topicPageData.length > 0; + const hasTopicEntity = Array.isArray(topicEntityData) && topicEntityData.length > 0; + const hasModels = Array.isArray(topicModels) && topicModels.length > 0; + + if (hasSemanticDensity) { + $menu.append( + '' + + semanticLabel + + '' + ); + } + + if (hasTopicEntity) { + $menu.append( + '' + + entityLabel + + '' + ); + } + + if (hasModels) { + const selectedStillExists = topicModels.some(function (m) { + return String(m.modelId) === String(selectedTopicModelId); + }); + + if (!selectedStillExists) { + selectedTopicModelId = topicModels[0].modelId; + selectedTopicModelName = topicModels[0].modelName || ('Model ' + topicModels[0].modelId); + } + + topicModels.forEach(function (m) { + const isActive = String(m.modelId) === String(selectedTopicModelId) ? ' active' : ''; + const label = m.modelName ? m.modelName : ('Model ' + m.modelId); + + $menu.append( + '' + + label + + '' + ); + }); + } + + if (!hasSemanticDensity && !hasTopicEntity && !hasModels) { + $menu.append('' + noDataLabel + ''); + } + + return { + hasSemanticDensity: hasSemanticDensity, + hasTopicEntity: hasTopicEntity, + models: topicModels + }; + }); +} +function renderTopicModelOverview(containerId) { + const container = document.getElementById(containerId); + if (!container) return; + + if (!selectedTopicModelId) { + container.classList.remove('rendered'); + container.innerHTML = '
Please choose a topic model
'; + container.classList.add('rendered'); + return; + } + + container.classList.remove('rendered'); + container.innerHTML = '
'; + + const docId = document.getElementsByClassName('reader-container')[0].getAttribute('data-id'); + + $('.visualization-spinner').show(); + + $.get('/api/document/topicModelOverview', { + documentId: docId, + modelId: selectedTopicModelId + }).then(function (data) { + $('.visualization-spinner').hide(); + + if (!data || !Array.isArray(data) || data.length === 0) { + container.innerHTML = '
No topic data for this model
'; + container.classList.add('rendered'); + return; + } + + const sortedTopics = data + .filter(function (item) { + return item && item.label && String(item.label).trim() !== ''; + }) + .slice(0, 10); + + if (sortedTopics.length === 0) { + container.innerHTML = '
No topic data for this model
'; + container.classList.add('rendered'); + return; + } + + const labels = sortedTopics.map(function (item) { + return String(item.label).trim(); + }); + + const values = sortedTopics.map(function (item) { + return item.value || 0; + }); + + const maxValue = Math.max.apply(null, values); + + function formatTopicLabel(label) { + const text = String(label || ''); + if (text.length <= 28) return text; + return text.slice(0, 25) + '...'; + } + + const chartDom = document.getElementById(containerId + '-topic-model'); + const chart = echarts.init(chartDom); + + const option = { + title: { + text: 'Topic Overview', + left: 0, + top: 0 + }, + tooltip: { + trigger: 'item', + formatter: function (params) { + return '
' + labels[params.dataIndex] + '
' + + '
Occurrences: ' + params.value + '
'; + } + }, + grid: { + left: '15%', + right: '12%', + top: 45, + bottom: 60, + containLabel: false + }, + xAxis: { + type: 'value', + minInterval: 1, + max: maxValue < 5 ? 5 : null, + splitLine: { + show: true + }, + axisLine: { + show: false + }, + axisTick: { + show: false + }, + name: 'Count', + nameLocation: 'middle', + nameGap: 28 + }, + yAxis: { + type: 'category', + inverse: true, + data: labels, + axisLine: { + show: false + }, + axisTick: { + show: false + }, + axisLabel: { + width: 180, + overflow: 'truncate', + formatter: function (value) { + return formatTopicLabel(value); + } + } + }, + series: [{ + type: 'bar', + data: values, + barWidth: 22, + label: { + show: true, + position: 'right', + formatter: '{c}' + }, + emphasis: { + focus: 'series' + } + }] + }; + + chart.setOption(option); + + window.addEventListener('resize', function () { + chart.resize(); + }); + + container.classList.add('rendered'); + }).catch(function () { + $('.visualization-spinner').hide(); + container.innerHTML = '
Failed to load topic model data
'; + container.classList.add('rendered'); + }); +} +function loadTopicModelPageCounts() { + const docId = document.getElementsByClassName('reader-container')[0].getAttribute('data-id'); + + return $.get('/api/document/topicModelPageCounts', { + documentId: docId, + modelId: selectedTopicModelId + }).then(function (data) { + if (!data || !Array.isArray(data) || data.length === 0) { + return null; + } + + const rawPageIds = []; + data.forEach(function (item) { + const pid = parseInt(item.pageId, 10); + if (!isNaN(pid)) rawPageIds.push(pid); + }); + + const uniqueSortedPageIds = Array.from(new Set(rawPageIds)).sort(function (a, b) { + return a - b; + }); + + const pageIdToPageNumber = new Map(); + uniqueSortedPageIds.forEach(function (pid, idx) { + pageIdToPageNumber.set(pid, idx + 1); + }); + + const pageTopicCounts = new Map(); + const totalTopicCounts = {}; + + data.forEach(function (item) { + const pid = parseInt(item.pageId, 10); + const pageNumber = pageIdToPageNumber.get(pid); + const label = item.label ? String(item.label).trim() : ''; + const value = parseInt(item.value, 10) || 0; + + if (!pageNumber || !label) return; + + if (!pageTopicCounts.has(pageNumber)) { + pageTopicCounts.set(pageNumber, {}); + } + + pageTopicCounts.get(pageNumber)[label] = value; + totalTopicCounts[label] = (totalTopicCounts[label] || 0) + value; + }); + + const pages = Array.from(pageTopicCounts.keys()).sort(function (a, b) { + return a - b; + }); + + const topLabels = Object.keys(totalTopicCounts) + .sort(function (a, b) { return totalTopicCounts[b] - totalTopicCounts[a]; }) + .slice(0, 8); + + return { + pages: pages, + labels: topLabels, + pageTopicCounts: pageTopicCounts + }; + }); +} +function renderTopicTimeline(containerId) { + const container = document.getElementById(containerId); + if (!container) return; + + container.classList.remove('rendered'); + container.innerHTML = '
'; + + $('.visualization-spinner').show(); + + loadTopicModelPageCounts() + .then(function (result) { + $('.visualization-spinner').hide(); + + if (!result || !result.pages.length || !result.labels.length) { + container.innerHTML = '
No topic timeline data for this model
'; + container.classList.add('rendered'); + return; + } + + const pages = result.pages; + const labels = result.labels; + const pageTopicCounts = result.pageTopicCounts; + + const series = labels.map(function (label) { + return { + name: label, + type: 'line', + smooth: true, + symbol: 'circle', + symbolSize: 6, + data: pages.map(function (page) { + const counts = pageTopicCounts.get(page) || {}; + return counts[label] || 0; + }) + }; + }); + + const chart = echarts.init(document.getElementById(containerId + '-timeline')); + + chart.setOption({ + title: { + text: 'Topic Timeline', + left: 0, + top: 0 + }, + tooltip: { + trigger: 'axis' + }, + legend: { + type: 'scroll', + top: 30 + }, + grid: { + left: '12%', + right: '8%', + top: 85, + bottom: 50 + }, + xAxis: { + type: 'category', + name: 'Page', + data: pages + }, + yAxis: { + type: 'value', + name: 'Count' + }, + series: series + }); + + container.classList.add('rendered'); + }) + .catch(function () { + $('.visualization-spinner').hide(); + container.innerHTML = '
Failed to load topic timeline
'; + container.classList.add('rendered'); + }); +} +function renderTopicHeatmap(containerId) { + const container = document.getElementById(containerId); + if (!container) return; + + container.classList.remove('rendered'); + container.innerHTML = '
'; + + $('.visualization-spinner').show(); + + loadTopicModelPageCounts() + .then(function (result) { + $('.visualization-spinner').hide(); + + if (!result || !result.pages.length || !result.labels.length) { + container.innerHTML = '
No topic heatmap data for this model
'; + container.classList.add('rendered'); + return; + } + + const pages = result.pages; + const labels = result.labels; + const pageTopicCounts = result.pageTopicCounts; + + const heatmapData = []; + let maxValue = 0; + + pages.forEach(function (page, pageIndex) { + const counts = pageTopicCounts.get(page) || {}; + + labels.forEach(function (label, labelIndex) { + const value = counts[label] || 0; + if (value > maxValue) maxValue = value; + heatmapData.push([pageIndex, labelIndex, value]); + }); + }); + + const chart = echarts.init(document.getElementById(containerId + '-heatmap')); + + chart.setOption({ + title: { + text: 'Topic Heatmap', + left: 0, + top: 0 + }, + tooltip: { + position: 'top', + formatter: function (params) { + const page = pages[params.value[0]]; + const label = labels[params.value[1]]; + const value = params.value[2]; + return '
Page ' + page + '
' + label + ': ' + value + '
'; + } + }, + grid: { + left: 120, + right: 30, + top: 75, + bottom: 60 + }, + xAxis: { + type: 'category', + name: 'Page', + data: pages, + splitArea: { show: true } + }, + yAxis: { + type: 'category', + name: 'Topic', + data: labels, + splitArea: { show: true } + }, + visualMap: { + min: 0, + max: maxValue > 0 ? maxValue : 1, + calculable: true, + orient: 'horizontal', + left: 'center', + bottom: 10 + }, + series: [{ + name: 'Topic Count', + type: 'heatmap', + data: heatmapData, + emphasis: { + itemStyle: { + shadowBlur: 10, + shadowColor: 'rgba(0, 0, 0, 0.35)' + } + } + }] + }); + + container.classList.add('rendered'); + }) + .catch(function () { + $('.visualization-spinner').hide(); + container.innerHTML = '
Failed to load topic heatmap
'; + container.classList.add('rendered'); + }); +} function getSelectedEmotionModelName() { const $activeModel = $('.emotion-model-item.active'); if ($activeModel.length > 0) { diff --git a/uce.portal/resources/templates/reader/documentReaderView.ftl b/uce.portal/resources/templates/reader/documentReaderView.ftl index b3446875..e043d027 100644 --- a/uce.portal/resources/templates/reader/documentReaderView.ftl +++ b/uce.portal/resources/templates/reader/documentReaderView.ftl @@ -331,22 +331,11 @@ Topic - diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java index e431c876..ccf1e2f3 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java @@ -2027,45 +2027,123 @@ public List getGeonameByPage(long documentId) throws DatabaseOperation return query.getResultList(); }); } - - public List getTopicDistributionByPageForDocument(long documentId) throws DatabaseOperationException { + return getTopicDistributionByPageForDocument(documentId, null); + } + + public List getTopicDistributionByPageForDocument(long documentId, Long modelId) throws DatabaseOperationException { return executeOperationSafely((session) -> { String sql = """ - WITH best_topic_per_sentence AS ( - SELECT DISTINCT ON (st.document_id, st.sentence_id) - st.unifiedtopic_id, - st.document_id, - st.sentence_id, - st.topiclabel, - st.thetast - FROM - sentencetopics st - WHERE - st.document_id = :documentId - ORDER BY - st.document_id, st.sentence_id, st.thetast DESC - ) - SELECT - ut.page_id, - btp.topiclabel + WITH best_topic_per_sentence AS ( + SELECT DISTINCT ON (st.document_id, st.sentence_id) + st.unifiedtopic_id, + st.document_id, + st.sentence_id, + st.topiclabel, + st.thetast FROM - best_topic_per_sentence btp - JOIN - unifiedtopic ut ON btp.unifiedtopic_id = ut.id + sentencetopics st WHERE - ut.document_id = :documentId + st.document_id = :documentId + AND (:modelId IS NULL OR st.model_id = :modelId) ORDER BY - ut.page_id, btp.topiclabel - """; + st.document_id, st.sentence_id, st.thetast DESC + ) + SELECT + ut.page_id, + btp.topiclabel + FROM + best_topic_per_sentence btp + JOIN + unifiedtopic ut ON btp.unifiedtopic_id = ut.id + WHERE + ut.document_id = :documentId + ORDER BY + ut.page_id, btp.topiclabel + """; var query = session.createNativeQuery(sql) - .setParameter("documentId", documentId); + .setParameter("documentId", documentId) + .setParameter("modelId", modelId); return query.getResultList(); }); } + public List getTopicModelsForDocumentWithName(long documentId) throws DatabaseOperationException { + return executeOperationSafely((session) -> { + String sql = """ + SELECT DISTINCT m.id AS model_id, m.name AS model_name + FROM sentencetopics st + JOIN models m ON m.id = st.model_id + WHERE st.document_id = :documentId + ORDER BY m.id + """; + return session.createNativeQuery(sql) + .setParameter("documentId", documentId) + .getResultList(); + }); + } + public List getTopicModelOverview(long documentId, long modelId) throws DatabaseOperationException { + return executeOperationSafely((session) -> { + String sql = """ + WITH best_topic_per_sentence AS ( + SELECT DISTINCT ON (st.document_id, st.sentence_id) + st.sentence_id, + st.topiclabel, + st.thetast + FROM sentencetopics st + WHERE st.document_id = :documentId + AND st.model_id = :modelId + ORDER BY st.document_id, st.sentence_id, st.thetast DESC + ) + SELECT + b.topiclabel, + COUNT(*) AS topic_count + FROM best_topic_per_sentence b + WHERE b.topiclabel IS NOT NULL + AND TRIM(b.topiclabel) <> '' + GROUP BY b.topiclabel + ORDER BY topic_count DESC, b.topiclabel + """; + + return session.createNativeQuery(sql) + .setParameter("documentId", documentId) + .setParameter("modelId", modelId) + .getResultList(); + }); + } + public List getTopicModelPageCounts(long documentId, long modelId) throws DatabaseOperationException { + return executeOperationSafely((session) -> { + String sql = """ + WITH best_topic_per_sentence AS ( + SELECT DISTINCT ON (st.document_id, st.sentence_id) + st.sentence_id, + st.topiclabel, + st.thetast + FROM sentencetopics st + WHERE st.document_id = :documentId + AND st.model_id = :modelId + ORDER BY st.document_id, st.sentence_id, st.thetast DESC + ) + SELECT + s.page_id, + b.topiclabel, + COUNT(*) AS topic_count + FROM best_topic_per_sentence b + JOIN sentence s ON s.id = b.sentence_id + WHERE b.topiclabel IS NOT NULL + AND TRIM(b.topiclabel) <> '' + GROUP BY s.page_id, b.topiclabel + ORDER BY s.page_id, topic_count DESC, b.topiclabel + """; + + return session.createNativeQuery(sql) + .setParameter("documentId", documentId) + .setParameter("modelId", modelId) + .getResultList(); + }); + } public List getSentenceTopicsWithEntitiesByPageForDocument(long documentId) throws DatabaseOperationException { diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java index 46602b0f..82d4adef 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java @@ -497,6 +497,9 @@ private static void initSparkRoutes(ApplicationContext context, ApiRegistry regi get("/topics", (ctx) -> (registry.get(DocumentApi.class)).getDocumentTopics(ctx)); get("/page/taxon", (ctx) -> (registry.get(DocumentApi.class)).getTaxonCountByPage(ctx)); get("/page/topics", (ctx) -> (registry.get(DocumentApi.class)).getDocumentTopicDistributionByPage(ctx)); + get("/topicModels", (ctx) -> (registry.get(DocumentApi.class)).getTopicModels(ctx)); + get("/topicModelOverview", (ctx) -> (registry.get(DocumentApi.class)).getTopicModelOverview(ctx)); + get("/topicModelPageCounts", (ctx) -> (registry.get(DocumentApi.class)).getTopicModelPageCounts(ctx)); get("/page/topicEntityRelation", (ctx) -> (registry.get(DocumentApi.class)).getSentenceTopicsWithEntities(ctx)); get("/page/topicWords", (ctx) -> (registry.get(DocumentApi.class)).getTopicWordsByDocument(ctx)); get("/unifiedTopicSentenceMap", (ctx) -> (registry.get(DocumentApi.class)).getUnifiedTopicToSentenceMap(ctx)); diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/DocumentApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/DocumentApi.java index 7b050bfa..803f5fd5 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/DocumentApi.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/DocumentApi.java @@ -383,10 +383,16 @@ public void getDocumentTopicDistributionByPage(Context ctx) { if (documentId == null) { ctx.status(400); ctx.render("defaultError.ftl", Map.of("information", "Missing documentId parameter")); + return; } + Long modelId = ExceptionUtils.tryCatchLog(() -> { + String modelParam = ctx.queryParam("modelId"); + return (modelParam == null || modelParam.isBlank()) ? null : Long.parseLong(modelParam); + }, (ex) -> logger.error("Error: couldn't determine the modelId for topics. ", ex)); + try { - var topicDistPerPage = db.getTopicDistributionByPageForDocument(documentId); + var topicDistPerPage = db.getTopicDistributionByPageForDocument(documentId, modelId); var result = new ArrayList>(); for (Object[] row : topicDistPerPage) { @@ -403,7 +409,86 @@ public void getDocumentTopicDistributionByPage(Context ctx) { ctx.render("defaultError.ftl", Map.of("information", "Error retrieving document topics.")); } } + public void getTopicModels(Context ctx) { + try { + long documentId = Long.parseLong(ctx.queryParam("documentId")); + List rows = db.getTopicModelsForDocumentWithName(documentId); + + List> result = new ArrayList<>(); + for (Object[] r : rows) { + Number id = (Number) r[0]; + String name = (String) r[1]; + Map obj = new HashMap<>(); + obj.put("modelId", id == null ? null : id.longValue()); + obj.put("modelName", (name == null || name.isBlank()) ? ("Model " + id) : name); + result.add(obj); + } + + ctx.json(result); + } catch (Exception ex) { + ctx.status(500).json(Map.of("error", "Failed to load topic models", "details", ex.getMessage())); + } + } + public void getTopicModelOverview(Context ctx) { + var documentId = ExceptionUtils.tryCatchLog(() -> Long.parseLong(ctx.queryParam("documentId")), + (ex) -> logger.error("Error: couldn't determine the documentId for topic model overview. ", ex)); + + var modelId = ExceptionUtils.tryCatchLog(() -> Long.parseLong(ctx.queryParam("modelId")), + (ex) -> logger.error("Error: couldn't determine the modelId for topic model overview. ", ex)); + + if (documentId == null || modelId == null) { + ctx.status(400).json(Map.of("error", "Missing documentId or modelId")); + return; + } + + try { + var rows = db.getTopicModelOverview(documentId, modelId); + var result = new ArrayList>(); + + for (Object[] row : rows) { + var obj = new HashMap(); + obj.put("label", row[0]); + obj.put("value", ((Number) row[1]).longValue()); + result.add(obj); + } + + ctx.json(result); + } catch (Exception ex) { + logger.error("Error getting topic model overview.", ex); + ctx.status(500).json(Map.of("error", "Failed to load topic model overview")); + } + } + public void getTopicModelPageCounts(Context ctx) { + var documentId = ExceptionUtils.tryCatchLog(() -> Long.parseLong(ctx.queryParam("documentId")), + (ex) -> logger.error("Error: couldn't determine the documentId for topic model page counts. ", ex)); + + var modelId = ExceptionUtils.tryCatchLog(() -> Long.parseLong(ctx.queryParam("modelId")), + (ex) -> logger.error("Error: couldn't determine the modelId for topic model page counts. ", ex)); + + if (documentId == null || modelId == null) { + ctx.status(400).json(Map.of("error", "Missing documentId or modelId")); + return; + } + + try { + var rows = db.getTopicModelPageCounts(documentId, modelId); + var result = new ArrayList>(); + + for (Object[] row : rows) { + var obj = new HashMap(); + obj.put("pageId", row[0]); + obj.put("label", row[1]); + obj.put("value", ((Number) row[2]).longValue()); + result.add(obj); + } + + ctx.json(result); + } catch (Exception ex) { + logger.error("Error getting topic model page counts.", ex); + ctx.status(500).json(Map.of("error", "Failed to load topic model page counts")); + } + } public void getDocumentNamedEntitiesByPage(Context ctx) { var documentId = ExceptionUtils.tryCatchLog(() -> Long.parseLong(ctx.queryParam("documentId")), (ex) -> logger.error("Error: couldn't determine the documentId for entities. ", ex)); @@ -665,5 +750,4 @@ public void getEmotionModels(Context ctx) { ctx.status(500).json(Map.of("error", "Failed to load emotion models", "details", ex.getMessage())); } } - } From cb87296e9fe6a2e8d164d2888abb456d8de0c308 Mon Sep 17 00:00:00 2001 From: Ph1l1ppGitHub Date: Tue, 10 Mar 2026 14:54:48 +0100 Subject: [PATCH 28/47] Button annotation TOPIC in UI --- uce.portal/resources/templates/landing-page.ftl | 5 ++++- .../texttechnologylab/uce/web/routes/ImportExportApi.java | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/uce.portal/resources/templates/landing-page.ftl b/uce.portal/resources/templates/landing-page.ftl index 82b98dc3..471242ea 100644 --- a/uce.portal/resources/templates/landing-page.ftl +++ b/uce.portal/resources/templates/landing-page.ftl @@ -210,7 +210,7 @@
-
+
@@ -234,6 +234,9 @@
+
+
+
diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java index 3aa6ea5b..f94da89d 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java @@ -260,6 +260,7 @@ else if (config.getLanguage() == null) { ann.setSentence(ann.isSentence() || ctx.formParam("sentence") != null); ann.setLemma(ann.isLemma() || ctx.formParam("lemma") != null); ann.setNamedEntity(ann.isNamedEntity() || ctx.formParam("namedEntity") != null); + ann.setTopic(ann.isNamedEntity() || ctx.formParam("topic") != null); ann.setSentiment(ann.isSentiment() || ctx.formParam("sentiment") != null); ann.setEmotion(ann.isEmotion() || ctx.formParam("emotion") != null); ann.setTime(ann.isTime() || ctx.formParam("time") != null); From eef34874bc298b8863259027319552d02d7ff4cb Mon Sep 17 00:00:00 2001 From: Mark Ian Braun Date: Tue, 10 Mar 2026 18:46:38 +0100 Subject: [PATCH 29/47] lighter blue for already checked flags --- uce.portal/resources/templates/css/site.css | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/uce.portal/resources/templates/css/site.css b/uce.portal/resources/templates/css/site.css index c4edeeb0..663f8e6a 100644 --- a/uce.portal/resources/templates/css/site.css +++ b/uce.portal/resources/templates/css/site.css @@ -1147,6 +1147,13 @@ nav .selected-nav-btn.text::before { border-color: var(--prime) !important; } +/* lighter blue for already checked checkboxes/flags (Import Modal) */ +.custom-control-input:checked:disabled ~ .custom-control-label::before { + background-color: #6da2bc !important; + border-color: #6da2bc !important; + opacity: 1 !important; +} + /*Custom button switches */ /* UCE Map styles */ From dfca1ad464160ea524a476266bc2c45adc75589b Mon Sep 17 00:00:00 2001 From: Francesco Da Silva Saporito Date: Tue, 10 Mar 2026 21:38:41 +0100 Subject: [PATCH 30/47] feat: add 'Others' category to visualization navigation bar - Added new 'Others' button to the viz navigation bar (alongside Topic, Sentiment, Emotion) - Moved Semantic Density, Topic Entity, Topic Landscape, Topic Similarity and Sentence Topic Flow from Topic category into Others - Refactored loadTopicMenu() to only handle Topic Models - Added new loadOthersMenu() function to dynamically populate the Others dropdown - Added click handler for others-menu-item to route to correct viz panels - Fixed positioning of viz navigation bar: now left-aligned within sidebar instead of overlapping document area --- .../templates/css/document-reader.css | 20 +-- .../resources/templates/js/documentReader.js | 145 ++++++++++++------ .../templates/reader/documentReaderView.ftl | 13 +- .../uce/corpusimporter/Importer.java | 70 --------- 4 files changed, 121 insertions(+), 127 deletions(-) diff --git a/uce.portal/resources/templates/css/document-reader.css b/uce.portal/resources/templates/css/document-reader.css index d2609393..930fcde2 100644 --- a/uce.portal/resources/templates/css/document-reader.css +++ b/uce.portal/resources/templates/css/document-reader.css @@ -655,7 +655,9 @@ body { } .tab-content .tab-pane.active { - display: block; + display: flex; + flex-direction: column; + height: calc(100vh - 50px); } .side-bar.visualization-expanded { width: 150vw !important; @@ -665,7 +667,7 @@ body { .tab-pane .visualization-wrapper { display: flex; flex-direction: column; - height: 100%; + flex: 1; position: relative; } .visualization-wrapper .visualization-content { @@ -710,13 +712,12 @@ body { /* Bottom Navigation */ .tab-pane .viz-bottom-nav { - position: fixed; - left: 50%; - bottom: 30px; - transform: translateX(-50%); - right: auto; + position: absolute; + bottom: 80px; + left: 16px; width: max-content; - max-width: calc(100vw - 40px); + max-width: calc(100% - 32px); + align-self: flex-end; display: flex; justify-content: center; gap: 10px; @@ -971,7 +972,8 @@ body { /* --- Dropdown Navigation Erweiterung (Hover) --- */ .tab-pane .viz-bottom-nav.viz-dropdown-nav{ - justify-content: center; /* statt space-around */ + justify-content: flex-start; + overflow: visible; gap: 10px; overflow: visible; /* wichtig, damit Menüs nicht abgeschnitten werden */ } diff --git a/uce.portal/resources/templates/js/documentReader.js b/uce.portal/resources/templates/js/documentReader.js index 66a67a05..0e50573d 100644 --- a/uce.portal/resources/templates/js/documentReader.js +++ b/uce.portal/resources/templates/js/documentReader.js @@ -899,25 +899,27 @@ document.querySelectorAll('.tab-btn').forEach(btn => { loadEmotionModels(docId); loadTopicMenu(docId).then(function (topicState) { - if (topicState.hasSemanticDensity) { - activateVisualizationPanel('#viz-panel-1', $('.viz-nav-group[data-category="topic"] .viz-nav-parent')); + if (topicState.models && topicState.models.length > 0) { + activateVisualizationPanel('#viz-panel-3', $('.viz-nav-group[data-category="topic"] .viz-nav-parent')); + $('#vp-3').removeClass('rendered'); + setTimeout(() => renderTopicViz('vp-3'), 500); + } + }); + + loadOthersMenu(docId).then(function (othersState) { + if (othersState.hasSemanticDensity) { + activateVisualizationPanel('#viz-panel-1', $('.viz-nav-group[data-category="others"] .viz-nav-parent')); $('#vp-1').removeClass('rendered'); setTimeout(() => renderTemporalExplorer('vp-1'), 500); return; } - if (topicState.hasTopicEntity) { - activateVisualizationPanel('#viz-panel-2', $('.viz-nav-group[data-category="topic"] .viz-nav-parent')); + if (othersState.hasTopicEntity) { + activateVisualizationPanel('#viz-panel-2', $('.viz-nav-group[data-category="others"] .viz-nav-parent')); $('#vp-2').removeClass('rendered'); setTimeout(() => renderTopicEntityChordDiagram('vp-2'), 500); return; } - - if (topicState.models && topicState.models.length > 0) { - activateVisualizationPanel('#viz-panel-3', $('.viz-nav-group[data-category="topic"] .viz-nav-parent')); - $('#vp-3').removeClass('rendered'); - setTimeout(() => renderTopicViz('vp-3'), 500); - } }); } }); @@ -976,7 +978,31 @@ $(document).on('click', '.emotion-model-item', function (e) { $('#vp-7').removeClass('rendered'); renderEmotionViz('vp-7'); }); +$(document).on('click', '.others-menu-item[data-target]', function (e) { + e.preventDefault(); + + const target = $(this).data('target'); + const $group = $(this).closest('.viz-nav-group'); + + activateVisualizationPanel(target, $group.find('.viz-nav-parent')); + if (target === '#viz-panel-1') { + $('#vp-1').removeClass('rendered'); + setTimeout(() => renderTemporalExplorer('vp-1'), 500); + } + if (target === '#viz-panel-2') { + $('#vp-2').removeClass('rendered'); + setTimeout(() => renderTopicEntityChordDiagram('vp-2'), 500); + } + if (target === '#viz-panel-4') { + $('#vp-4').removeClass('rendered'); + setTimeout(() => renderTopicSimilarityMatrix('vp-4'), 500); + } + if (target === '#viz-panel-5') { + $('#vp-5').removeClass('rendered'); + setTimeout(() => renderSentenceTopicSankey('vp-5'), 500); + } +}); $(document).on('click', '.emotion-viz-toggle-btn', function (e) { e.preventDefault(); @@ -1779,49 +1805,17 @@ function loadEmotionModels(docId) { } function loadTopicMenu(docId) { const $menu = $('#topic-menu'); - const semanticLabel = $menu.attr('data-label-semantic-density') || 'Semantic Density'; - const entityLabel = $menu.attr('data-label-topic-entity') || 'Topic Entity'; const noDataLabel = $menu.attr('data-label-no-data') || 'No data available'; $menu.empty(); - const topicPageReq = $.get('/api/document/page/topics', { documentId: docId }) - .then(function (data) { return data; }) - .catch(function () { return []; }); - - const topicEntityReq = $.get('/api/document/page/topicEntityRelation', { documentId: docId }) - .then(function (data) { return data; }) - .catch(function () { return []; }); - const topicModelsReq = $.get('/api/document/topicModels', { documentId: docId }) .then(function (data) { return data; }) .catch(function () { return []; }); - return Promise.all([topicPageReq, topicEntityReq, topicModelsReq]).then(function (results) { - const topicPageData = results[0] || []; - const topicEntityData = results[1] || []; - const topicModels = results[2] || []; - - const hasSemanticDensity = Array.isArray(topicPageData) && topicPageData.length > 0; - const hasTopicEntity = Array.isArray(topicEntityData) && topicEntityData.length > 0; + return topicModelsReq.then(function (topicModels) { const hasModels = Array.isArray(topicModels) && topicModels.length > 0; - if (hasSemanticDensity) { - $menu.append( - '' + - semanticLabel + - '' - ); - } - - if (hasTopicEntity) { - $menu.append( - '' + - entityLabel + - '' - ); - } - if (hasModels) { const selectedStillExists = topicModels.some(function (m) { return String(m.modelId) === String(selectedTopicModelId); @@ -1844,17 +1838,76 @@ function loadTopicMenu(docId) { }); } - if (!hasSemanticDensity && !hasTopicEntity && !hasModels) { + if (!hasModels) { $menu.append('' + noDataLabel + ''); } return { - hasSemanticDensity: hasSemanticDensity, - hasTopicEntity: hasTopicEntity, models: topicModels }; }); } +function loadOthersMenu(docId) { + const $menu = $('#others-menu'); + const noDataLabel = $menu.attr('data-label-no-data') || 'No data available'; + + $menu.empty(); + + const topicPageReq = $.get('/api/document/page/topics', { documentId: docId }) + .then(function (data) { return data; }) + .catch(function () { return []; }); + + const topicEntityReq = $.get('/api/document/page/topicEntityRelation', { documentId: docId }) + .then(function (data) { return data; }) + .catch(function () { return []; }); + + return Promise.all([topicPageReq, topicEntityReq]).then(function (results) { + const topicPageData = results[0] || []; + const topicEntityData = results[1] || []; + + const hasSemanticDensity = Array.isArray(topicPageData) && topicPageData.length > 0; + const hasTopicEntity = Array.isArray(topicEntityData) && topicEntityData.length > 0; + + if (hasSemanticDensity) { + $menu.append( + '' + + 'Semantic Density' + + '' + ); + } + + if (hasTopicEntity) { + $menu.append( + '' + + 'Topic Entity' + + '' + ); + } + + $menu.append( + '' + + 'Topic Landscape' + + '' + ); + + $menu.append( + '' + + 'Topic Similarity' + + '' + ); + + $menu.append( + '' + + 'Sentence Topic Flow' + + '' + ); + + return { + hasSemanticDensity: hasSemanticDensity, + hasTopicEntity: hasTopicEntity + }; + }); +} function renderTopicModelOverview(containerId) { const container = document.getElementById(containerId); if (!container) return; diff --git a/uce.portal/resources/templates/reader/documentReaderView.ftl b/uce.portal/resources/templates/reader/documentReaderView.ftl index e043d027..beb6163d 100644 --- a/uce.portal/resources/templates/reader/documentReaderView.ftl +++ b/uce.portal/resources/templates/reader/documentReaderView.ftl @@ -333,8 +333,6 @@
@@ -361,6 +359,17 @@
+ +
+ + +
+
+
diff --git a/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java b/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java index 319a7bc9..950609d4 100644 --- a/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java +++ b/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java @@ -531,7 +531,6 @@ public Document XMIToDocument(JCas jCas, Corpus corpus, String filePath, String logger.info("Checking if that document was also post-processed yet..."); var existingDoc = db.getDocumentByCorpusAndDocumentId(corpus.getId(), document.getDocumentId()); - //importSentenceTopicsFromXmiIntoDb(existingDoc, filePath); appendNewEmotionsToExistingDocument(existingDoc,jCas); appendNewSentenceTopicsToExistingDocument(existingDoc, jCas); @@ -1948,73 +1947,6 @@ private void postProccessCorpus(Corpus corpus, CorpusConfig corpusConfig) { logger.info("Done with the corpus postprocessing."); } - /** - * Each topic annotation is matched to an existing sentence using - * its begin and end offsets. For every (label, score) pair found, - * a corresponding entry is inserted into the - * table, linking the topic classification to the sentence. - * The method only performs sentence-level imports and does not - * create unified or aggregated topic representations - */ - private void importSentenceTopicsFromXmiIntoDb(Document document, String xmiFilePath) { - try { - var jCas = JCasFactory.createJCas(); - - try (InputStream raw = Files.newInputStream(Paths.get(xmiFilePath)); - InputStream in = xmiFilePath.endsWith(".gz") ? new GZIPInputStream(raw) : raw) { - - CasIOUtils.load(in, jCas.getCas()); - } - - if (casView != null) { - jCas = jCas.getView(casView); - } - - var topicAnnos = JCasUtil.select(jCas, org.texttechnologylab.annotation.Topic.class); - if (topicAnnos.isEmpty()) { - logger.info("No Topic annotations found in XMI: {}", xmiFilePath); - return; - } - - int inserted = 0; - - for (var topicSpan : topicAnnos) { - int begin = topicSpan.getBegin(); - int end = topicSpan.getEnd(); - String model = "unknown"; - try { - if (topicSpan.getModel() != null && topicSpan.getModel().getModelName() != null) { - model = topicSpan.getModel().getModelName(); - } - } catch (Exception ignored) { } - - var topicsArr = topicSpan.getTopics(); - if (topicsArr == null || topicsArr.size() == 0) continue; - - for (int i = 0; i < topicsArr.size(); i++) { - var fs = topicsArr.get(i); - if (!(fs instanceof AnnotationComment comment)) continue; - - String label = comment.getKey(); - String valueStr = comment.getValue(); - if (label == null || label.isBlank() || valueStr == null || valueStr.isBlank()) continue; - - double score; - try { score = Double.parseDouble(valueStr); } - catch (NumberFormatException nfe) { continue; } - - inserted += db.insertSentenceTopicBySpan(document.getId(), begin, end, label, score, model); - } - } - - logger.info("Imported sentence topic annotations into sentencetopics: documentId={}, insertedRows={}", - document.getId(), inserted); - - } catch (Exception ex) { - logger.error("Error importing sentence topics from XMI into DB. xmi={}", xmiFilePath, ex); - } - } - /** * Here we apply any postprocessing of a document that isn't DUUI and needs the document to be stored once like @@ -2024,8 +1956,6 @@ private void postProccessDocument(Document document, Corpus corpus, String fileP logImportInfo("Postprocessing " + filePath, LogStatus.POST_PROCESSING, filePath, 0); var start = System.currentTimeMillis(); var corpusConfig = corpus.getViewModel().getCorpusConfig(); - // Import sentence-level topic annotations (News XMI: annotation2:Topic + AnnotationComment) - //importSentenceTopicsFromXmiIntoDb(document, filePath); // build unifiedtopic + link sentencetopics.unifiedtopic_id ExceptionUtils.tryCatchLog( From 388aa9fb8a030758237c0d0cdf85a340e50d37a7 Mon Sep 17 00:00:00 2001 From: Francesco Da Silva Saporito Date: Tue, 10 Mar 2026 22:09:21 +0100 Subject: [PATCH 31/47] fix: align Topic header layout to match Emotion panel style and make toggle buttons visible feat: dynamic sentiment menu with 'No models found' fallback matching Topic and Emotion logic --- .../templates/css/document-reader.css | 14 +++++- .../resources/templates/js/documentReader.js | 50 ++++++++++++------- .../templates/reader/documentReaderView.ftl | 6 +-- 3 files changed, 47 insertions(+), 23 deletions(-) diff --git a/uce.portal/resources/templates/css/document-reader.css b/uce.portal/resources/templates/css/document-reader.css index 930fcde2..ae60e0d2 100644 --- a/uce.portal/resources/templates/css/document-reader.css +++ b/uce.portal/resources/templates/css/document-reader.css @@ -779,7 +779,7 @@ body { outline: none; } -#vp-3, #vp-4, #vp-5, #vp-2, #vp-1, #vp-6 { +#vp-4, #vp-5, #vp-2, #vp-1, #vp-6 { display: flex; align-items: center; justify-content: center; @@ -787,8 +787,20 @@ body { overflow: hidden; position: relative; } +#vp-3 { + display: flex; + flex-direction: column; + height: 100%; +} +#vp-3 > div[id$="-body"] { + flex: 1; +} +#vp-3 > .d-flex { + width: 100%; + padding: 0; +} .key-topic-settings-panel { position: absolute; top: 160px; diff --git a/uce.portal/resources/templates/js/documentReader.js b/uce.portal/resources/templates/js/documentReader.js index 0e50573d..92d26bc9 100644 --- a/uce.portal/resources/templates/js/documentReader.js +++ b/uce.portal/resources/templates/js/documentReader.js @@ -897,7 +897,7 @@ document.querySelectorAll('.tab-btn').forEach(btn => { const docId = document.getElementsByClassName('reader-container')[0].getAttribute('data-id'); loadEmotionModels(docId); - + loadSentimentMenu(docId); loadTopicMenu(docId).then(function (topicState) { if (topicState.models && topicState.models.length > 0) { activateVisualizationPanel('#viz-panel-3', $('.viz-nav-group[data-category="topic"] .viz-nav-parent')); @@ -1256,7 +1256,7 @@ function renderTopicViz(containerId) { '' + '' + '' + - '
'; + '
'; if (selectedTopicVizType === 'timeline') { renderTopicTimeline(containerId + '-body'); @@ -1839,7 +1839,7 @@ function loadTopicMenu(docId) { } if (!hasModels) { - $menu.append('' + noDataLabel + ''); + $menu.append('No models found'); } return { @@ -1868,21 +1868,17 @@ function loadOthersMenu(docId) { const hasSemanticDensity = Array.isArray(topicPageData) && topicPageData.length > 0; const hasTopicEntity = Array.isArray(topicEntityData) && topicEntityData.length > 0; - if (hasSemanticDensity) { - $menu.append( - '' + - 'Semantic Density' + - '' - ); - } + $menu.append( + '' + + 'Semantic Density' + + '' + ); - if (hasTopicEntity) { - $menu.append( - '' + - 'Topic Entity' + - '' - ); - } + $menu.append( + '' + + 'Topic Entity' + + '' + ); $menu.append( '' + @@ -1908,6 +1904,26 @@ function loadOthersMenu(docId) { }; }); } +function loadSentimentMenu(docId) { + const $menu = $('#sentiment-menu'); + $menu.empty(); + + $.get('/api/document/page/sentiments', { documentId: docId }) + .then(function (data) { + if (Array.isArray(data) && data.length > 0) { + $menu.append( + '' + + 'Sentence Sentiment' + + '' + ); + } else { + $menu.append('No models found'); + } + }) + .catch(function () { + $menu.append('No models found'); + }); +} function renderTopicModelOverview(containerId) { const container = document.getElementById(containerId); if (!container) return; diff --git a/uce.portal/resources/templates/reader/documentReaderView.ftl b/uce.portal/resources/templates/reader/documentReaderView.ftl index beb6163d..3d46e779 100644 --- a/uce.portal/resources/templates/reader/documentReaderView.ftl +++ b/uce.portal/resources/templates/reader/documentReaderView.ftl @@ -343,11 +343,7 @@ Sentiment - +
From 46cb4d73b64ea421324ac5e16c0f6495d31fdb6b Mon Sep 17 00:00:00 2001 From: Mark Ian Braun Date: Tue, 10 Mar 2026 22:41:37 +0100 Subject: [PATCH 32/47] users can now delete a corpus and documents of a corpus, Added a missing 'document_id' check to the SQL query in createSentenceEmotions. This prevents sentences in newly imported corpora from accidentally linking to emotions of older documents with identical text offsets, which caused foreign key constraint violations during deletion of corporas with the same documents but different models. --- .../templates/corpus/components/documents.ftl | 43 +++++++++++- .../templates/corpus/corpusInspector.ftl | 6 +- .../resources/templates/landing-page.ftl | 28 ++++++++ .../uce/common/services/DataInterface.java | 5 ++ .../PostgresqlDataInterface_Impl.java | 70 ++++++++++++++++++- .../org/texttechnologylab/uce/web/App.java | 1 + .../uce/web/routes/DocumentApi.java | 16 ++++- 7 files changed, 165 insertions(+), 4 deletions(-) diff --git a/uce.portal/resources/templates/corpus/components/documents.ftl b/uce.portal/resources/templates/corpus/components/documents.ftl index 0f5afd33..11d9ce84 100644 --- a/uce.portal/resources/templates/corpus/components/documents.ftl +++ b/uce.portal/resources/templates/corpus/components/documents.ftl @@ -3,6 +3,47 @@
<#assign searchId = ""> <#include '*/search/components/documentCardContent.ftl' > + +
+ +
+
- \ No newline at end of file + + + \ No newline at end of file diff --git a/uce.portal/resources/templates/corpus/corpusInspector.ftl b/uce.portal/resources/templates/corpus/corpusInspector.ftl index 99357fad..e453b716 100644 --- a/uce.portal/resources/templates/corpus/corpusInspector.ftl +++ b/uce.portal/resources/templates/corpus/corpusInspector.ftl @@ -31,7 +31,11 @@ this.getAttribute('data-desc'), this.getAttribute('data-config') )"> - Upload files to this corpora + Upload documents + + + diff --git a/uce.portal/resources/templates/landing-page.ftl b/uce.portal/resources/templates/landing-page.ftl index 471242ea..4b5891a0 100644 --- a/uce.portal/resources/templates/landing-page.ftl +++ b/uce.portal/resources/templates/landing-page.ftl @@ -61,6 +61,11 @@ onclick="$(this).closest('.corpus-card').find('.expanded-content').toggle(75)"> + + + @@ -535,4 +540,27 @@ resultDiv.innerHTML = '
Error: ' + err.message + '
'; }); } + + function deleteCorpus(corpusId){ + if (!confirm("Are you sure you want to delete this corpus?")){ + return; + } + + fetch('/api/corpus/delete?corpusId=' + corpusId, { + method: 'DELETE' + }) + .then(async response => { + if (response.ok){ + alert("Corpus successfully deleted"); + location.reload(); + }else{ + const msg = await response.text(); + alert("Error when trying to delete corpus " + msg); + } + }) + .catch(e => { + console.eor(e); + alert("Unexpected Error " + err.message); + }) + } \ No newline at end of file diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/DataInterface.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/DataInterface.java index 69a199ef..0ef5cf0a 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/DataInterface.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/DataInterface.java @@ -392,4 +392,9 @@ public DocumentSearchResult defaultSearchForDocuments(int skip, * Updates a corpusJsonConfig in the database */ public void updateCorpusJsonConfig(long corpusId,String jsonConfig) throws DatabaseOperationException; + + /** + * Deletes a corpus by id + */ + public void deleteCorpusById(long corpusId) throws DatabaseOperationException; } diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java index aa1bb203..b106dacc 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java @@ -1205,6 +1205,24 @@ public void deleteDocumentById(long id) throws DatabaseOperationException { // NOTE this only cleans up everything directly connected to the document // TODO also remove embeddings and other data executeOperationSafely((session) -> { + List queries = List.of( + "DELETE FROM sentenceemotions WHERE emotion_id IN (SELECT id FROM emotion WHERE document_id = :did)", + "DELETE FROM sentencetopics WHERE unifiedtopic_id IN (SELECT id FROM unifiedtopic WHERE document_id = :did)", + "DELETE FROM sentenceemotions WHERE document_id = :did", + "DELETE FROM sentencetopics WHERE document_id = :did", + "DELETE FROM documenttopicsraw WHERE document_id = :did", + "DELETE FROM documenttopicwords WHERE document_id = :did", + "DELETE FROM feeling WHERE emotion_id IN (SELECT id FROM emotion WHERE document_id = :did)", + "DELETE FROM documentchunkembeddings WHERE document_id = :did", + "DELETE FROM documentembeddings WHERE document_id = :did", + "DELETE FROM documentsentenceembeddings WHERE document_id = :did" + ); + for (String sql : queries) { + session.createNativeQuery(sql) + .setParameter("did", id) + .executeUpdate(); + } + var doc = session.get(Document.class, id); if (doc != null) { session.delete(doc); @@ -1212,6 +1230,56 @@ public void deleteDocumentById(long id) throws DatabaseOperationException { return null; }); } + + public void deleteCorpusById(long corpusId) throws DatabaseOperationException{ + executeOperationSafely((session) -> { + List queries = List.of( + "DELETE FROM sentenceemotions WHERE emotion_id IN (SELECT e.id FROM emotion e JOIN document d ON e.document_id = d.id WHERE d.corpusid = :cid)", + "DELETE FROM sentencetopics WHERE unifiedtopic_id IN (SELECT ut.id FROM unifiedtopic ut JOIN document d ON ut.document_id = d.id WHERE d.corpusid = :cid)", + "DELETE FROM sentenceemotions USING document WHERE sentenceemotions.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM sentencetopics USING document WHERE sentencetopics.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM documenttopicsraw USING document WHERE documenttopicsraw.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM documenttopicwords USING document WHERE documenttopicwords.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM corpustopicwords WHERE corpus_id = :cid", + "DELETE FROM feeling WHERE emotion_id IN (SELECT e.id FROM emotion e JOIN document d ON e.document_id = d.id WHERE d.corpusid = :cid)", + "DELETE FROM emotion USING document WHERE emotion.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM unifiedtopic USING document WHERE unifiedtopic.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM xscope USING document WHERE xscope.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM scope USING document WHERE scope.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM event USING document WHERE event.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM focus USING document WHERE focus.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM cue USING document WHERE cue.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM completenegation USING document WHERE completenegation.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM documentchunkembeddings USING document WHERE documentchunkembeddings.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM documentembeddings USING document WHERE documentembeddings.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM documentsentenceembeddings USING document WHERE documentsentenceembeddings.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM ucemetadata USING document WHERE ucemetadata.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM ucemetadatafilter WHERE ucemetadatafilter.corpusid = :cid", + "DELETE FROM biofidtaxon USING document WHERE biofidtaxon.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM lemma USING document WHERE lemma.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM namedentity USING document WHERE namedentity.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM geoname USING document WHERE geoname.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM time USING document WHERE time.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM srlink USING document WHERE srlink.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM gazetteertaxon USING document WHERE gazetteertaxon.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM gnfindertaxon USING document WHERE gnfindertaxon.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM sentence USING document WHERE sentence.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM documentlink WHERE documentlink.corpusid = :cid", + "DELETE FROM annotationlink WHERE annotationlink.corpusid = :cid", + "DELETE FROM documenttoannotationlink WHERE documenttoannotationlink.corpusid = :cid", + "DELETE FROM annotationtodocumentlink WHERE annotationtodocumentlink.corpusid = :cid", + "DELETE FROM page USING document WHERE page.document_id = document.id AND document.corpusid = :cid", + "DELETE FROM document WHERE corpusid = :cid", + "DELETE FROM corpus WHERE id = :cid" + ); + for (String sql : queries) { + session.createNativeQuery(sql) + .setParameter("cid", corpusId) + .executeUpdate(); + } + return null; + }); + } public List findDocumentIDsByTitle(String title, boolean like) throws DatabaseOperationException { return executeOperationSafely((session) -> { @@ -2440,7 +2508,7 @@ INSERT INTO sentenceemotions (sentence_id, emotion_id, model_id, document_id) SELECT s.id, e.id, e.model_id, s.document_id FROM emotion e JOIN sentence s - ON s.beginn = e.beginn AND s.endd = e.endd and s.document_id = :docId + ON s.beginn = e.beginn AND s.endd = e.endd and s.document_id = :docId AND e.document_id = :docId WHERE NOT EXISTS( SELECT 1 FROM sentenceemotions se WHERE se.sentence_id = s.id AND se.emotion_id = e.id diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java index 82d4adef..71a56409 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java @@ -464,6 +464,7 @@ private static void initSparkRoutes(ApplicationContext context, ApiRegistry regi post("/linkedOccurrences", (ctx) -> (registry.get(MapApi.class)).getLinkedOccurrences(ctx)); post("/linkedOccurrenceClusters", (ctx) -> (registry.get(MapApi.class)).getLinkedOccurrenceClusters(ctx)); }); + delete("/delete",(ctx) -> (registry.get(DocumentApi.class)).deleteCorpus(ctx)); }); path("/search", () -> { diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/DocumentApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/DocumentApi.java index 803f5fd5..5b65686e 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/DocumentApi.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/DocumentApi.java @@ -274,6 +274,7 @@ public void deleteDocument(Context ctx) throws DatabaseOperationException { var id = ExceptionUtils.tryCatchLog(() -> ctx.queryParam("id"), (ex) -> logger.error("Error: document deletion requires an 'id' query parameter. ", ex)); if (id == null) { + ctx.status(400); ctx.render("defaultError.ftl"); return; } @@ -282,10 +283,23 @@ public void deleteDocument(Context ctx) throws DatabaseOperationException { Map result = new HashMap<>(); result.put("status", "success"); - result.put("message", "NOTE Document deletion is not fully implemented yet."); + result.put("message", "Document successfully deleted"); ctx.json(result); } + + public void deleteCorpus(Context ctx) throws DatabaseOperationException{ + var id = ExceptionUtils.tryCatchLog(() -> ctx.queryParam("corpusId"),ex -> logger.error("Error: corpus deletion required a corpusId parameter")); + if (id == null){ + ctx.status(400); + ctx.render("defaultError.ftl"); + return; + } + db.deleteCorpusById(Long.parseLong(id)); + Map result = new HashMap<>(); + result.put("status","success"); + result.put("message","Corpus successfully deleted"); + } public void getPagesListView(Context ctx) { From 5d2ac8c2774fc86c4cde6ae40a9c7eb71581be7e Mon Sep 17 00:00:00 2001 From: Mark Ian Braun Date: Wed, 11 Mar 2026 00:28:26 +0100 Subject: [PATCH 33/47] bug fix: reset checkbox states for a new corpus upload after opening the import modal in an already imported corpus --- uce.portal/resources/templates/landing-page.ftl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/uce.portal/resources/templates/landing-page.ftl b/uce.portal/resources/templates/landing-page.ftl index 4b5891a0..70cc2eda 100644 --- a/uce.portal/resources/templates/landing-page.ftl +++ b/uce.portal/resources/templates/landing-page.ftl @@ -427,6 +427,8 @@ function openUploadForNewCorpora(){ const form = document.getElementById('uploadCorpusForm'); form.reset(); + $('#advancedSettings input[type="checkbox"]').prop('checked',false).prop('disabled',false); + $('#uploadConfigFile').prop('disabled', false); $('#uploadAddToExisting').val('false'); $('#uploadModalTitle').html('Create new Corpora'); $('#uploadCorpusName').val('').prop('readonly',false); @@ -545,6 +547,8 @@ if (!confirm("Are you sure you want to delete this corpus?")){ return; } + + fetch('/api/corpus/delete?corpusId=' + corpusId, { method: 'DELETE' From 17507ab4ae1f4f5cf5d4f9396936f5be110164e3 Mon Sep 17 00:00:00 2001 From: Ph1l1ppGitHub Date: Wed, 11 Mar 2026 13:46:10 +0100 Subject: [PATCH 34/47] =?UTF-8?q?Topic=20vorausgew=C3=A4hlt=20machen?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- uce.portal/resources/templates/landing-page.ftl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/uce.portal/resources/templates/landing-page.ftl b/uce.portal/resources/templates/landing-page.ftl index 70cc2eda..96c23d14 100644 --- a/uce.portal/resources/templates/landing-page.ftl +++ b/uce.portal/resources/templates/landing-page.ftl @@ -378,6 +378,7 @@ if (ann.namedEntity) $('#annoNE').prop('checked', true); if (ann.sentiment) $('#annoSentiment').prop('checked', true); if (ann.emotion) $('#annoEmotion').prop('checked', true); + if (ann.topic) $('#annoTopic').prop('checked', true); if (ann.time) $('#annoTime').prop('checked', true); if (ann.geoNames) $('#annoGeo').prop('checked', true); if (ann.wikipediaLink) $('#annoWiki').prop('checked', true); @@ -457,6 +458,7 @@ if (ann.namedEntity) $('#annoNE').prop('checked', true).prop('disabled', true); if (ann.sentiment) $('#annoSentiment').prop('checked', true).prop('disabled', true); if (ann.emotion) $('#annoEmotion').prop('checked', true).prop('disabled', true); + if (ann.topic) $('#annoTopic').prop('checked', true).prop('disabled', true); if (ann.time) $('#annoTime').prop('checked', true).prop('disabled', true); if (ann.geoNames) $('#annoGeo').prop('checked', true).prop('disabled', true); if (ann.wikipediaLink) $('#annoWiki').prop('checked', true).prop('disabled', true); From d293ed56ddf977d9705951082be9f49437f5dc01 Mon Sep 17 00:00:00 2001 From: Mark Ian Braun Date: Wed, 11 Mar 2026 22:55:19 +0100 Subject: [PATCH 35/47] importer topic bug fix --- .../org/texttechnologylab/uce/web/routes/ImportExportApi.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java index f94da89d..0f270731 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java @@ -260,7 +260,7 @@ else if (config.getLanguage() == null) { ann.setSentence(ann.isSentence() || ctx.formParam("sentence") != null); ann.setLemma(ann.isLemma() || ctx.formParam("lemma") != null); ann.setNamedEntity(ann.isNamedEntity() || ctx.formParam("namedEntity") != null); - ann.setTopic(ann.isNamedEntity() || ctx.formParam("topic") != null); + ann.setTopic(ann.isTopic() || ctx.formParam("topic") != null); ann.setSentiment(ann.isSentiment() || ctx.formParam("sentiment") != null); ann.setEmotion(ann.isEmotion() || ctx.formParam("emotion") != null); ann.setTime(ann.isTime() || ctx.formParam("time") != null); From 081f3b91400ef7c735575a827338353e2f0c96ee Mon Sep 17 00:00:00 2001 From: Francesco Da Silva Saporito Date: Wed, 11 Mar 2026 23:04:34 +0100 Subject: [PATCH 36/47] fix: add error handling to renderTemporalExplorer requests to prevent Promise.all from failing when individual endpoints return no data --- uce.portal/resources/templates/js/documentReader.js | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/uce.portal/resources/templates/js/documentReader.js b/uce.portal/resources/templates/js/documentReader.js index 92d26bc9..d4f54b3c 100644 --- a/uce.portal/resources/templates/js/documentReader.js +++ b/uce.portal/resources/templates/js/documentReader.js @@ -1577,12 +1577,12 @@ function renderTemporalExplorer(containerId) { const emotionReq = $.get('/api/document/page/emotions', { documentId: docId, modelId: selectedEmotionModelId - }); - const taxonReq = $.get('/api/document/page/taxon', { documentId: docId }); - const topicReq = $.get('/api/document/page/topics', { documentId: docId }); - const entityReq = $.get('/api/document/page/namedEntities', { documentId: docId }); - const lemmaReq = $.get('/api/document/page/lemma', { documentId: docId }); - const geonameReq = $.get('/api/document/page/geoname', { documentId: docId }); + }).then(d => d).catch(() => []); + const taxonReq = $.get('/api/document/page/taxon', { documentId: docId }).then(d => d).catch(() => []); + const topicReq = $.get('/api/document/page/topics', { documentId: docId }).then(d => d).catch(() => []); + const entityReq = $.get('/api/document/page/namedEntities', { documentId: docId }).then(d => d).catch(() => []); + const lemmaReq = $.get('/api/document/page/lemma', { documentId: docId }).then(d => d).catch(() => []); + const geonameReq = $.get('/api/document/page/geoname', { documentId: docId }).then(d => d).catch(() => []); Promise.all([taxonReq, topicReq, entityReq, lemmaReq, geonameReq, emotionReq]).then(([taxon, topics, entities, lemma, geoname, emotions]) => { $('.visualization-spinner').hide() From e9fb0d174531c991d017e1a370feb77e77de0410 Mon Sep 17 00:00:00 2001 From: Francesco Da Silva Saporito Date: Wed, 11 Mar 2026 23:31:07 +0100 Subject: [PATCH 37/47] fix: remove auto-render of Topic Entity on tab init to prevent rendered state conflict on first user click --- uce.portal/resources/templates/js/documentReader.js | 10 ++-------- .../common/services/PostgresqlDataInterface_Impl.java | 2 +- .../texttechnologylab/uce/web/routes/DocumentApi.java | 4 +++- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/uce.portal/resources/templates/js/documentReader.js b/uce.portal/resources/templates/js/documentReader.js index d4f54b3c..564d9efe 100644 --- a/uce.portal/resources/templates/js/documentReader.js +++ b/uce.portal/resources/templates/js/documentReader.js @@ -913,19 +913,12 @@ document.querySelectorAll('.tab-btn').forEach(btn => { setTimeout(() => renderTemporalExplorer('vp-1'), 500); return; } - - if (othersState.hasTopicEntity) { - activateVisualizationPanel('#viz-panel-2', $('.viz-nav-group[data-category="others"] .viz-nav-parent')); - $('#vp-2').removeClass('rendered'); - setTimeout(() => renderTopicEntityChordDiagram('vp-2'), 500); - return; - } }); } }); }); -$(document).on('click', '.viz-nav-item[data-target]', function (e) { +$(document).on('click', '.viz-nav-item[data-target]:not(.others-menu-item)', function (e) { e.preventDefault(); const target = $(this).data('target'); @@ -1327,6 +1320,7 @@ function renderTopicEntityChordDiagram(containerId) { $.get('/api/document/page/topicEntityRelation', { documentId: docId }) + .catch(() => []) .then(data => { $('.visualization-spinner').hide() if (!data || !Array.isArray(data) || data.length === 0) { diff --git a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java index b106dacc..d7a22966 100644 --- a/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java +++ b/uce.portal/uce.common/src/main/java/org/texttechnologylab/uce/common/services/PostgresqlDataInterface_Impl.java @@ -2134,7 +2134,7 @@ SELECT DISTINCT ON (st.document_id, st.sentence_id) var query = session.createNativeQuery(sql) .setParameter("documentId", documentId) - .setParameter("modelId", modelId); + .setParameter("modelId", modelId, org.hibernate.type.LongType.INSTANCE); return query.getResultList(); }); diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/DocumentApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/DocumentApi.java index 5b65686e..7e0dc78f 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/DocumentApi.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/DocumentApi.java @@ -355,7 +355,9 @@ public void getDocumentTopics(Context ctx) { } catch (Exception ex) { logger.error("Error getting document topics.", ex); ctx.status(500); - ctx.render("defaultError.ftl", Map.of("information", "Error retrieving document topics.")); + var errorModel = new HashMap(); + errorModel.put("information", "Error retrieving document topics."); + ctx.render("defaultError.ftl", errorModel); } } From 37950e83b90cefb8b8c9322002a0250503931c63 Mon Sep 17 00:00:00 2001 From: Francesco Da Silva Saporito Date: Wed, 11 Mar 2026 23:43:32 +0100 Subject: [PATCH 38/47] fix: remove auto-render of Semantic Density on tab init to prevent rendered state conflict on first user click --- uce.portal/resources/templates/js/documentReader.js | 6 ------ 1 file changed, 6 deletions(-) diff --git a/uce.portal/resources/templates/js/documentReader.js b/uce.portal/resources/templates/js/documentReader.js index 564d9efe..af043631 100644 --- a/uce.portal/resources/templates/js/documentReader.js +++ b/uce.portal/resources/templates/js/documentReader.js @@ -907,12 +907,6 @@ document.querySelectorAll('.tab-btn').forEach(btn => { }); loadOthersMenu(docId).then(function (othersState) { - if (othersState.hasSemanticDensity) { - activateVisualizationPanel('#viz-panel-1', $('.viz-nav-group[data-category="others"] .viz-nav-parent')); - $('#vp-1').removeClass('rendered'); - setTimeout(() => renderTemporalExplorer('vp-1'), 500); - return; - } }); } }); From dfaed6cba33dbcf755f997b724f8ffdef039a67a Mon Sep 17 00:00:00 2001 From: Mark Ian Braun Date: Thu, 12 Mar 2026 21:47:50 +0100 Subject: [PATCH 39/47] importer loading bar, not fully completed yet. --- uce.portal/resources/templates/css/site.css | 24 ++++ uce.portal/resources/templates/index.ftl | 116 ++++++++++++++++++ .../resources/templates/landing-page.ftl | 27 ++-- .../uce/corpusimporter/Importer.java | 24 +++- .../org/texttechnologylab/uce/web/App.java | 1 + .../uce/web/routes/ImportExportApi.java | 50 +++++++- 6 files changed, 227 insertions(+), 15 deletions(-) diff --git a/uce.portal/resources/templates/css/site.css b/uce.portal/resources/templates/css/site.css index 663f8e6a..1e189c9f 100644 --- a/uce.portal/resources/templates/css/site.css +++ b/uce.portal/resources/templates/css/site.css @@ -1247,3 +1247,27 @@ nav .selected-nav-btn.text::before { } /* UCE Map styles end */ + +/* Import Loading Bar Styles */ +#importProgressWrapper{ + display: none; + position: fixed; + bottom: 20px; + right: 20px; + width: 300px; + z-index: 1050; +} + +#importProgressWrapper:hover #allImportsList{ + display: block !important; +} + +#allImportsList{ + display: none; + position: absolute; + bottom: 100%; + right: 0; + width: 300px; + max-height: 400px; + overflow-y: auto; +} diff --git a/uce.portal/resources/templates/index.ftl b/uce.portal/resources/templates/index.ftl index 4523a6ac..e7025e89 100644 --- a/uce.portal/resources/templates/index.ftl +++ b/uce.portal/resources/templates/index.ftl @@ -221,6 +221,21 @@ +
+
+
+
+ Importing... +
+
+
+
+
+
+
+
+
+
+ +
+ + + If empty, the import ID will be auto generated +
@@ -528,14 +534,24 @@ body: formData }) .then(async response => { + const msg = await response.text(); if (response.ok) { resultDiv.innerHTML = '
Upload successful! Reloading...
'; - setTimeout(() => { - location.reload(); - }, 3000); + $('#uploadCorpusModal').modal('hide'); + const idMatch = msg.match(/ID:\s*(.+)/); + if(idMatch && idMatch[1]){ + const importId = idMatch[1].trim(); + let activeImports = JSON.parse(localStorage.getItem('activeUceImports') || '[]'); + if (!activeImports.includes(importId)) { + activeImports.push(importId); + localStorage.setItem('activeUceImports', JSON.stringify(activeImports)); + } + if(typeof startImportProgress === 'function') startImportProgress(); + }else{ + console.warn("No import Id extracted: ",msg); + } } else { - const msg = await response.text(); throw new Error(msg); } }) @@ -549,9 +565,6 @@ if (!confirm("Are you sure you want to delete this corpus?")){ return; } - - - fetch('/api/corpus/delete?corpusId=' + corpusId, { method: 'DELETE' }) diff --git a/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java b/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java index 950609d4..3a4f039a 100644 --- a/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java +++ b/uce.portal/uce.corpus-importer/src/main/java/org/texttechnologylab/uce/corpusimporter/Importer.java @@ -68,10 +68,7 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.*; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CopyOnWriteArrayList; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.Executors; +import java.util.concurrent.*; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; @@ -80,7 +77,8 @@ import java.util.zip.ZipInputStream; public class Importer { - + + public static final Map IMPORT_PROGRESS = new ConcurrentHashMap<>(); private static final Gson gson = new Gson(); private static final Logger logger = LogManager.getLogger(Importer.class); private static final int BATCH_SIZE = 2000; @@ -255,6 +253,8 @@ public void storeCorpusFromFolderAsync(String folderName, int numThreads) throws var docInBatch = new AtomicInteger(0); var lock = new Object(); var batchLatch = new AtomicReference<>(new CountDownLatch(0)); + + IMPORT_PROGRESS.put(this.importId,new AtomicInteger(0)); try (var fileStream = Files.walk(inputFolderName)) { fileStream.filter(Files::isRegularFile) @@ -287,6 +287,10 @@ public void storeCorpusFromFolderAsync(String folderName, int numThreads) throws () -> postProccessDocument(doc, corpus1, filePath.toString()), (ex) -> logImportError("Error postprocessing a saved document with id " + doc.getId(), ex, filePath.toString())); logImportInfo("Finished with import.", LogStatus.FINISHED, filePath.toString(), 0); + // Incrementing Counter for UI + if (Importer.IMPORT_PROGRESS != null && Importer.IMPORT_PROGRESS .containsKey(importId)){ + Importer.IMPORT_PROGRESS.get(importId).incrementAndGet(); + } } int local = docInBatch.incrementAndGet(); @@ -360,6 +364,16 @@ public void storeCorpusFromFolderAsync(String folderName, int numThreads) throws ExceptionUtils.tryCatchLog( () -> postProccessCorpus(corpus1, corpusConfigFinal), (ex) -> logger.error("Error in the final postprocessing of the current corpus with id " + corpus1.getId())); + + // Setting Import-Status to FINISHED + if (this.importerNumber == 1){ + ExceptionUtils.tryCatchLog(()->{ + var finalUceImport = db.getUceImportByImportId(this.importId); + finalUceImport.setStatus(ImportStatus.FINISHED); + db.saveOrUpdateUceImport(finalUceImport); + return null; + },ex -> logger.error("Error when trying to set import-status to FINISHED")); + } logger.info("\n\n=================================\n Done with the corpus import."); executor.shutdown(); diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java index 71a56409..04f0586f 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/App.java @@ -442,6 +442,7 @@ private static void initSparkRoutes(ApplicationContext context, ApiRegistry regi get("/download/uima", (ctx) -> (registry.get(ImportExportApi.class)).downloadUIMA(ctx)); post("/import/path", (ctx) -> (registry.get(ImportExportApi.class)).importCorpusFromPath(ctx)); post("/import/upload",(ctx) -> (registry.get(ImportExportApi.class)).importCorpusFromUpload(ctx)); + get("import/status/{importId}",(ctx) -> (registry.get(ImportExportApi.class)).getImportStatus(ctx)); }); path("/wiki", () -> { diff --git a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java index 0f270731..0eac0fb8 100644 --- a/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java +++ b/uce.portal/uce.web/src/main/java/org/texttechnologylab/uce/web/routes/ImportExportApi.java @@ -193,7 +193,8 @@ public void importCorpusFromPath(Context ctx) { public void importCorpusFromUpload(Context ctx){ try{ - String importId = UUID.randomUUID().toString(); + String customImportId = ctx.formParam("importId"); + String importId = (customImportId != null && !customImportId.isBlank() ? customImportId : UUID.randomUUID().toString()); Path rootDir = java.nio.file.Paths.get(System.getProperty("java.io.tmpdir"), "uce_uploads", importId); Path inputDir = rootDir.resolve("input"); Files.createDirectories(inputDir); @@ -318,13 +319,26 @@ else if (config.getLanguage() == null) { CompletableFuture.runAsync(() -> { try{ importer.start(numThreads); + UCEImport finishedImport = db.getUceImportByImportId(importId); + if (finishedImport != null) { + finishedImport.setStatus(ImportStatus.FINISHED); + db.saveOrUpdateUceImport(finishedImport); + } } catch (DatabaseOperationException e) { logger.error("Error during asynchronous corpus uplaod import",e); + try { + UCEImport errImport = db.getUceImportByImportId(importId); + if (errImport != null) { + errImport.setStatus(ImportStatus.ERROR); + db.saveOrUpdateUceImport(errImport); + } + } catch (Exception ignored) {} + }finally { try { org.apache.commons.io.FileUtils.deleteDirectory(rootDir.toFile()); - } catch (IOException e) { - logger.warn("Could not delete temp upload dir: " + rootDir,e); + } catch (IOException ex) { + logger.warn("Could not delete temp upload dir: " + rootDir,ex); } } }); @@ -339,5 +353,35 @@ else if (config.getLanguage() == null) { ctx.status(500).result("Error during saving/updating database " + e.getMessage()); } } + + public void getImportStatus(Context ctx){ + String importId = ctx.pathParam("importId"); + + try{ + UCEImport uceImport = db.getUceImportByImportId(importId); + if (uceImport == null){ + ctx.status(400).result("Import %s not found".formatted(importId)); + return; + } + + Map statusData = new HashMap<>(); + statusData.put("status",uceImport.getStatus().name()); + statusData.put("total",uceImport.getTotalDocuments()); + int processed = 0; + + if(uceImport.getStatus() == ImportStatus.FINISHED || uceImport.getStatus() == ImportStatus.ERROR){ + processed = uceImport.getTotalDocuments(); + Importer.IMPORT_PROGRESS.remove(importId); + }else if (Importer.IMPORT_PROGRESS.containsKey(importId)){ + processed = Importer.IMPORT_PROGRESS.get(importId).get(); + } + + statusData.put("processed",processed); + ctx.json((statusData)); + } catch (DatabaseOperationException e) { + logger.error("Error when trying to get import-status"); + ctx.status(500).result("Error when trying to get import-status"); + } + } } From 39c10eb78e366731175070a8a422f9fefda0d541 Mon Sep 17 00:00:00 2001 From: Mark Ian Braun Date: Thu, 12 Mar 2026 23:16:44 +0100 Subject: [PATCH 40/47] importer now also works for documents with same id's. loading box now visible in both landing-page.ftl and corpus-inspector.ftl --- uce.portal/resources/templates/index.ftl | 34 ++++++++++--------- .../uce/corpusimporter/Importer.java | 8 ++--- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/uce.portal/resources/templates/index.ftl b/uce.portal/resources/templates/index.ftl index e7025e89..16369817 100644 --- a/uce.portal/resources/templates/index.ftl +++ b/uce.portal/resources/templates/index.ftl @@ -221,21 +221,7 @@
-
-
-
-
- Importing... -
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+ Importing... +
+
+
+
+
+
+
+
+
+
+
@@ -576,7 +578,7 @@