diff --git a/echo/server/dembrane/api/project.py b/echo/server/dembrane/api/project.py
index 75521aa3..4e97f9f4 100644
--- a/echo/server/dembrane/api/project.py
+++ b/echo/server/dembrane/api/project.py
@@ -374,12 +374,10 @@ class CreateReportRequestBodySchema(BaseModel):
 
 
 @ProjectRouter.post("/{project_id}/create-report")
-async def create_report(
-    project_id: str, db: DependencyInjectDatabase, body: CreateReportRequestBodySchema
-) -> None:
+async def create_report(project_id: str, body: CreateReportRequestBodySchema) -> None:
     language = body.language or "en"
     try:
-        report_content_response = await get_report_content_for_project(project_id, db, language)
+        report_content_response = await get_report_content_for_project(project_id, language)
     except ContextTooLongException:
         report = directus.create_item(
             "project_report",
diff --git a/echo/server/dembrane/config.py b/echo/server/dembrane/config.py
index 274ec992..d0229264 100644
--- a/echo/server/dembrane/config.py
+++ b/echo/server/dembrane/config.py
@@ -173,11 +173,59 @@
 DISABLE_CORS = os.environ.get("DISABLE_CORS", "false").lower() in ["true", "1"]
 logger.debug(f"DISABLE_CORS: {DISABLE_CORS}")
 
+SMALL_LITELLM_MODEL = os.environ.get("SMALL_LITELLM_MODEL")  # 4o-mini
+assert SMALL_LITELLM_MODEL, "SMALL_LITELLM_MODEL environment variable is not set"
+logger.debug(f"SMALL_LITELLM_MODEL: {SMALL_LITELLM_MODEL}")
+
+SMALL_LITELLM_API_KEY = os.environ.get("SMALL_LITELLM_API_KEY")
+assert SMALL_LITELLM_API_KEY, "SMALL_LITELLM_API_KEY environment variable is not set"
+logger.debug("SMALL_LITELLM_API_KEY: set")
+
+SMALL_LITELLM_API_VERSION = os.environ.get("SMALL_LITELLM_API_VERSION")
+assert SMALL_LITELLM_API_VERSION, "SMALL_LITELLM_API_VERSION environment variable is not set"
+logger.debug(f"SMALL_LITELLM_API_VERSION: {SMALL_LITELLM_API_VERSION}")
+
+SMALL_LITELLM_API_BASE = os.environ.get("SMALL_LITELLM_API_BASE")
+assert SMALL_LITELLM_API_BASE, "SMALL_LITELLM_API_BASE environment variable is not set"
+logger.debug(f"SMALL_LITELLM_API_BASE: {SMALL_LITELLM_API_BASE}")
+
+MEDIUM_LITELLM_MODEL = os.environ.get("MEDIUM_LITELLM_MODEL")  # 4.1
+assert MEDIUM_LITELLM_MODEL, "MEDIUM_LITELLM_MODEL environment variable is not set"
+logger.debug(f"MEDIUM_LITELLM_MODEL: {MEDIUM_LITELLM_MODEL}")
+
+MEDIUM_LITELLM_API_KEY = os.environ.get("MEDIUM_LITELLM_API_KEY")
+assert MEDIUM_LITELLM_API_KEY, "MEDIUM_LITELLM_API_KEY environment variable is not set"
+logger.debug("MEDIUM_LITELLM_API_KEY: set")
+
+MEDIUM_LITELLM_API_VERSION = os.environ.get("MEDIUM_LITELLM_API_VERSION")
+assert MEDIUM_LITELLM_API_VERSION, "MEDIUM_LITELLM_API_VERSION environment variable is not set"
+logger.debug(f"MEDIUM_LITELLM_API_VERSION: {MEDIUM_LITELLM_API_VERSION}")
+
+MEDIUM_LITELLM_API_BASE = os.environ.get("MEDIUM_LITELLM_API_BASE")
+assert MEDIUM_LITELLM_API_BASE, "MEDIUM_LITELLM_API_BASE environment variable is not set"
+logger.debug(f"MEDIUM_LITELLM_API_BASE: {MEDIUM_LITELLM_API_BASE}")
+
+LARGE_LITELLM_MODEL = os.environ.get("LARGE_LITELLM_MODEL")  # o4-mini
+assert LARGE_LITELLM_MODEL, "LARGE_LITELLM_MODEL environment variable is not set"
+logger.debug(f"LARGE_LITELLM_MODEL: {LARGE_LITELLM_MODEL}")
+
+LARGE_LITELLM_API_KEY = os.environ.get("LARGE_LITELLM_API_KEY")
+assert LARGE_LITELLM_API_KEY, "LARGE_LITELLM_API_KEY environment variable is not set"
+logger.debug("LARGE_LITELLM_API_KEY: set")
+
+LARGE_LITELLM_API_VERSION = os.environ.get("LARGE_LITELLM_API_VERSION")
+assert LARGE_LITELLM_API_VERSION, "LARGE_LITELLM_API_VERSION environment variable is not set"
+logger.debug(f"LARGE_LITELLM_API_VERSION: {LARGE_LITELLM_API_VERSION}")
+
+LARGE_LITELLM_API_BASE = os.environ.get("LARGE_LITELLM_API_BASE")
+assert LARGE_LITELLM_API_BASE, "LARGE_LITELLM_API_BASE environment variable is not set"
+logger.debug(f"LARGE_LITELLM_API_BASE: {LARGE_LITELLM_API_BASE}")
+
 
 LITELLM_WHISPER_URL = os.environ.get("LITELLM_WHISPER_URL")
-LITELLM_WHISPER_API_KEY = os.environ.get("LITELLM_WHISPER_API_KEY", OPENAI_API_KEY)
+LITELLM_WHISPER_API_KEY = os.environ.get("LITELLM_WHISPER_API_KEY")
 LITELLM_WHISPER_API_VERSION = os.environ.get("LITELLM_WHISPER_API_VERSION", "2024-06-01")
-LITELLM_WHISPER_MODEL = os.environ.get("LITELLM_WHISPER_MODEL", "whisper-1")
+LITELLM_WHISPER_MODEL = os.environ.get("LITELLM_WHISPER_MODEL")
 assert LITELLM_WHISPER_API_KEY, "LITELLM_WHISPER_API_KEY environment variable is not set"
 logger.debug("LITELLM_WHISPER_API_KEY: set")
 assert LITELLM_WHISPER_API_VERSION, "LITELLM_WHISPER_API_VERSION environment variable is not set"
diff --git a/echo/server/dembrane/embedding.py b/echo/server/dembrane/embedding.py
index 00e2ac77..5c9a3510 100644
--- a/echo/server/dembrane/embedding.py
+++ b/echo/server/dembrane/embedding.py
@@ -2,10 +2,16 @@
 from typing import List
 
 import backoff
+import litellm
 
-from dembrane.openai import client
+from dembrane.config import (
+    # FIXME: update to use dembrane embeddings
+    LIGHTRAG_LITELLM_EMBEDDING_API_KEY,
+    LIGHTRAG_LITELLM_EMBEDDING_API_BASE,
+    LIGHTRAG_LITELLM_EMBEDDING_API_VERSION,
+)
 
-EMBEDDING_DIM = 1536
+EMBEDDING_DIM = 3072
 
 logger = logging.getLogger("embedding")
 logger.setLevel(logging.DEBUG)
@@ -15,9 +21,14 @@
 def embed_text(text: str) -> List[float]:
     text = text.replace("\n", " ").strip()
     try:
-        return (
-            client.embeddings.create(input=[text], model="text-embedding-3-small").data[0].embedding
+        response = litellm.embedding(
+            api_key=str(LIGHTRAG_LITELLM_EMBEDDING_API_KEY),
+            api_base=str(LIGHTRAG_LITELLM_EMBEDDING_API_BASE),
+            api_version=str(LIGHTRAG_LITELLM_EMBEDDING_API_VERSION),
+            model="azure/text-embedding-3-large",
+            input=[text],
         )
+        return response["data"][0]["embedding"]
     except Exception as exc:
         logger.debug("error:" + str(exc))
         logger.debug("input text:" + text)
diff --git a/echo/server/dembrane/quote_utils.py b/echo/server/dembrane/quote_utils.py
index 304851e3..5989e901 100644
--- a/echo/server/dembrane/quote_utils.py
+++ b/echo/server/dembrane/quote_utils.py
@@ -7,17 +7,26 @@
 import numpy as np
 import pandas as pd
 import tiktoken
+from litellm import completion
 from pydantic import BaseModel
 from sqlalchemy import func, select, literal
 from sqlalchemy.orm import Session
 from sklearn.cluster import KMeans
-from langchain_openai import OpenAIEmbeddings
 from pgvector.sqlalchemy import Vector
-from langchain_experimental.text_splitter import SemanticChunker
 
 from dembrane.s3 import save_to_s3_from_url
 from dembrane.ner import anonymize_sentence
 from dembrane.utils import generate_uuid, get_utc_timestamp
+from dembrane.config import (
+    LARGE_LITELLM_MODEL,  # o4-mini
+    SMALL_LITELLM_MODEL,  # 4o-mini
+    LARGE_LITELLM_API_KEY,
+    SMALL_LITELLM_API_KEY,
+    LARGE_LITELLM_API_BASE,
+    SMALL_LITELLM_API_BASE,
+    LARGE_LITELLM_API_VERSION,
+    SMALL_LITELLM_API_VERSION,
+)
 from dembrane.openai import client
 from dembrane.prompts import render_prompt
 from dembrane.database import (
@@ -38,9 +47,6 @@
 np.random.seed(0)
 
 
-lc_embedder = OpenAIEmbeddings(model="text-embedding-3-small")
-semantic_chunker = SemanticChunker(lc_embedder)
-
 SENTENCE_ENDING_PUNCTUATION = {".", "!", "?"}
 SENTENCE_ENDING_PUNTUATION_REGEX = r"(?<=[.!?]) +"
 
@@ -73,9 +79,6 @@ def join_transcript_chunks(string_list: List[str]) -> str:
     return joined_string
 
 
-# def generate_contextual_quote_and_embedding(db: Session, conversation_id: str, text: str) -> Tuple[QuoteModel, List[float]]:
-
-
 def llm_split_text(text: str) -> List[str]:
     logger = logging.getLogger("llm_split_text")
     logger.debug(f"splitting text: {text}")
@@ -88,9 +91,12 @@ def llm_split_text(text: str) -> List[str]:
         }
     ]
 
-    response = client.chat.completions.create(
-        model="gpt-4o-mini",
-        messages=messages,  # type: ignore
+    response = completion(
+        model=SMALL_LITELLM_MODEL,
+        messages=messages,
+        api_key=SMALL_LITELLM_API_KEY,
+        api_version=SMALL_LITELLM_API_VERSION,
+        api_base=SMALL_LITELLM_API_BASE,
     )
     logger.debug(response)
 
@@ -453,32 +459,38 @@ class AspectOutput(BaseModel):
     class JSONOutputSchema(BaseModel):
         aspect_list: list[AspectOutput]
 
-    response = client.beta.chat.completions.parse(
-        model="gpt-4o",
-        messages=messages,  # type: ignore
+    response = completion(
+        model=LARGE_LITELLM_MODEL,
+        api_key=LARGE_LITELLM_API_KEY,
+        api_version=LARGE_LITELLM_API_VERSION,
+        api_base=LARGE_LITELLM_API_BASE,
+        messages=messages,
         response_format=JSONOutputSchema,
     )
 
     response_message = response.choices[0].message
 
+    logger.debug(f"Response message: {response_message}")
     try:
         if response_message.refusal is not None:
             raise ValueError(response_message.refusal)
 
         # Access the parsed response content
-        parsed_response = response.choices[0].message.parsed
+        parsed_response = json.loads(response_message.content)
+        logger.debug(f"Parsed response: {parsed_response}")
+
         logger.debug(f"Draft aspects: {parsed_response}")
 
         if parsed_response is None:
-            raise ValueError("No response from GPT-4o")
+            raise ValueError("No response from model.")
 
-        aspects_list = parsed_response.aspect_list
+        aspects_list = parsed_response["aspect_list"]
     except Exception as e:
         logger.error(f"Error generating draft aspects: {e}")
         raise e from e
 
     for aspect in aspects_list:
-        if aspect.name is None or aspect.description is None:
+        if aspect["name"] is None or aspect["description"] is None:
             logger.warning(f"Aspect missing name or description: {aspect}")
             continue
 
@@ -486,8 +498,8 @@ class JSONOutputSchema(BaseModel):
             aspect = AspectModel(
                 id=generate_uuid(),
                 view_id=view.id,
-                name=aspect.name,
-                description=aspect.description,
+                name=aspect["name"],
+                description=aspect["description"],
             )
             db.add(aspect)
             db.commit()
@@ -589,9 +601,12 @@ def assign_aspect_centroid(db: Session, aspect_id: str, language: str) -> None:
 
     messages = [{"role": "user", "content": prompt}]
 
-    response = client.chat.completions.create(
-        model="gpt-4o",
-        messages=messages,  # type: ignore
+    response = completion(
+        model=LARGE_LITELLM_MODEL,
+        messages=messages,
+        api_key=LARGE_LITELLM_API_KEY,
+        api_version=LARGE_LITELLM_API_VERSION,
+        api_base=LARGE_LITELLM_API_BASE,
     )
 
     sample_quotes_json_string = response.choices[0].message.content
@@ -731,9 +746,12 @@ def generate_aspect_summary(db: Session, aspect_id: str, language: str) -> None:
     )
 
     messages = [{"role": "user", "content": prompt}]
-    response = client.chat.completions.create(
-        model="gpt-4o",
-        messages=messages,  # type: ignore
+    response = completion(
+        model=SMALL_LITELLM_MODEL,
+        messages=messages,
+        api_key=SMALL_LITELLM_API_KEY,
+        api_version=SMALL_LITELLM_API_VERSION,
+        api_base=SMALL_LITELLM_API_BASE,
     )
 
     short_summary = response.choices[0].message.content
@@ -754,9 +772,12 @@ def generate_aspect_summary(db: Session, aspect_id: str, language: str) -> None:
     )
 
     messages = [{"role": "user", "content": prompt}]
-    response = client.chat.completions.create(
-        model="gpt-4o",
-        messages=messages,  # type: ignore
+    response = completion(
+        model=SMALL_LITELLM_MODEL,
+        messages=messages,
+        api_key=SMALL_LITELLM_API_KEY,
+        api_version=SMALL_LITELLM_API_VERSION,
+        api_base=SMALL_LITELLM_API_BASE,
     )
 
     long_summary = response.choices[0].message.content
@@ -910,9 +931,12 @@ def generate_view_extras(db: Session, view_id: str, language: str) -> ViewModel:
 
     messages = [{"role": "user", "content": prompt}]
 
-    response = client.chat.completions.create(
-        model="gpt-4o",
-        messages=messages,  # type: ignore
+    response = completion(
+        model=SMALL_LITELLM_MODEL,
+        messages=messages,
+        api_key=SMALL_LITELLM_API_KEY,
+        api_version=SMALL_LITELLM_API_VERSION,
+        api_base=SMALL_LITELLM_API_BASE,
     )
 
     view.summary = response.choices[0].message.content
@@ -943,9 +967,12 @@ def generate_insight_extras(db: Session, insight_id: str, language: str) -> None
 
     title_messages = [{"role": "user", "content": title_prompt}]
 
-    title_response = client.chat.completions.create(
-        model="gpt-4o",
-        messages=title_messages,  # type: ignore
+    title_response = completion(
+        model=SMALL_LITELLM_MODEL,
+        messages=title_messages,
+        api_key=SMALL_LITELLM_API_KEY,
+        api_version=SMALL_LITELLM_API_VERSION,
+        api_base=SMALL_LITELLM_API_BASE,
     )
 
     if not title_response.choices:
@@ -966,9 +993,12 @@ def generate_insight_extras(db: Session, insight_id: str, language: str) -> None
 
     summary_messages = [{"role": "user", "content": summary_prompt}]
 
-    summary_response = client.chat.completions.create(
-        model="gpt-4o",
-        messages=summary_messages,  # type: ignore
+    summary_response = completion(
+        model=SMALL_LITELLM_MODEL,
+        messages=summary_messages,
+        api_key=SMALL_LITELLM_API_KEY,
+        api_version=SMALL_LITELLM_API_VERSION,
+        api_base=SMALL_LITELLM_API_BASE,
     )
 
     summary = summary_response.choices[0].message.content
@@ -1011,10 +1041,12 @@ def generate_conversation_summary(db: Session, conversation_id: str, language: s
 
     messages = [{"role": "user", "content": prompt}]
 
-    # FIXME: use litellm
-    response = client.chat.completions.create(
-        model="gpt-4o-mini",
-        messages=messages,  # type: ignore
+    response = completion(
+        model=SMALL_LITELLM_MODEL,
+        messages=messages,
+        api_key=SMALL_LITELLM_API_KEY,
+        api_version=SMALL_LITELLM_API_VERSION,
+        api_base=SMALL_LITELLM_API_BASE,
     )
 
     conversation.summary = response.choices[0].message.content
diff --git a/echo/server/dembrane/reply_utils.py b/echo/server/dembrane/reply_utils.py
index 006cd98a..1483117a 100644
--- a/echo/server/dembrane/reply_utils.py
+++ b/echo/server/dembrane/reply_utils.py
@@ -242,6 +242,7 @@ async def generate_reply_for_conversation(
     in_response_section = False
 
     # Stream the response
+    # FIXME: reply
     response = await litellm.acompletion(
         model="anthropic/claude-3-5-sonnet-20240620",
         messages=[
diff --git a/echo/server/dembrane/report_utils.py b/echo/server/dembrane/report_utils.py
index 3888e8b0..049c69d0 100644
--- a/echo/server/dembrane/report_utils.py
+++ b/echo/server/dembrane/report_utils.py
@@ -1,14 +1,27 @@
+import re
 import logging
 
-from sqlalchemy.orm import Session
+from litellm import completion, token_counter
 
+from dembrane.config import (
+    MEDIUM_LITELLM_MODEL,
+    MEDIUM_LITELLM_API_KEY,
+    MEDIUM_LITELLM_API_BASE,
+    MEDIUM_LITELLM_API_VERSION,
+)
 from dembrane.prompts import render_prompt
-from dembrane.database import ConversationModel
-from dembrane.anthropic import async_anthropic_client, count_tokens_anthropic
+from dembrane.directus import directus
 from dembrane.api.conversation import get_conversation_transcript
 from dembrane.api.dependency_auth import DirectusSession
 
-MAX_REPORT_CONTEXT_LENGTH = 128000
+logger = logging.getLogger("report_utils")
+
+if "4.1" in MEDIUM_LITELLM_MODEL:
+    logger.info("using 700k context length for report")
+    MAX_REPORT_CONTEXT_LENGTH = 700000
+else:
+    logger.info("using 128k context length for report")
+    MAX_REPORT_CONTEXT_LENGTH = 128000
 
 
 class ContextTooLongException(Exception):
@@ -17,45 +30,73 @@ class ContextTooLongException(Exception):
     pass
 
 
-logger = logging.getLogger("report_utils")
 
 
-async def get_report_content_for_project(project_id: str, db: Session, language: str) -> str:
-    conversations = (
-        db.query(ConversationModel).filter(ConversationModel.project_id == project_id).all()
+async def get_report_content_for_project(project_id: str, language: str) -> str:
+    conversations = directus.get_items(
+        "conversation",
+        {
+            "query": {
+                "filter": {
+                    "project_id": project_id,
+                },
+                "fields": ["id", "participant_name", "tags.project_tag_id.text", "summary"],
+            }
+        },
     )
 
+    logger.debug(f"Found {len(conversations)} conversations for project {project_id}")
+    logger.debug(f"Conversations: {conversations}")
+
     token_count = 0
-    conversation_data_list = []
+    conversation_data_dict = {}
 
+    # first add all the summaries to the list
     for conversation in conversations:
-        transcript = get_conversation_transcript(
-            conversation.id,
-            # fake auth to get this fn call
-            DirectusSession(user_id="none", is_admin=True),
-        )
+        logger.info(f"Adding conversation {conversation['id']} to report")
+
+        if conversation["summary"] is None:
+            logger.info(f"Conversation {conversation['id']} has no summary")
+            continue
+
+        token_count += token_counter(model=MEDIUM_LITELLM_MODEL, text=conversation["summary"])
+
+        tags_text = ""
+        for tag in conversation["tags"]:
+            tag_text = tag["project_tag_id"]["text"]
+            tags_text += tag_text + ", "
 
-        conversation_data_list.append(
-            {
-                "name": conversation.participant_name,
-                "tags": ", ".join([tag.text for tag in conversation.tags]),
-                "transcript": transcript,
+        if token_count < MAX_REPORT_CONTEXT_LENGTH:
+            conversation_data_dict[conversation["id"]] = {
+                "name": conversation["participant_name"],
+                "tags": tags_text,
+                "transcript": conversation["summary"],
             }
-        )
+        else:
+            logger.info(
+                f"Context too long for report for project {project_id}, token count: {token_count}"
+            )
+            break
 
-        try:
-            token_count += count_tokens_anthropic(transcript)
+    for conversation in conversations:
+        transcript = get_conversation_transcript(
+            conversation["id"],
+            DirectusSession(user_id="none", is_admin=True),
+        )
 
-        except Exception as e:
-            logger.error(f"Error counting tokens for conversation {conversation.id}: {e}")
-            token_count += len(str(transcript).split(" ")) // 4
-            continue
+        token_count += token_counter(model=MEDIUM_LITELLM_MODEL, text=transcript)
 
-        if token_count > MAX_REPORT_CONTEXT_LENGTH:
+        if token_count < MAX_REPORT_CONTEXT_LENGTH:
+            conversation_data_dict[conversation["id"]]["transcript"] = (
+                conversation_data_dict[conversation["id"]]["transcript"] + transcript
+            )
+        else:
             logger.info(
-                f"Context too long for report for project{project_id}, token count: {token_count}"
+                f"Context too long for report for project {project_id}, token count: {token_count}"
             )
-            raise ContextTooLongException
+            break
+
+    conversation_data_list = list(conversation_data_dict.values())
 
     logger.debug(f"Getting report content for project {project_id}. Token count: {token_count}.")
 
@@ -63,20 +104,32 @@ async def get_report_content_for_project(project_id: str, db: Session, language:
         "system_report", language, {"conversations": conversation_data_list}
     )
 
-    response = await async_anthropic_client.messages.create(
-        model="claude-3-5-sonnet-20241022",
-        max_tokens=4096,
+    # Use litellm.completion instead of anthropic client
+    response = completion(
+        model=MEDIUM_LITELLM_MODEL,
+        api_key=MEDIUM_LITELLM_API_KEY,
+        api_version=MEDIUM_LITELLM_API_VERSION,
+        api_base=MEDIUM_LITELLM_API_BASE,
+        # max tokens needed for "anthropic"
+        # max_tokens=4096,
         messages=[
             {"role": "user", "content": prompt_message},
-            # prefill message
-            {"role": "assistant", "content": "<article>"},
+            # prefill message only for "anthropic"
+            # {"role": "assistant", "content": "<article>"},
         ],
     )
 
-    response_content = response.content[0].text  # type: ignore
+    response_content = response.choices[0].message.content
+
+    # Extract content between <article> tags
+    article_pattern = r"<article>(.*?)</article>"
+    match = re.search(article_pattern, response_content, re.DOTALL)
 
-    # remove <article> and </article> if found
-    response_content = response_content.replace("<article>", "").replace("</article>", "")
+    if match:
+        response_content = match.group(1)
+    else:
+        # If no <article> tags found, keep original content but remove any existing tags
+        response_content = response_content.replace("<article>", "").replace("</article>", "")
 
     logger.debug(f"Report content for project {project_id}: {response_content}")
 
diff --git a/echo/server/dembrane/tasks.py b/echo/server/dembrane/tasks.py
index eeaabd81..6923a94d 100644
--- a/echo/server/dembrane/tasks.py
+++ b/echo/server/dembrane/tasks.py
@@ -114,7 +114,7 @@ def task_transcribe_chunk(conversation_chunk_id: str) -> None:
         raise e from e
 
 
-@dramatiq.actor(queue_name="network")
+@dramatiq.actor(queue_name="network", priority=30)
 def task_summarize_conversation(conversation_id: str) -> None:
     """
     Summarize a conversation. The results are not returned. You can find it in
@@ -136,7 +136,7 @@ def task_summarize_conversation(conversation_id: str) -> None:
         raise e from e
 
 
-@dramatiq.actor(store_results=True, queue_name="cpu")
+@dramatiq.actor(store_results=True, queue_name="cpu", priority=30)
 def task_merge_conversation_chunks(conversation_id: str) -> None:
     """
     Merge conversation chunks.
@@ -161,7 +161,7 @@ def task_merge_conversation_chunks(conversation_id: str) -> None:
         raise e from e
 
 
-@dramatiq.actor(queue_name="cpu")
+@dramatiq.actor(queue_name="cpu", priority=30)
 def task_run_etl_pipeline(conversation_id: str) -> None:
     """
     Run the AudioLightrag ETL pipeline.
@@ -178,7 +178,7 @@ def task_run_etl_pipeline(conversation_id: str) -> None:
         raise e from e
 
 
-@dramatiq.actor(queue_name="network")
+@dramatiq.actor(queue_name="network", priority=30)
 def task_finish_conversation_hook(conversation_id: str) -> None:
     """
     Finalize processing of a conversation and invoke follow-up tasks.
@@ -331,7 +331,7 @@ def task_collect_and_finish_unfinished_conversations() -> None:
 
 
 # FIXME: move to quote_utils.py / remove
-@dramatiq.actor(queue_name="cpu", priority=70)
+@dramatiq.actor(queue_name="cpu", priority=50)
 def task_generate_quotes(project_analysis_run_id: str, conversation_id: str) -> None:
     logger = getLogger("dembrane.tasks.task_generate_quotes")
 
@@ -399,9 +399,12 @@ def task_generate_quotes(project_analysis_run_id: str, conversation_id: str) ->
                     return
 
                 # conversation was updated since the last project analysis run so we need to generate new quotes
-                if latest_conversation_chunk.timestamp > comparison_project_analysis_run.created_at:
+                if (
+                    latest_conversation_chunk.timestamp
+                    > comparison_project_analysis_run["created_at"]
+                ):
                     logger.info(
-                        f"Have to generate quotes for project analysis run ({latest_conversation_chunk.id[:6]} ({latest_conversation_chunk.timestamp.strftime('%Y-%m-%d %H:%M:%S')}) > {comparison_project_analysis_run.id[:6]} ({comparison_project_analysis_run.created_at.strftime('%Y-%m-%d %H:%M:%S')}))"
+                        f"Have to generate quotes for project analysis run ({latest_conversation_chunk.id[:6]} ({latest_conversation_chunk.timestamp.strftime('%Y-%m-%d %H:%M:%S')}) > {comparison_project_analysis_run['id'][:6]} ({comparison_project_analysis_run['created_at'].strftime('%Y-%m-%d %H:%M:%S')}))"
                     )
                     generate_quotes(db, project_analysis_run_id, conversation_id)
                 else:
@@ -409,7 +412,7 @@ def task_generate_quotes(project_analysis_run_id: str, conversation_id: str) ->
                     # for all quotes (comparision run, conversation id) update with the latest project run id
                     # we need to update the quote with the latest conversation chunk
                     logger.info(
-                        f"Reusing quotes for project analysis run from {comparison_project_analysis_run.id[:6]} ({comparison_project_analysis_run.created_at.strftime('%Y-%m-%d %H:%M:%S')})"
+                        f"Reusing quotes for project analysis run from {comparison_project_analysis_run['id'][:6]} ({comparison_project_analysis_run['created_at'].strftime('%Y-%m-%d %H:%M:%S')})"
                     )
                     latest_project_analysis_run = previous_project_analysis_runs[0]
 
@@ -417,7 +420,7 @@ def task_generate_quotes(project_analysis_run_id: str, conversation_id: str) ->
                         db.query(QuoteModel)
                         .filter(
                             QuoteModel.project_analysis_run_id
-                            == comparison_project_analysis_run.id,
+                            == comparison_project_analysis_run["id"],
                             QuoteModel.conversation_id == conversation_id,
                         )
                         .update(
@@ -440,7 +443,7 @@ def task_generate_quotes(project_analysis_run_id: str, conversation_id: str) ->
             raise e from e
 
 
-@dramatiq.actor(queue_name="network")
+@dramatiq.actor(queue_name="network", priority=50)
 def task_generate_aspect_extras(aspect_id: str, language: str = "en") -> None:
     logger = getLogger("dembrane.tasks.task_generate_aspect_extras")
     with DatabaseSession() as db:
@@ -457,7 +460,7 @@ def task_generate_aspect_extras(aspect_id: str, language: str = "en") -> None:
             raise e from e
 
 
-@dramatiq.actor(queue_name="network")
+@dramatiq.actor(queue_name="network", priority=50)
 def task_generate_view_extras(view_id: str, language: str) -> None:
     logger = getLogger("dembrane.tasks.task_generate_view_extras")
     with DatabaseSession() as db:
@@ -484,7 +487,7 @@ def task_generate_view_extras(view_id: str, language: str) -> None:
             raise e from e
 
 
-@dramatiq.actor(queue_name="cpu", priority=70)
+@dramatiq.actor(queue_name="cpu", priority=50)
 def task_assign_aspect_centroid(aspect_id: str, language: str = "en") -> None:
     logger = getLogger("dembrane.tasks.task_assign_aspect_centroid")
     with DatabaseSession() as db:
@@ -501,7 +504,7 @@ def task_assign_aspect_centroid(aspect_id: str, language: str = "en") -> None:
             raise e from e
 
 
-@dramatiq.actor(queue_name="cpu", priority=70)
+@dramatiq.actor(queue_name="cpu", priority=50)
 def task_cluster_quotes_using_aspect_centroids(view_id: str) -> None:
     logger = getLogger("dembrane.tasks.task_cluster_quotes_using_aspect_centroids")
     with DatabaseSession() as db:
@@ -518,7 +521,7 @@ def task_cluster_quotes_using_aspect_centroids(view_id: str) -> None:
             raise e from e
 
 
-@dramatiq.actor(queue_name="network", priority=70)
+@dramatiq.actor(queue_name="network", priority=50)
 def task_create_view(
     project_analysis_run_id: str,
     user_query: str,
@@ -577,7 +580,7 @@ def task_create_view(
             raise e from e
 
 
-@dramatiq.actor(queue_name="network", priority=70)
+@dramatiq.actor(queue_name="network", priority=50)
 def task_finalize_project_library(project_analysis_run_id: str) -> None:
     logger = getLogger("dembrane.tasks.task_finalize_project_library")
     with DatabaseSession() as db:
@@ -645,7 +648,7 @@ def task_finalize_project_library(project_analysis_run_id: str) -> None:
 }
 
 
-@dramatiq.actor(queue_name="network", priority=70)
+@dramatiq.actor(queue_name="cpu", priority=50)
 def task_create_project_library(project_id: str, language: str) -> None:
     logger = getLogger("dembrane.tasks.task_create_project_library")
 
diff --git a/echo/server/prompt_templates/system_report.de.jinja b/echo/server/prompt_templates/system_report.de.jinja
index 82c99680..9b13c212 100644
--- a/echo/server/prompt_templates/system_report.de.jinja
+++ b/echo/server/prompt_templates/system_report.de.jinja
@@ -48,4 +48,6 @@ Formatieren Sie Ihre endgültige Ausgabe wie folgt:
 
 Behalten Sie während des gesamten Artikels einen professionellen, journalistischen Ton bei. Die Sprache sollte klar, präzise und für ein allgemeines Publikum leicht verständlich sein. Vermeiden Sie umgangssprachliche Ausdrücke, Fachjargon oder übermäßig technische Begriffe, es sei denn, sie sind wesentlich und werden erklärt.
 
+Fügen Sie keine <br> Tags in die Ausgabe ein. Verwenden Sie stattdessen Zeilenumbrüche.
+
 Präsentieren Sie Ihren fertigen Artikel innerhalb von <article> Tags mit markdown-formatiertem Text. 
\ No newline at end of file
diff --git a/echo/server/prompt_templates/system_report.en.jinja b/echo/server/prompt_templates/system_report.en.jinja
index c5c6c1d8..408323c7 100644
--- a/echo/server/prompt_templates/system_report.en.jinja
+++ b/echo/server/prompt_templates/system_report.en.jinja
@@ -49,4 +49,6 @@ Format your final output as follows:
 
 Remember to maintain a professional, journalistic tone throughout the article. The language should be clear, concise, and easily understandable to a general audience. Avoid casual language, jargon, or overly technical terms unless they are essential and explained.
 
+Don't insert any <br> tags in the output. Use new lines instead.
+
 Present your final article within <article> tags with markdown formatted text.
\ No newline at end of file
diff --git a/echo/server/prompt_templates/system_report.es.jinja b/echo/server/prompt_templates/system_report.es.jinja
index 5dbf741d..49fd56c2 100644
--- a/echo/server/prompt_templates/system_report.es.jinja
+++ b/echo/server/prompt_templates/system_report.es.jinja
@@ -48,4 +48,6 @@ Formatee su resultado final de la siguiente manera:
 
 Mantenga un tono profesional y periodístico a lo largo del artículo. El lenguaje debe ser claro, conciso y fácilmente comprensible para una audiencia general. Evite el lenguaje coloquial, la jerga o términos excesivamente técnicos a menos que sean esenciales y estén explicados.
 
+No inserte ningún tag <br> en la salida. Use saltos de línea en su lugar.
+
 Presente su artículo final dentro de etiquetas <article> con texto formateado en markdown. 
\ No newline at end of file
diff --git a/echo/server/prompt_templates/system_report.fr.jinja b/echo/server/prompt_templates/system_report.fr.jinja
index d3801364..0076338d 100644
--- a/echo/server/prompt_templates/system_report.fr.jinja
+++ b/echo/server/prompt_templates/system_report.fr.jinja
@@ -48,4 +48,6 @@ Formatez votre sortie finale comme suit :
 
 Maintenez un ton professionnel et journalistique tout au long de l'article. Le langage doit être clair, concis et facilement compréhensible pour un public général. Évitez le langage familier, le jargon ou les termes trop techniques sauf s'ils sont essentiels et expliqués.
 
+Ne pas insérer de balises <br> dans la sortie. Utilisez des sauts de ligne à la place.
+
 Présentez votre article final entre les balises <article> avec du texte formaté en markdown. 
\ No newline at end of file
diff --git a/echo/server/prompt_templates/system_report.nl.jinja b/echo/server/prompt_templates/system_report.nl.jinja
index 55c7deda..fc3c24c1 100644
--- a/echo/server/prompt_templates/system_report.nl.jinja
+++ b/echo/server/prompt_templates/system_report.nl.jinja
@@ -48,4 +48,6 @@ Formatteer uw eindresultaat als volgt:
 
 Behoud gedurende het hele artikel een professionele, journalistieke toon. De taal moet helder, beknopt en gemakkelijk te begrijpen zijn voor een algemeen publiek. Vermijd informeel taalgebruik, jargon of overmatig technische termen tenzij deze essentieel zijn en worden uitgelegd.
 
+Gebruik geen <br> tags in de uitvoer. Gebruik "new lines" in plaats daarvan.
+
 Presenteer uw definitieve artikel binnen <article> tags met markdown geformatteerde tekst. 
\ No newline at end of file
diff --git a/echo/server/tests/test_quote_utils.py b/echo/server/tests/test_quote_utils.py
index daf18e3c..b72f8a14 100644
--- a/echo/server/tests/test_quote_utils.py
+++ b/echo/server/tests/test_quote_utils.py
@@ -7,12 +7,14 @@
 import numpy as np
 from sqlalchemy.orm import Session
 
+from dembrane.utils import generate_uuid
 from dembrane.database import (
     QuoteModel,
     ProcessingStatusEnum,
     ProjectAnalysisRunModel,
     get_db,
 )
+from dembrane.embedding import EMBEDDING_DIM
 from dembrane.quote_utils import get_random_sample_quotes
 
 from .common import (