Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions echo/server/dembrane/api/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,12 +374,10 @@ class CreateReportRequestBodySchema(BaseModel):


@ProjectRouter.post("/{project_id}/create-report")
async def create_report(
project_id: str, db: DependencyInjectDatabase, body: CreateReportRequestBodySchema
) -> None:
async def create_report(project_id: str, body: CreateReportRequestBodySchema) -> None:
language = body.language or "en"
try:
report_content_response = await get_report_content_for_project(project_id, db, language)
report_content_response = await get_report_content_for_project(project_id, language)
except ContextTooLongException:
report = directus.create_item(
"project_report",
Expand Down
52 changes: 50 additions & 2 deletions echo/server/dembrane/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,11 +173,59 @@
DISABLE_CORS = os.environ.get("DISABLE_CORS", "false").lower() in ["true", "1"]
logger.debug(f"DISABLE_CORS: {DISABLE_CORS}")

SMALL_LITELLM_MODEL = os.environ.get("SMALL_LITELLM_MODEL") # 4o-mini
assert SMALL_LITELLM_MODEL, "SMALL_LITELLM_MODEL environment variable is not set"
logger.debug(f"SMALL_LITELLM_MODEL: {SMALL_LITELLM_MODEL}")

SMALL_LITELLM_API_KEY = os.environ.get("SMALL_LITELLM_API_KEY")
assert SMALL_LITELLM_API_KEY, "SMALL_LITELLM_API_KEY environment variable is not set"
logger.debug("SMALL_LITELLM_API_KEY: set")

SMALL_LITELLM_API_VERSION = os.environ.get("SMALL_LITELLM_API_VERSION")
assert SMALL_LITELLM_API_VERSION, "SMALL_LITELLM_API_VERSION environment variable is not set"
logger.debug(f"SMALL_LITELLM_API_VERSION: {SMALL_LITELLM_API_VERSION}")

SMALL_LITELLM_API_BASE = os.environ.get("SMALL_LITELLM_API_BASE")
assert SMALL_LITELLM_API_BASE, "SMALL_LITELLM_API_BASE environment variable is not set"
logger.debug(f"SMALL_LITELLM_API_BASE: {SMALL_LITELLM_API_BASE}")

MEDIUM_LITELLM_MODEL = os.environ.get("MEDIUM_LITELLM_MODEL") # 4.1
assert MEDIUM_LITELLM_MODEL, "MEDIUM_LITELLM_MODEL environment variable is not set"
logger.debug(f"MEDIUM_LITELLM_MODEL: {MEDIUM_LITELLM_MODEL}")

MEDIUM_LITELLM_API_KEY = os.environ.get("MEDIUM_LITELLM_API_KEY")
assert MEDIUM_LITELLM_API_KEY, "MEDIUM_LITELLM_API_KEY environment variable is not set"
logger.debug("MEDIUM_LITELLM_API_KEY: set")

MEDIUM_LITELLM_API_VERSION = os.environ.get("MEDIUM_LITELLM_API_VERSION")
assert MEDIUM_LITELLM_API_VERSION, "MEDIUM_LITELLM_API_VERSION environment variable is not set"
logger.debug(f"MEDIUM_LITELLM_API_VERSION: {MEDIUM_LITELLM_API_VERSION}")

MEDIUM_LITELLM_API_BASE = os.environ.get("MEDIUM_LITELLM_API_BASE")
assert MEDIUM_LITELLM_API_BASE, "MEDIUM_LITELLM_API_BASE environment variable is not set"
logger.debug(f"MEDIUM_LITELLM_API_BASE: {MEDIUM_LITELLM_API_BASE}")

LARGE_LITELLM_MODEL = os.environ.get("LARGE_LITELLM_MODEL") # o4-mini
assert LARGE_LITELLM_MODEL, "LARGE_LITELLM_MODEL environment variable is not set"
logger.debug(f"LARGE_LITELLM_MODEL: {LARGE_LITELLM_MODEL}")

LARGE_LITELLM_API_KEY = os.environ.get("LARGE_LITELLM_API_KEY")
assert LARGE_LITELLM_API_KEY, "LARGE_LITELLM_API_KEY environment variable is not set"
logger.debug("LARGE_LITELLM_API_KEY: set")

LARGE_LITELLM_API_VERSION = os.environ.get("LARGE_LITELLM_API_VERSION")
assert LARGE_LITELLM_API_VERSION, "LARGE_LITELLM_API_VERSION environment variable is not set"
logger.debug(f"LARGE_LITELLM_API_VERSION: {LARGE_LITELLM_API_VERSION}")

LARGE_LITELLM_API_BASE = os.environ.get("LARGE_LITELLM_API_BASE")
assert LARGE_LITELLM_API_BASE, "LARGE_LITELLM_API_BASE environment variable is not set"
logger.debug(f"LARGE_LITELLM_API_BASE: {LARGE_LITELLM_API_BASE}")


LITELLM_WHISPER_URL = os.environ.get("LITELLM_WHISPER_URL")
LITELLM_WHISPER_API_KEY = os.environ.get("LITELLM_WHISPER_API_KEY", OPENAI_API_KEY)
LITELLM_WHISPER_API_KEY = os.environ.get("LITELLM_WHISPER_API_KEY")
LITELLM_WHISPER_API_VERSION = os.environ.get("LITELLM_WHISPER_API_VERSION", "2024-06-01")
LITELLM_WHISPER_MODEL = os.environ.get("LITELLM_WHISPER_MODEL", "whisper-1")
LITELLM_WHISPER_MODEL = os.environ.get("LITELLM_WHISPER_MODEL")
assert LITELLM_WHISPER_API_KEY, "LITELLM_WHISPER_API_KEY environment variable is not set"
logger.debug("LITELLM_WHISPER_API_KEY: set")
assert LITELLM_WHISPER_API_VERSION, "LITELLM_WHISPER_API_VERSION environment variable is not set"
Expand Down
19 changes: 15 additions & 4 deletions echo/server/dembrane/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,16 @@
from typing import List

import backoff
import litellm

from dembrane.openai import client
from dembrane.config import (
# FIXME: update to use dembrane embeddings
LIGHTRAG_LITELLM_EMBEDDING_API_KEY,
LIGHTRAG_LITELLM_EMBEDDING_API_BASE,
LIGHTRAG_LITELLM_EMBEDDING_API_VERSION,
)
Comment on lines 4 to +12
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Verification agent

🧩 Analysis chain

Inconsistent embedding configuration.

Using LIGHTRAG_LITELLM_EMBEDDING_* variables while the comment indicates these should be updated to use "dembrane embeddings". This appears to be a temporary implementation with a TODO marker.


🏁 Script executed:

#!/bin/bash
# Check if SMALL_LITELLM_* variables are used elsewhere for embeddings
rg "SMALL_LITELLM_" --type py | grep -v config.py

Length of output: 2457


🏁 Script executed:

#!/bin/bash
# List all embedding-related config variables to verify available dembrane-specific keys
rg "EMBEDDING" -n echo/server/dembrane/config.py

Length of output: 1402


Embeddings pipeline: wire up dembrane embedding config – LGTM pending fix

We’re still importing and using LIGHTRAG_LITELLM_EMBEDDING_* keys in
echo/server/dembrane/embedding.py (lines 4–12), but config.py only defines those. To complete the “FIXME: update to use dembrane embeddings”:

• In echo/server/dembrane/config.py (lines 329–353):
– Define new DEMBRANE_EMBEDDING_* variables alongside the existing LIGHTRAG_* ones, e.g.:
python DEMBRANE_EMBEDDING_MODEL = os.environ.get("DEMBRANE_EMBEDDING_MODEL") DEMBRANE_EMBEDDING_API_BASE = os.environ.get("DEMBRANE_EMBEDDING_API_BASE") DEMBRANE_EMBEDDING_API_KEY = os.environ.get("DEMBRANE_EMBEDDING_API_KEY") DEMBRANE_EMBEDDING_API_VERSION = os.environ.get("DEMBRANE_EMBEDDING_API_VERSION")
• In echo/server/dembrane/embedding.py (lines 4–12):
– Replace the LIGHTRAG_LITELLM_* imports with the new DEMBRANE_EMBEDDING_* keys and drop the # FIXME:
diff - from dembrane.config import ( - # FIXME: update to use dembrane embeddings - LIGHTRAG_LITELLM_EMBEDDING_API_KEY, - LIGHTRAG_LITELLM_EMBEDDING_API_BASE, - LIGHTRAG_LITELLM_EMBEDDING_API_VERSION, - ) + from dembrane.config import ( + DEMBRANE_EMBEDDING_API_KEY, + DEMBRANE_EMBEDDING_API_BASE, + DEMBRANE_EMBEDDING_API_VERSION, + )

Once those variables are in place and referenced, the embedding module will be fully aligned with the dembrane standard.

🤖 Prompt for AI Agents
In echo/server/dembrane/embedding.py lines 4 to 12, replace the imported
LIGHTRAG_LITELLM_EMBEDDING_API_KEY, LIGHTRAG_LITELLM_EMBEDDING_API_BASE, and
LIGHTRAG_LITELLM_EMBEDDING_API_VERSION with the new DEMBRANE_EMBEDDING_API_KEY,
DEMBRANE_EMBEDDING_API_BASE, and DEMBRANE_EMBEDDING_API_VERSION variables
respectively, and remove the FIXME comment. Also, ensure these
DEMBRANE_EMBEDDING_* variables are defined in echo/server/dembrane/config.py
around lines 329 to 353 by adding environment variable retrievals for
DEMBRANE_EMBEDDING_MODEL, DEMBRANE_EMBEDDING_API_BASE,
DEMBRANE_EMBEDDING_API_KEY, and DEMBRANE_EMBEDDING_API_VERSION.


EMBEDDING_DIM = 1536
EMBEDDING_DIM = 3072

logger = logging.getLogger("embedding")
logger.setLevel(logging.DEBUG)
Expand All @@ -15,9 +21,14 @@
def embed_text(text: str) -> List[float]:
text = text.replace("\n", " ").strip()
try:
return (
client.embeddings.create(input=[text], model="text-embedding-3-small").data[0].embedding
response = litellm.embedding(
api_key=str(LIGHTRAG_LITELLM_EMBEDDING_API_KEY),
api_base=str(LIGHTRAG_LITELLM_EMBEDDING_API_BASE),
api_version=str(LIGHTRAG_LITELLM_EMBEDDING_API_VERSION),
model="azure/text-embedding-3-large",
input=[text],
)
return response["data"][0]["embedding"]
except Exception as exc:
logger.debug("error:" + str(exc))
logger.debug("input text:" + text)
Expand Down
116 changes: 74 additions & 42 deletions echo/server/dembrane/quote_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,26 @@
import numpy as np
import pandas as pd
import tiktoken
from litellm import completion
from pydantic import BaseModel
from sqlalchemy import func, select, literal
from sqlalchemy.orm import Session
from sklearn.cluster import KMeans
from langchain_openai import OpenAIEmbeddings
from pgvector.sqlalchemy import Vector
from langchain_experimental.text_splitter import SemanticChunker

from dembrane.s3 import save_to_s3_from_url
from dembrane.ner import anonymize_sentence
from dembrane.utils import generate_uuid, get_utc_timestamp
from dembrane.config import (
LARGE_LITELLM_MODEL, # o4-mini
SMALL_LITELLM_MODEL, # 4o-mini
LARGE_LITELLM_API_KEY,
SMALL_LITELLM_API_KEY,
LARGE_LITELLM_API_BASE,
SMALL_LITELLM_API_BASE,
LARGE_LITELLM_API_VERSION,
SMALL_LITELLM_API_VERSION,
)
from dembrane.openai import client
from dembrane.prompts import render_prompt
from dembrane.database import (
Expand All @@ -38,9 +47,6 @@
np.random.seed(0)


lc_embedder = OpenAIEmbeddings(model="text-embedding-3-small")
semantic_chunker = SemanticChunker(lc_embedder)

SENTENCE_ENDING_PUNCTUATION = {".", "!", "?"}
SENTENCE_ENDING_PUNTUATION_REGEX = r"(?<=[.!?]) +"

Expand Down Expand Up @@ -73,9 +79,6 @@ def join_transcript_chunks(string_list: List[str]) -> str:
return joined_string


# def generate_contextual_quote_and_embedding(db: Session, conversation_id: str, text: str) -> Tuple[QuoteModel, List[float]]:


def llm_split_text(text: str) -> List[str]:
logger = logging.getLogger("llm_split_text")
logger.debug(f"splitting text: {text}")
Expand All @@ -88,9 +91,12 @@ def llm_split_text(text: str) -> List[str]:
}
]

response = client.chat.completions.create(
model="gpt-4o-mini",
messages=messages, # type: ignore
response = completion(
model=SMALL_LITELLM_MODEL,
messages=messages,
api_key=SMALL_LITELLM_API_KEY,
api_version=SMALL_LITELLM_API_VERSION,
api_base=SMALL_LITELLM_API_BASE,
)
logger.debug(response)

Expand Down Expand Up @@ -453,41 +459,47 @@ class AspectOutput(BaseModel):
class JSONOutputSchema(BaseModel):
aspect_list: list[AspectOutput]

response = client.beta.chat.completions.parse(
model="gpt-4o",
messages=messages, # type: ignore
response = completion(
model=LARGE_LITELLM_MODEL,
api_key=LARGE_LITELLM_API_KEY,
api_version=LARGE_LITELLM_API_VERSION,
api_base=LARGE_LITELLM_API_BASE,
messages=messages,
response_format=JSONOutputSchema,
)

response_message = response.choices[0].message

logger.debug(f"Response message: {response_message}")
try:
if response_message.refusal is not None:
raise ValueError(response_message.refusal)

# Access the parsed response content
parsed_response = response.choices[0].message.parsed
parsed_response = json.loads(response_message.content)
logger.debug(f"Parsed response: {parsed_response}")

logger.debug(f"Draft aspects: {parsed_response}")

if parsed_response is None:
raise ValueError("No response from GPT-4o")
raise ValueError("No response from model.")

aspects_list = parsed_response.aspect_list
aspects_list = parsed_response["aspect_list"]
except Exception as e:
logger.error(f"Error generating draft aspects: {e}")
raise e from e

for aspect in aspects_list:
if aspect.name is None or aspect.description is None:
if aspect["name"] is None or aspect["description"] is None:
logger.warning(f"Aspect missing name or description: {aspect}")
continue

else:
aspect = AspectModel(
id=generate_uuid(),
view_id=view.id,
name=aspect.name,
description=aspect.description,
name=aspect["name"],
description=aspect["description"],
)
db.add(aspect)
db.commit()
Expand Down Expand Up @@ -589,9 +601,12 @@ def assign_aspect_centroid(db: Session, aspect_id: str, language: str) -> None:

messages = [{"role": "user", "content": prompt}]

response = client.chat.completions.create(
model="gpt-4o",
messages=messages, # type: ignore
response = completion(
model=LARGE_LITELLM_MODEL,
messages=messages,
api_key=LARGE_LITELLM_API_KEY,
api_version=LARGE_LITELLM_API_VERSION,
api_base=LARGE_LITELLM_API_BASE,
)

sample_quotes_json_string = response.choices[0].message.content
Expand Down Expand Up @@ -731,9 +746,12 @@ def generate_aspect_summary(db: Session, aspect_id: str, language: str) -> None:
)

messages = [{"role": "user", "content": prompt}]
response = client.chat.completions.create(
model="gpt-4o",
messages=messages, # type: ignore
response = completion(
model=SMALL_LITELLM_MODEL,
messages=messages,
api_key=SMALL_LITELLM_API_KEY,
api_version=SMALL_LITELLM_API_VERSION,
api_base=SMALL_LITELLM_API_BASE,
)

short_summary = response.choices[0].message.content
Expand All @@ -754,9 +772,12 @@ def generate_aspect_summary(db: Session, aspect_id: str, language: str) -> None:
)

messages = [{"role": "user", "content": prompt}]
response = client.chat.completions.create(
model="gpt-4o",
messages=messages, # type: ignore
response = completion(
model=SMALL_LITELLM_MODEL,
messages=messages,
api_key=SMALL_LITELLM_API_KEY,
api_version=SMALL_LITELLM_API_VERSION,
api_base=SMALL_LITELLM_API_BASE,
)

long_summary = response.choices[0].message.content
Expand Down Expand Up @@ -910,9 +931,12 @@ def generate_view_extras(db: Session, view_id: str, language: str) -> ViewModel:

messages = [{"role": "user", "content": prompt}]

response = client.chat.completions.create(
model="gpt-4o",
messages=messages, # type: ignore
response = completion(
model=SMALL_LITELLM_MODEL,
messages=messages,
api_key=SMALL_LITELLM_API_KEY,
api_version=SMALL_LITELLM_API_VERSION,
api_base=SMALL_LITELLM_API_BASE,
)

view.summary = response.choices[0].message.content
Expand Down Expand Up @@ -943,9 +967,12 @@ def generate_insight_extras(db: Session, insight_id: str, language: str) -> None

title_messages = [{"role": "user", "content": title_prompt}]

title_response = client.chat.completions.create(
model="gpt-4o",
messages=title_messages, # type: ignore
title_response = completion(
model=SMALL_LITELLM_MODEL,
messages=title_messages,
api_key=SMALL_LITELLM_API_KEY,
api_version=SMALL_LITELLM_API_VERSION,
api_base=SMALL_LITELLM_API_BASE,
)

if not title_response.choices:
Expand All @@ -966,9 +993,12 @@ def generate_insight_extras(db: Session, insight_id: str, language: str) -> None

summary_messages = [{"role": "user", "content": summary_prompt}]

summary_response = client.chat.completions.create(
model="gpt-4o",
messages=summary_messages, # type: ignore
summary_response = completion(
model=SMALL_LITELLM_MODEL,
messages=summary_messages,
api_key=SMALL_LITELLM_API_KEY,
api_version=SMALL_LITELLM_API_VERSION,
api_base=SMALL_LITELLM_API_BASE,
)

summary = summary_response.choices[0].message.content
Expand Down Expand Up @@ -1011,10 +1041,12 @@ def generate_conversation_summary(db: Session, conversation_id: str, language: s

messages = [{"role": "user", "content": prompt}]

# FIXME: use litellm
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=messages, # type: ignore
response = completion(
model=SMALL_LITELLM_MODEL,
messages=messages,
api_key=SMALL_LITELLM_API_KEY,
api_version=SMALL_LITELLM_API_VERSION,
api_base=SMALL_LITELLM_API_BASE,
)

conversation.summary = response.choices[0].message.content
Expand Down
1 change: 1 addition & 0 deletions echo/server/dembrane/reply_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ async def generate_reply_for_conversation(
in_response_section = False

# Stream the response
# FIXME: reply
response = await litellm.acompletion(
model="anthropic/claude-3-5-sonnet-20240620",
messages=[
Expand Down
Loading