Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from langchain.chains import ConversationalRetrievalChain
from langchain.chains.base import Chain

from domain.chat.message_response import MessageResponse
from domain.chat.message import Message
from domain.chat.chat_id import ChatId
Expand All @@ -8,10 +11,25 @@

from datetime import datetime, timezone

from adapter.out.upload_documents.langchain_embedding_model import LangchainEmbeddingModel
from adapter.out.persistence.postgres.chat_history_manager import ChatHistoryManager


class AskChatbotLangchain(AskChatbotPort):
def __init__(self, chain: Chain, chatHistoryManager: ChatHistoryManager):
self.chain = chain
self.chatHistoryManager = chatHistoryManager
def askChatbot(self, message: Message, chatId: ChatId) -> MessageResponse:
embeddingModel = LangchainEmbeddingModel()
if chatId is not None:
self.chain.memory = self.chatHistoryManager.getChatHistory(chatId)
answer = self.chain.run(message.content)
print(answer, flush=True)
return MessageResponse(
True,
Message(content="I'm a chatbot, this is my response.", timestamp=datetime.now(timezone.utc), relevantDocuments=[DocumentId("DocumentoRilevante.pdf")], sender=MessageSender.CHATBOT),
Message(content=answer,
timestamp=datetime.now(timezone.utc),
relevantDocuments=[DocumentId("DocumentoRilevante.pdf")],
sender=MessageSender.CHATBOT),
chatId
)
38 changes: 36 additions & 2 deletions 3 - PB/MVP/src/backend/adapter/out/configuration_manager.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import os

from langchain.chains import ConversationalRetrievalChain
from langchain_community.llms import HuggingFaceEndpoint
from langchain_openai import OpenAI

from application.port.out.documents_uploader_port import DocumentsUploaderPort
from application.port.out.embeddings_uploader_port import EmbeddingsUploaderPort
from application.port.out.delete_documents_port import DeleteDocumentsPort
Expand Down Expand Up @@ -32,6 +36,7 @@
from adapter.out.upload_documents.documents_uploader_AWSS3 import DocumentsUploaderAWSS3
from adapter.out.get_documents.get_documents_content_awss3 import GetDocumentsContentAWSS3
from adapter.out.ask_chatbot.ask_chatbot_langchain import AskChatbotLangchain
from adapter.out.persistence.postgres.chat_history_manager import ChatHistoryManager


class ConfigurationException(Exception):
Expand Down Expand Up @@ -153,5 +158,34 @@ def getGetDocumentsContentPort(self) -> GetDocumentsContentPort:
return configuredDocumentStore

def getAskChatbotPort(self) -> AskChatbotPort:
#TODO
return AskChatbotLangchain()
configuration = self.postgresConfigurationORM.getConfigurationChoices(os.environ.get('USER_ID'))
if configuration.vectorStore == PostgresVectorStoreType.PINECONE:
configuredVectorStore = VectorStorePineconeManager()
elif configuration.vectorStore == PostgresVectorStoreType.CHROMA_DB:
configuredVectorStore = VectorStoreChromaDBManager()
else:
raise ConfigurationException('Vector store non configurato.')
if configuration.embeddingModel == PostgresEmbeddingModelType.HUGGINGFACE:
configuredEmbeddingModel = HuggingFaceEmbeddingModel()
elif configuration.embeddingModel == PostgresEmbeddingModelType.OPENAI:
configuredEmbeddingModel = OpenAIEmbeddingModel()
else:
raise ConfigurationException('Embeddings model non configurato.')
if configuration.LLMModel == PostgresLLMModelType.HUGGINGFACE:
with open('/run/secrets/openai_key', 'r') as file:
openai_key = file.read()
configuredLLMModel = OpenAI(openai_api_key= openai_key, model_name="gpt-3.5-turbo-instruct", temperature=0.3)
elif configuration.LLMModel == PostgresLLMModelType.OPENAI:
with open('/run/secrets/huggingface_key', 'r') as file:
hugging_face = file.read()
configuredLLMModel = HuggingFaceEndpoint(repo_id="google/flan-5-large", temperature=0.3, token=hugging_face)
else:
raise ConfigurationException('LLM model non configurato.')


chain = ConversationalRetrievalChain.from_llm(
llm=configuredLLMModel,
retriever=configuredVectorStore.getRetriever(configuredEmbeddingModel),
return_source_documents=True
)
return AskChatbotLangchain(chain=chain, chatHistoryManager=ChatHistoryManager())
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from typing import List

from langchain_community.chat_message_histories import PostgresChatMessageHistory
from langchain_core.messages import BaseMessage
import os

from domain.chat.chat_id import ChatId
from langchain.memory import ConversationBufferMemory

class ChatHistoryManager:
def getChatHistory(self, chatId:ChatId)-> PostgresChatMessageHistory:
history = PostgresChatMessageHistory(session_id=str(chatId.id),
connection_string=os.environ.get('DATABASE_URL'))
return history
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
import os
from typing import List
import chromadb
from langchain_core.retrievers import BaseRetriever

from adapter.out.persistence.vector_store.vector_store_manager import VectorStoreManager
from adapter.out.persistence.vector_store.vector_store_document_operation_response import VectorStoreDocumentOperationResponse
from adapter.out.persistence.vector_store.vector_store_document_status_response import VectorStoreDocumentStatusResponse
from langchain_core.documents.base import Document as LangchainCoreDocument
from langchain_community.vectorstores import Chroma
from adapter.out.upload_documents.langchain_embedding_model import LangchainEmbeddingModel


class VectorStoreChromaDBManager(VectorStoreManager):
def __init__(self):
cromadb = chromadb.PersistentClient(path=os.environ.get("CHROMA_DB_PATH"))
self.chromadb = chromadb.PersistentClient(path=os.environ.get("CHROMA_DB_PATH"))
with open('/run/secrets/chromadb_collection', 'r') as file:
chromadbCollection = file.read()
self.collection = cromadb.get_or_create_collection(chromadbCollection)
self.collection = self.chromadb.get_or_create_collection(chromadbCollection)

def getDocumentsStatus(self, documentsIds: List[str]) -> List[VectorStoreDocumentStatusResponse]:
vectorStoreDocumentStatusResponses = []
Expand Down Expand Up @@ -106,4 +111,7 @@ def uploadEmbeddings(self, documentsIds: List[str], documentsChunks: List[List[L
vectorStoreDocumentOperationResponses.append(VectorStoreDocumentOperationResponse(documentId, False, "Errore nel caricamento degli embeddings."))
continue
return vectorStoreDocumentOperationResponses



def getRetriever(self, embeddingModel : LangchainEmbeddingModel) -> BaseRetriever:
return Chroma(client=self.chromadb, collection_name = self.collection.name, embedding_function=embeddingModel.getEmbedQueryFunction()).as_retriever()
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
from typing import List

from langchain_core.retrievers import BaseRetriever

from adapter.out.persistence.vector_store.vector_store_document_operation_response import VectorStoreDocumentOperationResponse
from adapter.out.persistence.vector_store.vector_store_document_status_response import VectorStoreDocumentStatusResponse

from langchain_core.documents.base import Document as LangchainCoreDocument

from adapter.out.upload_documents.langchain_embedding_model import LangchainEmbeddingModel


class VectorStoreManager:
def getDocumentsStatus(self, documentsIds: List[str]) -> List[VectorStoreDocumentStatusResponse]:
pass
Expand All @@ -19,4 +24,7 @@ def enableDocuments(self, documentsIds: List[str]) -> List[VectorStoreDocumentOp
pass

def uploadEmbeddings(self, documentsIds: List[str], documentsChunks: List[List[LangchainCoreDocument]], documentsEmbeddings: List[List[List[float]]]) -> List[VectorStoreDocumentOperationResponse]:
pass

def getRetriever(self, embeddingModel : LangchainEmbeddingModel) -> BaseRetriever:
pass
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
from typing import List

from langchain_core.retrievers import BaseRetriever
from pinecone import Pinecone
from pinecone import PineconeApiException
from adapter.out.persistence.vector_store.vector_store_manager import VectorStoreManager
from adapter.out.persistence.vector_store.vector_store_document_operation_response import VectorStoreDocumentOperationResponse
from adapter.out.persistence.vector_store.vector_store_document_status_response import VectorStoreDocumentStatusResponse
from langchain_core.documents.base import Document as LangchainCoreDocument
from langchain_community.vectorstores import Pinecone as PineconeLangchain
from adapter.out.upload_documents.langchain_embedding_model import LangchainEmbeddingModel


class VectorStorePineconeManager(VectorStoreManager):
def __init__(self):
Expand Down Expand Up @@ -180,4 +185,6 @@ def uploadEmbeddings(self, documentsId: List[str], documentsChunks: List[List[La
vectorStoreDocumentOperationResponses.append(VectorStoreDocumentOperationResponse(documentId, False, f"Errore nel caricamento degli embeddings: {e}"))

return vectorStoreDocumentOperationResponses


def getRetriever(self, embeddingModel : LangchainEmbeddingModel) -> BaseRetriever:
return PineconeLangchain(self.index, embeddingModel.getEmbedQueryFunction(), "text").as_retriever()
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,6 @@ def embedDocument(self, documentChunks: List[str]) -> List[List[float]]:
try:
return self.model.embed_documents(documentChunks)
except Exception as e:
return []
return []
def getEmbedQueryFunction(self):
return self.model.embed_query
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@

class LangchainEmbeddingModel:
def embedDocument(self, documentChunks: List[str]) -> List[List[float]]:
pass
def getEmbedQueryFunction(self):
pass
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import List

from langchain_community.embeddings import OpenAIEmbeddings
from langchain_openai import OpenAIEmbeddings
from adapter.out.upload_documents.langchain_embedding_model import LangchainEmbeddingModel


Expand All @@ -17,3 +17,5 @@ def embedDocument(self, documentChunks: List[str]) -> List[List[float]]:
return self.model.embed_documents(documentChunks)
except Exception as e:
return []
def getEmbedQueryFunction(self):
return self.model.embed_query
6 changes: 3 additions & 3 deletions 3 - PB/MVP/src/backend/domain/chat/chat_filter.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@

from dataclasses import dataclass
@dataclass
class ChatFilter:
def __init__(self, searchFilter:str):
self.searchFilter = searchFilter
searchFilter:str
6 changes: 5 additions & 1 deletion 3 - PB/MVP/src/backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,17 @@ InstructorEmbedding
langchain
langchain_community
langchain_core
langchain-openai
openai
pypdf
PyPDF2
pytest
python-dotenv
pinecone-client
psycopg2-binary
#psycopg2-binary
#psycopg
#psycopg-c
#pytest-mock
# sentence-transformers
tiktoken
# torch