diff --git a/3 - PB/MVP/src/backend/adapter/out/ask_chatbot/ask_chatbot_langchain.py b/3 - PB/MVP/src/backend/adapter/out/ask_chatbot/ask_chatbot_langchain.py index b3021858..e24868c0 100644 --- a/3 - PB/MVP/src/backend/adapter/out/ask_chatbot/ask_chatbot_langchain.py +++ b/3 - PB/MVP/src/backend/adapter/out/ask_chatbot/ask_chatbot_langchain.py @@ -1,3 +1,6 @@ +from langchain.chains import ConversationalRetrievalChain +from langchain.chains.base import Chain + from domain.chat.message_response import MessageResponse from domain.chat.message import Message from domain.chat.chat_id import ChatId @@ -8,10 +11,25 @@ from datetime import datetime, timezone +from adapter.out.upload_documents.langchain_embedding_model import LangchainEmbeddingModel +from adapter.out.persistence.postgres.chat_history_manager import ChatHistoryManager + + class AskChatbotLangchain(AskChatbotPort): + def __init__(self, chain: Chain, chatHistoryManager: ChatHistoryManager): + self.chain = chain + self.chatHistoryManager = chatHistoryManager def askChatbot(self, message: Message, chatId: ChatId) -> MessageResponse: + embeddingModel = LangchainEmbeddingModel() + if chatId is not None: + self.chain.memory = self.chatHistoryManager.getChatHistory(chatId) + answer = self.chain.run(message.content) + print(answer, flush=True) return MessageResponse( True, - Message(content="I'm a chatbot, this is my response.", timestamp=datetime.now(timezone.utc), relevantDocuments=[DocumentId("DocumentoRilevante.pdf")], sender=MessageSender.CHATBOT), + Message(content=answer, + timestamp=datetime.now(timezone.utc), + relevantDocuments=[DocumentId("DocumentoRilevante.pdf")], + sender=MessageSender.CHATBOT), chatId ) \ No newline at end of file diff --git a/3 - PB/MVP/src/backend/adapter/out/configuration_manager.py b/3 - PB/MVP/src/backend/adapter/out/configuration_manager.py index 9e2518f5..cae7c0cc 100644 --- a/3 - PB/MVP/src/backend/adapter/out/configuration_manager.py +++ b/3 - PB/MVP/src/backend/adapter/out/configuration_manager.py @@ -1,5 +1,9 @@ import os +from langchain.chains import ConversationalRetrievalChain +from langchain_community.llms import HuggingFaceEndpoint +from langchain_openai import OpenAI + from application.port.out.documents_uploader_port import DocumentsUploaderPort from application.port.out.embeddings_uploader_port import EmbeddingsUploaderPort from application.port.out.delete_documents_port import DeleteDocumentsPort @@ -32,6 +36,7 @@ from adapter.out.upload_documents.documents_uploader_AWSS3 import DocumentsUploaderAWSS3 from adapter.out.get_documents.get_documents_content_awss3 import GetDocumentsContentAWSS3 from adapter.out.ask_chatbot.ask_chatbot_langchain import AskChatbotLangchain +from adapter.out.persistence.postgres.chat_history_manager import ChatHistoryManager class ConfigurationException(Exception): @@ -153,5 +158,34 @@ def getGetDocumentsContentPort(self) -> GetDocumentsContentPort: return configuredDocumentStore def getAskChatbotPort(self) -> AskChatbotPort: - #TODO - return AskChatbotLangchain() \ No newline at end of file + configuration = self.postgresConfigurationORM.getConfigurationChoices(os.environ.get('USER_ID')) + if configuration.vectorStore == PostgresVectorStoreType.PINECONE: + configuredVectorStore = VectorStorePineconeManager() + elif configuration.vectorStore == PostgresVectorStoreType.CHROMA_DB: + configuredVectorStore = VectorStoreChromaDBManager() + else: + raise ConfigurationException('Vector store non configurato.') + if configuration.embeddingModel == PostgresEmbeddingModelType.HUGGINGFACE: + configuredEmbeddingModel = HuggingFaceEmbeddingModel() + elif configuration.embeddingModel == PostgresEmbeddingModelType.OPENAI: + configuredEmbeddingModel = OpenAIEmbeddingModel() + else: + raise ConfigurationException('Embeddings model non configurato.') + if configuration.LLMModel == PostgresLLMModelType.HUGGINGFACE: + with open('/run/secrets/openai_key', 'r') as file: + openai_key = file.read() + configuredLLMModel = OpenAI(openai_api_key= openai_key, model_name="gpt-3.5-turbo-instruct", temperature=0.3) + elif configuration.LLMModel == PostgresLLMModelType.OPENAI: + with open('/run/secrets/huggingface_key', 'r') as file: + hugging_face = file.read() + configuredLLMModel = HuggingFaceEndpoint(repo_id="google/flan-5-large", temperature=0.3, token=hugging_face) + else: + raise ConfigurationException('LLM model non configurato.') + + + chain = ConversationalRetrievalChain.from_llm( + llm=configuredLLMModel, + retriever=configuredVectorStore.getRetriever(configuredEmbeddingModel), + return_source_documents=True + ) + return AskChatbotLangchain(chain=chain, chatHistoryManager=ChatHistoryManager()) diff --git a/3 - PB/MVP/src/backend/adapter/out/persistence/postgres/chat_history_manager.py b/3 - PB/MVP/src/backend/adapter/out/persistence/postgres/chat_history_manager.py new file mode 100644 index 00000000..c6f8c891 --- /dev/null +++ b/3 - PB/MVP/src/backend/adapter/out/persistence/postgres/chat_history_manager.py @@ -0,0 +1,14 @@ +from typing import List + +from langchain_community.chat_message_histories import PostgresChatMessageHistory +from langchain_core.messages import BaseMessage +import os + +from domain.chat.chat_id import ChatId +from langchain.memory import ConversationBufferMemory + +class ChatHistoryManager: + def getChatHistory(self, chatId:ChatId)-> PostgresChatMessageHistory: + history = PostgresChatMessageHistory(session_id=str(chatId.id), + connection_string=os.environ.get('DATABASE_URL')) + return history diff --git a/3 - PB/MVP/src/backend/adapter/out/persistence/vector_store/vector_store_chromaDB_manager.py b/3 - PB/MVP/src/backend/adapter/out/persistence/vector_store/vector_store_chromaDB_manager.py index d575e2e3..d08b1da9 100644 --- a/3 - PB/MVP/src/backend/adapter/out/persistence/vector_store/vector_store_chromaDB_manager.py +++ b/3 - PB/MVP/src/backend/adapter/out/persistence/vector_store/vector_store_chromaDB_manager.py @@ -1,17 +1,22 @@ import os from typing import List import chromadb +from langchain_core.retrievers import BaseRetriever + from adapter.out.persistence.vector_store.vector_store_manager import VectorStoreManager from adapter.out.persistence.vector_store.vector_store_document_operation_response import VectorStoreDocumentOperationResponse from adapter.out.persistence.vector_store.vector_store_document_status_response import VectorStoreDocumentStatusResponse from langchain_core.documents.base import Document as LangchainCoreDocument +from langchain_community.vectorstores import Chroma +from adapter.out.upload_documents.langchain_embedding_model import LangchainEmbeddingModel + class VectorStoreChromaDBManager(VectorStoreManager): def __init__(self): - cromadb = chromadb.PersistentClient(path=os.environ.get("CHROMA_DB_PATH")) + self.chromadb = chromadb.PersistentClient(path=os.environ.get("CHROMA_DB_PATH")) with open('/run/secrets/chromadb_collection', 'r') as file: chromadbCollection = file.read() - self.collection = cromadb.get_or_create_collection(chromadbCollection) + self.collection = self.chromadb.get_or_create_collection(chromadbCollection) def getDocumentsStatus(self, documentsIds: List[str]) -> List[VectorStoreDocumentStatusResponse]: vectorStoreDocumentStatusResponses = [] @@ -106,4 +111,7 @@ def uploadEmbeddings(self, documentsIds: List[str], documentsChunks: List[List[L vectorStoreDocumentOperationResponses.append(VectorStoreDocumentOperationResponse(documentId, False, "Errore nel caricamento degli embeddings.")) continue return vectorStoreDocumentOperationResponses - \ No newline at end of file + + + def getRetriever(self, embeddingModel : LangchainEmbeddingModel) -> BaseRetriever: + return Chroma(client=self.chromadb, collection_name = self.collection.name, embedding_function=embeddingModel.getEmbedQueryFunction()).as_retriever() \ No newline at end of file diff --git a/3 - PB/MVP/src/backend/adapter/out/persistence/vector_store/vector_store_manager.py b/3 - PB/MVP/src/backend/adapter/out/persistence/vector_store/vector_store_manager.py index 536823cf..e5c140f1 100644 --- a/3 - PB/MVP/src/backend/adapter/out/persistence/vector_store/vector_store_manager.py +++ b/3 - PB/MVP/src/backend/adapter/out/persistence/vector_store/vector_store_manager.py @@ -1,10 +1,15 @@ from typing import List +from langchain_core.retrievers import BaseRetriever + from adapter.out.persistence.vector_store.vector_store_document_operation_response import VectorStoreDocumentOperationResponse from adapter.out.persistence.vector_store.vector_store_document_status_response import VectorStoreDocumentStatusResponse from langchain_core.documents.base import Document as LangchainCoreDocument +from adapter.out.upload_documents.langchain_embedding_model import LangchainEmbeddingModel + + class VectorStoreManager: def getDocumentsStatus(self, documentsIds: List[str]) -> List[VectorStoreDocumentStatusResponse]: pass @@ -19,4 +24,7 @@ def enableDocuments(self, documentsIds: List[str]) -> List[VectorStoreDocumentOp pass def uploadEmbeddings(self, documentsIds: List[str], documentsChunks: List[List[LangchainCoreDocument]], documentsEmbeddings: List[List[List[float]]]) -> List[VectorStoreDocumentOperationResponse]: + pass + + def getRetriever(self, embeddingModel : LangchainEmbeddingModel) -> BaseRetriever: pass \ No newline at end of file diff --git a/3 - PB/MVP/src/backend/adapter/out/persistence/vector_store/vector_store_pinecone_manager.py b/3 - PB/MVP/src/backend/adapter/out/persistence/vector_store/vector_store_pinecone_manager.py index ab32dd66..fb92290f 100644 --- a/3 - PB/MVP/src/backend/adapter/out/persistence/vector_store/vector_store_pinecone_manager.py +++ b/3 - PB/MVP/src/backend/adapter/out/persistence/vector_store/vector_store_pinecone_manager.py @@ -1,10 +1,15 @@ from typing import List + +from langchain_core.retrievers import BaseRetriever from pinecone import Pinecone from pinecone import PineconeApiException from adapter.out.persistence.vector_store.vector_store_manager import VectorStoreManager from adapter.out.persistence.vector_store.vector_store_document_operation_response import VectorStoreDocumentOperationResponse from adapter.out.persistence.vector_store.vector_store_document_status_response import VectorStoreDocumentStatusResponse from langchain_core.documents.base import Document as LangchainCoreDocument +from langchain_community.vectorstores import Pinecone as PineconeLangchain +from adapter.out.upload_documents.langchain_embedding_model import LangchainEmbeddingModel + class VectorStorePineconeManager(VectorStoreManager): def __init__(self): @@ -180,4 +185,6 @@ def uploadEmbeddings(self, documentsId: List[str], documentsChunks: List[List[La vectorStoreDocumentOperationResponses.append(VectorStoreDocumentOperationResponse(documentId, False, f"Errore nel caricamento degli embeddings: {e}")) return vectorStoreDocumentOperationResponses - \ No newline at end of file + + def getRetriever(self, embeddingModel : LangchainEmbeddingModel) -> BaseRetriever: + return PineconeLangchain(self.index, embeddingModel.getEmbedQueryFunction(), "text").as_retriever() \ No newline at end of file diff --git a/3 - PB/MVP/src/backend/adapter/out/upload_documents/huggingface_embedding_model.py b/3 - PB/MVP/src/backend/adapter/out/upload_documents/huggingface_embedding_model.py index 0f73fa63..1b0722c1 100644 --- a/3 - PB/MVP/src/backend/adapter/out/upload_documents/huggingface_embedding_model.py +++ b/3 - PB/MVP/src/backend/adapter/out/upload_documents/huggingface_embedding_model.py @@ -16,4 +16,6 @@ def embedDocument(self, documentChunks: List[str]) -> List[List[float]]: try: return self.model.embed_documents(documentChunks) except Exception as e: - return [] \ No newline at end of file + return [] + def getEmbedQueryFunction(self): + return self.model.embed_query \ No newline at end of file diff --git a/3 - PB/MVP/src/backend/adapter/out/upload_documents/langchain_embedding_model.py b/3 - PB/MVP/src/backend/adapter/out/upload_documents/langchain_embedding_model.py index b003704c..6ad21bea 100644 --- a/3 - PB/MVP/src/backend/adapter/out/upload_documents/langchain_embedding_model.py +++ b/3 - PB/MVP/src/backend/adapter/out/upload_documents/langchain_embedding_model.py @@ -2,4 +2,6 @@ class LangchainEmbeddingModel: def embedDocument(self, documentChunks: List[str]) -> List[List[float]]: + pass + def getEmbedQueryFunction(self): pass \ No newline at end of file diff --git a/3 - PB/MVP/src/backend/adapter/out/upload_documents/openai_embedding_model.py b/3 - PB/MVP/src/backend/adapter/out/upload_documents/openai_embedding_model.py index c8807d1f..5a00d526 100644 --- a/3 - PB/MVP/src/backend/adapter/out/upload_documents/openai_embedding_model.py +++ b/3 - PB/MVP/src/backend/adapter/out/upload_documents/openai_embedding_model.py @@ -1,6 +1,6 @@ from typing import List -from langchain_community.embeddings import OpenAIEmbeddings +from langchain_openai import OpenAIEmbeddings from adapter.out.upload_documents.langchain_embedding_model import LangchainEmbeddingModel @@ -17,3 +17,5 @@ def embedDocument(self, documentChunks: List[str]) -> List[List[float]]: return self.model.embed_documents(documentChunks) except Exception as e: return [] + def getEmbedQueryFunction(self): + return self.model.embed_query \ No newline at end of file diff --git a/3 - PB/MVP/src/backend/domain/chat/chat_filter.py b/3 - PB/MVP/src/backend/domain/chat/chat_filter.py index 818d75d7..2fa19bd4 100644 --- a/3 - PB/MVP/src/backend/domain/chat/chat_filter.py +++ b/3 - PB/MVP/src/backend/domain/chat/chat_filter.py @@ -1,4 +1,4 @@ - +from dataclasses import dataclass +@dataclass class ChatFilter: - def __init__(self, searchFilter:str): - self.searchFilter = searchFilter \ No newline at end of file + searchFilter:str \ No newline at end of file diff --git a/3 - PB/MVP/src/backend/requirements.txt b/3 - PB/MVP/src/backend/requirements.txt index c1654090..e084f7af 100644 --- a/3 - PB/MVP/src/backend/requirements.txt +++ b/3 - PB/MVP/src/backend/requirements.txt @@ -9,13 +9,17 @@ InstructorEmbedding langchain langchain_community langchain_core +langchain-openai openai pypdf PyPDF2 pytest python-dotenv pinecone-client -psycopg2-binary +#psycopg2-binary +#psycopg +#psycopg-c +#pytest-mock # sentence-transformers tiktoken # torch \ No newline at end of file