From d04c4076e8fcfe05057290b4e070db534b178b31 Mon Sep 17 00:00:00 2001 From: Eli4479 Date: Mon, 2 Jun 2025 13:47:35 +0530 Subject: [PATCH 1/5] Add Weaviate schema creation and population scripts - Implemented `create_schemas.py` to define schemas for user profiles, code chunks, and interactions in Weaviate. - Added `populate_db.py` to insert sample data into the Weaviate collections. - Created unit tests in `test_supabase.py` for user, interaction, code chunk, and repository models, including CRUD operations. --- backend/app/db/supabase/auth.py | 24 + backend/app/db/supabase/supabase_client.py | 17 + backend/app/db/weaviate/weaviate_client.py | 20 + backend/app/model/__init__.py | 0 backend/app/model/supabase/__init__.py | 0 backend/app/model/supabase/models.py | 183 +++ backend/app/model/weaviate/models.py | 35 + backend/app/scripts/supabase/populate_db.sql | 1057 +++++++++++++++++ .../app/scripts/weaviate/create_schemas.py | 56 + backend/app/scripts/weaviate/populate_db.py | 230 ++++ backend/app/tests/test_supabase.py | 289 +++++ 11 files changed, 1911 insertions(+) create mode 100644 backend/app/db/supabase/auth.py create mode 100644 backend/app/db/supabase/supabase_client.py create mode 100644 backend/app/db/weaviate/weaviate_client.py create mode 100644 backend/app/model/__init__.py create mode 100644 backend/app/model/supabase/__init__.py create mode 100644 backend/app/model/supabase/models.py create mode 100644 backend/app/model/weaviate/models.py create mode 100644 backend/app/scripts/supabase/populate_db.sql create mode 100644 backend/app/scripts/weaviate/create_schemas.py create mode 100644 backend/app/scripts/weaviate/populate_db.py create mode 100644 backend/app/tests/test_supabase.py diff --git a/backend/app/db/supabase/auth.py b/backend/app/db/supabase/auth.py new file mode 100644 index 00000000..3ef7bd58 --- /dev/null +++ b/backend/app/db/supabase/auth.py @@ -0,0 +1,24 @@ +from app.db.supabase.supabase_client import supabase_client + +async def login_with_github(): + result = supabase_client.auth.sign_in_with_oauth({ + "provider": "github", + "options": { + "redirect_to": "http://localhost:3000/home" + } + }) + return {"url": result.url} + +async def login_with_discord(): + result = supabase_client.auth.sign_in_with_oauth({ + "provider": "discord", + "options": { + "redirect_to": "http://localhost:3000/home" + } + }) + return {"url": result.url} + +async def logout(access_token: str): + supabase_client.auth.set_session(access_token, refresh_token="") + supabase_client.auth.sign_out() + return {"message": "User logged out successfully"} diff --git a/backend/app/db/supabase/supabase_client.py b/backend/app/db/supabase/supabase_client.py new file mode 100644 index 00000000..4eb72d1d --- /dev/null +++ b/backend/app/db/supabase/supabase_client.py @@ -0,0 +1,17 @@ +import os +from dotenv import load_dotenv +from supabase import create_client + +load_dotenv() + +SUPABASE_URL = os.getenv("SUPABASE_URL") +SUPABASE_KEY = os.getenv("SUPABASE_KEY") + +if SUPABASE_URL is None or SUPABASE_KEY is None: + raise ValueError("SUPABASE_URL and SUPABASE_KEY must be set in environment variables.") + +supabase_client = create_client(SUPABASE_URL, SUPABASE_KEY) + + +def get_supabase_client(): + return supabase_client diff --git a/backend/app/db/weaviate/weaviate_client.py b/backend/app/db/weaviate/weaviate_client.py new file mode 100644 index 00000000..ff05cc2d --- /dev/null +++ b/backend/app/db/weaviate/weaviate_client.py @@ -0,0 +1,20 @@ +import os +import weaviate +from weaviate.classes.init import Auth + +# Best practice: store your credentials in environment variables +weaviate_url = os.environ["WEAVIATE_URL"] +weaviate_api_key = os.environ["WEAVIATE_API_KEY"] +weaviate_grpc_host = os.environ.get("WEAVIATE_GRPC_HOST") # Default to localhost if not set +weaviate_grpc_port = os.environ.get("WEAVIATE_GRPC_PORT") # Fallback to localhost if not set + +# Connect to Weaviate Cloud +client = weaviate.connect_to_weaviate_cloud( + cluster_url=weaviate_url, + auth_credentials=Auth.api_key(weaviate_api_key), + skip_init_checks=True, +) + + +def get_client(): + return client diff --git a/backend/app/model/__init__.py b/backend/app/model/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/app/model/supabase/__init__.py b/backend/app/model/supabase/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/app/model/supabase/models.py b/backend/app/model/supabase/models.py new file mode 100644 index 00000000..e5f37893 --- /dev/null +++ b/backend/app/model/supabase/models.py @@ -0,0 +1,183 @@ +from pydantic import BaseModel +from uuid import UUID +from typing import Optional, List +from datetime import datetime + + +class User(BaseModel): + """ + Represents a user profile with various platform integrations and metadata. + + Attributes: + id (UUID): Unique identifier for the user. + created_at (datetime): Timestamp when the user was created. + updated_at (datetime): Timestamp when the user was last updated. + discord_id (Optional[str]): Discord user ID, if linked. + discord_username (Optional[str]): Discord username, if linked. + github_id (Optional[str]): GitHub user ID, if linked. + github_username (Optional[str]): GitHub username, if linked. + slack_id (Optional[str]): Slack user ID, if linked. + slack_username (Optional[str]): Slack username, if linked. + display_name (str): Display name of the user. + email (str): Email address of the user. + avatar_url (Optional[str]): URL to the user's avatar image. + bio (Optional[str]): Short biography or description of the user. + location (Optional[str]): User's location. + is_verified (bool): Indicates if the user is verified. + verification_token (Optional[str]): Token used for verifying the user. + verified_at (Optional[datetime]): Timestamp when the user was verified. + skills (Optional[List[str]]): List of user's skills. + github_stats (Optional[dict]): GitHub statistics for the user. + last_active_discord (Optional[datetime]): Last active time on Discord. + last_active_github (Optional[datetime]): Last active time on GitHub. + last_active_slack (Optional[datetime]): Last active time on Slack. + total_interactions_count (int): Total number of user interactions. + preferred_languages (List[str]): List of user's preferred programming languages. + weaviate_user_id (Optional[str]): Associated Weaviate user ID, if any. + """ + id: UUID + created_at: datetime + updated_at: datetime + discord_id: Optional[str] = None + discord_username: Optional[str] = None + github_id: Optional[str] = None + github_username: Optional[str] = None + slack_id: Optional[str] = None + slack_username: Optional[str] = None + display_name: str + email: str + avatar_url: Optional[str] = None + bio: Optional[str] = None + location: Optional[str] = None + is_verified: bool = False + verification_token: Optional[str] = None + verified_at: Optional[datetime] = None + skills: Optional[List[str]] = [] + github_stats: Optional[dict] = {} + last_active_discord: Optional[datetime] = None + last_active_github: Optional[datetime] = None + last_active_slack: Optional[datetime] = None + total_interactions_count: int = 0 + preferred_languages: List[str] = [] + weaviate_user_id: Optional[str] = None + +class Repository(BaseModel): + """ + Represents a GitHub repository with metadata and indexing status. + + Attributes: + id (UUID): Unique identifier for the repository. + created_at (datetime): Timestamp when the repository record was created. + updated_at (datetime): Timestamp when the repository record was last updated. + github_id (Optional[int]): GitHub's unique identifier for the repository. + full_name (str): Full name of the repository (e.g., "owner/name"). + name (str): Name of the repository. + owner (str): Owner of the repository. + description (Optional[str]): Description of the repository. + stars_count (int): Number of stars the repository has received. + forks_count (int): Number of times the repository has been forked. + open_issues_count (int): Number of open issues in the repository. + language (Optional[str]): Primary programming language used in the repository. + topics (List[str]): List of topics/tags associated with the repository. + is_indexed (bool): Indicates if the repository has been indexed. + indexed_at (Optional[datetime]): Timestamp when the repository was indexed. + indexing_status (Optional[str]): Current status of the indexing process. + total_chunks_count (int): Total number of chunks generated during indexing. + last_commit_hash (Optional[str]): Hash of the last commit indexed. + indexing_progress (Optional[dict]): Progress details of the indexing process. + weaviate_repo_id (Optional[str]): Identifier for the repository in Weaviate. + """ + id: UUID + created_at: datetime + updated_at: datetime + github_id: Optional[int] = None + full_name: str + name: str + owner: str + description: Optional[str] = None + stars_count: int = 0 + forks_count: int = 0 + open_issues_count: int = 0 + language: Optional[str] = None + topics: List[str] = [] + is_indexed: bool = False + indexed_at: Optional[datetime] = None + indexing_status: Optional[str] = None + total_chunks_count: int = 0 + last_commit_hash: Optional[str] = None + indexing_progress: Optional[dict] = None + weaviate_repo_id: Optional[str] = None + +class CodeChunk(BaseModel): + """ + Represents a chunk of code extracted from a file within a repository. + + Attributes: + id (UUID): Unique identifier for the code chunk. + repository_id (UUID): Identifier of the repository this chunk belongs to. + created_at (datetime): Timestamp when the chunk was created. + file_path (str): Path to the file containing the code chunk. + file_name (str): Name of the file containing the code chunk. + file_extension (Optional[str]): Extension of the file (e.g., '.py', '.js'). + chunk_index (int): Index of the chunk within the file. + content (str): The actual code content of the chunk. + chunk_type (Optional[str]): Type of the chunk (e.g., 'function', 'class', 'block'). + language (Optional[str]): Programming language of the code chunk. + lines_start (Optional[int]): Starting line number of the chunk in the file. + lines_end (Optional[int]): Ending line number of the chunk in the file. + code_metadata (Optional[dict]): Additional metadata related to the code chunk. + weaviate_chunk_id (Optional[str]): Identifier for the chunk in Weaviate vector database. + """ + id: UUID + repository_id: UUID + created_at: datetime + file_path: str + file_name: str + file_extension: Optional[str] = None + chunk_index: int + content: str + chunk_type: Optional[str] = None + language: Optional[str] = None + lines_start: Optional[int] = None + lines_end: Optional[int] = None + code_metadata: Optional[dict] = None + weaviate_chunk_id: Optional[str] = None + +class Interaction(BaseModel): + """ + Represents an interaction within a repository platform, such as a message, comment, or post. + + Attributes: + id (UUID): Unique identifier for the interaction. + created_at (datetime): Timestamp when the interaction was created. + updated_at (datetime): Timestamp when the interaction was last updated. + user_id (UUID): Unique identifier of the user who performed the interaction. + repository_id (UUID): Unique identifier of the repository associated with the interaction. + platform (str): Name of the platform where the interaction occurred (e.g., GitHub, Slack). + platform_specific_id (str): Platform-specific identifier for the interaction. + channel_id (Optional[str]): Identifier for the channel where the interaction took place, if applicable. + thread_id (Optional[str]): Identifier for the thread within the channel, if applicable. + content (str): The textual content of the interaction. + interaction_type (str): Type of interaction (e.g., message, comment, issue). + sentiment_score (Optional[float]): Sentiment analysis score of the interaction content. + intent_classification (Optional[str]): Classification of the user's intent in the interaction. + topics_discussed (List[str]): List of topics discussed in the interaction. + metadata (Optional[dict]): Additional metadata related to the interaction. + weaviate_interaction_id (Optional[str]): Identifier for the interaction in the Weaviate vector database. + """ + id: UUID + created_at: datetime + updated_at: datetime + user_id: UUID + repository_id: UUID + platform: str + platform_specific_id: str + channel_id: Optional[str] = None + thread_id: Optional[str] = None + content: str + interaction_type: str + sentiment_score: Optional[float] = None + intent_classification: Optional[str] = None + topics_discussed: List[str] = [] + metadata: Optional[dict] = None + weaviate_interaction_id: Optional[str] = None diff --git a/backend/app/model/weaviate/models.py b/backend/app/model/weaviate/models.py new file mode 100644 index 00000000..976ff84d --- /dev/null +++ b/backend/app/model/weaviate/models.py @@ -0,0 +1,35 @@ +from pydantic import BaseModel, Field +from typing import List + + +class WeaviateUserProfile(BaseModel): + """ + Represents a vectorized user profile for semantic search in Weaviate. + """ + supabase_user_id: str = Field(..., alias="supabaseUserId") + profile_summary: str = Field(..., alias="profileSummary") + primary_languages: List[str] = Field(..., alias="primaryLanguages") + expertise_areas: List[str] = Field(..., alias="expertiseAreas") + embedding: List[float] = Field(..., description="384-dimensional vector") + + +class WeaviateCodeChunk(BaseModel): + """ + Vectorized representation of code chunks stored in Weaviate. + """ + supabase_chunk_id: str = Field(..., alias="supabaseChunkId") + code_content: str = Field(..., alias="codeContent") + language: str + function_names: List[str] = Field(..., alias="functionNames") + embedding: List[float] = Field(..., description="384-dimensional vector") + + +class WeaviateInteraction(BaseModel): + """ + Vectorized interaction representation stored in Weaviate. + """ + supabase_interaction_id: str = Field(..., alias="supabaseInteractionId") + conversation_summary: str = Field(..., alias="conversationSummary") + platform: str + topics: List[str] + embedding: List[float] = Field(..., description="384-dimensional vector") diff --git a/backend/app/scripts/supabase/populate_db.sql b/backend/app/scripts/supabase/populate_db.sql new file mode 100644 index 00000000..f627795b --- /dev/null +++ b/backend/app/scripts/supabase/populate_db.sql @@ -0,0 +1,1057 @@ +-- Table: users +-- Stores user profile and authentication information, including social platform identities and activity metadata. +CREATE TABLE users ( + id UUID PRIMARY KEY NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), + + discord_id TEXT UNIQUE, + discord_username TEXT, + + github_id TEXT UNIQUE, + github_username TEXT, + + slack_id TEXT UNIQUE, + slack_username TEXT, + + display_name TEXT NOT NULL, + email TEXT NOT NULL, + avatar_url TEXT, + bio TEXT, + location TEXT, + + is_verified BOOLEAN NOT NULL DEFAULT false, + verification_token TEXT, + verified_at TIMESTAMPTZ, + + skills JSONB, -- Array or object of user skills + github_stats JSONB, -- GitHub statistics (e.g., contributions) + + last_active_discord TIMESTAMPTZ, + last_active_github TIMESTAMPTZ, + last_active_slack TIMESTAMPTZ, + + total_interactions_count INTEGER NOT NULL DEFAULT 0, + preferred_languages TEXT[], -- List of programming languages + + weaviate_user_id TEXT UNIQUE -- External vector DB reference +); + +-- Table: repositories +-- Stores metadata for code repositories, including indexing and statistics. +CREATE TABLE repositories ( + id UUID PRIMARY KEY NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), + + github_id BIGINT UNIQUE, -- Unique GitHub repository identifier + full_name TEXT NOT NULL, -- Format: owner/repo + name TEXT NOT NULL, -- Repository name + owner TEXT NOT NULL, -- Repository owner + description TEXT, + + stars_count INTEGER NOT NULL DEFAULT 0, + forks_count INTEGER NOT NULL DEFAULT 0, + open_issues_count INTEGER NOT NULL DEFAULT 0, + + language TEXT, -- Primary language + topics TEXT[], -- List of repository topics + + is_indexed BOOLEAN NOT NULL DEFAULT false, + indexed_at TIMESTAMPTZ, -- When repository was indexed + + indexing_status TEXT, -- Status: pending, processing, completed, failed + total_chunks_count INTEGER NOT NULL DEFAULT 0, + + last_commit_hash TEXT, -- Last commit hash + indexing_progress JSONB, -- Progress details + + weaviate_repo_id TEXT UNIQUE -- External vector DB reference +); + +-- Table: code_chunks +-- Stores segmented code blocks from repositories for analysis and retrieval. +CREATE TABLE code_chunks ( + id UUID PRIMARY KEY NOT NULL, + repository_id UUID NOT NULL REFERENCES repositories(id) ON DELETE CASCADE, + + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + + file_path TEXT NOT NULL, -- Full path to the file + file_name TEXT NOT NULL, -- File name + file_extension TEXT, -- File extension (e.g., .py, .js) + + chunk_index INTEGER NOT NULL, -- Order of chunk in file + content TEXT, -- Code content + + chunk_type TEXT, -- Type: function, class, module, comment, import + language TEXT, -- Programming language + + lines_start INTEGER, -- Start line number + lines_end INTEGER, -- End line number + + code_metadata JSONB, -- Additional analysis data + + weaviate_chunk_id TEXT UNIQUE -- External vector DB reference +); + +-- Table: interactions +-- Stores user interactions across platforms, including messages, issues, and comments. +CREATE TABLE interactions ( + id UUID PRIMARY KEY NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT now(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT now(), + + user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE, + repository_id UUID REFERENCES repositories(id) ON DELETE SET NULL, + + platform TEXT NOT NULL, -- Platform: discord, github, slack + platform_specific_id TEXT NOT NULL, -- Platform-specific identifier + channel_id TEXT, -- Channel or repository reference + thread_id TEXT, -- Thread or conversation reference + + content TEXT, -- Content of the interaction + interaction_type TEXT, -- Type: message, issue, pr, comment, reaction + + sentiment_score FLOAT, -- Sentiment score (-1 to 1) + intent_classification TEXT, -- Classified intent (e.g., help_request) + + topics_discussed TEXT[], -- Topics extracted from content + metadata JSONB, -- Additional platform-specific data + + weaviate_interaction_id TEXT UNIQUE -- External vector DB reference +); + +insert into + users ( + id, + created_at, + updated_at, + discord_id, + discord_username, + github_id, + github_username, + slack_id, + slack_username, + display_name, + email, + avatar_url, + bio, + location, + is_verified, + verification_token, + verified_at, + skills, + github_stats, + last_active_discord, + last_active_github, + last_active_slack, + total_interactions_count, + preferred_languages, + weaviate_user_id + ) +values + ( + '6afc59e3-18b7-4182-b42c-8210d1152b07', + '2025-05-05 03:56:41', + '2025-01-22 14:50:25', + '3eb13b90-4668-4257-bdd6-40fb06671ad1', + 'donaldgarcia', + '16419f82-8b9d-4434-a465-e150bd9c66b3', + 'fjohnson', + '9a1de644-815e-46d1-bb8f-aa1837f8a88b', + 'hoffmanjennifer', + 'Jennifer Cole', + 'blakeerik@yahoo.com', + 'https://dummyimage.com/696x569', + 'Bill here grow gas enough analysis. Movie win her need stop peace technology.', + 'East Steven', + true, + 'a3d70628-ece6-4fa2-bd51-66e6451b4cf3', + '2025-05-14 15:04:01', + '{"skills": ["Python", "C++", "Java"]}'::jsonb, + '{"commits": 300}'::jsonb, + '2025-04-19 03:34:26', + '2025-02-12 15:28:51', + '2025-05-13 22:32:01', + 28, + array['JavaScript', 'C++'], + 'c6a7ee39-c4b0-42cc-97c5-24a55304317f' + ), + ( + '6f990423-0d57-4c64-b191-17e53f39c799', + '2025-01-11 20:41:23', + '2025-02-14 11:26:28', + '50c187fc-ce17-4b4e-8837-b8a3d261a7ab', + 'nadams', + 'e059a0ee-9132-463e-b162-87e4e9c349e0', + 'jason76', + '757750a9-a491-40b2-aa1f-ca65e27a984d', + 'josephwright', + 'Deborah Richards', + 'jeffrey28@yahoo.com', + 'https://www.lorempixel.com/186/96', + 'Civil quite others his other life edge network. Quite boy those.', + 'Kathrynside', + true, + '87c5421e-ec24-43c5-8754-108ff4188f3f', + '2025-01-01 02:39:54', + '{"skills": ["C++", "TypeScript", "Rust"]}'::jsonb, + '{"commits": 139}'::jsonb, + '2025-04-27 07:17:02', + '2025-03-04 22:40:36', + '2025-04-05 21:04:03', + 75, + array['Go', 'Python'], + '5cec4eb5-edd9-4831-9ca3-5cfb04fc6d82' + ), + ( + '2aefee92-c7da-4d6e-90c1-d6c3bb82c0e1', + '2025-03-01 17:07:10', + '2025-02-16 11:55:43', + '913e4de2-e0c5-4cb8-bda9-c2a90ed42f1a', + 'millertodd', + '885f6e66-c2b6-42c5-ba5d-310011b7e948', + 'davidalvarez', + '8715a103-43da-4043-aa45-c2ab8cbfedb0', + 'ibrandt', + 'Melissa Marquez', + 'samuel87@gmail.com', + 'https://www.lorempixel.com/507/460', + 'Open discover detail. Remain arrive attack all. Audience draw protect Democrat car very.', + 'Stevenland', + true, + 'db20a56e-dc81-4fe7-8eda-8bbb71710434', + '2025-04-17 20:42:06', + '{"skills": ["Python", "JavaScript", "C++"]}'::jsonb, + '{"commits": 567}'::jsonb, + '2025-01-20 00:17:15', + '2025-01-10 19:45:31', + '2025-05-07 15:12:55', + 77, + array['Python', 'Rust'], + '03c72ba8-d605-4770-8a63-f881ffd0f9d5' + ), + ( + '9b56cac8-504a-4dd8-b7ba-0a5bfce7abf7', + '2025-01-19 02:38:09', + '2025-05-27 07:52:11', + '680ac07a-2a93-4d62-bc83-5dc0d9441fa5', + 'zolson', + '610461e3-2a25-4888-8f02-bad0e7067ef4', + 'gallowayjoseph', + '490617f2-747b-4dba-88fe-3ccdc8b8d9c6', + 'uhorton', + 'Kristi Higgins MD', + 'tanyariley@hotmail.com', + 'https://www.lorempixel.com/124/642', + 'Live try most arm meet surface attention attack.', + 'Stewartland', + true, + 'ff9ab5c2-9f04-4aed-b552-332702627f73', + '2025-04-20 07:44:25', + '{"skills": ["C++", "TypeScript", "Rust"]}'::jsonb, + '{"commits": 479}'::jsonb, + '2025-01-20 09:56:47', + '2025-01-15 05:25:10', + '2025-04-15 09:33:28', + 28, + array['Go', 'Rust'], + '21e8ac68-43e4-4caf-8181-a8cc369147eb' + ), + ( + '5a64824b-afcd-4586-9a25-16af29e673a3', + '2025-03-09 18:29:56', + '2025-01-14 08:35:26', + '48f4ef12-5e99-43d2-be89-6c64e117dac3', + 'samueldaniels', + 'fcbb4e59-fbdd-4f7c-9c96-e9ec4d71c366', + 'contrerasangela', + 'f05db76e-1a84-451a-a9d3-d7c7ee87905e', + 'josephpreston', + 'Brittney Campbell', + 'james48@king-odonnell.com', + 'https://placekitten.com/189/867', + 'Four capital woman. +Necessary into act away third tough. Along hard need involve among half value.', + 'East Michelle', + false, + '2d534dd0-cf8e-4c5a-8cc5-6569f9e8a369', + '2025-03-22 01:25:52', + '{"skills": ["TypeScript", "Python", "JavaScript"]}'::jsonb, + '{"commits": 764}'::jsonb, + '2025-01-05 19:30:46', + '2025-02-04 19:52:52', + '2025-05-24 00:38:21', + 54, + array['Java', 'TypeScript'], + 'ee49f329-c84a-4b28-950a-1b46ecab3301' + ), + ( + '26283e71-c735-4a11-9831-afd279af4a4f', + '2025-03-21 22:43:20', + '2025-05-11 01:59:36', + '3f87e362-cf8d-446a-bc2c-bb0ddd334cc7', + 'erik16', + '787f2425-dbcc-4477-89e9-db0adf465290', + 'jamessellers', + 'cb9bc326-d20e-4c17-8e20-fd1a598336e3', + 'jeffreykeller', + 'Donald Jones', + 'darlene68@yahoo.com', + 'https://placekitten.com/56/236', + 'While enter board its rock finish paper memory. Tonight couple and job mind southern.', + 'South Elizabeth', + true, + 'e71e43a6-bf85-4f0e-ad64-b56c610faa3f', + '2025-02-03 19:01:34', + '{"skills": ["JavaScript", "Java", "Python"]}'::jsonb, + '{"commits": 144}'::jsonb, + '2025-04-30 12:30:26', + '2025-04-21 12:13:21', + '2025-02-28 10:31:41', + 48, + array['Python', 'Java'], + '001a9a8b-d56f-4350-8c45-9ce267f48ad5' + ), + ( + '010d518f-362b-435b-a4de-148914bcbdb9', + '2025-03-01 00:09:35', + '2025-02-25 17:06:53', + '9479e1e6-c927-4d9b-ae0d-264835ce8841', + 'steven73', + 'e6b3c944-cb32-4e35-b922-bac282dc4c8e', + 'donnacampbell', + '55cee5db-9e87-404c-a208-6977a9f25336', + 'cardenaskaren', + 'Courtney Gonzalez', + 'sanchezthomas@gibson.org', + 'https://placeimg.com/398/786/any', + 'Imagine my indeed deal information toward. Watch affect thing offer local wall fear hope.', + 'West Nicholasborough', + false, + 'c2dff335-5666-4f9f-93ac-2ab974672cd9', + '2025-03-15 14:51:22', + '{"skills": ["Rust", "Java", "Python"]}'::jsonb, + '{"commits": 797}'::jsonb, + '2025-02-24 00:46:11', + '2025-05-27 00:37:11', + '2025-03-23 20:38:04', + 58, + array['Rust', 'Python'], + '78660765-14f7-4e8d-95bc-b8d04094dded' + ), + ( + 'c28e727c-f6f1-4fb1-98c4-d7230cd1c855', + '2025-01-04 18:19:57', + '2025-05-26 10:42:21', + 'f3b1025b-fff9-4585-8d55-7b618a175dfe', + 'jeffrey10', + '3f4df561-f319-4125-87f1-94f9c1156d6d', + 'maryhowell', + 'ab61a7b1-793b-4c32-a050-04943d114802', + 'elliottjeffery', + 'Pamela Jackson', + 'tamirodriguez@hickman.biz', + 'https://www.lorempixel.com/812/406', + 'Right with modern executive beyond. Fast guess few remain call. Window network recently.', + 'Christopherbury', + false, + '6a8a616f-c3b2-40d0-8edd-dfcd1e52d770', + '2025-04-28 17:38:42', + '{"skills": ["Python", "Rust", "Java"]}'::jsonb, + '{"commits": 899}'::jsonb, + '2025-04-26 18:09:08', + '2025-05-01 02:02:18', + '2025-02-13 19:46:41', + 80, + array['Rust', 'Java'], + '7354ea6f-6160-4459-85c7-504bc693da11' + ), + ( + '7771182e-ed52-4f4e-a376-0750b9854324', + '2025-03-27 23:13:45', + '2025-02-27 17:45:57', + '6dc7cac7-fd72-4050-96a9-954fdc33e1f9', + 'ashley09', + 'f295456e-1967-4f06-bd76-7e35f5c9b047', + 'colleenbaker', + '2c7f0b79-3d67-4de9-a834-e4c014c8b3b4', + 'moorericky', + 'Julie Johnson', + 'jacqueline71@hotmail.com', + 'https://dummyimage.com/478x541', + 'Name positive training step. Arrive society organization station. Keep light fight I evening.', + 'Rickymouth', + true, + '292bd156-db94-4570-9ac7-0ec0ab8ddeb4', + '2025-03-06 00:59:47', + '{"skills": ["C++", "Python", "TypeScript"]}'::jsonb, + '{"commits": 727}'::jsonb, + '2025-03-21 22:24:18', + '2025-05-15 16:45:13', + '2025-04-07 05:03:10', + 29, + array['TypeScript', 'Java'], + '668409e3-f1f8-443e-a99f-131849c8a43f' + ), + ( + 'e4ca4ab9-de13-42ea-a394-db08a247abb7', + '2025-04-17 19:38:09', + '2025-01-08 03:01:15', + '409d3602-5084-4242-968b-1625746f7891', + 'darrell68', + 'a85c6e4a-004b-4fab-bcf5-6188d32e6dcd', + 'mgutierrez', + 'c1a6423b-9f64-4eed-9c9d-927d84b871bb', + 'wgarrett', + 'Derek Anderson', + 'fgilmore@gmail.com', + 'https://dummyimage.com/59x490', + 'Add impact different success box water positive. Marriage respond meeting event.', + 'Grimesmouth', + true, + '551ac8ea-585a-4afa-bbfd-cc1289e06ab3', + '2025-04-18 04:07:17', + '{"skills": ["TypeScript", "JavaScript", "Python"]}'::jsonb, + '{"commits": 439}'::jsonb, + '2025-04-16 12:34:03', + '2025-03-15 05:55:34', + '2025-03-30 10:03:34', + 35, + array['Go', 'C++'], + '304b8590-de9e-4757-9260-001eeecf67d2' + ); + +insert into + repositories ( + id, + created_at, + updated_at, + github_id, + full_name, + name, + owner, + description, + stars_count, + forks_count, + open_issues_count, + language, + topics, + is_indexed, + indexed_at, + indexing_status, + total_chunks_count, + last_commit_hash, + indexing_progress, + weaviate_repo_id + ) +values + ( + 'f6b0bff9-074d-4062-86f5-0a853e521334', + '2025-05-16 10:34:41', + '2025-02-16 08:54:52', + 3728882, + 'jamessellers/repo_0', + 'repo_0', + 'jamessellers', + 'Him task improve fish list tree high.', + 3032, + 363, + 26, + 'C++', + array['Java', 'C++'], + true, + '2025-05-09 21:00:50', + 'processing', + 18, + 'e270dbf424cff6864cc592f6611d8df90c895ec5', + '{"progress": 93}'::jsonb, + '7ecddbaf-26f0-4fcf-bb16-e5dba6eab79e' + ), + ( + '0f08ecdb-53dd-4352-bb50-b1cfbf09da8b', + '2025-01-08 04:31:26', + '2025-01-25 12:21:00', + 3741438, + 'gallowayjoseph/repo_1', + 'repo_1', + 'gallowayjoseph', + 'Whole forward beyond suddenly between treat address.', + 3786, + 388, + 34, + 'C++', + array['C++', 'Rust'], + true, + '2025-01-28 23:48:46', + 'completed', + 2, + 'c9f97db5d2fc4b809df59bc23dd7345dbe6d14d5', + '{"progress": 29}'::jsonb, + '1327f1bc-2784-478f-b84f-16b3a79fbfaf' + ), + ( + '08946f22-0d74-4499-b40d-0f60218d5152', + '2025-04-02 03:59:05', + '2025-02-21 11:05:44', + 6292423, + 'fjohnson/repo_2', + 'repo_2', + 'fjohnson', + 'Perhaps however bag forget purpose move.', + 3286, + 274, + 8, + 'JavaScript', + array['Rust', 'C++'], + false, + '2025-03-03 11:44:52', + 'processing', + 16, + '5e3af4aafc18e025cea707fa7707a1d945e0ffef', + '{"progress": 50}'::jsonb, + 'df547e50-7cea-4045-8268-283ee32f2e63' + ), + ( + 'dda18eb8-8354-4897-8c7f-2c66afbc73e6', + '2025-04-16 01:19:02', + '2025-01-07 23:18:06', + 3396987, + 'maryhowell/repo_3', + 'repo_3', + 'maryhowell', + 'Attention piece TV young section its better plant.', + 2169, + 142, + 31, + 'C++', + array['Rust', 'TypeScript'], + false, + '2025-01-20 12:23:51', + 'failed', + 19, + '22a9658e1dcda6fa5df48102f5882b204e39bc17', + '{"progress": 51}'::jsonb, + '0769165f-e746-4cb9-8ca9-cf07b1aa0f6a' + ), + ( + '7a9eab06-0656-43dd-8b8d-b17b9e5f396c', + '2025-01-09 22:20:00', + '2025-03-04 23:41:43', + 4679591, + 'jamessellers/repo_4', + 'repo_4', + 'jamessellers', + 'Fall hear certainly most.', + 1133, + 521, + 63, + 'Python', + array['TypeScript', 'Python'], + true, + '2025-04-20 21:24:57', + 'processing', + 6, + 'effd50723baca7c5da884a171f8b5bbed8320a23', + '{"progress": 87}'::jsonb, + '14374509-2cd1-486a-ab84-0c672e183554' + ), + ( + 'ea879f36-d060-4f65-bf5d-9138a542f74a', + '2025-04-29 08:26:15', + '2025-03-16 06:47:08', + 2065818, + 'donnacampbell/repo_5', + 'repo_5', + 'donnacampbell', + 'Raise marriage on discussion point least project together.', + 3152, + 390, + 76, + 'Go', + array['Rust', 'Java'], + true, + '2025-04-01 08:14:14', + 'pending', + 18, + 'a76a3a0bef5688fbee63da697c29fa6d719b37d9', + '{"progress": 96}'::jsonb, + '12e89d10-2871-4733-8bed-db12ad77e82f' + ), + ( + '07921dba-1ae8-422b-9f29-9c908080aa1b', + '2025-03-27 18:36:35', + '2025-03-09 02:27:09', + 6707197, + 'contrerasangela/repo_6', + 'repo_6', + 'contrerasangela', + 'Federal while real lead few yourself table blood.', + 913, + 300, + 55, + 'JavaScript', + array['Go', 'Python'], + false, + '2025-03-16 01:17:11', + 'processing', + 17, + '3a65c2c24c52e4b1907c677fc07132e89ec719bc', + '{"progress": 13}'::jsonb, + 'b303f438-fe21-40d0-8bbe-4aff9326dffd' + ), + ( + 'd96ff094-7f9c-42d8-bb60-f83f19b07f77', + '2025-02-27 07:52:30', + '2025-01-05 06:14:45', + 9517169, + 'contrerasangela/repo_7', + 'repo_7', + 'contrerasangela', + 'Ever not rate seat any paper.', + 4988, + 203, + 19, + 'Java', + array['TypeScript', 'JavaScript'], + true, + '2025-04-07 10:55:00', + 'completed', + 16, + '4b0c490400e9caf8027725c024538d6df508bd11', + '{"progress": 2}'::jsonb, + 'cc8218da-c696-45e6-8944-051be726be23' + ), + ( + '4882ce56-489d-4abc-bc29-bf3ad5c48930', + '2025-02-14 16:25:45', + '2025-04-28 21:07:40', + 7089806, + 'jason76/repo_8', + 'repo_8', + 'jason76', + 'Despite couple economy sense should race.', + 2519, + 245, + 7, + 'JavaScript', + array['Rust', 'Python'], + true, + '2025-01-27 13:26:14', + 'failed', + 3, + '6ab21b990f1416846b362fdb26b90d80cbf249a9', + '{"progress": 97}'::jsonb, + '0a17991e-a576-4411-a0a1-1839e7457704' + ), + ( + '46d4cf41-4cd4-4043-a835-625a0bf349f2', + '2025-03-01 23:43:53', + '2025-03-27 14:23:05', + 3109911, + 'colleenbaker/repo_9', + 'repo_9', + 'colleenbaker', + 'Often run bed.', + 1051, + 675, + 60, + 'Rust', + array['JavaScript', 'Java'], + false, + '2025-02-27 07:18:51', + 'processing', + 18, + '1f93145c08645e58c60f86341a4f5e572e111863', + '{"progress": 96}'::jsonb, + '21d53971-3367-49b5-acf6-bf756a5e6920' + ); + +insert into + code_chunks ( + id, + repository_id, + created_at, + file_path, + file_name, + file_extension, + chunk_index, + content, + chunk_type, + language, + lines_start, + lines_end, + code_metadata, + weaviate_chunk_id + ) +values + ( + '095a5ff0-545a-48ff-83ad-2ea3566f5674', + 'dda18eb8-8354-4897-8c7f-2c66afbc73e6', + '2025-04-15 17:49:20', + '/src/file_0.py', + 'file_0.py', + '.py', + 0, + 'Maybe evening clearly trial want whose far. Sound life away senior difficult put. Whose source hand so add Mr.', + 'comment', + 'C++', + 92, + 106, + '{"length": 14}'::jsonb, + 'f23e323d-0b9b-4934-a3c8-6d301dde7969' + ), + ( + 'b6bbdb5a-deb1-43c7-bf99-b9f88e4af1ed', + 'ea879f36-d060-4f65-bf5d-9138a542f74a', + '2025-01-08 05:25:15', + '/src/file_1.py', + 'file_1.py', + '.py', + 1, + 'Break doctor Mr home he we recent. Industry score choice increase between majority impact. +Real describe know. Talk between rate name within.', + 'function', + 'Go', + 57, + 76, + '{"length": 19}'::jsonb, + '00b9d4a3-9892-40ac-a689-33a9c9e48e8c' + ), + ( + '1f787967-316c-4232-b251-64bcf8e3251b', + 'dda18eb8-8354-4897-8c7f-2c66afbc73e6', + '2025-02-23 20:11:39', + '/src/file_2.py', + 'file_2.py', + '.py', + 2, + 'Music sometimes body term. Address so draw food. +Appear score moment second live. Message board mean war analysis situation.', + 'module', + 'C++', + 29, + 36, + '{"length": 7}'::jsonb, + '1963c26d-6e21-4b09-9afd-4015816bcb9f' + ), + ( + '233530b2-d89f-416d-a73c-40b4ebb33c50', + 'f6b0bff9-074d-4062-86f5-0a853e521334', + '2025-05-17 06:31:44', + '/src/file_3.py', + 'file_3.py', + '.py', + 3, + 'Result Democrat later direction fund law indeed. Fine fine effort well. +Before be it season. Speech news only no form business. Them wait institution trouble anything explain.', + 'import', + 'C++', + 76, + 88, + '{"length": 12}'::jsonb, + '8e867f3c-a487-4eab-accb-461a9d132363' + ), + ( + 'b3103899-d683-422a-9072-2ad26050d8f5', + 'dda18eb8-8354-4897-8c7f-2c66afbc73e6', + '2025-01-06 02:21:06', + '/src/file_4.py', + 'file_4.py', + '.py', + 4, + 'Ahead event several TV go. Thank not husband center. Begin most heavy. Game have return since nothing be apply.', + 'function', + 'C++', + 1, + 8, + '{"length": 7}'::jsonb, + '0e0630cd-996d-4c50-bc04-a168652ffb49' + ), + ( + '28ea68b7-1f26-472c-b568-319e1d41732b', + 'dda18eb8-8354-4897-8c7f-2c66afbc73e6', + '2025-01-02 11:49:27', + '/src/file_5.py', + 'file_5.py', + '.py', + 5, + 'War should share face build. Section compare herself region matter street south. +Technology amount affect TV television office. Identify policy face if whom commercial way.', + 'module', + 'C++', + 9, + 15, + '{"length": 6}'::jsonb, + '2c6a6e9a-3280-47a1-8187-222b257d5e52' + ), + ( + '1cb8ccc0-db27-49c5-8dff-8d535d5a37d3', + '0f08ecdb-53dd-4352-bb50-b1cfbf09da8b', + '2025-04-27 23:22:57', + '/src/file_6.py', + 'file_6.py', + '.py', + 6, + 'Concern significant management senior. Large under north play person ten physical character. +Kind field ever argue medical financial later. Hard expert popular within.', + 'module', + 'C++', + 66, + 78, + '{"length": 12}'::jsonb, + 'fb7d9f1c-57eb-49b1-965e-59dde62d2d06' + ), + ( + '9edaae8a-3d6c-47c1-8777-ff0b0002b85a', + 'd96ff094-7f9c-42d8-bb60-f83f19b07f77', + '2025-05-19 16:57:06', + '/src/file_7.py', + 'file_7.py', + '.py', + 7, + 'Position always remain yard model particular hair. Hold simple quickly appear piece.', + 'import', + 'Java', + 28, + 37, + '{"length": 9}'::jsonb, + '86c1b6cb-e996-40f7-af77-520eff4625af' + ), + ( + 'd1927881-d0e7-4df3-a97a-18521db08ff4', + '46d4cf41-4cd4-4043-a835-625a0bf349f2', + '2025-01-19 03:31:20', + '/src/file_8.py', + 'file_8.py', + '.py', + 8, + 'Gun guy Congress degree way main difficult. Choice fast small medical. Strong this also from short capital heavy. +Story side speak close. Analysis hair rest wide particular sell.', + 'comment', + 'Rust', + 61, + 73, + '{"length": 12}'::jsonb, + 'ef2ddcc4-8df6-41da-9f07-c1a5dfc620ce' + ), + ( + 'fdda052a-ca4f-40b5-ae99-a711e2161d85', + '07921dba-1ae8-422b-9f29-9c908080aa1b', + '2025-01-20 22:06:10', + '/src/file_9.py', + 'file_9.py', + '.py', + 9, + 'Expect several evening town. Store begin treat stage. Us increase how hear history bank. +Five between research. Social case expert stop receive catch.', + 'function', + 'JavaScript', + 25, + 33, + '{"length": 8}'::jsonb, + '9d642932-0066-453d-ade2-99a14a90cd0c' + ); + +insert into + interactions ( + id, + created_at, + updated_at, + user_id, + repository_id, + platform, + platform_specific_id, + channel_id, + thread_id, + content, + interaction_type, + sentiment_score, + intent_classification, + topics_discussed, + metadata, + weaviate_interaction_id + ) +values + ( + '7c59fe66-53b6-44b5-8ae1-ddc29b071097', + '2025-03-10 12:14:30', + '2025-02-16 17:06:38', + '010d518f-362b-435b-a4de-148914bcbdb9', + 'ea879f36-d060-4f65-bf5d-9138a542f74a', + 'github', + 'aa143cd8-2ff3-4de4-aaa4-2c9f92170475', + 'f982f4e0-8603-456a-95ea-cbcfab1021ce', + '86abd4e7-f412-4360-9153-6e995c508720', + 'Skill medical after them analysis hit health. Ground attack drop. Billion old series card good full poor store.', + 'comment', + -0.07, + 'help_request', + array['C++', 'TypeScript'], + '{"info": "capital"}'::jsonb, + 'e3b56360-6fdc-4bad-9e36-8127cca1b45c' + ), + ( + 'f0c80815-fde1-4644-94ca-cd8915f11e46', + '2025-03-19 16:14:11', + '2025-05-25 08:03:53', + '6f990423-0d57-4c64-b191-17e53f39c799', + 'f6b0bff9-074d-4062-86f5-0a853e521334', + 'github', + '62fb26d7-f4db-4a07-a506-f6707092947d', + '7f072cb9-2fd3-40c0-b945-f2fd56cb1ab0', + 'ec9f9c54-5e0a-42ab-bf5d-b163b12b6680', + 'Song risk bad own state. Family bill foreign fast knowledge response coach. Goal amount thank good your ever.', + 'pr', + 0.6, + 'help_request', + array['JavaScript', 'TypeScript'], + '{"info": "already"}'::jsonb, + 'c74cc890-3c6a-4174-9136-34a520509c62' + ), + ( + 'ef139daa-fa4c-445a-8bf7-fdd725bdb82c', + '2025-05-06 06:40:36', + '2025-03-13 03:12:51', + '9b56cac8-504a-4dd8-b7ba-0a5bfce7abf7', + '4882ce56-489d-4abc-bc29-bf3ad5c48930', + 'github', + '9136f1f8-f310-46dc-a202-bee65cb5e69c', + 'add702c9-2747-493c-9ae7-7eab084a6780', + '5f3c44dc-5ef7-47b8-b2e6-195f732e2016', + 'Off morning huge power. Whether ago control military trial. Energy employee land you.', + 'issue', + -0.16, + 'feature_request', + array['Go', 'JavaScript'], + '{"info": "security"}'::jsonb, + '2c913a7c-a340-4f08-b341-91b8ed6522b4' + ), + ( + 'd5c02f3d-6d9a-49b9-8d71-33ba08c610a2', + '2025-03-12 04:40:32', + '2025-04-09 18:58:25', + '6f990423-0d57-4c64-b191-17e53f39c799', + 'd96ff094-7f9c-42d8-bb60-f83f19b07f77', + 'slack', + '3373730e-fc31-4597-9f11-9c0f3967e60a', + 'ca55e38b-3c9a-4e10-a38d-6f44cac4d0eb', + '8dd4595b-5c63-46f3-86ba-8cd37e7838c9', + 'Level work candidate this assume huge. Moment shoulder statement available win politics last. General there sister policy consider whom item.', + 'message', + -0.9, + 'help_request', + array['Python', 'JavaScript'], + '{"info": "prove"}'::jsonb, + '200b2903-4642-4c45-8d03-f17af4d375c1' + ), + ( + '5696eff8-bba4-41a4-953f-f70eece14b2d', + '2025-05-02 08:48:55', + '2025-02-28 19:15:53', + '2aefee92-c7da-4d6e-90c1-d6c3bb82c0e1', + '07921dba-1ae8-422b-9f29-9c908080aa1b', + 'github', + '80a8a23d-17ea-4c83-8892-042f9d4b2bf9', + 'f10d27c8-9780-4215-ab2b-b8e9a417c093', + 'a8a2b7ad-2bd3-4dcd-a779-468594a53fde', + 'Wish candidate have no five letter. Last cell anything war ten. Eat tend civil force officer fine.', + 'comment', + -0.57, + 'general_discussion', + array['Python', 'JavaScript'], + '{"info": "ready"}'::jsonb, + '896490ab-4926-4e5f-b878-6140ac2a4f71' + ), + ( + '2ea1d897-a515-40cd-a92a-01eada9542d8', + '2025-01-02 14:06:34', + '2025-01-21 20:58:21', + '010d518f-362b-435b-a4de-148914bcbdb9', + 'f6b0bff9-074d-4062-86f5-0a853e521334', + 'github', + '43b47ee5-e1e8-4e7e-a249-8f666e51484d', + '673ba8bd-c38c-4dec-9da3-9a73ba3df7ff', + '0d18ab95-668c-4477-8b95-017c5dae1201', + 'Foreign party class wrong. Order medical meeting majority none. Staff happy purpose woman on someone rise.', + 'pr', + 0.85, + 'general_discussion', + array['Java', 'Go'], + '{"info": "market"}'::jsonb, + 'ba370623-bc5f-44dd-992f-3c1edf70fb2a' + ), + ( + 'dc1ad7fb-edca-4c34-b07d-7b51f7a92974', + '2025-01-13 02:15:06', + '2025-05-18 01:09:03', + '7771182e-ed52-4f4e-a376-0750b9854324', + 'd96ff094-7f9c-42d8-bb60-f83f19b07f77', + 'discord', + '8f964685-3514-4890-84c8-6c4623595fa4', + '7a768555-a987-4218-bf84-faef5336723b', + 'f4e95734-5052-4700-a077-96a38685abaa', + 'Treatment garden great sign return poor really. Machine whatever everything fear walk word side relate.', + 'issue', + -0.41, + 'help_request', + array['Rust', 'C++'], + '{"info": "defense"}'::jsonb, + 'eedaa802-4568-4426-89e0-3e22d3f4a49b' + ), + ( + '30634a69-7c7b-4f11-8ec5-b83299015938', + '2025-04-20 03:11:37', + '2025-05-16 04:23:36', + '7771182e-ed52-4f4e-a376-0750b9854324', + 'f6b0bff9-074d-4062-86f5-0a853e521334', + 'slack', + '9d9d028e-1bf6-45f6-a324-52114588fc1b', + '3e11bafe-3d41-46fe-963a-8617bdab07e7', + '87f255d6-e7ba-46ac-ab7a-3d1c0cfef683', + 'Appear including response beyond side. Who within citizen.', + 'pr', + -0.89, + 'general_discussion', + array['Rust', 'TypeScript'], + '{"info": "cultural"}'::jsonb, + 'df4e713e-f64e-4dfc-bfbe-ac7aefc59738' + ), + ( + '87916dfb-a7ce-4315-80f3-a72be814f08c', + '2025-02-26 05:00:49', + '2025-01-08 06:36:18', + '2aefee92-c7da-4d6e-90c1-d6c3bb82c0e1', + 'f6b0bff9-074d-4062-86f5-0a853e521334', + 'slack', + '9a4ffc0c-9165-42ed-8c63-6e95025f5543', + '55c551fc-fba5-4cc8-adaf-37661b780ede', + 'a42d0cd7-fd35-4f6a-b450-388748d90846', + 'According himself land environment form. Reveal activity president realize artist brother fill if. Type thousand show real police wait happen.', + 'message', + 0.7, + 'help_request', + array['Rust', 'Python'], + '{"info": "store"}'::jsonb, + '87365a84-725e-434d-8687-9aa914f573d0' + ), + ( + 'c29c38dc-10be-4da2-81b7-6f82b746a359', + '2025-05-17 15:47:16', + '2025-03-12 06:02:22', + '9b56cac8-504a-4dd8-b7ba-0a5bfce7abf7', + '07921dba-1ae8-422b-9f29-9c908080aa1b', + 'discord', + '1cb9b73a-906d-4c8c-aad0-8c8913fe8a29', + 'e723ada3-8c32-4db2-942a-895e0fcf601f', + '89628f6e-929c-43b3-b3c0-8bf18167999f', + 'Foreign minute break day. Major together knowledge argue car indeed nor next. +How staff second. Authority interest red must art thus worry line.', + 'reaction', + -0.51, + 'help_request', + array['Rust', 'Python'], + '{"info": "cell"}'::jsonb, + '1e5ebe54-5907-4299-9c3b-bdd8a74e02a9' + ); diff --git a/backend/app/scripts/weaviate/create_schemas.py b/backend/app/scripts/weaviate/create_schemas.py new file mode 100644 index 00000000..519f3396 --- /dev/null +++ b/backend/app/scripts/weaviate/create_schemas.py @@ -0,0 +1,56 @@ +from app.db.weaviate.weaviate_client import get_client +import weaviate.classes.config as wc + +def create_user_profile_schema(client): + client.collections.create( + name="weaviate_user_profile", + properties=[ + wc.Property(name="supabaseUserId", data_type=wc.DataType.TEXT), + wc.Property(name="profileSummary", data_type=wc.DataType.TEXT), + wc.Property(name="primaryLanguages", data_type=wc.DataType.TEXT_ARRAY), + wc.Property(name="expertiseAreas", data_type=wc.DataType.TEXT_ARRAY), + ], + vectorizer_config=wc.Configure.Vectorizer.text2vec_cohere(), + generative_config=wc.Configure.Generative.openai() + ) + print("Created: weaviate_user_profile") + + +def create_code_chunk_schema(client): + client.collections.create( + name="weaviate_code_chunk", + properties=[ + wc.Property(name="supabaseChunkId", data_type=wc.DataType.TEXT), + wc.Property(name="codeContent", data_type=wc.DataType.TEXT), + wc.Property(name="language", data_type=wc.DataType.TEXT), + wc.Property(name="functionNames", data_type=wc.DataType.TEXT_ARRAY), + ], + vectorizer_config=wc.Configure.Vectorizer.text2vec_cohere(), + generative_config=wc.Configure.Generative.openai() + ) + print("Created: weaviate_code_chunk") + +def create_interaction_schema(client): + client.collections.create( + name="weaviate_interaction", + properties=[ + wc.Property(name="supabaseInteractionId", data_type=wc.DataType.TEXT), + wc.Property(name="conversationSummary", data_type=wc.DataType.TEXT), + wc.Property(name="platform", data_type=wc.DataType.TEXT), + wc.Property(name="topics", data_type=wc.DataType.TEXT_ARRAY), + ], + vectorizer_config=wc.Configure.Vectorizer.text2vec_cohere(), + generative_config=wc.Configure.Generative.openai() + ) + print("Created: weaviate_interaction") + +def create_all_schemas(): + client = get_client() + existing_collections = client.collections.list_all() + if "Weaviate_code_chunk" not in existing_collections: + create_code_chunk_schema(client) + if "Weaviate_interaction" not in existing_collections: + create_interaction_schema(client) + if "Weaviate_user_profile" not in existing_collections: + create_user_profile_schema(client) + print("✅ All schemas ensured.") diff --git a/backend/app/scripts/weaviate/populate_db.py b/backend/app/scripts/weaviate/populate_db.py new file mode 100644 index 00000000..a74045f8 --- /dev/null +++ b/backend/app/scripts/weaviate/populate_db.py @@ -0,0 +1,230 @@ +from app.db.weaviate.weaviate_client import get_client + + +async def populate_Weaviate_code_chunk(client): + weaviate_code_chunk = client.collections.get("weaviate_code_chunk") + code_chunks = [ + { + "supabaseChunkId": "095a5ff0-545a-48ff-83ad-2ea3566f5674", + "codeContent": ( + "Maybe evening clearly trial want whose far. Sound life away senior difficult put. " + "Whose source hand so add Mr." + ), + "language": "C++", + "functionNames": ["comment"] + }, + { + "supabaseChunkId": "b6bbdb5a-deb1-43c7-bf99-b9f88e4af1ed", + "codeContent": ( + "Break doctor Mr home he we recent. Industry score choice increase between majority impact.\n" + "Real describe know. Talk between rate name within." + ), + "language": "Go", + "functionNames": ["function"] + }, + { + "supabaseChunkId": "1f787967-316c-4232-b251-64bcf8e3251b", + "codeContent": ( + "Music sometimes body term. Address so draw food.\n" + "Appear score moment second live. Message board mean war analysis situation." + ), + "language": "C++", + "functionNames": ["module"] + }, + { + "supabaseChunkId": "233530b2-d89f-416d-a73c-40b4ebb33c50", + "codeContent": ( + "Result Democrat later direction fund law indeed. Fine fine effort well.\n" + "Before be it season. Speech news only form business. Them wait institution trouble anything explain." + ), + "language": "C++", + "functionNames": ["import"] + }, + { + "supabaseChunkId": "b3103899-d683-422a-9072-2ad26050d8f5", + "codeContent": ( + "Ahead event several TV go. Thank not husband center. Begin most heavy. " + "Game have return since nothing be apply." + ), + "language": "C++", + "functionNames": ["function"] + }, + { + "supabaseChunkId": "28ea68b7-1f26-472c-b568-319e1d41732b", + "codeContent": ( + "War should share face build. Section compare herself region matter street south.\n" + "Technology amount affect TV television office. Identify policy face if whom commercial way." + ), + "language": "C++", + "functionNames": ["module"] + }, + { + "supabaseChunkId": "1cb8ccc0-db27-49c5-8dff-8d535d5a37d3", + "codeContent": ( + "Concern significant management senior. Large under north play person ten physical character.\n" + "Kind field ever argue medical financial later. Hard expert popular within." + ), + "language": "C++", + "functionNames": ["module"] + }, + { + "supabaseChunkId": "9edaae8a-3d6c-47c1-8777-ff0b0002b85a", + "codeContent": ( + "Position always remain yard model particular hair. Hold simple quickly appear piece." + ), + "language": "Java", + "functionNames": ["import"] + }, + { + "supabaseChunkId": "d1927881-d0e7-4df3-a97a-18521db08ff4", + "codeContent": ( + "Gun guy Congress degree way main difficult. Choice fast small medical. Strong this also from short.\n" + "Story side speak close. Analysis hair rest wide particular sell." + ), + "language": "Rust", + "functionNames": ["comment"] + }, + { + "supabaseChunkId": "fdda052a-ca4f-40b5-ae99-a711e2161d85", + "codeContent": ( + "Expect several evening town. Store begin treat stage. Us increase how hear history bank.\n" + "Five between research. Social case expert stop receive catch." + ), + "language": "JavaScript", + "functionNames": ["function"] + } + ] + response = await weaviate_code_chunk.data.insert_many(code_chunks) + print(response) + print("Populated: weaviate_code_chunk with sample data.") +async def populate_Weaviate_interaction(client): + weaviate_interaction = client.collections.get("weaviate_interaction") + interactions = [ + { + "userId": "095a5ff0-545a-48ff-83ad-2ea3566f5674", + "message": "Hi, can you explain the code chunk with ID 095a5ff0-545a-48ff-83ad-2ea3566f5674?", + "timestamp": "2023-01-01T12:00:00Z" + }, + { + "userId": "b6bbdb5a-deb1-43c7-bf99-b9f88e4af1ed", + "message": "What does the function in Go do?", + "timestamp": "2023-01-01T12:01:00Z" + }, + { + "userId": "1f787967-316c-4232-b251-64bcf8e3251b", + "message": "Can you summarize the module in C++?", + "timestamp": "2023-01-01T12:02:00Z" + }, + { + "userId": "233530b2-d89f-416d-a73c-40b4ebb33c50", + "message": "What is the purpose of the import in this C++ chunk?", + "timestamp": "2023-01-01T12:03:00Z" + }, + { + "userId": "b3103899-d683-422a-9072-2ad26050d8f5", + "message": "Is this function in C++ recursive?", + "timestamp": "2023-01-01T12:04:00Z" + }, + { + "userId": "28ea68b7-1f26-472c-b568-319e1d41732b", + "message": "What does this module handle in the codebase?", + "timestamp": "2023-01-01T12:05:00Z" + }, + { + "userId": "1cb8ccc0-db27-49c5-8dff-8d535d5a37d3", + "message": "Can you explain the logic in this C++ module?", + "timestamp": "2023-01-01T12:06:00Z" + }, + { + "userId": "9edaae8a-3d6c-47c1-8777-ff0b0002b85a", + "message": "What does the import statement in Java do?", + "timestamp": "2023-01-01T12:07:00Z" + }, + { + "userId": "d1927881-d0e7-4df3-a97a-18521db08ff4", + "message": "Is this a comment or code in Rust?", + "timestamp": "2023-01-01T12:08:00Z" + }, + { + "userId": "fdda052a-ca4f-40b5-ae99-a711e2161d85", + "message": "What is the output of this JavaScript function?", + "timestamp": "2023-01-01T12:09:00Z" + } + ] + response = await weaviate_interaction.data.insert_many(interactions) + print(response) + print("Populated: weaviate_interaction with sample data.") +async def populate_Weaviate_user_profile(client): + weaviate_user_profile = client.collections.get("weaviate_user_profile") + user_profiles = [ + { + "supabaseUserId": "095a5ff0-545a-48ff-83ad-2ea3566f5674", + "profileSummary": "Experienced C++ developer with a focus on performance optimization.", + "primaryLanguages": ["C++", "Python"], + "expertiseAreas": ["Performance Tuning", "Concurrency"] + }, + { + "supabaseUserId": "b6bbdb5a-deb1-43c7-bf99-b9f88e4af1ed", + "profileSummary": "Go developer with a passion for building scalable systems.", + "primaryLanguages": ["Go", "JavaScript"], + "expertiseAreas": ["Microservices", "Cloud Computing"] + }, + { + "supabaseUserId": "1f787967-316c-4232-b251-64bcf8e3251b", + "profileSummary": "C++ module developer with experience in embedded systems.", + "primaryLanguages": ["C++", "Rust"], + "expertiseAreas": ["Embedded Systems", "Real-time Processing"] + }, + { + "supabaseUserId": "233530b2-d89f-416d-a73c-40b4ebb33c50", + "profileSummary": "C++ developer with a knack for clean imports and modular code.", + "primaryLanguages": ["C++"], + "expertiseAreas": ["Code Organization", "Modularity"] + }, + { + "supabaseUserId": "b3103899-d683-422a-9072-2ad26050d8f5", + "profileSummary": "C++ enthusiast focusing on algorithmic challenges.", + "primaryLanguages": ["C++"], + "expertiseAreas": ["Algorithms", "Problem Solving"] + }, + { + "supabaseUserId": "28ea68b7-1f26-472c-b568-319e1d41732b", + "profileSummary": "C++ developer with experience in system architecture.", + "primaryLanguages": ["C++"], + "expertiseAreas": ["System Design", "Architecture"] + }, + { + "supabaseUserId": "1cb8ccc0-db27-49c5-8dff-8d535d5a37d3", + "profileSummary": "C++ developer passionate about medical technology.", + "primaryLanguages": ["C++"], + "expertiseAreas": ["Medical Tech", "Data Analysis"] + }, + { + "supabaseUserId": "9edaae8a-3d6c-47c1-8777-ff0b0002b85a", + "profileSummary": "Java developer with a focus on enterprise solutions.", + "primaryLanguages": ["Java"], + "expertiseAreas": ["Enterprise Software", "APIs"] + }, + { + "supabaseUserId": "d1927881-d0e7-4df3-a97a-18521db08ff4", + "profileSummary": "Rustacean interested in safe and fast code.", + "primaryLanguages": ["Rust"], + "expertiseAreas": ["Memory Safety", "Performance"] + }, + { + "supabaseUserId": "fdda052a-ca4f-40b5-ae99-a711e2161d85", + "profileSummary": "JavaScript developer with a love for UI/UX.", + "primaryLanguages": ["JavaScript"], + "expertiseAreas": ["Frontend", "User Experience"] + } + ] + response = await weaviate_user_profile.data.insert_many(user_profiles) + print(response) + print("Populated: weaviate_user_profile with sample data.") +async def populate_all_collections(): + client = get_client() + print("Populating Weaviate collections with sample data...") + await populate_Weaviate_code_chunk(client) + await populate_Weaviate_interaction(client) + await populate_Weaviate_user_profile(client) + print("✅ All collections populated with sample data.") diff --git a/backend/app/tests/test_supabase.py b/backend/app/tests/test_supabase.py new file mode 100644 index 00000000..d02ea910 --- /dev/null +++ b/backend/app/tests/test_supabase.py @@ -0,0 +1,289 @@ +from app.model.supabase.models import User, Interaction, CodeChunk, Repository +from uuid import uuid4 +from app.db.weaviate.weaviate_client import get_client +from datetime import datetime # Your User model import + +client = get_client() + +client = get_client() + +def insert_user_into_supabase(user: User): + # Convert Pydantic User model to dict to send to Supabase + user_dict = user.dict() + + # Supabase expects datetime fields as ISO 8601 strings + # Convert datetime fields to ISO strings + for key in ['created_at', 'updated_at', 'verified_at', 'last_active_discord', 'last_active_github', 'last_active_slack']: + if user_dict.get(key): + user_dict[key] = user_dict[key].isoformat() + + response = client.table("users").insert(user_dict).execute() # type: ignore + + if response.status_code != 201: + raise Exception(f"Failed to insert user: {response}") + return response.data[0] + +def test_create_and_save_user(): + user = User( + id=uuid4(), + created_at=datetime.utcnow(), + updated_at=datetime.utcnow(), + discord_id="1234567890", + discord_username="discordUser#1234", + github_id="987654321", + github_username="githubUser", + slack_id="U12345678", + slack_username="slackUser", + display_name="John Doe", + email="john.doe@example.com", + avatar_url="https://example.com/avatar.jpg", + bio="Software developer and open source enthusiast.", + location="San Francisco, CA", + is_verified=True, + verification_token="verif_token_abc123", + verified_at=datetime.utcnow(), + skills=["Python", "Go", "Docker"], + github_stats={"repos": 42, "followers": 100}, + last_active_discord=datetime.utcnow(), + last_active_github=datetime.utcnow(), + last_active_slack=datetime.utcnow(), + total_interactions_count=256, + preferred_languages=["Python", "JavaScript", "Rust"], + weaviate_user_id="weaviate-uuid-1234" + ) + + saved_user = insert_user_into_supabase(user) + print("User saved:", saved_user) + +def get_user_by_id(user_id: str): + response = client.table("users").select("*").eq("id", user_id).execute() + if response.status_code != 200: + raise Exception(f"Failed to fetch user: {response}") + if not response.data: + raise ValueError(f"No user found with ID: {user_id}") + return response.data[0] + +def update_user(user_id: str, updates: dict): + response = client.table("users").update(updates).eq("id", user_id).execute() + if response.status_code != 200: + raise Exception(f"Failed to update user: {response}") + return response.data[0] + +def delete_user(user_id: str): + response = client.table("users").delete().eq("id", user_id).execute() + if response.status_code != 200: + raise Exception(f"Failed to delete user: {response}") + return response.data[0] + +# Test the user creation and saving functionality +def test_user(): + user = User( + id=uuid4(), + created_at=datetime.utcnow(), + updated_at=datetime.utcnow(), + discord_id="1234567890", + discord_username="discordUser#1234", + github_id="987654321", + github_username="githubUser", + slack_id="U12345678", + slack_username="slackUser", + display_name="John Doe", + email="john.doe@example.com", + avatar_url="https://example.com/avatar.jpg", + bio="Software developer and open source enthusiast.", + location="San Francisco, CA", + is_verified=True, + verification_token="verif_token_abc123", + verified_at=datetime.utcnow(), + skills=["Python", "Go", "Docker"], + github_stats={"repos": 42, "followers": 100}, + last_active_discord=datetime.utcnow(), + last_active_github=datetime.utcnow(), + last_active_slack=datetime.utcnow(), + total_interactions_count=256, + preferred_languages=["Python", "JavaScript", "Rust"], + weaviate_user_id="weaviate-uuid-1234" + ) + inserted_user = insert_user_into_supabase(user) + print(f"Inserted User: {inserted_user}") + get_user = get_user_by_id(inserted_user['id']) + print(f"Fetched User: {get_user}") + updated_user = update_user(inserted_user['id'], {"display_name": "John Updated"}) + print(f"Updated User: {updated_user}") + deleted_user = delete_user(inserted_user['id']) + print(f"Deleted User: {deleted_user}") + + +def insert_Intercation(interaction: Interaction): + interaction_dict = interaction.dict() + for key in ['created_at', 'updated_at']: + if interaction_dict.get(key): + interaction_dict[key] = interaction_dict[key].isoformat() + + response = client.table("interactions").insert(interaction_dict).execute() + + if response.status_code != 201: + raise Exception(f"Failed to insert interaction: {response}") + return response.data[0] + +def read_interaction_by_id(interaction_id: str): + response = client.table("interactions").select("*").eq("id", interaction_id).execute() + if response.status_code != 200: + raise Exception(f"Failed to fetch interaction: {response}") + if not response.data: + raise ValueError(f"No interaction found with ID: {interaction_id}") + return response.data[0] + +def update_interaction(interaction_id: str, updates: dict): + response = client.table("interactions").update(updates).eq("id", interaction_id).execute() + if response.status_code != 200: + raise Exception(f"Failed to update interaction: {response}") + return response.data[0] +def delete_interaction(interaction_id: str): + response = client.table("interactions").delete().eq("id", interaction_id).execute() + if response.status_code != 200: + raise Exception(f"Failed to delete interaction: {response}") + return response.data[0] + +def test_interaction(): + interaction = Interaction( + id=uuid4(), + created_at=datetime.utcnow(), + updated_at=datetime.utcnow(), + user_id=uuid4(), + repository_id=uuid4(), + interaction_type="comment", + content="Hello, this is a test interaction.", + metadata={"source": "test_script"}, + platform="github", + platform_specific_id="gh-interaction-5678", + weaviate_interaction_id="weaviate-interaction-1234" + ) + inserted_interaction = insert_Intercation(interaction) + print(f"Inserted Interaction: {inserted_interaction}") + get_interaction = read_interaction_by_id(inserted_interaction['id']) + print(f"Fetched Interaction: {get_interaction}") + updated_interaction = update_interaction(inserted_interaction['id'], {"content": "Updated interaction content."}) + print(f"Updated Interaction: {updated_interaction}") + deleted_interaction = delete_interaction(inserted_interaction['id']) + print(f"Deleted Interaction: {deleted_interaction}") + +def insert_code_chunk(code_chunk: CodeChunk): + code_chunk_dict = code_chunk.dict() + for key in ['created_at', 'lines_start', 'lines_end']: + if code_chunk_dict.get(key): + code_chunk_dict[key] = code_chunk_dict[key].isoformat() + + response = client.table("code_chunks").insert(code_chunk_dict).execute() + + if response.status_code != 201: + raise Exception(f"Failed to insert code chunk: {response}") + return response.data[0] +def read_code_chunk_by_id(code_chunk_id: str): + response = client.table("code_chunks").select("*").eq("id", code_chunk_id).execute() + if response.status_code != 200: + raise Exception(f"Failed to fetch code chunk: {response}") + if not response.data: + raise ValueError(f"No code chunk found with ID: {code_chunk_id}") + return response.data[0] +def update_code_chunk(code_chunk_id: str, updates: dict): + response = client.table("code_chunks").update(updates).eq("id", code_chunk_id).execute() + if response.status_code != 200: + raise Exception(f"Failed to update code chunk: {response}") + return response.data[0] +def delete_code_chunk(code_chunk_id: str): + response = client.table("code_chunks").delete().eq("id", code_chunk_id).execute() + if response.status_code != 200: + raise Exception(f"Failed to delete code chunk: {response}") + return response.data[0] +def test_code_chunk(): + code_chunk = CodeChunk( + id=uuid4(), + repository_id=uuid4(), + created_at=datetime.utcnow(), + file_path="/path/to/file.py", + file_name="file.py", + file_extension=".py", + chunk_index=1, + content="def hello_world():\n print('Hello, world!')", + chunk_type="function", + language="Python", + lines_start=1, + lines_end=3, + code_metadata={"complexity": "low"}, + weaviate_chunk_id="weaviate-chunk-1234" + ) + inserted_code_chunk = insert_code_chunk(code_chunk) + print(f"Inserted Code Chunk: {inserted_code_chunk}") + get_code_chunk = read_code_chunk_by_id(inserted_code_chunk['id']) + print(f"Fetched Code Chunk: {get_code_chunk}") + updated_code_chunk = update_code_chunk(inserted_code_chunk['id'], { + "content": "def hello_world():\n print('Updated content!')"}) + print(f"Updated Code Chunk: {updated_code_chunk}") + deleted_code_chunk = delete_code_chunk(inserted_code_chunk['id']) + print(f"Deleted Code Chunk: {deleted_code_chunk}") +def insert_repository(repository: Repository): + repository_dict = repository.dict() + for key in ['created_at', 'updated_at', 'indexed_at']: + if repository_dict.get(key): + repository_dict[key] = repository_dict[key].isoformat() + + response = client.table("repositories").insert(repository_dict).execute() + + if response.status_code != 201: + raise Exception(f"Failed to insert repository: {response}") + return response.data[0] +def read_repository_by_id(repository_id: str): + response = client.table("repositories").select("*").eq("id", repository_id).execute() + if response.status_code != 200: + raise Exception(f"Failed to fetch repository: {response}") + if not response.data: + raise ValueError(f"No repository found with ID: {repository_id}") + return response.data[0] +def update_repository(repository_id: str, updates: dict): + response = client.table("repositories").update(updates).eq("id", repository_id).execute() + if response.status_code != 200: + raise Exception(f"Failed to update repository: {response}") + return response.data[0] +def delete_repository(repository_id: str): + response = client.table("repositories").delete().eq("id", repository_id).execute() + if response.status_code != 200: + raise Exception(f"Failed to delete repository: {response}") + return response.data[0] +def test_repository(): + repository = Repository( + id=uuid4(), + created_at=datetime.utcnow(), + updated_at=datetime.utcnow(), + github_id=123456789, + full_name="example/repo", + name="repo", + owner="example", + description="An example repository for testing.", + stars_count=100, + forks_count=10, + open_issues_count=5, + language="Python", + topics=["example", "test"], + is_indexed=True, + indexed_at=datetime.utcnow(), + indexing_status="completed", + total_chunks_count=50, + last_commit_hash="abc123def456", + indexing_progress={"current": 50, "total": 100}, + weaviate_repo_id="weaviate-repo-1234" + ) + inserted_repository = insert_repository(repository) + print(f"Inserted Repository: {inserted_repository}") + get_repository = read_repository_by_id(inserted_repository['id']) + print(f"Fetched Repository: {get_repository}") + updated_repository = update_repository(inserted_repository['id'], {"description": "Updated description."}) + print(f"Updated Repository: {updated_repository}") + deleted_repository = delete_repository(inserted_repository['id']) + print(f"Deleted Repository: {deleted_repository}") + +def all_tests(): + test_user() + test_interaction() + test_code_chunk() + test_repository() From 9d098e8307a594cda1667f7e0c816c49e94051c4 Mon Sep 17 00:00:00 2001 From: Eli4479 Date: Mon, 9 Jun 2025 21:02:01 +0530 Subject: [PATCH 2/5] [feat]: implement Weaviate client connection , data population methods and tests added --- backend/app/db/weaviate/weaviate_client.py | 13 +- backend/app/scripts/weaviate/populate_db.py | 42 ++-- backend/app/tests/test_supabase.py | 6 +- backend/app/tests/test_weaviate.py | 227 ++++++++++++++++++++ backend/docker-compose.yml | 28 +++ backend/main.py | 4 +- 6 files changed, 291 insertions(+), 29 deletions(-) create mode 100644 backend/app/tests/test_weaviate.py create mode 100644 backend/docker-compose.yml diff --git a/backend/app/db/weaviate/weaviate_client.py b/backend/app/db/weaviate/weaviate_client.py index ff05cc2d..b1bc7e0f 100644 --- a/backend/app/db/weaviate/weaviate_client.py +++ b/backend/app/db/weaviate/weaviate_client.py @@ -1,19 +1,8 @@ import os import weaviate -from weaviate.classes.init import Auth - -# Best practice: store your credentials in environment variables -weaviate_url = os.environ["WEAVIATE_URL"] -weaviate_api_key = os.environ["WEAVIATE_API_KEY"] -weaviate_grpc_host = os.environ.get("WEAVIATE_GRPC_HOST") # Default to localhost if not set -weaviate_grpc_port = os.environ.get("WEAVIATE_GRPC_PORT") # Fallback to localhost if not set # Connect to Weaviate Cloud -client = weaviate.connect_to_weaviate_cloud( - cluster_url=weaviate_url, - auth_credentials=Auth.api_key(weaviate_api_key), - skip_init_checks=True, -) +client = weaviate.connect_to_local() def get_client(): diff --git a/backend/app/scripts/weaviate/populate_db.py b/backend/app/scripts/weaviate/populate_db.py index a74045f8..9e89d9ab 100644 --- a/backend/app/scripts/weaviate/populate_db.py +++ b/backend/app/scripts/weaviate/populate_db.py @@ -2,7 +2,6 @@ async def populate_Weaviate_code_chunk(client): - weaviate_code_chunk = client.collections.get("weaviate_code_chunk") code_chunks = [ { "supabaseChunkId": "095a5ff0-545a-48ff-83ad-2ea3566f5674", @@ -94,11 +93,17 @@ async def populate_Weaviate_code_chunk(client): "functionNames": ["function"] } ] - response = await weaviate_code_chunk.data.insert_many(code_chunks) - print(response) - print("Populated: weaviate_code_chunk with sample data.") + try: + with client.batch.dynamic() as batch: + for chunk in code_chunks: + batch.add_object( + collection="weaviate_code_chunk", + properties=chunk + ) + print("Populated: weaviate_code_chunk with sample data.") + except Exception as e: + print(f"Error populating weaviate_code_chunk: {e}") async def populate_Weaviate_interaction(client): - weaviate_interaction = client.collections.get("weaviate_interaction") interactions = [ { "userId": "095a5ff0-545a-48ff-83ad-2ea3566f5674", @@ -151,11 +156,17 @@ async def populate_Weaviate_interaction(client): "timestamp": "2023-01-01T12:09:00Z" } ] - response = await weaviate_interaction.data.insert_many(interactions) - print(response) - print("Populated: weaviate_interaction with sample data.") + try: + with client.batch.dynamic() as batch: + for interaction in interactions: + batch.add_object( + collection="weaviate_interaction", + properties=interaction + ) + print("Populated: weaviate_interaction with sample data.") + except Exception as e: + print(f"Error populating weaviate_interaction: {e}") async def populate_Weaviate_user_profile(client): - weaviate_user_profile = client.collections.get("weaviate_user_profile") user_profiles = [ { "supabaseUserId": "095a5ff0-545a-48ff-83ad-2ea3566f5674", @@ -218,9 +229,16 @@ async def populate_Weaviate_user_profile(client): "expertiseAreas": ["Frontend", "User Experience"] } ] - response = await weaviate_user_profile.data.insert_many(user_profiles) - print(response) - print("Populated: weaviate_user_profile with sample data.") + try: + with client.batch.dynamic() as batch: + for profile in user_profiles: + batch.add_object( + collection="weaviate_user_profile", + properties=profile + ) + print("Populated: weaviate_user_profile with sample data.") + except Exception as e: + print(f"Error populating weaviate_user_profile: {e}") async def populate_all_collections(): client = get_client() print("Populating Weaviate collections with sample data...") diff --git a/backend/app/tests/test_supabase.py b/backend/app/tests/test_supabase.py index d02ea910..74817204 100644 --- a/backend/app/tests/test_supabase.py +++ b/backend/app/tests/test_supabase.py @@ -1,11 +1,9 @@ from app.model.supabase.models import User, Interaction, CodeChunk, Repository from uuid import uuid4 -from app.db.weaviate.weaviate_client import get_client +from app.db.supabase.supabase_client import get_supabase_client from datetime import datetime # Your User model import -client = get_client() - -client = get_client() +client = get_supabase_client() def insert_user_into_supabase(user: User): # Convert Pydantic User model to dict to send to Supabase diff --git a/backend/app/tests/test_weaviate.py b/backend/app/tests/test_weaviate.py new file mode 100644 index 00000000..784f3114 --- /dev/null +++ b/backend/app/tests/test_weaviate.py @@ -0,0 +1,227 @@ +from app.db.weaviate.weaviate_client import get_client +from datetime import datetime +from uuid import uuid4 +from app.model.weaviate.models import ( + WeaviateUserProfile, + WeaviateCodeChunk, + WeaviateInteraction +) + + +def test_weaviate_client(): + client = get_client() + assert client is not None, "Weaviate client should not be None" + try: + ready = client.is_ready() + assert ready, "Weaviate client is not ready" + except Exception as e: + assert False, f"Weaviate client connection failed: {e}" + +def insert_user_profile(): + user_profile = WeaviateUserProfile( + supabase_user_id=str(uuid4()), + profile_summary="Test user profile summary", + primary_languages=["Python", "JavaScript"], + expertise_areas=["Web Development", "Data Science"], + embedding=[0.1] * 384 # Example embedding + ) + client = get_client() + try: + client.data_object.create( + data_object=user_profile.dict(by_alias=True), + class_name="Weaviate_user_profile" + ) + print("User profile inserted successfully.") + except Exception as e: + print(f"Error inserting user profile: {e}") + + +def get_user_profile_by_id(user_id: str): + client = get_client() + try: + questions = client.collections.get("Weaviate_user_profile") + response = questions.query.bm25( + query=user_id, + properties=["supabaseUserId", "profileSummary", "primaryLanguages", "expertiseAreas"] + ) + if response and len(response) > 0: + user_profile_data = response[0] + return WeaviateUserProfile(**user_profile_data) + except Exception as e: + print(f"Error retrieving user profile: {e}") + return None + +def update_user_profile(user_id: str): + questions = get_client().collections.get("Weaviate_user_profile") + try: + user_profile = questions.query.bm25( + query=user_id, + properties=["supabaseUserId", "profileSummary", "primaryLanguages", "expertiseAreas"] + ) + if user_profile: + user_profile[0]["profileSummary"] = "Updated profile summary" + questions.update(user_profile[0]) + print("User profile updated successfully.") + else: + print("User profile not found.") + except Exception as e: + print(f"Error updating user profile: {e}") + +def delete_user_profile(user_id: str): + questions = get_client().collections.get("Weaviate_user_profile") + try: + deleted = questions.data.delete_by_id(user_id) + if deleted: + print("User profile deleted successfully.") + else: + print("User profile not found.") + except Exception as e: + print(f"Error deleting user profile: {e}") + +def test_user_profile(): + inserted_user = insert_user_profile() + assert inserted_user is not None, "User profile insertion failed" + get_user_profile_by_id(inserted_user.supabase_user_id) + update_user_profile(inserted_user.supabase_user_id) + delete_user_profile(inserted_user.supabase_user_id) + + +def insert_code_chunk(): + client = get_client() + code_chunk = WeaviateCodeChunk( + supabase_chunk_id=str(uuid4()), + code_content="def hello_world():\n print('Hello, world!')", + language="Python", + function_names=["hello_world"], + embedding=[0.1] * 384 # Example embedding + ) + try: + client.data_object.create( + data_object=code_chunk.dict(by_alias=True), + class_name="Weaviate_code_chunk" + ) + print("Code chunk inserted successfully.") + except Exception as e: + print(f"Error inserting code chunk: {e}") +def get_code_chunk_by_id(code_chunk_id: str): + client = get_client() + try: + code_chunk = client.data_object.get( + id=code_chunk_id, + class_name="Weaviate_code_chunk" + ) + if code_chunk: + return WeaviateCodeChunk(**code_chunk) + except Exception as e: + print(f"Error retrieving code chunk: {e}") + return None +def update_code_chunk(code_chunk_id: str): + client = get_client() + try: + code_chunk = client.data_object.get( + id=code_chunk_id, + class_name="Weaviate_code_chunk" + ) + if code_chunk: + code_chunk["codeContent"] = "Updated code content" + client.data_object.update( + data_object=code_chunk, + class_name="Weaviate_code_chunk" + ) + print("Code chunk updated successfully.") + else: + print("Code chunk not found.") + except Exception as e: + print(f"Error updating code chunk: {e}") +def delete_code_chunk(code_chunk_id: str): + client = get_client() + try: + deleted = client.data_object.delete( + id=code_chunk_id, + class_name="Weaviate_code_chunk" + ) + if deleted: + print("Code chunk deleted successfully.") + else: + print("Code chunk not found.") + except Exception as e: + print(f"Error deleting code chunk: {e}") +def test_code_chunk(): + inserted_chunk = insert_code_chunk() + assert inserted_chunk is not None, "Code chunk insertion failed" + get_code_chunk_by_id(inserted_chunk.supabase_chunk_id) + update_code_chunk(inserted_chunk.supabase_chunk_id) + delete_code_chunk(inserted_chunk.supabase_chunk_id) +def insert_interaction(): + client = get_client() + interaction = WeaviateInteraction( + supabase_interaction_id=str(uuid4()), + conversation_summary="Test interaction summary", + platform="Web", + topics=["AI", "Machine Learning"], + embedding=[0.1] * 384 # Example embedding + ) + try: + client.data_object.create( + data_object=interaction.dict(by_alias=True), + class_name="Weaviate_interaction" + ) + print("Interaction inserted successfully.") + except Exception as e: + print(f"Error inserting interaction: {e}") +def get_interaction_by_id(interaction_id: str): + client = get_client() + try: + interaction = client.data_object.get( + id=interaction_id, + class_name="Weaviate_interaction" + ) + if interaction: + return WeaviateInteraction(**interaction) + except Exception as e: + print(f"Error retrieving interaction: {e}") + return None +def update_interaction(interaction_id: str): + client = get_client() + try: + interaction = client.data_object.get( + id=interaction_id, + class_name="Weaviate_interaction" + ) + if interaction: + interaction["conversationSummary"] = "Updated interaction summary" + client.data_object.update( + data_object=interaction, + class_name="Weaviate_interaction" + ) + print("Interaction updated successfully.") + else: + print("Interaction not found.") + except Exception as e: + print(f"Error updating interaction: {e}") +def delete_interaction(interaction_id: str): + client = get_client() + try: + deleted = client.data_object.delete( + id=interaction_id, + class_name="Weaviate_interaction" + ) + if deleted: + print("Interaction deleted successfully.") + else: + print("Interaction not found.") + except Exception as e: + print(f"Error deleting interaction: {e}") + +def test_interaction(): + inserted_interaction = insert_interaction() + assert inserted_interaction is not None, "Interaction insertion failed" + get_interaction_by_id(inserted_interaction.supabase_interaction_id) + update_interaction(inserted_interaction.supabase_interaction_id) + delete_interaction(inserted_interaction.supabase_interaction_id) + +def all_tests(): + test_weaviate_client() + test_user_profile() + test_code_chunk() + test_interaction() diff --git a/backend/docker-compose.yml b/backend/docker-compose.yml new file mode 100644 index 00000000..fa53ce8b --- /dev/null +++ b/backend/docker-compose.yml @@ -0,0 +1,28 @@ +--- +services: + weaviate: + command: + - --host + - 0.0.0.0 + - --port + - '8080' + - --scheme + - http + image: cr.weaviate.io/semitechnologies/weaviate:1.31.0 + ports: + - 8080:8080 + - 50051:50051 + volumes: + - weaviate_data:/var/lib/weaviate + restart: on-failure:0 + environment: + QUERY_DEFAULTS_LIMIT: 25 + AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true' + PERSISTENCE_DATA_PATH: '/var/lib/weaviate' + ENABLE_API_BASED_MODULES: 'true' + ENABLE_MODULES: 'text2vec-ollama,generative-ollama' + CLUSTER_HOSTNAME: 'node1' +volumes: + weaviate_data: +... + diff --git a/backend/main.py b/backend/main.py index 9608cdfa..72db8b8a 100644 --- a/backend/main.py +++ b/backend/main.py @@ -7,6 +7,7 @@ from app.core.orchestration.queue_manager import AsyncQueueManager from app.core.orchestration.agent_coordinator import AgentCoordinator from bots.discord.discord_bot import DiscordBot +from app.db.weaviate.weaviate_client import get_client # Configure logging logging.basicConfig( @@ -17,6 +18,8 @@ class DevRAIApplication: """Main application class""" + weaviate_client = get_client() + print(f"Weaviate client initialized: {weaviate_client.is_ready()}") def __init__(self): self.queue_manager = AsyncQueueManager() @@ -71,7 +74,6 @@ async def stop(self): async def main(): """Main entry point""" - # Setup signal handlers for graceful shutdown loop = asyncio.get_running_loop() From f04755a1906cbe86f68e6795609fde0374d92bb1 Mon Sep 17 00:00:00 2001 From: Eli4479 Date: Tue, 10 Jun 2025 10:41:44 +0530 Subject: [PATCH 3/5] refactor: implemented changes based on feedback from Gemini and CodeRabbit --- backend/app/db/supabase/auth.py | 38 +++++---- backend/app/db/weaviate/weaviate_client.py | 3 +- backend/app/model/supabase/models.py | 12 +-- backend/app/scripts/weaviate/__init__.py | 0 .../app/scripts/weaviate/create_schemas.py | 66 +++++++-------- backend/app/scripts/weaviate/populate_db.py | 80 ++++++++++++------- backend/app/tests/test_supabase.py | 6 +- backend/app/tests/test_weaviate.py | 25 ++++++ backend/main.py | 22 +++-- 9 files changed, 151 insertions(+), 101 deletions(-) create mode 100644 backend/app/scripts/weaviate/__init__.py diff --git a/backend/app/db/supabase/auth.py b/backend/app/db/supabase/auth.py index 3ef7bd58..aa3f54e3 100644 --- a/backend/app/db/supabase/auth.py +++ b/backend/app/db/supabase/auth.py @@ -1,24 +1,28 @@ from app.db.supabase.supabase_client import supabase_client +import os +async def login_with_oauth(provider: str): + try: + result = supabase_client.auth.sign_in_with_oauth({ + "provider": provider, + "options": { + "redirect_to": os.getenv("SUPABASE_REDIRECT_URL") + } + }) + return {"url": result.url} + except Exception as e: + raise Exception(f"OAuth login failed for {provider}: {str(e)}") + async def login_with_github(): - result = supabase_client.auth.sign_in_with_oauth({ - "provider": "github", - "options": { - "redirect_to": "http://localhost:3000/home" - } - }) - return {"url": result.url} + return await login_with_oauth("github") async def login_with_discord(): - result = supabase_client.auth.sign_in_with_oauth({ - "provider": "discord", - "options": { - "redirect_to": "http://localhost:3000/home" - } - }) - return {"url": result.url} + return await login_with_oauth("discord") async def logout(access_token: str): - supabase_client.auth.set_session(access_token, refresh_token="") - supabase_client.auth.sign_out() - return {"message": "User logged out successfully"} + try: + supabase_client.auth.set_session(access_token, refresh_token="") + supabase_client.auth.sign_out() + return {"message": "User logged out successfully"} + except Exception as e: + raise Exception(f"Logout failed: {str(e)}") diff --git a/backend/app/db/weaviate/weaviate_client.py b/backend/app/db/weaviate/weaviate_client.py index b1bc7e0f..98bf241e 100644 --- a/backend/app/db/weaviate/weaviate_client.py +++ b/backend/app/db/weaviate/weaviate_client.py @@ -1,7 +1,6 @@ -import os import weaviate -# Connect to Weaviate Cloud +# Connect to local Weaviate instance client = weaviate.connect_to_local() diff --git a/backend/app/model/supabase/models.py b/backend/app/model/supabase/models.py index e5f37893..f87a354d 100644 --- a/backend/app/model/supabase/models.py +++ b/backend/app/model/supabase/models.py @@ -1,4 +1,4 @@ -from pydantic import BaseModel +from pydantic import BaseModel, Field from uuid import UUID from typing import Optional, List from datetime import datetime @@ -52,13 +52,13 @@ class User(BaseModel): is_verified: bool = False verification_token: Optional[str] = None verified_at: Optional[datetime] = None - skills: Optional[List[str]] = [] - github_stats: Optional[dict] = {} + skills: Optional[List[str]] = None + github_stats: Optional[dict] = None last_active_discord: Optional[datetime] = None last_active_github: Optional[datetime] = None last_active_slack: Optional[datetime] = None total_interactions_count: int = 0 - preferred_languages: List[str] = [] + preferred_languages: List[str] = Field(default_factory=list) weaviate_user_id: Optional[str] = None class Repository(BaseModel): @@ -99,7 +99,7 @@ class Repository(BaseModel): forks_count: int = 0 open_issues_count: int = 0 language: Optional[str] = None - topics: List[str] = [] + topics: List[str] = Field(default_factory=list) is_indexed: bool = False indexed_at: Optional[datetime] = None indexing_status: Optional[str] = None @@ -178,6 +178,6 @@ class Interaction(BaseModel): interaction_type: str sentiment_score: Optional[float] = None intent_classification: Optional[str] = None - topics_discussed: List[str] = [] + topics_discussed: List[str] = Field(default_factory=list) metadata: Optional[dict] = None weaviate_interaction_id: Optional[str] = None diff --git a/backend/app/scripts/weaviate/__init__.py b/backend/app/scripts/weaviate/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/app/scripts/weaviate/create_schemas.py b/backend/app/scripts/weaviate/create_schemas.py index 519f3396..1aca808f 100644 --- a/backend/app/scripts/weaviate/create_schemas.py +++ b/backend/app/scripts/weaviate/create_schemas.py @@ -1,56 +1,48 @@ from app.db.weaviate.weaviate_client import get_client import weaviate.classes.config as wc - -def create_user_profile_schema(client): +def create_schema(client, name, properties): client.collections.create( - name="weaviate_user_profile", - properties=[ - wc.Property(name="supabaseUserId", data_type=wc.DataType.TEXT), - wc.Property(name="profileSummary", data_type=wc.DataType.TEXT), - wc.Property(name="primaryLanguages", data_type=wc.DataType.TEXT_ARRAY), - wc.Property(name="expertiseAreas", data_type=wc.DataType.TEXT_ARRAY), - ], + name=name, + properties=properties, vectorizer_config=wc.Configure.Vectorizer.text2vec_cohere(), generative_config=wc.Configure.Generative.openai() ) - print("Created: weaviate_user_profile") + print(f"Created: {name}") +def create_user_profile_schema(client): + properties = [ + wc.Property(name="supabaseUserId", data_type=wc.DataType.TEXT), + wc.Property(name="profileSummary", data_type=wc.DataType.TEXT), + wc.Property(name="primaryLanguages", data_type=wc.DataType.TEXT_ARRAY), + wc.Property(name="expertiseAreas", data_type=wc.DataType.TEXT_ARRAY), + ] + create_schema(client, "weaviate_user_profile", properties) def create_code_chunk_schema(client): - client.collections.create( - name="weaviate_code_chunk", - properties=[ - wc.Property(name="supabaseChunkId", data_type=wc.DataType.TEXT), - wc.Property(name="codeContent", data_type=wc.DataType.TEXT), - wc.Property(name="language", data_type=wc.DataType.TEXT), - wc.Property(name="functionNames", data_type=wc.DataType.TEXT_ARRAY), - ], - vectorizer_config=wc.Configure.Vectorizer.text2vec_cohere(), - generative_config=wc.Configure.Generative.openai() - ) - print("Created: weaviate_code_chunk") + properties = [ + wc.Property(name="supabaseChunkId", data_type=wc.DataType.TEXT), + wc.Property(name="codeContent", data_type=wc.DataType.TEXT), + wc.Property(name="language", data_type=wc.DataType.TEXT), + wc.Property(name="functionNames", data_type=wc.DataType.TEXT_ARRAY), + ] + create_schema(client, "weaviate_code_chunk", properties) def create_interaction_schema(client): - client.collections.create( - name="weaviate_interaction", - properties=[ - wc.Property(name="supabaseInteractionId", data_type=wc.DataType.TEXT), - wc.Property(name="conversationSummary", data_type=wc.DataType.TEXT), - wc.Property(name="platform", data_type=wc.DataType.TEXT), - wc.Property(name="topics", data_type=wc.DataType.TEXT_ARRAY), - ], - vectorizer_config=wc.Configure.Vectorizer.text2vec_cohere(), - generative_config=wc.Configure.Generative.openai() - ) - print("Created: weaviate_interaction") + properties = [ + wc.Property(name="supabaseInteractionId", data_type=wc.DataType.TEXT), + wc.Property(name="conversationSummary", data_type=wc.DataType.TEXT), + wc.Property(name="platform", data_type=wc.DataType.TEXT), + wc.Property(name="topics", data_type=wc.DataType.TEXT_ARRAY), + ] + create_schema(client, "weaviate_interaction", properties) def create_all_schemas(): client = get_client() existing_collections = client.collections.list_all() - if "Weaviate_code_chunk" not in existing_collections: + if "weaviate_code_chunk" not in existing_collections: create_code_chunk_schema(client) - if "Weaviate_interaction" not in existing_collections: + if "weaviate_interaction" not in existing_collections: create_interaction_schema(client) - if "Weaviate_user_profile" not in existing_collections: + if "weaviate_user_profile" not in existing_collections: create_user_profile_schema(client) print("✅ All schemas ensured.") diff --git a/backend/app/scripts/weaviate/populate_db.py b/backend/app/scripts/weaviate/populate_db.py index 9e89d9ab..8189f510 100644 --- a/backend/app/scripts/weaviate/populate_db.py +++ b/backend/app/scripts/weaviate/populate_db.py @@ -106,54 +106,74 @@ async def populate_Weaviate_code_chunk(client): async def populate_Weaviate_interaction(client): interactions = [ { - "userId": "095a5ff0-545a-48ff-83ad-2ea3566f5674", - "message": "Hi, can you explain the code chunk with ID 095a5ff0-545a-48ff-83ad-2ea3566f5674?", - "timestamp": "2023-01-01T12:00:00Z" + "supabaseInteractionId": "095a5ff0-545a-48ff-83ad-2ea3566f5674", + "conversationSummary": "User asked about C++ code chunk.", + "platform": "web", + "topics": ["C++", "Code Chunk"], + "embedding": [0.1, 0.2, 0.3] # Example embedding vector }, { - "userId": "b6bbdb5a-deb1-43c7-bf99-b9f88e4af1ed", - "message": "What does the function in Go do?", - "timestamp": "2023-01-01T12:01:00Z" + "supabaseInteractionId": "b6bbdb5a-deb1-43c7-bf99-b9f88e4af1ed", + "conversationSummary": "User inquired about Go function.", + "platform": "mobile", + "topics": ["Go", "Function"], + "embedding": [0.4, 0.5, 0.6] # Example embedding vector }, { - "userId": "1f787967-316c-4232-b251-64bcf8e3251b", - "message": "Can you summarize the module in C++?", - "timestamp": "2023-01-01T12:02:00Z" + "supabaseInteractionId": "1f787967-316c-4232-b251-64bcf8e3251b", + "conversationSummary": "User asked for a summary of the C++ module.", + "platform": "web", + "topics": ["C++", "Module"], + "embedding": [0.7, 0.8, 0.9] # Example embedding vector }, { - "userId": "233530b2-d89f-416d-a73c-40b4ebb33c50", - "message": "What is the purpose of the import in this C++ chunk?", - "timestamp": "2023-01-01T12:03:00Z" + "supabaseInteractionId": "233530b2-d89f-416d-a73c-40b4ebb33c50", + "conversationSummary": "User inquired about the import statement in C++.", + "platform": "web", + "topics": ["C++", "Import"], + "embedding": [0.1, 0.2, 0.3] # Example embedding vector }, { - "userId": "b3103899-d683-422a-9072-2ad26050d8f5", - "message": "Is this function in C++ recursive?", - "timestamp": "2023-01-01T12:04:00Z" + "supabaseInteractionId": "b3103899-d683-422a-9072-2ad26050d8f5", + "conversationSummary": "User asked if this function in C++ is recursive.", + "platform": "web", + "topics": ["C++", "Function", "Recursion"], + "embedding": [0.1, 0.2, 0.3] # Example embedding vector }, { - "userId": "28ea68b7-1f26-472c-b568-319e1d41732b", - "message": "What does this module handle in the codebase?", - "timestamp": "2023-01-01T12:05:00Z" + "supabaseInteractionId": "28ea68b7-1f26-472c-b568-319e1d41732b", + "conversationSummary": "User inquired about what this module handles in the codebase.", + "platform": "web", + "topics": ["C++", "Module"], + "embedding": [0.1, 0.2, 0.3] # Example embedding vector }, { - "userId": "1cb8ccc0-db27-49c5-8dff-8d535d5a37d3", - "message": "Can you explain the logic in this C++ module?", - "timestamp": "2023-01-01T12:06:00Z" + "supabaseInteractionId": "1cb8ccc0-db27-49c5-8dff-8d535d5a37d3", + "conversationSummary": "User asked about the logic in this C++ module.", + "platform": "web", + "topics": ["C++", "Module"], + "embedding": [0.1, 0.2, 0.3] # Example embedding vector }, { - "userId": "9edaae8a-3d6c-47c1-8777-ff0b0002b85a", - "message": "What does the import statement in Java do?", - "timestamp": "2023-01-01T12:07:00Z" + "supabaseInteractionId": "9edaae8a-3d6c-47c1-8777-ff0b0002b85a", + "conversationSummary": "User inquired about the import statement in Java.", + "platform": "web", + "topics": ["Java", "Import"], + "embedding": [0.1, 0.2, 0.3] # Example embedding vector }, { - "userId": "d1927881-d0e7-4df3-a97a-18521db08ff4", - "message": "Is this a comment or code in Rust?", - "timestamp": "2023-01-01T12:08:00Z" + "supabaseInteractionId": "d1927881-d0e7-4df3-a97a-18521db08ff4", + "conversationSummary": "User asked if this is a comment or code in Rust.", + "platform": "web", + "topics": ["Rust", "Comment", "Code"], + "embedding": [0.1, 0.2, 0.3] # Example embedding vector }, { - "userId": "fdda052a-ca4f-40b5-ae99-a711e2161d85", - "message": "What is the output of this JavaScript function?", - "timestamp": "2023-01-01T12:09:00Z" + "supabaseInteractionId": "fdda052a-ca4f-40b5-ae99-a711e2161d85", + "conversationSummary": "User inquired about the output of this JavaScript function.", + "platform": "web", + "topics": ["JavaScript", "Function", "Output"], + "embedding": [0.1, 0.2, 0.3] # Example embedding vector } ] try: diff --git a/backend/app/tests/test_supabase.py b/backend/app/tests/test_supabase.py index 74817204..f2dc257b 100644 --- a/backend/app/tests/test_supabase.py +++ b/backend/app/tests/test_supabase.py @@ -112,7 +112,7 @@ def test_user(): print(f"Deleted User: {deleted_user}") -def insert_Intercation(interaction: Interaction): +def insert_interaction(interaction: Interaction): interaction_dict = interaction.dict() for key in ['created_at', 'updated_at']: if interaction_dict.get(key): @@ -157,7 +157,7 @@ def test_interaction(): platform_specific_id="gh-interaction-5678", weaviate_interaction_id="weaviate-interaction-1234" ) - inserted_interaction = insert_Intercation(interaction) + inserted_interaction = insert_interaction(interaction) print(f"Inserted Interaction: {inserted_interaction}") get_interaction = read_interaction_by_id(inserted_interaction['id']) print(f"Fetched Interaction: {get_interaction}") @@ -168,7 +168,7 @@ def test_interaction(): def insert_code_chunk(code_chunk: CodeChunk): code_chunk_dict = code_chunk.dict() - for key in ['created_at', 'lines_start', 'lines_end']: + for key in ['created_at']: if code_chunk_dict.get(key): code_chunk_dict[key] = code_chunk_dict[key].isoformat() diff --git a/backend/app/tests/test_weaviate.py b/backend/app/tests/test_weaviate.py index 784f3114..796f225e 100644 --- a/backend/app/tests/test_weaviate.py +++ b/backend/app/tests/test_weaviate.py @@ -32,8 +32,10 @@ def insert_user_profile(): class_name="Weaviate_user_profile" ) print("User profile inserted successfully.") + return user_profile except Exception as e: print(f"Error inserting user profile: {e}") + return None def get_user_profile_by_id(user_id: str): @@ -62,10 +64,13 @@ def update_user_profile(user_id: str): user_profile[0]["profileSummary"] = "Updated profile summary" questions.update(user_profile[0]) print("User profile updated successfully.") + return user_profile[0] else: print("User profile not found.") + return None except Exception as e: print(f"Error updating user profile: {e}") + return None def delete_user_profile(user_id: str): questions = get_client().collections.get("Weaviate_user_profile") @@ -73,10 +78,13 @@ def delete_user_profile(user_id: str): deleted = questions.data.delete_by_id(user_id) if deleted: print("User profile deleted successfully.") + return True else: print("User profile not found.") + return False except Exception as e: print(f"Error deleting user profile: {e}") + return False def test_user_profile(): inserted_user = insert_user_profile() @@ -101,8 +109,10 @@ def insert_code_chunk(): class_name="Weaviate_code_chunk" ) print("Code chunk inserted successfully.") + return code_chunk except Exception as e: print(f"Error inserting code chunk: {e}") + return None def get_code_chunk_by_id(code_chunk_id: str): client = get_client() try: @@ -129,10 +139,13 @@ def update_code_chunk(code_chunk_id: str): class_name="Weaviate_code_chunk" ) print("Code chunk updated successfully.") + return WeaviateCodeChunk(**code_chunk) else: print("Code chunk not found.") + return None except Exception as e: print(f"Error updating code chunk: {e}") + return None def delete_code_chunk(code_chunk_id: str): client = get_client() try: @@ -142,10 +155,13 @@ def delete_code_chunk(code_chunk_id: str): ) if deleted: print("Code chunk deleted successfully.") + return True else: print("Code chunk not found.") + return False except Exception as e: print(f"Error deleting code chunk: {e}") + return False def test_code_chunk(): inserted_chunk = insert_code_chunk() assert inserted_chunk is not None, "Code chunk insertion failed" @@ -167,8 +183,11 @@ def insert_interaction(): class_name="Weaviate_interaction" ) print("Interaction inserted successfully.") + return interaction except Exception as e: print(f"Error inserting interaction: {e}") + return None + def get_interaction_by_id(interaction_id: str): client = get_client() try: @@ -195,10 +214,13 @@ def update_interaction(interaction_id: str): class_name="Weaviate_interaction" ) print("Interaction updated successfully.") + return WeaviateInteraction(**interaction) else: print("Interaction not found.") + return None except Exception as e: print(f"Error updating interaction: {e}") + return None def delete_interaction(interaction_id: str): client = get_client() try: @@ -208,10 +230,13 @@ def delete_interaction(interaction_id: str): ) if deleted: print("Interaction deleted successfully.") + return True else: print("Interaction not found.") + return False except Exception as e: print(f"Error deleting interaction: {e}") + return False def test_interaction(): inserted_interaction = insert_interaction() diff --git a/backend/main.py b/backend/main.py index 72db8b8a..277bc793 100644 --- a/backend/main.py +++ b/backend/main.py @@ -18,10 +18,14 @@ class DevRAIApplication: """Main application class""" - weaviate_client = get_client() - print(f"Weaviate client initialized: {weaviate_client.is_ready()}") def __init__(self): + try: + self.weaviate_client = get_client() + logger.info(f"Weaviate client initialized: {self.weaviate_client.is_ready()}") + except Exception as e: + logger.error(f"Error initializing Weaviate client: {str(e)}") + self.weaviate_client = None self.queue_manager = AsyncQueueManager() self.agent_coordinator = AgentCoordinator(self.queue_manager) self.discord_bot = DiscordBot(self.queue_manager) @@ -55,18 +59,24 @@ async def stop(self): logger.info("Stopping Devr.AI Application...") self.running = False + # Close Weaviate client + try: + if hasattr(self, 'weaviate_client') and self.weaviate_client is not None: + self.weaviate_client.close() + logger.info("Weaviate client closed") + except Exception as e: + logger.error(f"Error closing Weaviate client: {str(e)}") - # Stop Discord bot + # Stop Discord bot try: if not self.discord_bot.is_closed(): await self.discord_bot.close() except Exception as e: logger.error(f"Error closing Discord bot: {str(e)}") - - # Stop queue manager + # Stop queue manager await self.queue_manager.stop() - logger.info("Devr.AI Application stopped") + logger.info("Devr.AI Application stopped") # Global application instance From a13ca6be72309843562ba15bbf5680d17cf7e0e9 Mon Sep 17 00:00:00 2001 From: Eli4479 Date: Wed, 11 Jun 2025 08:38:59 +0530 Subject: [PATCH 4/5] feat: update OAuth login redirect URL and refactor Weaviate population functions to synchronous --- backend/app/db/supabase/auth.py | 6 +++--- backend/app/scripts/weaviate/populate_db.py | 14 ++++++------- {backend/app/tests => tests}/test_supabase.py | 20 +++++++++---------- {backend/app/tests => tests}/test_weaviate.py | 10 +++++----- 4 files changed, 25 insertions(+), 25 deletions(-) rename {backend/app/tests => tests}/test_supabase.py (96%) rename {backend/app/tests => tests}/test_weaviate.py (96%) diff --git a/backend/app/db/supabase/auth.py b/backend/app/db/supabase/auth.py index aa3f54e3..c421373b 100644 --- a/backend/app/db/supabase/auth.py +++ b/backend/app/db/supabase/auth.py @@ -5,7 +5,7 @@ async def login_with_oauth(provider: str): result = supabase_client.auth.sign_in_with_oauth({ "provider": provider, "options": { - "redirect_to": os.getenv("SUPABASE_REDIRECT_URL") + "redirect_to": os.getenv("SUPABASE_REDIRECT_URL", "http://localhost:3000/home") } }) return {"url": result.url} @@ -14,10 +14,10 @@ async def login_with_oauth(provider: str): async def login_with_github(): - return await login_with_oauth("github") + return login_with_oauth("github") async def login_with_discord(): - return await login_with_oauth("discord") + return login_with_oauth("discord") async def logout(access_token: str): try: diff --git a/backend/app/scripts/weaviate/populate_db.py b/backend/app/scripts/weaviate/populate_db.py index 8189f510..f1051730 100644 --- a/backend/app/scripts/weaviate/populate_db.py +++ b/backend/app/scripts/weaviate/populate_db.py @@ -1,7 +1,7 @@ from app.db.weaviate.weaviate_client import get_client -async def populate_Weaviate_code_chunk(client): +def populate_Weaviate_code_chunk(client): code_chunks = [ { "supabaseChunkId": "095a5ff0-545a-48ff-83ad-2ea3566f5674", @@ -103,7 +103,7 @@ async def populate_Weaviate_code_chunk(client): print("Populated: weaviate_code_chunk with sample data.") except Exception as e: print(f"Error populating weaviate_code_chunk: {e}") -async def populate_Weaviate_interaction(client): +def populate_Weaviate_interaction(client): interactions = [ { "supabaseInteractionId": "095a5ff0-545a-48ff-83ad-2ea3566f5674", @@ -186,7 +186,7 @@ async def populate_Weaviate_interaction(client): print("Populated: weaviate_interaction with sample data.") except Exception as e: print(f"Error populating weaviate_interaction: {e}") -async def populate_Weaviate_user_profile(client): +def populate_Weaviate_user_profile(client): user_profiles = [ { "supabaseUserId": "095a5ff0-545a-48ff-83ad-2ea3566f5674", @@ -259,10 +259,10 @@ async def populate_Weaviate_user_profile(client): print("Populated: weaviate_user_profile with sample data.") except Exception as e: print(f"Error populating weaviate_user_profile: {e}") -async def populate_all_collections(): +def populate_all_collections(): client = get_client() print("Populating Weaviate collections with sample data...") - await populate_Weaviate_code_chunk(client) - await populate_Weaviate_interaction(client) - await populate_Weaviate_user_profile(client) + populate_Weaviate_code_chunk(client) + populate_Weaviate_interaction(client) + populate_Weaviate_user_profile(client) print("✅ All collections populated with sample data.") diff --git a/backend/app/tests/test_supabase.py b/tests/test_supabase.py similarity index 96% rename from backend/app/tests/test_supabase.py rename to tests/test_supabase.py index f2dc257b..2cf7f1b3 100644 --- a/backend/app/tests/test_supabase.py +++ b/tests/test_supabase.py @@ -1,6 +1,6 @@ -from app.model.supabase.models import User, Interaction, CodeChunk, Repository +from ..backend.app.model.supabase.models import User, Interaction, CodeChunk, Repository from uuid import uuid4 -from app.db.supabase.supabase_client import get_supabase_client +from ..backend.app.db.supabase.supabase_client import get_supabase_client from datetime import datetime # Your User model import client = get_supabase_client() @@ -23,7 +23,7 @@ def insert_user_into_supabase(user: User): def test_create_and_save_user(): user = User( - id=uuid4(), + id=str(uuid4()), created_at=datetime.utcnow(), updated_at=datetime.utcnow(), discord_id="1234567890", @@ -76,7 +76,7 @@ def delete_user(user_id: str): # Test the user creation and saving functionality def test_user(): user = User( - id=uuid4(), + id=str(uuid4()), created_at=datetime.utcnow(), updated_at=datetime.utcnow(), discord_id="1234567890", @@ -145,11 +145,11 @@ def delete_interaction(interaction_id: str): def test_interaction(): interaction = Interaction( - id=uuid4(), + id=str(uuid4()), created_at=datetime.utcnow(), updated_at=datetime.utcnow(), - user_id=uuid4(), - repository_id=uuid4(), + user_id=str(uuid4()), + repository_id=str(uuid4()), interaction_type="comment", content="Hello, this is a test interaction.", metadata={"source": "test_script"}, @@ -196,8 +196,8 @@ def delete_code_chunk(code_chunk_id: str): return response.data[0] def test_code_chunk(): code_chunk = CodeChunk( - id=uuid4(), - repository_id=uuid4(), + id=str(uuid4()), + repository_id=str(uuid4()), created_at=datetime.utcnow(), file_path="/path/to/file.py", file_name="file.py", @@ -250,7 +250,7 @@ def delete_repository(repository_id: str): return response.data[0] def test_repository(): repository = Repository( - id=uuid4(), + id=str(uuid4()), created_at=datetime.utcnow(), updated_at=datetime.utcnow(), github_id=123456789, diff --git a/backend/app/tests/test_weaviate.py b/tests/test_weaviate.py similarity index 96% rename from backend/app/tests/test_weaviate.py rename to tests/test_weaviate.py index 796f225e..a6452ba7 100644 --- a/backend/app/tests/test_weaviate.py +++ b/tests/test_weaviate.py @@ -15,7 +15,7 @@ def test_weaviate_client(): ready = client.is_ready() assert ready, "Weaviate client is not ready" except Exception as e: - assert False, f"Weaviate client connection failed: {e}" + raise AssertionError(f"Weaviate client connection failed: {e}") def insert_user_profile(): user_profile = WeaviateUserProfile( @@ -29,7 +29,7 @@ def insert_user_profile(): try: client.data_object.create( data_object=user_profile.dict(by_alias=True), - class_name="Weaviate_user_profile" + class_name="weaviate_user_profile" ) print("User profile inserted successfully.") return user_profile @@ -41,7 +41,7 @@ def insert_user_profile(): def get_user_profile_by_id(user_id: str): client = get_client() try: - questions = client.collections.get("Weaviate_user_profile") + questions = client.collections.get("weaviate_user_profile"") response = questions.query.bm25( query=user_id, properties=["supabaseUserId", "profileSummary", "primaryLanguages", "expertiseAreas"] @@ -54,7 +54,7 @@ def get_user_profile_by_id(user_id: str): return None def update_user_profile(user_id: str): - questions = get_client().collections.get("Weaviate_user_profile") + questions = get_client().collections.get("weaviate_user_profile"") try: user_profile = questions.query.bm25( query=user_id, @@ -73,7 +73,7 @@ def update_user_profile(user_id: str): return None def delete_user_profile(user_id: str): - questions = get_client().collections.get("Weaviate_user_profile") + questions = get_client().collections.get("weaviate_user_profile"") try: deleted = questions.data.delete_by_id(user_id) if deleted: From 9b0cde78be260e8adca5d0b4186e6f7281bc49ed Mon Sep 17 00:00:00 2001 From: Eli4479 Date: Wed, 11 Jun 2025 09:24:13 +0530 Subject: [PATCH 5/5] refactor: change async functions to synchronous for OAuth and user management --- backend/app/db/supabase/auth.py | 8 ++++---- tests/test_weaviate.py | 20 ++++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/backend/app/db/supabase/auth.py b/backend/app/db/supabase/auth.py index c421373b..ac738246 100644 --- a/backend/app/db/supabase/auth.py +++ b/backend/app/db/supabase/auth.py @@ -1,6 +1,6 @@ from app.db.supabase.supabase_client import supabase_client import os -async def login_with_oauth(provider: str): +def login_with_oauth(provider: str): try: result = supabase_client.auth.sign_in_with_oauth({ "provider": provider, @@ -13,13 +13,13 @@ async def login_with_oauth(provider: str): raise Exception(f"OAuth login failed for {provider}: {str(e)}") -async def login_with_github(): +def login_with_github(): return login_with_oauth("github") -async def login_with_discord(): +def login_with_discord(): return login_with_oauth("discord") -async def logout(access_token: str): +def logout(access_token: str): try: supabase_client.auth.set_session(access_token, refresh_token="") supabase_client.auth.sign_out() diff --git a/tests/test_weaviate.py b/tests/test_weaviate.py index a6452ba7..ff8fd863 100644 --- a/tests/test_weaviate.py +++ b/tests/test_weaviate.py @@ -106,7 +106,7 @@ def insert_code_chunk(): try: client.data_object.create( data_object=code_chunk.dict(by_alias=True), - class_name="Weaviate_code_chunk" + class_name="weaviate_code_chunk" ) print("Code chunk inserted successfully.") return code_chunk @@ -118,7 +118,7 @@ def get_code_chunk_by_id(code_chunk_id: str): try: code_chunk = client.data_object.get( id=code_chunk_id, - class_name="Weaviate_code_chunk" + class_name="weaviate_code_chunk" ) if code_chunk: return WeaviateCodeChunk(**code_chunk) @@ -130,13 +130,13 @@ def update_code_chunk(code_chunk_id: str): try: code_chunk = client.data_object.get( id=code_chunk_id, - class_name="Weaviate_code_chunk" + class_name="weaviate_code_chunk" ) if code_chunk: code_chunk["codeContent"] = "Updated code content" client.data_object.update( data_object=code_chunk, - class_name="Weaviate_code_chunk" + class_name="weaviate_code_chunk" ) print("Code chunk updated successfully.") return WeaviateCodeChunk(**code_chunk) @@ -151,7 +151,7 @@ def delete_code_chunk(code_chunk_id: str): try: deleted = client.data_object.delete( id=code_chunk_id, - class_name="Weaviate_code_chunk" + class_name="weaviate_code_chunk" ) if deleted: print("Code chunk deleted successfully.") @@ -180,7 +180,7 @@ def insert_interaction(): try: client.data_object.create( data_object=interaction.dict(by_alias=True), - class_name="Weaviate_interaction" + class_name="weaviate_interaction" ) print("Interaction inserted successfully.") return interaction @@ -193,7 +193,7 @@ def get_interaction_by_id(interaction_id: str): try: interaction = client.data_object.get( id=interaction_id, - class_name="Weaviate_interaction" + class_name="weaviate_interaction" ) if interaction: return WeaviateInteraction(**interaction) @@ -205,13 +205,13 @@ def update_interaction(interaction_id: str): try: interaction = client.data_object.get( id=interaction_id, - class_name="Weaviate_interaction" + class_name="weaviate_interaction" ) if interaction: interaction["conversationSummary"] = "Updated interaction summary" client.data_object.update( data_object=interaction, - class_name="Weaviate_interaction" + class_name="weaviate_interaction" ) print("Interaction updated successfully.") return WeaviateInteraction(**interaction) @@ -226,7 +226,7 @@ def delete_interaction(interaction_id: str): try: deleted = client.data_object.delete( id=interaction_id, - class_name="Weaviate_interaction" + class_name="weaviate_interaction" ) if deleted: print("Interaction deleted successfully.")