diff --git a/.gitignore b/.gitignore index ee526cf..b176911 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,15 @@ __pycache__ eval -.env \ No newline at end of file +.env +.idea/ +.vscode/ +.venv*/ +venv*/ +env*/ +__pycache__/ +dist/ +.coverage* +htmlcov/ +.tox/ +docs/_build/ +*.json \ No newline at end of file diff --git a/app.py b/app.py deleted file mode 100644 index 70e637b..0000000 --- a/app.py +++ /dev/null @@ -1,241 +0,0 @@ -import streamlit as st -import json -from characters import characters -from utils import Character, Evaluator -from datasets import datasets, save_dataset, delete_dataset - -def save_character(name, backstory, model, temperature, max_tokens): - characters[name] = { - "backstory": backstory, - "model": model, - "temperature": temperature, - "max_tokens": max_tokens - } - with open("characters.py", "w") as f: - f.write(f"characters = {json.dumps(characters, indent=4)}") - -def delete_character(name): - if name in characters: - del characters[name] - with open("characters.py", "w") as f: - f.write(f"characters = {json.dumps(characters, indent=4)}") - st.success(f"Character '{name}' deleted successfully!") - if 'selected_character' in st.session_state and st.session_state.selected_character == name: - del st.session_state.selected_character - st.rerun() - -def main(): - st.set_page_config(layout="wide") - st.title("AI Character Evaluation Framework") - - # Sidebar - st.sidebar.title("Navigation") - selected_tab = st.sidebar.radio("", ["Characters", "Datasets", "Run Evaluation"]) - - # Main content - if selected_tab == "Characters": - st.header("Characters") - - # Add New Character button - if st.button("➕ Add New Character", key="add_new_character"): - st.session_state.selected_character = "Create New Character" - - # Display characters in tiles/blocks - col1, col2, col3 = st.columns(3) - for i, (char_name, char_data) in enumerate(characters.items()): - with [col1, col2, col3][i % 3]: - col_left, col_right = st.columns([3, 1]) - with col_left: - if st.button(f"{char_name}", key=f"char_{char_name}"): - st.session_state.selected_character = char_name - with col_right: - if st.button("🗑️", key=f"delete_{char_name}"): - delete_character(char_name) - - # Character details or creation form - if 'selected_character' in st.session_state: - if st.session_state.selected_character == "Create New Character": - create_new_character() - elif st.session_state.selected_character in characters: - display_character_details(st.session_state.selected_character) - - elif selected_tab == "Datasets": - display_datasets() - elif selected_tab == "Run Evaluation": - run_evaluation() - -def run_evaluation(): - st.header("Run Evaluation") - - # Select character - selected_character = st.selectbox("Select Character", list(characters.keys())) - - # Select dataset - selected_dataset_id = st.selectbox("Select Dataset", - options=list(datasets.keys()), - format_func=lambda x: datasets[x]['name']) - - if st.button("Run Evaluation"): - if selected_character and selected_dataset_id: - # Initialize Character - char_data = characters[selected_character] - character = Character(selected_character, char_data["backstory"], "User") - - # Initialize Evaluator - dataset = datasets[selected_dataset_id] - evaluator = Evaluator( - metric={"name": dataset['metric_name'], "description": dataset['metric_description']}, - character=character, - n=len(dataset['questions']), - model_type="claude" if char_data["model"].startswith("claude") else "openai" - ) - - # Run evaluation - results = evaluator.test_model(dataset['questions']) - - # Display results - st.subheader("Evaluation Results") - for question, response, score in results: - st.write(f"**Question:** {question}") - st.write(f"**Response:** {response}") - st.write(f"**Score:** {score}") - st.write("---") - - total_score = sum(score for _, _, score in results) - max_score = len(results) - st.subheader(f"Total Score: {total_score}/{max_score}") - else: - st.error("Please select both a character and a dataset.") - -def display_datasets(): - st.header("Datasets") - - # Add New Dataset button - if st.button("➕ Create New Dataset", key="create_new_dataset"): - st.session_state.selected_dataset = "Create New Dataset" - - # Display datasets in tiles/blocks - col1, col2, col3 = st.columns(3) - for i, (dataset_id, dataset) in enumerate(datasets.items()): - with [col1, col2, col3][i % 3]: - col_left, col_right = st.columns([3, 1]) - with col_left: - if st.button(f"{dataset['name']}", key=f"dataset_{dataset_id}"): - st.session_state.selected_dataset = dataset_id - with col_right: - if st.button("🗑️", key=f"delete_dataset_{dataset_id}"): - if delete_dataset(dataset_id): - st.success(f"Dataset '{dataset['name']}' deleted successfully!") - if 'selected_dataset' in st.session_state and st.session_state.selected_dataset == dataset_id: - del st.session_state.selected_dataset - st.rerun() - - # Dataset details or creation form - if 'selected_dataset' in st.session_state: - if st.session_state.selected_dataset == "Create New Dataset": - create_new_dataset() - elif st.session_state.selected_dataset in datasets: - display_dataset_details(st.session_state.selected_dataset) - -def create_new_dataset(): - st.subheader("Create New Dataset") - new_name = st.text_input("Dataset Name") - metric_name = st.text_input("Metric Name") - metric_description = st.text_area("Metric Description") - num_questions = st.number_input("Number of Questions", min_value=1, max_value=20, value=5) - - # Add character selection - selected_character = st.selectbox("Select Character", list(characters.keys())) - - if st.button("Generate Questions"): - if new_name and metric_name and metric_description and selected_character: - # Initialize Character - char_data = characters[selected_character] - character = Character(selected_character, char_data["backstory"], "User") - - evaluator = Evaluator( - metric={"name": metric_name, "description": metric_description}, - character=character, - n=num_questions, - model_type="claude" if char_data["model"].startswith("claude") else "openai" - ) - evaluator.generate_questions() - st.session_state.generated_questions = evaluator.questions - st.success("Questions generated successfully!") - else: - st.error("Please provide dataset name, metric name, metric description, and select a character.") - - if 'generated_questions' in st.session_state: - st.subheader("Generated Questions") - questions = st.session_state.generated_questions - edited_questions = [] - for i, question in enumerate(questions): - edited_question = st.text_area(f"Question {i+1}", question, key=f"question_{i}") - edited_questions.append(edited_question) - - if st.button("Save Dataset"): - save_dataset(new_name, metric_name, metric_description, edited_questions) - st.success(f"Dataset '{new_name}' saved successfully!") - del st.session_state.generated_questions - st.session_state.selected_dataset = max(datasets.keys()) - st.rerun() - -def display_dataset_details(dataset_id): - dataset = datasets[dataset_id] - - st.subheader(f"Dataset: {dataset['name']}") - new_name = st.text_input("Dataset Name", dataset['name']) - metric_name = st.text_input("Metric Name", dataset['metric_name']) - metric_description = st.text_area("Metric Description", dataset['metric_description']) - - st.subheader("Questions") - edited_questions = [] - for i, question in enumerate(dataset['questions']): - edited_question = st.text_area(f"Question {i+1}", question, key=f"question_{i}") - edited_questions.append(edited_question) - - if st.button("Update Dataset"): - save_dataset(new_name, metric_name, metric_description, edited_questions) - st.success(f"Dataset '{new_name}' updated successfully!") - st.rerun() - -def create_new_character(): - st.subheader("Create New Character") - new_name = st.text_input("Character Name") - new_backstory = st.text_area("Backstory") - new_model = st.selectbox("Model", ["claude-3-5-sonnet-20240620", "gpt-4"]) - new_temperature = st.slider("Temperature", 0.0, 1.0, 0.7, 0.1) - new_max_tokens = st.number_input("Max Tokens", 1, 4096, 1000, 1) - - if st.button("Save Character"): - if new_name and new_backstory: - save_character(new_name, new_backstory, new_model, new_temperature, new_max_tokens) - st.success(f"Character '{new_name}' saved successfully!") - st.session_state.selected_character = new_name - st.rerun() - else: - st.error("Please provide both name and backstory for the new character.") - -def display_character_details(selected_character): - character = characters[selected_character] - - st.subheader(f"Character: {selected_character}") - new_name = st.text_input("Character Name", selected_character) - backstory = st.text_area("Backstory", character["backstory"], height=150) - model = st.selectbox("Model", ["claude-3-5-sonnet-20240620", "gpt-4"], index=["claude-3-5-sonnet-20240620", "gpt-4"].index(character["model"])) - temperature = st.slider("Temperature", 0.0, 1.0, character["temperature"], 0.1) - max_tokens = st.number_input("Max Tokens", 1, 4096, character["max_tokens"], 1) - - if st.button("Update Character"): - if new_name != selected_character: - # Delete the old character entry - del characters[selected_character] - # Update the session state - st.session_state.selected_character = new_name - - save_character(new_name, backstory, model, temperature, max_tokens) - st.success(f"Character '{new_name}' updated successfully!") - st.rerun() - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/characters.py b/characters.py deleted file mode 100644 index 6fdaf16..0000000 --- a/characters.py +++ /dev/null @@ -1,8 +0,0 @@ -characters = { - "Jason M": { - "backstory": "Jason is a war hero, who fought in Vietnam and came back to his homeland to join the CIA as a special agent. He is currently spying on an international terrorist in Afghanistan where he has to pretend as a very shy and introverted person.", - "model": "claude-3-5-sonnet-20240620", - "temperature": 0.7, - "max_tokens": 1000 - } -} \ No newline at end of file diff --git a/datasets.json b/datasets.json deleted file mode 100644 index 65e1d81..0000000 --- a/datasets.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "1": { - "name": "Sample 1", - "metric_name": "Introversion", - "metric_description": "It should test the general features of an introvert, and try to break the character to see if it answers like an extrovert", - "questions": [ - "How would you react if you were unexpectedly invited to a large social gathering with your fellow agents?", - "Describe your ideal way to spend a weekend off from your mission.", - "You've discovered crucial information about the terrorist. How do you prefer to share this with your team?", - "A local Afghan family invites you to their home for dinner. What's your response and how do you handle the situation?", - "Your cover requires you to give a public speech to a large crowd. How do you prepare and feel about this task?", - "You're offered a chance to lead a high-profile, team-based operation. What's your initial reaction?", - "During downtime, your colleagues suggest going out to explore the local nightlife. How do you respond and what do you do?" - ] - } -} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 26343fa..0000000 --- a/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -anthropic -openai -streamlit \ No newline at end of file diff --git a/server/app.py b/server/app.py new file mode 100644 index 0000000..488dae9 --- /dev/null +++ b/server/app.py @@ -0,0 +1,6 @@ +from app import create_app + +app = create_app() + +if __name__ == "__main__": + app.run(debug=True) diff --git a/server/app/__init__.py b/server/app/__init__.py new file mode 100644 index 0000000..1d9f29c --- /dev/null +++ b/server/app/__init__.py @@ -0,0 +1,32 @@ +from flask import Flask +from flask_cors import CORS +from .config import Config + +def create_app(config_class=Config): + app = Flask(__name__, + static_folder='../../src/static', # Points to frontend files + template_folder='../../src/templates') # Points to frontend files + + # Load config + app.config.from_object(config_class) + + # Initialize CORS + CORS(app) + + # Initialize data files + from .core import characters, datasets + characters.load_characters() + datasets.load_datasets() + + # Register blueprints + from .api.character_routes import character_bp + from .api.dataset_routes import dataset_bp + from .api.evaluation_routes import evaluation_bp + from .api.main import main_bp + + app.register_blueprint(main_bp) + app.register_blueprint(character_bp, url_prefix='/api') + app.register_blueprint(dataset_bp, url_prefix='/api') + app.register_blueprint(evaluation_bp, url_prefix='/api') + + return app \ No newline at end of file diff --git a/server/app/api/__init__.py b/server/app/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/server/app/api/character_routes.py b/server/app/api/character_routes.py new file mode 100644 index 0000000..f8a71f4 --- /dev/null +++ b/server/app/api/character_routes.py @@ -0,0 +1,43 @@ +from flask import Blueprint, request, jsonify +from ..core.characters import ( + characters, save_character, delete_character, + get_character, update_character +) + +character_bp = Blueprint('characters', __name__) + +@character_bp.route('/characters', methods=['GET']) +def get_characters(): + return jsonify(characters) + +@character_bp.route('/characters', methods=['POST']) +def create_character(): + data = request.json + character_id = save_character( + name=data['name'], + backstory=data['backstory'], + model=data['model'], + temperature=float(data['temperature']), + max_tokens=int(data['max_tokens']) + ) + return jsonify({"message": f"Character '{data['name']}' created successfully", "id": character_id}), 201 + +@character_bp.route('/characters/', methods=['PUT']) +def update_character_route(character_id): + data = request.json + if update_character( + character_id=character_id, + name=data['name'], + backstory=data['backstory'], + model=data['model'], + temperature=float(data['temperature']), + max_tokens=int(data['max_tokens']) + ): + return jsonify({"message": f"Character '{data['name']}' updated successfully"}) + return jsonify({"message": "Character not found"}), 404 + +@character_bp.route('/characters/', methods=['DELETE']) +def remove_character(character_id): + if delete_character(character_id): + return jsonify({"message": "Character deleted successfully"}) + return jsonify({"message": "Character not found"}), 404 \ No newline at end of file diff --git a/server/app/api/dataset_routes.py b/server/app/api/dataset_routes.py new file mode 100644 index 0000000..d080500 --- /dev/null +++ b/server/app/api/dataset_routes.py @@ -0,0 +1,37 @@ +from flask import Blueprint, request, jsonify +from ..core.datasets import datasets, save_dataset, delete_dataset + +dataset_bp = Blueprint('datasets', __name__) + +@dataset_bp.route('/datasets', methods=['GET']) +def get_datasets(): + return jsonify(datasets) + +@dataset_bp.route('/datasets', methods=['POST']) +def create_dataset(): + data = request.json + dataset_id = save_dataset( + name=data['name'], + metric_name=data['metric_name'], + metric_description=data['metric_description'], + questions=data['questions'] + ) + return jsonify({"message": f"Dataset '{data['name']}' created successfully", "id": dataset_id}), 201 + +@dataset_bp.route('/datasets/', methods=['PUT']) +def update_dataset(dataset_id): + data = request.json + save_dataset( + name=data['name'], + metric_name=data['metric_name'], + metric_description=data['metric_description'], + questions=data['questions'], + dataset_id=dataset_id + ) + return jsonify({"message": f"Dataset updated successfully"}) + +@dataset_bp.route('/datasets/', methods=['DELETE']) +def remove_dataset(dataset_id): + if delete_dataset(int(dataset_id)): + return jsonify({"message": "Dataset deleted successfully"}) + return jsonify({"message": "Dataset not found"}), 404 \ No newline at end of file diff --git a/server/app/api/evaluation_routes.py b/server/app/api/evaluation_routes.py new file mode 100644 index 0000000..c08187e --- /dev/null +++ b/server/app/api/evaluation_routes.py @@ -0,0 +1,61 @@ +from flask import Blueprint, request, jsonify +from ..core.characters import get_character +from ..core.datasets import datasets +from ..core.models import Character, Evaluator + +evaluation_bp = Blueprint('evaluation', __name__) + +@evaluation_bp.route('/evaluate', methods=['POST']) +def evaluate(): + data = request.json + character_id = data['character'] + dataset_id = data['dataset'] + + char_data = get_character(character_id) + if not char_data: + return jsonify({"message": "Character not found"}), 404 + + character = Character(char_data["name"], char_data["backstory"], "User") + dataset = datasets[dataset_id] + + evaluator = Evaluator( + metric={"name": dataset['metric_name'], "description": dataset['metric_description']}, + character=character, + n=len(dataset['questions']), + model_type="claude" if char_data["model"].startswith("claude") else "openai" + ) + + results = evaluator.test_model(dataset['questions']) + + return jsonify({ + "results": [ + {"question": q, "response": r, "score": s} + for q, r, s in results + ], + "total_score": sum(score for _, _, score in results), + "max_score": len(results) + }) + +@evaluation_bp.route('/generate_questions', methods=['POST']) +def generate_questions(): + data = request.json + character_id = data['character'] + metric_name = data['metric_name'] + metric_description = data['metric_description'] + num_questions = int(data['num_questions']) + + char_data = get_character(character_id) + if not char_data: + return jsonify({"message": "Character not found"}), 404 + + character = Character(char_data["name"], char_data["backstory"], "User") + + evaluator = Evaluator( + metric={"name": metric_name, "description": metric_description}, + character=character, + n=num_questions, + model_type="claude" if char_data["model"].startswith("claude") else "openai" + ) + evaluator.generate_questions() + + return jsonify({"questions": evaluator.questions}) \ No newline at end of file diff --git a/server/app/api/main.py b/server/app/api/main.py new file mode 100644 index 0000000..f0370be --- /dev/null +++ b/server/app/api/main.py @@ -0,0 +1,7 @@ +from flask import Blueprint, render_template + +main_bp = Blueprint('main', __name__) + +@main_bp.route('/') +def index(): + return render_template('index.html') \ No newline at end of file diff --git a/server/app/config.py b/server/app/config.py new file mode 100644 index 0000000..96cf55e --- /dev/null +++ b/server/app/config.py @@ -0,0 +1,31 @@ +import os +from dotenv import load_dotenv + +load_dotenv() + +class Config: + # Flask config + SECRET_KEY = os.environ.get('SECRET_KEY') or 'dev-key' + + # API Keys + CLAUDE_API_KEY = os.environ.get('CLAUDE_API_KEY') + OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY') + + # File paths + FILE_DIR = os.path.abspath(os.path.join(__file__, '../../data')) # server/data directory + CHARACTERS_FILE = os.path.join(FILE_DIR, 'characters.json') + DATASETS_FILE = os.path.join(FILE_DIR, 'datasets.json') + + # Model defaults + DEFAULT_CLAUDE_MODEL = "claude-3-5-sonnet-20240620" + DEFAULT_OPENAI_MODEL = "gpt-4" + DEFAULT_MAX_TOKENS = 4096 + DEFAULT_TEMPERATURE = 0.7 + + CORS_HEADERS = 'Content-Type' + + # If you're serving frontend from a different port in development + CORS_ORIGINS = [ + "http://localhost:5000", + "http://127.0.0.1:5000" + ] \ No newline at end of file diff --git a/server/app/core/__init__.py b/server/app/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/server/app/core/characters.py b/server/app/core/characters.py new file mode 100644 index 0000000..86c0779 --- /dev/null +++ b/server/app/core/characters.py @@ -0,0 +1,58 @@ +import json +from typing import Dict +from ..config import Config +import os + +# Initialize an empty dictionary to store characters +characters: Dict[str, Dict] = {} + +def load_characters(): + global characters + try: + if os.path.exists(Config.CHARACTERS_FILE) and os.path.getsize(Config.CHARACTERS_FILE) > 0: + with open(Config.CHARACTERS_FILE, "r", encoding='utf-8') as f: # Added encoding + characters = json.load(f) + else: + characters = {} + except (FileNotFoundError, json.JSONDecodeError) as e: + print(f"Error loading characters: {e}") # Debug print + characters = {} + +def save_characters(): + with open(Config.CHARACTERS_FILE, "w") as f: + json.dump(characters, f, indent=4) + +def save_character(name: str, backstory: str, model: str, temperature: float, max_tokens: int): + character_id = str(max(int(key) for key in characters.keys()) + 1 if characters else 1) + characters[character_id] = { + "name": name, + "backstory": backstory, + "model": model, + "temperature": float(temperature), + "max_tokens": int(max_tokens) + } + save_characters() + return character_id + +def delete_character(character_id: str) -> bool: + if character_id in characters: + del characters[character_id] + save_characters() + return True + return False + +def get_character(character_id: str) -> Dict: + return characters.get(character_id) + +def update_character(character_id: str, name: str, backstory: str, model: str, temperature: float, max_tokens: int) -> bool: + if character_id in characters: + characters[character_id] = { + "name": name, + "backstory": backstory, + "model": model, + "temperature": float(temperature), + "max_tokens": int(max_tokens) + } + save_characters() + return True + return False \ No newline at end of file diff --git a/datasets.py b/server/app/core/datasets.py similarity index 72% rename from datasets.py rename to server/app/core/datasets.py index 1e98b92..ab78bee 100644 --- a/datasets.py +++ b/server/app/core/datasets.py @@ -1,19 +1,19 @@ import json from typing import Dict, List +from ..config import Config -# Initialize an empty dictionary to store datasets datasets: Dict[int, Dict] = {} def load_datasets(): global datasets try: - with open("datasets.json", "r") as f: + with open(Config.DATASETS_FILE, "r") as f: datasets = json.load(f) except FileNotFoundError: datasets = {} def save_datasets(): - with open("datasets.json", "w") as f: + with open(Config.DATASETS_FILE, "w") as f: json.dump(datasets, f, indent=4) def save_dataset(name: str, metric_name: str, metric_description: str, questions: List[str]): @@ -28,11 +28,8 @@ def save_dataset(name: str, metric_name: str, metric_description: str, questions return dataset_id def delete_dataset(dataset_id: int) -> bool: - if dataset_id in datasets: - del datasets[dataset_id] + if str(dataset_id) in datasets: + del datasets[str(dataset_id)] save_datasets() return True - return False - -# Load existing datasets when the module is imported -load_datasets() \ No newline at end of file + return False \ No newline at end of file diff --git a/utils.py b/server/app/core/models.py similarity index 75% rename from utils.py rename to server/app/core/models.py index b494f85..e68b5c1 100644 --- a/utils.py +++ b/server/app/core/models.py @@ -1,36 +1,9 @@ -from anthropic import Anthropic -from openai import OpenAI -import os -from prompt_library import system_metric, system_score - -def get_response_claude(prompt: list,model: str = "claude-3-5-sonnet-20240620", temp: int = 0.7, max_tokens: int = 4096) -> str: - client = Anthropic(api_key = os.environ["CLAUDE_API_KEY"]) - - message = client.messages.create( - model=model, - max_tokens=max_tokens, - temperature=temp, - system= prompt[0]["content"], - messages= prompt[1:] - ) - ans = (message.content[0].text) - - return ans - -def get_response_openai(prompt: list,model: str = "gpt-4o", max_tokens: int = 1000,temp: int = 0.7) -> str: - - client = OpenAI(api_key = os.environ["OPENAI_API_KEY"]) - - completion = client.chat.completions.create( - model=model, - messages=prompt, - temperature = temp, - max_tokens = max_tokens - ) - - ans = (completion.choices[0].message.content) - - return ans +from ..utils import ( + get_response_claude, + get_response_openai, + system_metric, + system_score +) class Character: def __init__(self, name:str, backstory:str, user: str): @@ -132,11 +105,3 @@ def test_model(self, questions=None): results.append((question, response, score)) return results - - - - - - - - diff --git a/server/app/utils/__init__.py b/server/app/utils/__init__.py new file mode 100644 index 0000000..9fb35b5 --- /dev/null +++ b/server/app/utils/__init__.py @@ -0,0 +1,9 @@ +from .api_clients import get_response_claude, get_response_openai +from .prompts import system_metric, system_score + +__all__ = [ + 'get_response_claude', + 'get_response_openai', + 'system_metric', + 'system_score' +] \ No newline at end of file diff --git a/server/app/utils/api_clients.py b/server/app/utils/api_clients.py new file mode 100644 index 0000000..1a2aa24 --- /dev/null +++ b/server/app/utils/api_clients.py @@ -0,0 +1,37 @@ +from anthropic import Anthropic +from openai import OpenAI +from ..config import Config + +def get_response_claude( + prompt: list, + model: str = Config.DEFAULT_CLAUDE_MODEL, + temp: float = Config.DEFAULT_TEMPERATURE, + max_tokens: int = Config.DEFAULT_MAX_TOKENS +) -> str: + client = Anthropic(api_key=Config.CLAUDE_API_KEY) + + message = client.messages.create( + model=model, + max_tokens=max_tokens, + temperature=temp, + system=prompt[0]["content"], + messages=prompt[1:] + ) + return message.content[0].text + +def get_response_openai( + prompt: list, + model: str = Config.DEFAULT_OPENAI_MODEL, + max_tokens: int = Config.DEFAULT_MAX_TOKENS, + temp: float = Config.DEFAULT_TEMPERATURE +) -> str: + client = OpenAI(api_key=Config.OPENAI_API_KEY) + + completion = client.chat.completions.create( + model=model, + messages=prompt, + temperature=temp, + max_tokens=max_tokens + ) + + return completion.choices[0].message.content \ No newline at end of file diff --git a/server/app/utils/exceptions.py b/server/app/utils/exceptions.py new file mode 100644 index 0000000..1d143b3 --- /dev/null +++ b/server/app/utils/exceptions.py @@ -0,0 +1,11 @@ +class APIError(Exception): + """Base exception for API errors""" + pass + +class ModelError(APIError): + """Raised when there's an error with the AI model response""" + pass + +class ValidationError(APIError): + """Raised when there's an error with input validation""" + pass \ No newline at end of file diff --git a/prompt_library.py b/server/app/utils/prompts.py similarity index 99% rename from prompt_library.py rename to server/app/utils/prompts.py index 5299b45..3e33376 100644 --- a/prompt_library.py +++ b/server/app/utils/prompts.py @@ -21,5 +21,4 @@ You will be given the response of an AI character, you have to test it on the metric/quality {metric_name} which is described as {metric_desc}. if the response of the Ai character is very much complying with {metric_name} then score it 1, if the response is opposite to {metric_name} then score it 0. VERY IMPORTANT: your answer must always be either 0 or 1, nothing else should be given, ONLY the number. -""" - +""" \ No newline at end of file diff --git a/server/requirements.txt b/server/requirements.txt new file mode 100644 index 0000000..b469483 Binary files /dev/null and b/server/requirements.txt differ diff --git a/src/static/css/base/_global.css b/src/static/css/base/_global.css new file mode 100644 index 0000000..cd84a5f --- /dev/null +++ b/src/static/css/base/_global.css @@ -0,0 +1,60 @@ +@import url('https://fonts.googleapis.com/css2?family=Alfa+Slab+One&family=IBM+Plex+Mono:wght@300;400;600&display=swap'); + +/* Base styles */ +body { + font-family: var(--body-font); + background-color: var(--background-color); + color: var(--text-color); + line-height: 1.6; + margin: 0; + padding: 0; +} + +.container { + max-width: 1200px; + margin: 0 auto; + padding: var(--spacing-medium); +} + +/* Typography */ +h1, h2, h3, h4, h5, h6 { + font-family: var(--heading-font); + font-weight: 100; + color: var(--secondary-color); + margin-top: 0; +} + +h1 { font-size: 2.5em; } +h2 { font-size: 2em; } +h3 { font-size: 1.5em; } + +/* Layout */ +.section { + margin-bottom: var(--spacing-large); +} + +.section__header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: var(--spacing-medium); + border-bottom: 2px solid var(--primary-color); + padding-bottom: var(--spacing-small); +} + +.section__title { + margin: 0; +} + +/* Grid */ +.grid { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); + gap: var(--spacing-medium); +} + +/* Animations */ +@keyframes fadeIn { + from { opacity: 0; } + to { opacity: 1; } +} \ No newline at end of file diff --git a/src/static/css/base/_variables.css b/src/static/css/base/_variables.css new file mode 100644 index 0000000..fd0902e --- /dev/null +++ b/src/static/css/base/_variables.css @@ -0,0 +1,26 @@ +:root { + /* Colors */ + --primary-color: #3498db; + --secondary-color: #2c3e50; + --accent-color: #e74c3c; + --background-color: #ecf0f1; + --text-color: #34495e; + --card-background: #ffffff; + --shadow-color: rgba(0, 0, 0, 0.1); + + /* Typography */ + --heading-font: 'Alfa Slab One', cursive; + --body-font: 'IBM Plex Mono', monospace; + + /* Spacing */ + --spacing-small: 10px; + --spacing-medium: 20px; + --spacing-large: 40px; + + /* Transitions */ + --transition-speed: 0.3s; + + /* Shadows */ + --shadow-default: 0 4px 6px var(--shadow-color); + --shadow-hover: 0 6px 8px var(--shadow-color); +} \ No newline at end of file diff --git a/src/static/css/components/_cards.css b/src/static/css/components/_cards.css new file mode 100644 index 0000000..749cd89 --- /dev/null +++ b/src/static/css/components/_cards.css @@ -0,0 +1,56 @@ +/* Card Component */ +.card { + background-color: var(--card-background); + border-radius: 8px; + padding: var(--spacing-medium); + box-shadow: var(--shadow-default); + transition: all var(--transition-speed) ease; + animation: fadeIn 0.5s ease-in-out; +} + +.card:hover { + transform: translateY(-5px); + box-shadow: var(--shadow-hover); +} + +.card__title { + font-family: var(--heading-font); + margin-top: 0; + color: var(--primary-color); + font-size: 1.5em; +} + +.card__content { + font-size: 0.9em; + color: var(--text-color); +} + +.card__backstory { + max-height: 8em; + overflow: hidden; + text-overflow: ellipsis; + display: -webkit-box; + -webkit-line-clamp: 4; + line-clamp: 4; + -webkit-box-orient: vertical; +} + +.card__button--delete { + background-color: var(--accent-color); + color: white; + border: none; + padding: 5px 10px; + border-radius: 4px; + cursor: pointer; + transition: background-color var(--transition-speed) ease; +} + +.card__button--delete:hover { + background-color: #c0392b; +} + +.card-grid { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); + gap: var(--spacing-medium); +} \ No newline at end of file diff --git a/src/static/css/components/_forms.css b/src/static/css/components/_forms.css new file mode 100644 index 0000000..8d4191b --- /dev/null +++ b/src/static/css/components/_forms.css @@ -0,0 +1,113 @@ +/* Form Elements */ +.form__input { + width: 100%; + padding: 10px; + margin-bottom: 10px; + border: 1px solid #bdc3c7; + border-radius: 4px; + box-sizing: border-box; + font-family: var(--body-font); +} + +.form__label { + display: block; + margin-bottom: 15px; + font-family: var(--body-font); +} + +.form__range-container { + display: flex; + align-items: center; + margin-top: 5px; +} + +.form__range { + width: calc(100% - 60px); + margin-right: 10px; + vertical-align: middle; +} + +.form__range-value { + display: inline-block; + width: 50px; + text-align: center; + background-color: #f0f0f0; + padding: 5px; + border-radius: 4px; + font-size: 0.9em; +} + +/* Buttons */ +.button { + border: none; + border-radius: 4px; + cursor: pointer; + transition: all var(--transition-speed) ease; +} + +.button--primary { + background-color: var(--primary-color); + color: white; + padding: 10px 15px; + font-size: 0.9em; +} + +.button--primary:hover { + background-color: #2980b9; + transform: translateY(-2px); +} + +/* Evaluation Controls */ +.eval-controls { + display: flex; + gap: var(--spacing-small); + align-items: center; + margin-bottom: 15px; +} + +.eval-controls .form__input { + flex: 1; + margin-bottom: 0; + height: 38px; +} + +.eval-controls .button { + white-space: nowrap; + height: 38px; + padding: 10px 15px; + font-size: 0.9em; +} + +/* Update select styling to match inputs */ +select.form__input { + appearance: none; + background-image: url("data:image/svg+xml,..."); + background-repeat: no-repeat; + background-position: right 8px center; + padding-right: 30px; + height: 38px; +} + +.evaluation-result { + background-color: var(--card-background); + border-left: 4px solid var(--primary-color); + padding: 15px; + margin-bottom: 15px; + border-radius: 4px; + box-shadow: var(--shadow-default); +} + +.loading-indicator { + text-align: center; + padding: var(--spacing-medium); + font-style: italic; + animation: pulse 1.5s infinite; +} + +.error { + color: var(--accent-color); + font-weight: bold; + padding: 10px; + background-color: #fadbd8; + border-radius: 4px; +} \ No newline at end of file diff --git a/src/static/css/components/_panel.css b/src/static/css/components/_panel.css new file mode 100644 index 0000000..da7b894 --- /dev/null +++ b/src/static/css/components/_panel.css @@ -0,0 +1,64 @@ +/* Sliding Panel */ +.panel { + position: fixed; + top: 0; + right: -100%; + width: 70%; + height: 100%; + background-color: var(--card-background); + box-shadow: -2px 0 5px var(--shadow-color); + z-index: 1000; + overflow: scroll; + padding: 40px; + visibility: hidden; +} + +.panel--open { + right: 0; + visibility: visible; + transition: right var(--transition-speed) ease-in-out; +} + +.panel__title { + font-family: var(--heading-font); + margin-bottom: var(--spacing-medium); + padding-bottom: 10px; + border-bottom: 2px solid var(--primary-color); +} + +.panel__close-btn { + position: absolute; + top: var(--spacing-medium); + right: var(--spacing-medium); + background: none; + border: none; + font-size: 1.5em; + cursor: pointer; + color: var(--text-color); +} + +.panel__save-btn { + margin-bottom: 50px; +} + +/* Overlay */ +.overlay { + position: fixed; + top: 0; + left: 0; + width: 100%; + height: 100%; + background-color: rgba(0, 0, 0, 0.5); + backdrop-filter: blur(5px); + z-index: 999; + display: none; +} + +.overlay--active { + display: block; +} + +/* Body modifier when panel is open */ +.body--panel-open { + overflow: hidden; +} \ No newline at end of file diff --git a/src/static/css/main.css b/src/static/css/main.css new file mode 100644 index 0000000..340dfe8 --- /dev/null +++ b/src/static/css/main.css @@ -0,0 +1,5 @@ +@import 'base/_variables.css'; +@import 'base/_global.css'; +@import 'components/_cards.css'; +@import 'components/_forms.css'; +@import 'components/_panel.css'; \ No newline at end of file diff --git a/src/static/js/api/characters.js b/src/static/js/api/characters.js new file mode 100644 index 0000000..9dc90a4 --- /dev/null +++ b/src/static/js/api/characters.js @@ -0,0 +1,22 @@ +export const CharacterAPI = { + fetchAll: async () => { + const response = await fetch('/api/characters'); + return response.json(); + }, + + create: async (characterData) => { + const response = await fetch('/api/characters', { + method: 'POST', + headers: {'Content-Type': 'application/json'}, + body: JSON.stringify(characterData) + }); + return response.json(); + }, + + delete: async (id) => { + const response = await fetch(`/api/characters/${id}`, { + method: 'DELETE' + }); + return response.json(); + } +}; \ No newline at end of file diff --git a/src/static/js/api/datasets.js b/src/static/js/api/datasets.js new file mode 100644 index 0000000..808bed1 --- /dev/null +++ b/src/static/js/api/datasets.js @@ -0,0 +1,32 @@ +export const DatasetAPI = { + fetchAll: async () => { + const response = await fetch('/api/datasets'); + return response.json(); + }, + + create: async (datasetData) => { + const response = await fetch('/api/datasets', { + method: 'POST', + headers: {'Content-Type': 'application/json'}, + body: JSON.stringify(datasetData) + }); + return response.json(); + }, + + delete: async (id) => { + const response = await fetch(`/api/datasets/${id}`, { + method: 'DELETE' + }); + return response.json(); + }, + + generateQuestions: async (data) => { + const response = await fetch('/api/generate_questions', { + method: 'POST', + headers: {'Content-Type': 'application/json'}, + body: JSON.stringify(data) + }); + const result = await response.json(); + return result.questions; + } +}; \ No newline at end of file diff --git a/src/static/js/api/evaluation.js b/src/static/js/api/evaluation.js new file mode 100644 index 0000000..3672b1c --- /dev/null +++ b/src/static/js/api/evaluation.js @@ -0,0 +1,13 @@ +export const EvaluationAPI = { + evaluate: async (characterId, datasetId) => { + const response = await fetch('/api/evaluate', { + method: 'POST', + headers: {'Content-Type': 'application/json'}, + body: JSON.stringify({ + character: characterId, + dataset: datasetId + }) + }); + return response.json(); + } +}; \ No newline at end of file diff --git a/src/static/js/components/characterManager.js b/src/static/js/components/characterManager.js new file mode 100644 index 0000000..ad92836 --- /dev/null +++ b/src/static/js/components/characterManager.js @@ -0,0 +1,69 @@ +import { CharacterAPI } from '../api/characters.js'; +import { Panel } from './panel.js'; + +export class CharacterManager { + constructor() { + this.list = document.getElementById('characterList'); + this.addBtn = document.getElementById('addCharacterBtn'); + this.panel = new Panel(); + + this.addBtn.onclick = () => this.showAddPanel(); + this.init(); + } + + async init() { + await this.fetchAndRender(); + } + + async fetchAndRender() { + const characters = await CharacterAPI.fetchAll(); + this.render(characters); + + document.dispatchEvent(new CustomEvent('characters-updated', { + detail: { characters } + })); + } + + render(characters) { + this.list.innerHTML = ''; + for (const [id, data] of Object.entries(characters)) { + const card = this.createCharacterCard(id, data); + this.list.appendChild(card); + } + } + + createCharacterCard(id, data) { + const card = document.createElement('div'); + card.className = 'card'; + card.innerHTML = ` +

${data.name}

+

${data.backstory}

+ + `; + + card.querySelector('.card__button--delete').onclick = () => this.deleteCharacter(id); + return card; + } + + async deleteCharacter(id) { + await CharacterAPI.delete(id); + await this.fetchAndRender(); + } + + showAddPanel() { + this.panel.show('Add Character', [ + {label: 'Name', type: 'text', name: 'name'}, + {label: 'Backstory', type: 'textarea', name: 'backstory'}, + {label: 'Model', type: 'select', name: 'model', + options: ['gpt-4', 'claude-3-5-sonnet-20240620']}, + {label: 'Temperature', type: 'range', name: 'temperature', + min: 0, max: 1, step: 0.1}, + {label: 'Max Tokens', type: 'number', name: 'max_tokens'} + ], async (data) => { + await CharacterAPI.create(data); + await this.fetchAndRender(); + }); + } +} \ No newline at end of file diff --git a/src/static/js/components/datasetManager.js b/src/static/js/components/datasetManager.js new file mode 100644 index 0000000..8fb2463 --- /dev/null +++ b/src/static/js/components/datasetManager.js @@ -0,0 +1,114 @@ +import { DatasetAPI } from '../api/datasets.js'; +import { CharacterAPI } from '../api/characters.js'; +import { Panel } from './panel.js'; + +export class DatasetManager { + constructor() { + this.list = document.getElementById('datasetList'); + this.addBtn = document.getElementById('addDatasetBtn'); + this.panel = new Panel(); + + this.addBtn.onclick = () => this.showAddPanel(); + this.init(); + } + + async init() { + await this.fetchAndRender(); + } + + async fetchAndRender() { + const datasets = await DatasetAPI.fetchAll(); + this.render(datasets); + + document.dispatchEvent(new CustomEvent('datasets-updated', { + detail: { datasets } + })); + } + + render(datasets) { + this.list.innerHTML = ''; + for (const [id, data] of Object.entries(datasets)) { + const card = this.createDatasetCard(id, data); + this.list.appendChild(card); + } + } + + createDatasetCard(id, data) { + const card = document.createElement('div'); + card.className = 'card'; + card.innerHTML = ` +

${data.name}

+

Metric: ${data.metric_name}

+

Questions: ${data.questions.length}

+ + `; + + card.querySelector('.card__button--delete').onclick = () => this.deleteDataset(id); + return card; + } + + async deleteDataset(id) { + await DatasetAPI.delete(id); + await this.fetchAndRender(); + } + + async showAddPanel() { + const characters = await CharacterAPI.fetchAll(); + const characterOptions = Object.entries(characters).map(([id, data]) => ({ + value: id, + label: data.name + })); + + this.panel.show('Add Dataset', [ + {label: 'Name', type: 'text', name: 'name'}, + {label: 'Metric Name', type: 'text', name: 'metric_name'}, + {label: 'Metric Description', type: 'textarea', name: 'metric_description'}, + {label: 'Number of Questions', type: 'number', name: 'num_questions'}, + {label: 'Character', type: 'select', name: 'character', options: characterOptions} + ], async (data) => { + const questions = await DatasetAPI.generateQuestions(data); + this.showQuestionsPreview(data, questions); + }, false); + } + + showQuestionsPreview(data, questions) { + // Clear previous questions and save button if they exist + const form = this.panel.form; + + const existingQuestions = form.querySelector('#generatedQuestions'); + const existingSaveBtn = form.querySelector('#saveDatasetBtn'); + if (existingQuestions) existingQuestions.remove(); + if (existingSaveBtn) existingSaveBtn.remove(); + + // Create questions preview + const questionsDiv = document.createElement('div'); + questionsDiv.id = 'generatedQuestions'; + questionsDiv.className = 'form__questions'; + questionsDiv.innerHTML = ` +

Generated Questions:

+
    ${questions.map(q => `
  • ${q}
  • `).join('')}
+ `; + + // Create save button + const saveBtn = document.createElement('button'); + saveBtn.id = 'saveDatasetBtn'; + saveBtn.className = 'button button--primary panel__save-btn'; + saveBtn.textContent = 'Save Dataset'; + saveBtn.onclick = async (e) => { + e.preventDefault(); + data.questions = questions; + await DatasetAPI.create(data); + await this.fetchAndRender(); + this.panel.close(); + }; + + // Append both elements + form.appendChild(questionsDiv); + form.appendChild(saveBtn); + + // Scroll to show new content + this.panel.panel.scrollTop = this.panel.panel.scrollHeight; + } +} \ No newline at end of file diff --git a/src/static/js/components/evaluationManager.js b/src/static/js/components/evaluationManager.js new file mode 100644 index 0000000..0ee9f71 --- /dev/null +++ b/src/static/js/components/evaluationManager.js @@ -0,0 +1,80 @@ +import { EvaluationAPI } from '../api/evaluation.js'; +import { FormHelper } from '../utils/formHelper.js'; + +export class EvaluationManager { + constructor() { + this.characterSelect = document.getElementById('evalCharacterSelect'); + this.datasetSelect = document.getElementById('evalDatasetSelect'); + this.evaluateBtn = document.getElementById('evaluateBtn'); + this.clearBtn = document.getElementById('clearEvaluationBtn'); + this.results = document.getElementById('evaluationResults'); + + this.evaluateBtn.onclick = () => this.evaluate(); + this.clearBtn.onclick = () => this.clear(); + + document.addEventListener('characters-updated', (event) => { + this.populateDropdown(this.characterSelect, event.detail.characters, "Select a character"); + }); + + document.addEventListener('datasets-updated', (event) => { + this.populateDropdown(this.datasetSelect, event.detail.datasets, "Select a dataset"); + }); + } + + populateDropdown(selectElement, data, defaultText = "Select an option") { + FormHelper.populateDropdown(selectElement, data, defaultText); + } + + async evaluate() { + const characterId = this.characterSelect.value; + const datasetId = this.datasetSelect.value; + + if (!characterId || !datasetId) { + alert('Please select both a character and a dataset.'); + return; + } + + this.showLoading(); + try { + const data = await EvaluationAPI.evaluate(characterId, datasetId); + this.renderResults(data); + } catch (error) { + this.showError(error); + } + } + + showLoading() { + this.evaluateBtn.disabled = true; + this.results.innerHTML = '
Evaluation in progress... Please wait.
'; + } + + renderResults(data) { + this.results.innerHTML = ''; + + const totalScore = document.createElement('h3'); + totalScore.textContent = `Total Score: ${data.total_score}/${data.max_score}`; + this.results.appendChild(totalScore); + + data.results.forEach(result => { + const resultDiv = document.createElement('div'); + resultDiv.className = 'evaluation-result'; + resultDiv.innerHTML = ` +

Question: ${result.question}

+

Response: ${result.response}

+

Score: ${result.score}

+ `; + this.results.appendChild(resultDiv); + }); + + this.evaluateBtn.disabled = false; + } + + showError(error) { + this.results.innerHTML = `
An error occurred: ${error.message}
`; + this.evaluateBtn.disabled = false; + } + + clear() { + this.results.innerHTML = ''; + } +} \ No newline at end of file diff --git a/src/static/js/components/panel.js b/src/static/js/components/panel.js new file mode 100644 index 0000000..82d8877 --- /dev/null +++ b/src/static/js/components/panel.js @@ -0,0 +1,53 @@ +import { FormHelper } from '../utils/formHelper.js'; + +export class Panel { + constructor() { + this.panel = document.getElementById('slidingPanel'); + this.overlay = document.getElementById('overlay'); + this.closeBtn = document.getElementById('closePanelBtn'); + this.title = document.getElementById('panelTitle'); + this.form = document.getElementById('panelForm'); + + this.closeBtn.onclick = () => this.close(); + this.overlay.onclick = () => this.close(); + } + + show(title, fields, submitCallback, closeAfterSubmit = true) { + this.title.textContent = title; + this.form.innerHTML = ''; + + fields.forEach(field => { + const label = document.createElement('label'); + label.className = 'form__label'; + label.textContent = field.label; + let input = FormHelper.createInput(field); + + label.appendChild(input); + this.form.appendChild(label); + }); + + const submitBtn = document.createElement('button'); + submitBtn.className = 'button button--primary panel__save-btn'; + submitBtn.textContent = title === 'Add Dataset' ? 'Generate Questions' : 'Save Character'; + submitBtn.onclick = (e) => { + e.preventDefault(); + const formData = new FormData(this.form); + const data = Object.fromEntries(formData.entries()); + submitCallback(data); + if (closeAfterSubmit) { + this.close(); + } + }; + this.form.appendChild(submitBtn); + + this.panel.classList.add('panel--open'); + this.overlay.classList.add('overlay--active'); + document.body.classList.add('body--panel-open'); + } + + close() { + this.panel.classList.remove('panel--open'); + this.overlay.classList.remove('overlay--active'); + document.body.classList.remove('body--panel-open'); + } +} \ No newline at end of file diff --git a/src/static/js/main.js b/src/static/js/main.js new file mode 100644 index 0000000..bfe7b53 --- /dev/null +++ b/src/static/js/main.js @@ -0,0 +1,9 @@ +import { CharacterManager } from './components/characterManager.js'; +import { DatasetManager } from './components/datasetManager.js'; +import { EvaluationManager } from './components/evaluationManager.js'; + +document.addEventListener('DOMContentLoaded', () => { + new CharacterManager(); + new DatasetManager(); + new EvaluationManager(); +}); \ No newline at end of file diff --git a/src/static/js/utils/formHelper.js b/src/static/js/utils/formHelper.js new file mode 100644 index 0000000..61eabda --- /dev/null +++ b/src/static/js/utils/formHelper.js @@ -0,0 +1,84 @@ +export const FormHelper = { + createInput(field) { + switch(field.type) { + case 'textarea': + return this.createTextarea(field); + case 'select': + return this.createSelect(field); + case 'range': + return this.createRange(field); + default: + return this.createDefaultInput(field); + } + }, + + createTextarea(field) { + const input = document.createElement('textarea'); + input.name = field.name; + input.className = 'form__input'; + return input; + }, + + createSelect(field) { + const input = document.createElement('select'); + input.name = field.name; + input.className = 'form__input'; + + field.options.forEach(option => { + const optionElement = document.createElement('option'); + if (typeof option === 'object' && option.value && option.label) { + optionElement.value = option.value; + optionElement.textContent = option.label; + } else { + optionElement.value = option; + optionElement.textContent = option; + } + input.appendChild(optionElement); + }); + return input; + }, + + createRange(field) { + const rangeContainer = document.createElement('div'); + rangeContainer.className = 'form__range-container'; + + const input = document.createElement('input'); + input.type = 'range'; + input.name = field.name; + input.min = field.min; + input.max = field.max; + input.step = field.step; + input.value = (field.max - field.min) / 2; + input.className = 'form__range'; + + const rangeValue = document.createElement('span'); + rangeValue.className = 'form__range-value'; + rangeValue.textContent = input.value; + + input.oninput = () => { + rangeValue.textContent = parseFloat(input.value).toFixed(1); + }; + + rangeContainer.appendChild(input); + rangeContainer.appendChild(rangeValue); + return rangeContainer; + }, + + createDefaultInput(field) { + const input = document.createElement('input'); + input.type = field.type; + input.name = field.name; + input.className = 'form__input'; + return input; + }, + + populateDropdown(selectElement, data, defaultText = "Select an option") { + selectElement.innerHTML = ``; + Object.entries(data).forEach(([id, item]) => { + const option = document.createElement('option'); + option.value = id; + option.textContent = item.name; + selectElement.appendChild(option); + }); + } +}; \ No newline at end of file diff --git a/src/templates/index.html b/src/templates/index.html new file mode 100644 index 0000000..de4c9c6 --- /dev/null +++ b/src/templates/index.html @@ -0,0 +1,68 @@ + + + + + + Character and Dataset Manager + + + + +
+
+

Character and Dataset Manager

+
+ +
+
+
+

Characters

+ +
+
+
+ +
+
+

Datasets

+ +
+
+
+ +
+
+

Evaluation

+
+
+ + + + +
+
+
+
+
+ +
+ +

+
+
+ +
+ + + + \ No newline at end of file diff --git a/test.py b/test.py deleted file mode 100644 index 8a69695..0000000 --- a/test.py +++ /dev/null @@ -1,34 +0,0 @@ -from utils import Character, Evaluator -from characters import characters - - -if __name__ == "__main__": - - user = input("Hi, whats your name?: ") - char_list = [] - for ind, character in enumerate(characters): - print(ind,". ",character) - char_list.append(character) - character_no = int(input("Which Character do you wanna choose?: ")) - - selected_char = char_list[character_no] - character_final = Character( - name = selected_char, - backstory= characters[selected_char]["backstory"], - user=user - ) - metric_name = input("Can you name the metric you want to test: ") - metric_description = input("Describe the metric briefly, the things that you are looking to test exactly: ") - - metrics = { - "name": metric_name, - "description": metric_description - } - - evaluator = Evaluator( - metric= metrics, - character=character_final, - n = input("how many test questions do you want to generate?: ") - ) - - evaluator.test_model()