From ca3fedca8313c200309dbfd0db4d100173f3b8e2 Mon Sep 17 00:00:00 2001 From: calyptobai Date: Wed, 10 Jan 2024 13:15:34 -0500 Subject: [PATCH 1/3] Add repos to answer action prompt and step prompt --- server/bleep/src/agent/prompts.rs | 19 +++++++++++++++---- server/bleep/src/agent/tools/answer.rs | 7 ++++++- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/server/bleep/src/agent/prompts.rs b/server/bleep/src/agent/prompts.rs index 0fb385d9f2..da89ade3d9 100644 --- a/server/bleep/src/agent/prompts.rs +++ b/server/bleep/src/agent/prompts.rs @@ -1,3 +1,5 @@ +use std::collections::HashSet; + use crate::agent::exchange::RepoPath; pub fn functions(add_proc: bool) -> serde_json::Value { @@ -82,13 +84,22 @@ pub fn functions(add_proc: bool) -> serde_json::Value { } pub fn system<'a>(paths: impl IntoIterator) -> String { + let paths = paths.into_iter().collect::>(); + let mut s = "".to_string(); - let mut paths = paths.into_iter().peekable(); + let repos = paths.iter().map(|rp| &rp.repo).collect::>(); + + s.push_str("## REPOS ##\n"); + for repo in repos { + s.push_str(&format!("{repo}\n")); + } + + let mut iter = paths.into_iter().peekable(); - if paths.peek().is_some() { - s.push_str("## PATHS ##\nindex, repo, path\n"); - for (i, path) in paths.enumerate() { + if iter.peek().is_some() { + s.push_str("\n## PATHS ##\nindex, repo, path\n"); + for (i, path) in iter.enumerate() { let repo = path.repo.display_name(); let path = &path.path; s.push_str(&format!("{}, {}, {}\n", i, repo, path)); diff --git a/server/bleep/src/agent/tools/answer.rs b/server/bleep/src/agent/tools/answer.rs index 20eb3bbe7d..b8e48a59a0 100644 --- a/server/bleep/src/agent/tools/answer.rs +++ b/server/bleep/src/agent/tools/answer.rs @@ -115,8 +115,13 @@ impl Agent { debug!(?paths, ?aliases, "created filtered path alias list"); + s += "##### REPOS #####\n"; + for repo in self.relevant_repos() { + s += &format!("{repo}\n"); + } + if !aliases.is_empty() { - s += "##### PATHS #####\n"; + s += "\n##### PATHS #####\n"; for alias in &aliases { let path = &paths[*alias]; From 21b1569392aeba2a8232618f1e31e3f346ff21f5 Mon Sep 17 00:00:00 2001 From: Gabriel Gordon-Hall Date: Thu, 11 Jan 2024 10:50:48 +0000 Subject: [PATCH 2/3] limit number of tokens for symbol classification --- server/bleep/src/agent/prompts.rs | 2 +- server/bleep/src/agent/symbol.rs | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/server/bleep/src/agent/prompts.rs b/server/bleep/src/agent/prompts.rs index da89ade3d9..79a6611b17 100644 --- a/server/bleep/src/agent/prompts.rs +++ b/server/bleep/src/agent/prompts.rs @@ -408,7 +408,7 @@ pub fn symbol_classification_prompt(snippets: &str) -> String { Above are code chunks and non-local symbols that have been extracted from the chunks. Each chunk is followed by an enumerated list of symbols that it contains. Given a user query, select the symbol which is most relevant to it, e.g. the references or definition of this symbol would help somebody answer the query. Symbols which are language builtins or which come from third party libraries are unlikely to be helpful. -Do not answer with the symbol name, use the symbol index. +Do not answer with the symbol name, use the symbol index. If none of the symbols are relevant, answer with 0. ### Examples ### Q: how does ranking work? diff --git a/server/bleep/src/agent/symbol.rs b/server/bleep/src/agent/symbol.rs index b3099f77f0..d40f7e90e6 100644 --- a/server/bleep/src/agent/symbol.rs +++ b/server/bleep/src/agent/symbol.rs @@ -163,7 +163,7 @@ impl Agent { format!( "```{}\n{}```\n\n{}", - c.repo_path.path.clone(), + c.repo_path, c.snippet.clone(), symbols_string ) @@ -182,6 +182,7 @@ impl Agent { .clone() .model("gpt-4-0613") .temperature(0.0) + .max_tokens(5) .chat(&messages, None) .await { From 579f083ca70810ad453a438929c0c0c258d3b8b4 Mon Sep 17 00:00:00 2001 From: Gabriel Gordon-Hall Date: Mon, 15 Jan 2024 14:24:22 +0000 Subject: [PATCH 3/3] tweak prompt text --- server/bleep/src/agent/prompts.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/bleep/src/agent/prompts.rs b/server/bleep/src/agent/prompts.rs index 79a6611b17..c65dea0ab5 100644 --- a/server/bleep/src/agent/prompts.rs +++ b/server/bleep/src/agent/prompts.rs @@ -112,7 +112,7 @@ pub fn system<'a>(paths: impl IntoIterator) -> String { - ALWAYS call a function, DO NOT answer the question directly, even if the query is not in English - DO NOT call a function that you've used before with the same arguments -- DO NOT assume the structure of the codebase, or the existence of files or folders +- DO NOT assume the structure of the indexed repos (listed above), or the existence of files or folders - Your queries to functions.code or functions.path should be significantly different to previous queries - Call functions.none with paths that you are confident will help answer the user's query, include paths containing the information needed for a complete answer including definitions and references - If the user query is general (e.g. 'What does this do?', 'What is this repo?') look for READMEs, documentation and entry points in the code (main files, index files, api files etc.) @@ -132,7 +132,7 @@ pub fn answer_article_prompt(context: &str) -> String { format!( r#"{context}#### -You are an expert programmer called 'bloop' and you are helping a junior colleague answer questions about a codebase using the information above. If their query refers to 'this' or 'it' and there is no other context, assume that it refers to the information above. +You are an expert programmer called 'bloop' and you are helping a junior colleague answer questions about some repos using the information above. If their query refers to 'this' or 'it' and there is no other context, assume that it refers to the information above. Provide only as much information and code as is necessary to answer the query, but be concise. Keep number of quoted lines to a minimum when possible. If you do not have enough information needed to answer the query, do not make up an answer. Infer as much as possible from the information above. When referring to code, you must provide an example in a code block.