From aee3b41fbfd946b68b387570ccbc6a792cfc4eae Mon Sep 17 00:00:00 2001 From: Daniel Franklin Date: Mon, 18 May 2026 09:40:15 -0400 Subject: [PATCH 1/4] feat(web): repo catalog import, versioning, session notes, memory lifecycle - Skill discovery expands README/index/catalog links into additional source candidates; bulk select and install from discover results with explicit conflict behavior; catalog badge and source warnings surfaced in UI - Skills, workflows, and personas track a patch-incremented version field on each save; version exposed in list responses and dashboard - Runtime session notes: per-run messages.json artifact, list/append API endpoints, and Session Notes panel in run detail (Slice D) - Memory delete-key and purge-expired handlers with dashboard controls (Slice F); ScopedMemoryStore::delete with unit test - Fuzzy prefix token matching for skill discovery search scoring - skill_import_handler syncs Claude slash commands after write --- crates/memory/src/store.rs | 27 + crates/sharing/src/lib.rs | 28 + crates/web/Cargo.toml | 1 + crates/web/frontend/src/composables/useApi.js | 7 + .../web/frontend/src/views/DashboardView.vue | 99 ++++ crates/web/frontend/src/views/MemoryView.vue | 28 + crates/web/frontend/src/views/SkillsView.vue | 127 ++++- crates/web/src/api.rs | 534 +++++++++++++++++- crates/web/src/server.rs | 13 +- docs/github-project-hygiene.md | 59 ++ docs/milestones.md | 2 +- docs/runtime-and-tui-milestone.md | 17 +- docs/security-gaps.md | 212 +++++++ 13 files changed, 1117 insertions(+), 37 deletions(-) create mode 100644 docs/github-project-hygiene.md create mode 100644 docs/security-gaps.md diff --git a/crates/memory/src/store.rs b/crates/memory/src/store.rs index 462fd33..70bdb4e 100644 --- a/crates/memory/src/store.rs +++ b/crates/memory/src/store.rs @@ -479,6 +479,15 @@ impl MemoryStore { Ok(removed) } + fn delete_ns(&self, namespace: &str, key: &str) -> Result { + let path = self.resolve_existing_key_path(namespace, key); + if !path.exists() { + return Ok(false); + } + std::fs::remove_file(&path).map_err(|e| MemoryError::Io { path, source: e })?; + Ok(true) + } + pub fn scoped(self: &Arc, namespace: &str) -> ScopedMemoryStore { ScopedMemoryStore { inner: Arc::clone(self), @@ -569,6 +578,11 @@ impl ScopedMemoryStore { self.inner.purge_expired_ns(&self.namespace) } + /// Delete a memory key from this scope. Returns true when a file was removed. + pub fn delete(&self, key: &str) -> Result { + self.inner.delete_ns(&self.namespace, key) + } + /// Read a key plus all entries linked via `related_to` (1-hop graph expansion). /// Returns the primary entry first, followed by any related entries that exist. pub fn read_with_related(&self, key: &str) -> Result, MemoryError> { @@ -1083,4 +1097,17 @@ mod tests { assert_eq!(removed, 1); assert!(!path.exists()); } + + #[test] + fn delete_removes_existing_key_only_once() { + let dir = TempDir::new().unwrap(); + let store = Arc::new(MemoryStore::new(dir.path())); + let scoped = store.scoped("project"); + scoped.write("topic:item", "value").unwrap(); + + assert_eq!(scoped.read("topic:item").unwrap().as_deref(), Some("value")); + assert!(scoped.delete("topic:item").unwrap()); + assert_eq!(scoped.read("topic:item").unwrap(), None); + assert!(!scoped.delete("topic:item").unwrap()); + } } diff --git a/crates/sharing/src/lib.rs b/crates/sharing/src/lib.rs index 7f8cffd..de638ce 100644 --- a/crates/sharing/src/lib.rs +++ b/crates/sharing/src/lib.rs @@ -1524,6 +1524,34 @@ mod tests { .exists()); } + #[test] + fn builder_exports_selected_skill_package_directory() { + let dir = tempfile::tempdir().unwrap(); + let skills_dir = dir.path().join("skills"); + let workflows_dir = dir.path().join("workflows"); + let package_dir = skills_dir.join("ui-ux-pro-max"); + std::fs::create_dir_all(&package_dir).unwrap(); + std::fs::create_dir_all(&workflows_dir).unwrap(); + std::fs::write( + package_dir.join("SKILL.md"), + "---\nname: ui-ux-pro-max\ndescription: \"Actions: design, build\"\ntrigger: /ui-ux-pro-max\n---\nUse {{args}}\n", + ) + .unwrap(); + std::fs::write(package_dir.join("references.md"), "helper docs\n").unwrap(); + + let bundle = BundleBuilder::new([&skills_dir], [&workflows_dir]) + .build(&["ui-ux-pro-max"], &["__none__"], &[], &[]) + .unwrap(); + let filenames: HashSet = bundle + .skills + .iter() + .map(|asset| asset.filename.clone()) + .collect(); + + assert!(filenames.contains("ui-ux-pro-max/SKILL.md")); + assert!(filenames.contains("ui-ux-pro-max/references.md")); + } + #[test] fn importer_restores_project_tools() { let dir = tempfile::tempdir().unwrap(); diff --git a/crates/web/Cargo.toml b/crates/web/Cargo.toml index 5bcf820..1441a37 100644 --- a/crates/web/Cargo.toml +++ b/crates/web/Cargo.toml @@ -33,6 +33,7 @@ async-trait = { workspace = true } futures = { workspace = true } tempfile = { workspace = true } chrono = { workspace = true } +uuid = { workspace = true } reqwest = { version = "0.12", features = ["json", "rustls-tls"], default-features = false } sha2 = "0.10" ts-rs = { version = "10", features = ["serde-compat", "no-serde-warnings"] } diff --git a/crates/web/frontend/src/composables/useApi.js b/crates/web/frontend/src/composables/useApi.js index 2384a99..6c3e9b8 100644 --- a/crates/web/frontend/src/composables/useApi.js +++ b/crates/web/frontend/src/composables/useApi.js @@ -92,11 +92,18 @@ export function useApi() { if (!r.ok) throw new Error(`${r.status} ${r.statusText}`) return r.text() }), + deleteMemory: (scope, key) => + fetchJson(`/api/memory/${encodeURIComponent(scope)}/${encodeURIComponent(key)}`, { method: 'DELETE' }), + purgeExpiredMemory: (scope) => + fetchJson(`/api/memory/${encodeURIComponent(scope)}/purge-expired`, { method: 'POST' }), // Status getStatus: () => fetchJson('/api/status'), getStats: () => fetchJson('/api/stats'), getRuntimeSessions: (limit = 12) => fetchJson(`/api/runtime/sessions?limit=${encodeURIComponent(limit)}`), + getRuntimeMessages: (id) => fetchJson(`/api/runtime/sessions/${encodeURIComponent(id)}/messages`), + postRuntimeMessage: (id, data) => + fetchJson(`/api/runtime/sessions/${encodeURIComponent(id)}/messages`, { method: 'POST', body: JSON.stringify(data) }), getProviderStatus: () => fetchJson('/api/providers/status'), getScorecards: (limit = 100) => fetchJson(`/api/scorecards?limit=${encodeURIComponent(limit)}`), evaluateRegression: (params = {}) => { diff --git a/crates/web/frontend/src/views/DashboardView.vue b/crates/web/frontend/src/views/DashboardView.vue index b555e85..92dede3 100644 --- a/crates/web/frontend/src/views/DashboardView.vue +++ b/crates/web/frontend/src/views/DashboardView.vue @@ -22,6 +22,10 @@ const selectedArtifactPath = ref('') const selectedArtifactPreview = ref(null) const artifactPreviewStatus = ref('') const artifactPreviewRef = ref(null) +const runtimeMessages = ref([]) +const runtimeMessageText = ref('') +const runtimeMessageStatus = ref('') +const runtimeMessageBusy = ref(false) const approvalStatus = ref('') const approvalEditContent = ref('') const resumeStatus = ref('') @@ -528,6 +532,7 @@ async function refreshRuns() { expandedRunId.value = null selectedRunId.value = null selectedRun.value = null + clearRuntimeMessages() } } } @@ -537,6 +542,7 @@ async function toggleRun(id) { expandedRunId.value = null selectedRunId.value = null selectedRun.value = null + clearRuntimeMessages() clearArtifactPreview() approvalStatus.value = '' resumeStatus.value = '' @@ -549,11 +555,53 @@ async function toggleRun(id) { async function selectRun(id) { selectedRunId.value = id selectedRun.value = await api.getRunDetail(id) + await loadRuntimeMessages(id) approvalEditContent.value = selectedRun.value?.workflow_state?.pending_approval?.content || '' resumeStatus.value = '' clearArtifactPreview() } +function clearRuntimeMessages() { + runtimeMessages.value = [] + runtimeMessageText.value = '' + runtimeMessageStatus.value = '' + runtimeMessageBusy.value = false +} + +async function loadRuntimeMessages(id = selectedRunId.value) { + if (!id) { + runtimeMessages.value = [] + return + } + try { + const result = await api.getRuntimeMessages(id) + runtimeMessages.value = Array.isArray(result?.messages) ? result.messages : [] + } catch { + runtimeMessages.value = [] + } +} + +async function postRuntimeMessage() { + if (!selectedRunId.value || !runtimeMessageText.value.trim() || runtimeMessageBusy.value) return + runtimeMessageBusy.value = true + runtimeMessageStatus.value = 'Saving note...' + try { + await api.postRuntimeMessage(selectedRunId.value, { + author: 'operator', + kind: 'note', + body: runtimeMessageText.value.trim(), + }) + runtimeMessageText.value = '' + runtimeMessageStatus.value = '' + await loadRuntimeMessages(selectedRunId.value) + selectedRun.value = await api.getRunDetail(selectedRunId.value) + } catch (e) { + runtimeMessageStatus.value = `Error: ${e.message}` + } finally { + runtimeMessageBusy.value = false + } +} + function clearArtifactPreview() { selectedArtifactPath.value = '' selectedArtifactPreview.value = null @@ -1113,6 +1161,57 @@ async function submitTask() { v-html="renderMarkdown(selectedRunOutput)" /> + +
+
+
+
Session Notes
+
Operator and agent messages attached to this run for handoff, review, and continuation context.
+
+ {{ runtimeMessages.length }} note(s) +
+ +
+
+
+
+ {{ message.author || 'operator' }} + {{ message.kind || 'note' }} +
+ {{ fmtLocalTime(message.created_at) }} +
+
{{ message.body }}
+
+
+
+ No notes yet. Add a compact handoff, decision, or next-action note. +
+ +
+ +
+ + +
+
+
{{ runtimeMessageStatus }}
+
+
diff --git a/crates/web/frontend/src/views/MemoryView.vue b/crates/web/frontend/src/views/MemoryView.vue index 76890d5..719aa40 100644 --- a/crates/web/frontend/src/views/MemoryView.vue +++ b/crates/web/frontend/src/views/MemoryView.vue @@ -90,6 +90,28 @@ async function copyContent() { } } +async function deleteSelectedMemory() { + if (!selectedKey.value) return + if (!confirm(`Delete memory key '${selectedKey.value}' from ${activeScope.value}?`)) return + try { + await api.deleteMemory(activeScope.value, selectedKey.value) + await loadKeys(activeScope.value) + showRagToast('Memory key deleted') + } catch (e) { + showRagToast(e.message || 'Delete failed', 'error') + } +} + +async function purgeExpiredMemory() { + try { + const result = await api.purgeExpiredMemory(activeScope.value) + await loadKeys(activeScope.value) + showRagToast(`Purged ${result?.purged ?? 0} expired entr${result?.purged === 1 ? 'y' : 'ies'}`) + } catch (e) { + showRagToast(e.message || 'Purge failed', 'error') + } +} + // Render markdown via marked (GFM) + DOMPurify (XSS safe) function renderMarkdown(raw) { if (!raw) return '' @@ -533,6 +555,12 @@ watch(contentVisible, async (visible) => { + + ← select a key diff --git a/crates/web/frontend/src/views/SkillsView.vue b/crates/web/frontend/src/views/SkillsView.vue index 581ffbd..0a7108c 100644 --- a/crates/web/frontend/src/views/SkillsView.vue +++ b/crates/web/frontend/src/views/SkillsView.vue @@ -15,7 +15,11 @@ const searchQuery = ref('') const discoverQuery = ref('') const discoverSourcesText = ref('') const discoverResults = ref([]) +const discoverWarnings = ref([]) +const selectedDiscoverUrls = ref([]) const discovering = ref(false) +const bulkInstalling = ref(false) +const bulkConflictMode = ref('keep_existing') const previewLoading = ref(false) const previewError = ref(null) const previewOpen = ref(false) @@ -205,6 +209,12 @@ const filteredRegistry = computed(() => { }) const installedTriggers = computed(() => new Set(skills.value.map(s => s.trigger))) +const selectedDiscoverResults = computed(() => + discoverResults.value.filter(item => selectedDiscoverUrls.value.includes(item.url)) +) +const allDiscoverSelected = computed(() => + discoverResults.value.length > 0 && selectedDiscoverUrls.value.length === discoverResults.value.length +) function assocTools(item) { return item?.associations?.tools || [] @@ -354,24 +364,74 @@ async function importFromUrl() { async function runDiscoverSearch() { if (!discoverQuery.value.trim()) return const sources = normalizedDiscoverSources() - if (!sources.length) { - browseStatus.value = { type: 'error', message: 'Add at least one GitHub source URL' } - return - } saveDiscoverSources() discovering.value = true browseStatus.value = null + discoverWarnings.value = [] try { const result = await api.discoverSkills(discoverQuery.value.trim(), sources, 16) discoverResults.value = Array.isArray(result?.results) ? result.results : [] + selectedDiscoverUrls.value = selectedDiscoverUrls.value.filter(url => + discoverResults.value.some(item => item.url === url) + ) + discoverWarnings.value = Array.isArray(result?.warnings) ? result.warnings : [] } catch (e) { browseStatus.value = { type: 'error', message: e.message || 'Discover failed' } discoverResults.value = [] + selectedDiscoverUrls.value = [] + discoverWarnings.value = [] } finally { discovering.value = false } } +function toggleDiscoverSelection(item) { + if (!item?.url) return + const selected = new Set(selectedDiscoverUrls.value) + if (selected.has(item.url)) selected.delete(item.url) + else selected.add(item.url) + selectedDiscoverUrls.value = Array.from(selected) +} + +function toggleAllDiscoverResults() { + selectedDiscoverUrls.value = allDiscoverSelected.value + ? [] + : discoverResults.value.map(item => item.url).filter(Boolean) +} + +async function installSelectedDiscoverResults() { + const items = selectedDiscoverResults.value + if (!items.length || bulkInstalling.value) return + bulkInstalling.value = true + browseStatus.value = { type: 'loading', message: `Installing ${items.length} selected skill(s)...` } + const summary = { installed: 0, skipped: 0, failed: 0 } + try { + for (const item of items) { + try { + const opts = item.installed ? { conflict_action: bulkConflictMode.value } : {} + const result = await api.importSkill(item.url, opts) + if (result?.skipped) summary.skipped += 1 + else summary.installed += 1 + } catch { + summary.failed += 1 + } + } + await loadSkills() + await runDiscoverSearch() + selectedDiscoverUrls.value = [] + const parts = [`${summary.installed} installed`] + if (summary.skipped) parts.push(`${summary.skipped} kept`) + if (summary.failed) parts.push(`${summary.failed} failed`) + browseStatus.value = { + type: summary.failed ? 'error' : 'success', + message: `Bulk import complete: ${parts.join(', ')}`, + } + setTimeout(() => browseStatus.value = null, 4000) + } finally { + bulkInstalling.value = false + } +} + async function openPreview(url, installMode = 'preview') { previewOpen.value = true previewLoading.value = true @@ -651,10 +711,10 @@ async function installFromPreview() {
-
Use repo root, tree, or package URLs to add community or internal sources on top of the defaults.
+
Use repo root, tree, README, catalog, or package URLs to add community or internal sources on top of the defaults.
@@ -662,17 +722,67 @@ async function installFromPreview() {
+
+
source warnings
+
{{ warning }}
+
+
- No discovery results yet. Add one or more source URLs, search, then preview before install. + No discovery results yet. Search defaults or add one or more source URLs, then preview before install. +
+ +
+
+ + Duplicates are preserved by source so you can choose the better variant. +
+
+ + +
+
{{ item.name }}
{{ item.description }}
@@ -681,6 +791,7 @@ async function installFromPreview() { {{ item.repo }} v{{ item.version }} conflict + catalog
{{ item.path }}
diff --git a/crates/web/src/api.rs b/crates/web/src/api.rs index 89bf113..672287c 100644 --- a/crates/web/src/api.rs +++ b/crates/web/src/api.rs @@ -147,6 +147,60 @@ pub struct ProviderReadinessCard { pub hint: String, } +fn increment_patch_version(raw: Option<&str>) -> String { + let raw = raw.unwrap_or("0.0.0").trim(); + let mut parts = raw + .split('.') + .map(|part| part.parse::().unwrap_or(0)) + .collect::>(); + while parts.len() < 3 { + parts.push(0); + } + parts[2] = parts[2].saturating_add(1); + format!("{}.{}.{}", parts[0], parts[1], parts[2]) +} + +fn initial_or_incremented_version(existing: Option<&str>, requested: Option<&str>) -> String { + match existing { + Some(value) if !value.trim().is_empty() => increment_patch_version(Some(value)), + _ => requested + .filter(|value| !value.trim().is_empty()) + .unwrap_or("1.0.0") + .to_string(), + } +} + +fn markdown_frontmatter_version(path: &FsPath) -> Option { + let raw = std::fs::read_to_string(path).ok()?; + let parts: Vec<&str> = raw.splitn(3, "---").collect(); + if parts.len() < 3 { + return None; + } + let frontmatter = serde_yaml::from_str::(parts[1]).ok()?; + frontmatter + .get(serde_yaml::Value::String("version".to_string())) + .and_then(|value| value.as_str()) + .map(ToString::to_string) +} + +fn yaml_top_level_version(path: &FsPath) -> Option { + let raw = std::fs::read_to_string(path).ok()?; + let value = serde_yaml::from_str::(&raw).ok()?; + value + .get("version") + .and_then(|value| value.as_str()) + .map(ToString::to_string) +} + +fn toml_top_level_version(path: &FsPath) -> Option { + let raw = std::fs::read_to_string(path).ok()?; + let value = toml::from_str::(&raw).ok()?; + value + .get("version") + .and_then(|value| value.as_str()) + .map(ToString::to_string) +} + impl ProviderReadinessResponse { pub fn from_runtime( standalone_available: bool, @@ -440,6 +494,37 @@ pub struct RuntimeSessionsResponse { pub sessions: Vec, } +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "frontend/src/types/")] +pub struct RuntimeMessage { + pub id: String, + pub run_id: String, + pub created_at: String, + pub author: String, + pub kind: String, + pub body: String, +} + +#[derive(Debug, Deserialize, Serialize, TS)] +#[ts(export, export_to = "frontend/src/types/")] +pub struct RuntimeMessageRequest { + pub author: String, + #[serde(default = "default_runtime_message_kind")] + pub kind: String, + pub body: String, +} + +fn default_runtime_message_kind() -> String { + "note".to_string() +} + +#[derive(Debug, Clone, Serialize, Deserialize, TS)] +#[ts(export, export_to = "frontend/src/types/")] +pub struct RuntimeMessagesResponse { + pub run_id: String, + pub messages: Vec, +} + #[derive(Debug, Deserialize, Serialize, TS)] #[ts(export, export_to = "frontend/src/types/")] pub struct CleanupAwaitingRunsRequest { @@ -1062,6 +1147,86 @@ pub async fn runtime_sessions_handler( } } +pub async fn runtime_messages_list_handler(Path(run_id): Path) -> impl IntoResponse { + let store = run_store_for_web(); + match store.read_json_artifact_optional::>(&run_id, "messages.json") { + Ok(messages) => Json(RuntimeMessagesResponse { + run_id, + messages: messages.unwrap_or_default(), + }) + .into_response(), + Err(e) => ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({ "error": e.to_string() })), + ) + .into_response(), + } +} + +pub async fn runtime_messages_post_handler( + Path(run_id): Path, + Json(payload): Json, +) -> impl IntoResponse { + let body = payload.body.trim(); + if body.is_empty() { + return ( + StatusCode::BAD_REQUEST, + Json(serde_json::json!({ "error": "message body is required" })), + ) + .into_response(); + } + let author = payload.author.trim(); + let kind = payload.kind.trim(); + let message = RuntimeMessage { + id: uuid::Uuid::new_v4().to_string(), + run_id: run_id.clone(), + created_at: Utc::now().to_rfc3339(), + author: if author.is_empty() { + "operator".to_string() + } else { + author.to_string() + }, + kind: if kind.is_empty() { + "note".to_string() + } else { + kind.to_string() + }, + body: body.to_string(), + }; + + let store = run_store_for_web(); + let mut messages = + match store.read_json_artifact_optional::>(&run_id, "messages.json") { + Ok(messages) => messages.unwrap_or_default(), + Err(e) => { + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({ "error": e.to_string() })), + ) + .into_response() + } + }; + messages.push(message.clone()); + if let Err(e) = store.write_json_artifact(&run_id, "messages.json", &messages) { + return ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({ "error": e.to_string() })), + ) + .into_response(); + } + let _ = store.append_note( + &run_id, + "runtime-message", + serde_json::json!({ + "id": message.id, + "author": message.author, + "kind": message.kind, + "body": message.body, + }), + ); + Json(message).into_response() +} + /// `GET /api/providers/status` — provider readiness without exposing secrets. pub async fn provider_status_handler(State(state): State) -> impl IntoResponse { Json(state.provider_readiness.clone().with_generated_at()).into_response() @@ -1956,6 +2121,7 @@ pub async fn personas_list_handler(State(_state): State) -> impl IntoR for p in registry.list() { let key = p.name.to_ascii_lowercase(); if seen.insert(key) { + let version = persona_version_for_name(dir, &p.name); personas.push(serde_json::json!({ "name": p.name, "description": p.description, @@ -1963,6 +2129,7 @@ pub async fn personas_list_handler(State(_state): State) -> impl IntoR "allowed_tools": p.allowed_tools, "system_prompt": p.system_prompt, "source": source_label, + "version": version, })); } } @@ -1981,6 +2148,7 @@ pub async fn personas_list_handler(State(_state): State) -> impl IntoR "allowed_tools": p.allowed_tools, "system_prompt": p.system_prompt, "source": "global", + "version": "1.0.0", })); } } @@ -2023,14 +2191,17 @@ pub async fn persona_save_handler( .join(", "); let model = payload.preferred_model.as_deref().unwrap_or("codex"); let prompt = payload.system_prompt.as_deref().unwrap_or(""); + let version = initial_or_incremented_version(toml_top_level_version(&path).as_deref(), None); let content = format!( "name = \"{}\"\n\ + version = \"{}\"\n\ description = \"{}\"\n\ preferred_model = \"{}\"\n\ allowed_tools = [{}]\n\n\ system_prompt = \"\"\"\n{}\n\"\"\"\n", payload.name, + version, payload.description.replace('"', "\\\""), model, tools_str, @@ -2048,6 +2219,21 @@ pub async fn persona_save_handler( } } +fn persona_version_for_name(dir: &FsPath, name: &str) -> String { + let safe = name + .chars() + .map(|c| { + if c.is_alphanumeric() || c == '-' || c == '_' { + c + } else { + '-' + } + }) + .collect::() + .to_lowercase(); + toml_top_level_version(&dir.join(format!("{safe}.toml"))).unwrap_or_else(|| "1.0.0".to_string()) +} + pub async fn persona_delete_handler( State(_state): State, Path(name): Path, @@ -2688,7 +2874,7 @@ async fn validate_with_llm(state: &AppState, workflow: &Value) -> ValidateLlm { pub async fn workflow_save_handler( State(_state): State, - Json(payload): Json, + Json(mut payload): Json, ) -> impl IntoResponse { let name = sanitize_file_stem( payload @@ -2707,6 +2893,12 @@ pub async fn workflow_save_handler( } let path = wf_dir.join(format!("{name}.yaml")); + let requested_version = payload.get("version").and_then(|value| value.as_str()); + let version = + initial_or_incremented_version(yaml_top_level_version(&path).as_deref(), requested_version); + if let Some(obj) = payload.as_object_mut() { + obj.insert("version".to_string(), Value::String(version)); + } match serde_yaml::to_string(&payload) { Ok(yaml) => match std::fs::write(&path, &yaml) { Ok(()) => { @@ -2760,6 +2952,7 @@ struct SkillFrontmatter<'a> { trigger: &'a str, description: &'a str, model: &'a str, + version: &'a str, #[serde(skip_serializing_if = "Option::is_none")] category: Option<&'a str>, } @@ -2869,12 +3062,15 @@ pub async fn skill_save_handler( let path = skills_dir.join(format!("{filename}.md")); let model = payload.model.as_deref().unwrap_or("codex"); let category = payload.category.as_deref().filter(|s| !s.is_empty()); + let version = + initial_or_incremented_version(markdown_frontmatter_version(&path).as_deref(), None); let mut frontmatter_yaml = match serde_yaml::to_string(&SkillFrontmatter { name: &payload.name, trigger: &normalized_trigger, description: &payload.description, model, + version: &version, category, }) { Ok(yaml) => yaml, @@ -2950,6 +3146,21 @@ pub struct MemoryScopeStats { pub learning_skill_count: Option, } +#[derive(Serialize)] +pub struct MemoryDeleteResponse { + pub ok: bool, + pub deleted: bool, + pub scope: String, + pub key: String, +} + +#[derive(Serialize)] +pub struct MemoryPurgeResponse { + pub ok: bool, + pub scope: String, + pub purged: usize, +} + // ── Memory list ─────────────────────────────────────────────────────────────── pub async fn memory_list_handler( @@ -3004,6 +3215,71 @@ pub async fn memory_get_handler( } } +pub async fn memory_delete_handler( + State(_state): State, + Path((scope, key)): Path<(String, String)>, +) -> impl IntoResponse { + if scope.contains("..") || scope.contains('/') || scope.contains('\\') { + return ( + StatusCode::BAD_REQUEST, + Json(serde_json::json!({"error": "invalid scope"})), + ) + .into_response(); + } + if key.contains('\0') { + return ( + StatusCode::BAD_REQUEST, + Json(serde_json::json!({"error": "invalid key"})), + ) + .into_response(); + } + + let store = memory_store_for_web_scope(&scope); + let namespace = web_namespace(&scope); + match store.scoped(namespace).delete(&key) { + Ok(deleted) => Json(MemoryDeleteResponse { + ok: true, + deleted, + scope, + key, + }) + .into_response(), + Err(e) => ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"error": e.to_string()})), + ) + .into_response(), + } +} + +pub async fn memory_purge_expired_handler( + State(_state): State, + Path(scope): Path, +) -> impl IntoResponse { + if scope.contains("..") || scope.contains('/') || scope.contains('\\') { + return ( + StatusCode::BAD_REQUEST, + Json(serde_json::json!({"error": "invalid scope"})), + ) + .into_response(); + } + let store = memory_store_for_web_scope(&scope); + let namespace = web_namespace(&scope); + match store.scoped(namespace).purge_expired() { + Ok(purged) => Json(MemoryPurgeResponse { + ok: true, + scope, + purged, + }) + .into_response(), + Err(e) => ( + StatusCode::INTERNAL_SERVER_ERROR, + Json(serde_json::json!({"error": e.to_string()})), + ) + .into_response(), + } +} + pub async fn memory_stats_handler( State(_state): State, Path(scope): Path, @@ -3306,12 +3582,18 @@ pub async fn skill_import_handler( let _ = std::fs::create_dir_all(&skills_dir); match write_imported_skill(&skills_dir, imported) { - Ok((trigger, path)) => Json(serde_json::json!({ - "ok": true, - "trigger": trigger, - "path": path.display().to_string() - })) - .into_response(), + Ok((trigger, path)) => { + let write_home = agent007_write_home(); + let sync = sync_claude_slash_commands_for_write_home(&write_home); + Json(serde_json::json!({ + "ok": true, + "trigger": trigger, + "path": path.display().to_string(), + "slash_commands": sync.as_ref().ok(), + "slash_command_warning": sync.err(), + })) + .into_response() + } Err(e) => ( StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({ "error": e.to_string() })), @@ -3474,24 +3756,29 @@ pub async fn skill_discover_handler( let tokens = query_tokens(&query); let mut results = Vec::new(); let mut seen_urls = std::collections::BTreeSet::new(); + let mut warnings = Vec::new(); - let sources = if payload.sources.is_empty() { + let source_urls = if payload.sources.is_empty() { default_skill_discovery_sources() } else { payload.sources.clone() }; - for source_url in &sources { - let source = match parse_skill_discovery_source(source_url) { - Ok(source) => source, - Err(e) => { - return ( - StatusCode::BAD_REQUEST, - Json(serde_json::json!({ "error": format!("{source_url}: {e}") })), - ) - .into_response() - } - }; + let sources = match expand_skill_discovery_sources(&client, &source_urls).await { + Ok(expanded) => expanded, + Err(e) => { + return ( + StatusCode::BAD_REQUEST, + Json(serde_json::json!({ "error": e.to_string() })), + ) + .into_response() + } + }; + + for source in &sources { + if source.from_catalog { + warnings.push(format!("expanded catalog source {}", source.source_url)); + } let tree_url = github_tree_api_url(&source.owner, &source.repo, source.reference.as_deref()); @@ -3642,7 +3929,9 @@ pub async fn skill_discover_handler( } } - Json(serde_json::json!({ "results": results })).into_response() + warnings.sort(); + warnings.dedup(); + Json(serde_json::json!({ "results": results, "warnings": warnings })).into_response() } #[derive(Debug)] @@ -3731,6 +4020,7 @@ struct SkillDiscoverSource { reference: Option, path: String, source_url: String, + from_catalog: bool, } #[derive(Debug, Clone, Serialize)] @@ -3839,6 +4129,7 @@ fn parse_skill_discovery_source(url: &str) -> anyhow::Result anyhow::Result Err(anyhow::anyhow!( "discovery sources must be GitHub repository, tree, or package URLs" @@ -3861,6 +4153,106 @@ fn parse_skill_discovery_source(url: &str) -> anyhow::Result anyhow::Result> { + let mut expanded = Vec::new(); + let mut seen = std::collections::BTreeSet::new(); + + for source_url in source_urls { + let source = parse_skill_discovery_source(source_url)?; + let source_key = format!( + "{}/{}/{}/{}", + source.owner, + source.repo, + source + .reference + .clone() + .unwrap_or_else(|| "HEAD".to_string()), + source.path + ); + if seen.insert(source_key) { + expanded.push(source.clone()); + } + + let import_source = parse_skill_import_source(source_url)?; + let SkillImportSourceKind::GitHubFile { + owner, + repo, + reference, + path, + } = import_source + else { + continue; + }; + + let filename = FsPath::new(&path) + .file_name() + .and_then(|name| name.to_str()) + .unwrap_or("") + .to_ascii_lowercase(); + if !matches!(filename.as_str(), "readme.md" | "index.md" | "catalog.md") { + continue; + } + + let raw_url = github_raw_file_url(&owner, &repo, reference.as_deref(), &path); + let Ok(markdown) = fetch_text_async(client, &raw_url).await else { + continue; + }; + for linked_url in extract_github_urls_from_markdown(&markdown) + .into_iter() + .take(40) + { + let Ok(mut linked) = parse_skill_discovery_source(&linked_url) else { + continue; + }; + linked.from_catalog = true; + let key = format!( + "{}/{}/{}/{}", + linked.owner, + linked.repo, + linked + .reference + .clone() + .unwrap_or_else(|| "HEAD".to_string()), + linked.path + ); + if seen.insert(key) { + expanded.push(linked); + } + } + } + + Ok(expanded) +} + +fn extract_github_urls_from_markdown(markdown: &str) -> Vec { + let mut urls = Vec::new(); + for token in + markdown.split(|c: char| c.is_whitespace() || c == ')' || c == '(' || c == '<' || c == '>') + { + let cleaned = token + .trim_matches(|c: char| { + matches!( + c, + '"' | '\'' | '`' | ',' | ';' | ':' | '!' | '[' | ']' | '{' | '}' + ) + }) + .trim(); + if cleaned.starts_with("https://github.com/") + || cleaned.starts_with("http://github.com/") + || cleaned.starts_with("https://raw.githubusercontent.com/") + || cleaned.starts_with("http://raw.githubusercontent.com/") + { + urls.push(cleaned.to_string()); + } + } + urls.sort(); + urls.dedup(); + urls +} + fn is_discoverable_skill_markdown_path(path: &str, source_prefix: &str) -> bool { let normalized = path.replace('\\', "/"); let lower = normalized.to_ascii_lowercase(); @@ -3994,7 +4386,11 @@ fn skill_summary_match_score( let token_hits = tokens .iter() - .filter(|token| combined_tokens.iter().any(|candidate| candidate == *token)) + .filter(|token| { + combined_tokens + .iter() + .any(|candidate| token_matches(candidate, token)) + }) .count() as i32; let all_tokens_match = !tokens.is_empty() && token_hits as usize == tokens.len(); let any_token_match = token_hits > 0; @@ -4042,11 +4438,19 @@ fn skill_summary_match_score( fn field_matches_query(field: &str, query_lc: &str, exact_token_only: bool) -> bool { if exact_token_only { - return query_tokens(field).iter().any(|token| token == query_lc); + return query_tokens(field) + .iter() + .any(|token| token_matches(token, query_lc)); } field.contains(query_lc) } +fn token_matches(candidate: &str, query: &str) -> bool { + candidate == query + || (query.len() >= 4 && candidate.starts_with(query)) + || (candidate.len() >= 4 && query.starts_with(candidate)) +} + fn query_tokens(query: &str) -> Vec { query .split(|c: char| !(c.is_ascii_alphanumeric() || c == '-' || c == '_')) @@ -4390,14 +4794,33 @@ fn normalize_skill_manifest_content( ) }; - let trigger = fm.trigger.unwrap_or_else(|| url_trigger.clone()); + let trigger = normalize_skill_trigger(&fm.trigger.unwrap_or_else(|| url_trigger.clone())); let name = fm.name.unwrap_or(url_name); let description = fm.description.unwrap_or(url_description); let output = if has_original_trigger { content.to_string() } else { - format!("---\nname: {name}\ntrigger: {trigger}\ndescription: {description}\n---\n{body}") + let mut frontmatter = parts + .get(1) + .and_then(|raw| serde_yaml::from_str::(raw).ok()) + .unwrap_or_default(); + frontmatter.insert( + serde_yaml::Value::String("name".to_string()), + serde_yaml::Value::String(name), + ); + frontmatter.insert( + serde_yaml::Value::String("trigger".to_string()), + serde_yaml::Value::String(trigger.clone()), + ); + frontmatter.insert( + serde_yaml::Value::String("description".to_string()), + serde_yaml::Value::String(description), + ); + let yaml = serde_yaml::to_string(&frontmatter) + .map_err(|e| anyhow::anyhow!("failed to serialize skill frontmatter: {e}"))?; + let yaml = yaml.strip_prefix("---\n").unwrap_or(yaml.as_str()); + format!("---\n{}---\n{}", yaml, body.trim_start_matches('\n')) }; Ok((trigger, output)) @@ -4812,11 +5235,14 @@ fn generate_tool_skill( }; let skill_model = model.unwrap_or_else(|| "codex".to_string()); let category_ref = category.as_deref().filter(|value| !value.trim().is_empty()); + let version = + initial_or_incremented_version(markdown_frontmatter_version(&path).as_deref(), None); let mut frontmatter_yaml = serde_yaml::to_string(&SkillFrontmatter { name: &skill_name, trigger: &trigger, description: &skill_desc, model: &skill_model, + version: &version, category: category_ref, }) .map_err(|e| format!("failed to serialize skill frontmatter: {e}"))?; @@ -5953,9 +6379,15 @@ fn workflow_list_item( let mut skill_refs: Vec = Vec::new(); let mut agent_refs: Vec = Vec::new(); let mut associations = AssociatedAssets::default(); + let mut version = String::from("1.0.0"); if let Ok(raw) = std::fs::read_to_string(path) { extract_associations_from_text(&raw, &mut associations); + if let Ok(value) = serde_yaml::from_str::(&raw) { + if let Some(raw_version) = value.get("version").and_then(|value| value.as_str()) { + version = raw_version.to_string(); + } + } if let Some(def) = parse_workflow_def_from_path(path, &raw) { description = def.description.unwrap_or_default(); @@ -5993,6 +6425,7 @@ fn workflow_list_item( "name": name, "source": source, "description": description, + "version": version, "steps": steps_count, "skill_refs": skill_refs, "agent_refs": agent_refs, @@ -6642,6 +7075,34 @@ mod tests { ); } + #[test] + fn imported_package_manifest_without_trigger_keeps_yaml_valid() { + let source = "---\nname: ui-ux-pro-max\ndescription: \"Actions: design, build, review\"\n---\nUse {{args}}\n"; + let (trigger, normalized) = super::normalize_skill_manifest_content( + source, + "ui-ux-pro-max", + "https://github.com/example/repo/tree/main/.claude/skills/ui-ux-pro-max", + ) + .unwrap(); + assert_eq!(trigger, "/ui-ux-pro-max"); + assert!(normalized.contains("trigger: /ui-ux-pro-max")); + + let dir = tempfile::tempdir().unwrap(); + let skills_dir = dir.path().join(".agent007").join("skills"); + let package_dir = skills_dir.join("ui-ux-pro-max"); + std::fs::create_dir_all(&package_dir).unwrap(); + std::fs::write(package_dir.join("SKILL.md"), normalized).unwrap(); + + let skills = load_skills_from_dir(&skills_dir); + assert_eq!(skills.len(), 1); + assert_eq!(skills[0].trigger(), "/ui-ux-pro-max"); + assert_eq!( + skills[0].frontmatter.description, + "Actions: design, build, review" + ); + assert!(skills[0].is_package()); + } + #[tokio::test] async fn api_tool_import_quarantine_approve_and_test_flow() { let _guard = ENV_LOCK.lock().unwrap_or_else(|e| e.into_inner()); @@ -8252,6 +8713,7 @@ requires_approval = true reference: Some("main".to_string()), path: "skills".to_string(), source_url: "https://github.com/anthropics/skills/tree/main/skills".to_string(), + from_catalog: false, }; let tokens = super::query_tokens("ml engineer"); let score = super::skill_summary_match_score( @@ -8267,6 +8729,28 @@ requires_approval = true ); } + #[test] + fn version_helpers_increment_existing_patch_version() { + assert_eq!(super::increment_patch_version(Some("1.2.3")), "1.2.4"); + assert_eq!( + super::initial_or_incremented_version(Some("2.0.9"), Some("9.9.9")), + "2.0.10" + ); + assert_eq!(super::initial_or_incremented_version(None, None), "1.0.0"); + } + + #[test] + fn extract_github_urls_from_markdown_finds_catalog_links() { + let urls = super::extract_github_urls_from_markdown( + "- [Skill](https://github.com/acme/skills/tree/main/skills/foo)\n\ + - raw: https://raw.githubusercontent.com/acme/skills/main/skills/bar.md", + ); + assert_eq!(urls.len(), 2); + assert!(urls + .iter() + .any(|url| url.contains("github.com/acme/skills/tree/main/skills/foo"))); + } + #[test] fn skill_summary_match_score_rejects_unrelated_query() { let summary = super::SkillManifestSummary { @@ -8284,6 +8768,7 @@ requires_approval = true reference: Some("main".to_string()), path: "skills".to_string(), source_url: "https://github.com/openai/skills/tree/main/skills".to_string(), + from_catalog: false, }; let tokens = super::query_tokens("ml engineer"); let score = super::skill_summary_match_score( @@ -8313,6 +8798,7 @@ requires_approval = true reference: Some("main".to_string()), path: "skills".to_string(), source_url: "https://github.com/openai/skills/tree/main/skills".to_string(), + from_catalog: false, }; let tokens = super::query_tokens("ml"); let score = super::skill_summary_match_score( diff --git a/crates/web/src/server.rs b/crates/web/src/server.rs index 6b1efcc..5e7b4cd 100644 --- a/crates/web/src/server.rs +++ b/crates/web/src/server.rs @@ -190,6 +190,10 @@ impl WebServer { get(api::regression_evaluate_handler), ) .route("/api/runtime/sessions", get(api::runtime_sessions_handler)) + .route( + "/api/runtime/sessions/{id}/messages", + get(api::runtime_messages_list_handler).post(api::runtime_messages_post_handler), + ) .route("/api/providers/status", get(api::provider_status_handler)) .route("/api/runs", get(api::runs_handler)) .route( @@ -237,7 +241,14 @@ impl WebServer { ) .route("/api/memory/{scope}", get(api::memory_list_handler)) .route("/api/memory/{scope}/stats", get(api::memory_stats_handler)) - .route("/api/memory/{scope}/{key}", get(api::memory_get_handler)) + .route( + "/api/memory/{scope}/purge-expired", + post(api::memory_purge_expired_handler), + ) + .route( + "/api/memory/{scope}/{key}", + get(api::memory_get_handler).delete(api::memory_delete_handler), + ) .route( "/api/skills/{trigger}/promote", post(api::skill_promote_handler), diff --git a/docs/github-project-hygiene.md b/docs/github-project-hygiene.md new file mode 100644 index 0000000..ee9f98f --- /dev/null +++ b/docs/github-project-hygiene.md @@ -0,0 +1,59 @@ +# GitHub Project Hygiene + +agent007 uses GitHub Projects as a lightweight planning mirror, not as the source of truth for runtime state. + +## Goals + +```text +GitHub Project hygiene +├─ keep milestone cards small and actionable +├─ link PRs/issues back to repo docs +├─ avoid stale cards after PR merge +└─ make automation safe to run repeatedly +``` + +## Recommended states + +| State | Meaning | Exit action | +|---|---|---| +| Backlog | idea exists, not yet scoped | attach milestone/doc link | +| Ready | accepted next slice | create branch/PR | +| In progress | branch or PR exists | link PR | +| Review | PR checks/reviews pending | resolve comments/checks | +| Done | merged or explicitly closed | archive or move to Done | + +## Hygiene rules + +1. Every implementation PR should reference one milestone/card. +2. A merged PR should move the linked card to `Done` or be archived. +3. Closed/unmerged PRs should keep the card open only if work remains. +4. Large roadmap items should be split into small cards before coding. +5. Project automation must be idempotent: running it twice should not duplicate cards. + +## Current milestone cards to track + +```text +M4 cards +├─ repository/catalog skill import +├─ artifact versioning on save/import/edit +├─ runtime session messages +├─ memory lifecycle UX +├─ provider onboarding UX +├─ TUI operator controls +└─ .a7bundle v2 container +``` + +## Future automation contract + +A future `agent007 project sync` command should: + +```text +project sync +├─ read local milestone docs +├─ query GitHub Project items +├─ create missing cards only when explicitly requested +├─ update status for merged PR links +├─ never delete cards automatically +└─ print a dry-run diff by default +``` + diff --git a/docs/milestones.md b/docs/milestones.md index 21eeca2..345d9c7 100644 --- a/docs/milestones.md +++ b/docs/milestones.md @@ -12,7 +12,7 @@ | M1 | ✅ Complete | Consistent retrieval + execution behavior | Warmup indexing bounds, shared skill executor path, telemetry artifact generation, persona policy enforcement | none | core paths green + artifacts persisted | | M2 | ✅ Complete | User-facing observability | run-detail API extension, dashboard telemetry/policy/token cards, docs updates | M1 | artifacts visible in UI and validated | | M3 | 📋 Planned | Safe rollout | strict-mode rollout matrix, KPI baseline tracking, rollback playbook | M2 | measured rollout decision gates | -| M4 | 🚧 In Progress | Stronger long-lived runtime and operator UX | runtime session inventory, CLI status watch/filter, provider readiness, artifact/mock viewer v1, usable TUI, agent messaging, memory lifecycle improvements | M2 | sessions resumable, runtime visible in dashboard/TUI, first operator-grade terminal flow usable, visual artifacts reviewable in dashboard | +| M4 | 🚧 In Progress | Stronger long-lived runtime and operator UX | runtime session inventory, CLI status watch/filter, provider readiness, artifact/mock viewer v1, repository skill catalog import with bulk install, artifact versioning, usable TUI, session notes, memory lifecycle improvements | M2 | sessions resumable, runtime visible in dashboard/TUI, first operator-grade terminal flow usable, visual artifacts reviewable in dashboard | ## Parallel Workstreams 1. Backend: retriever/executor/policy and artifact persistence. diff --git a/docs/runtime-and-tui-milestone.md b/docs/runtime-and-tui-milestone.md index fe85799..eda0408 100644 --- a/docs/runtime-and-tui-milestone.md +++ b/docs/runtime-and-tui-milestone.md @@ -319,9 +319,10 @@ Make generated visual/design artifacts first-class in the dashboard so users can - approval queue ### Slice D — Agent Messaging Core -- message envelope -- persistence -- message inspection UI +- message envelope (v1 shipped as runtime message records) +- persistence (v1 shipped via per-run `messages.json` artifact) +- message inspection UI (v1 shipped as run-detail Session Notes) +- operator note append from dashboard ### Slice E — Provider / Browser UX - provider health/status (v1 shipped) @@ -330,6 +331,8 @@ Make generated visual/design artifacts first-class in the dashboard so users can ### Slice F — Memory Lifecycle - memory classes +- purge expired entries (v1 shipped in dashboard/API) +- delete stale keys (v1 shipped in dashboard/API) - promotion rules - retrieval summary visibility @@ -349,3 +352,11 @@ Make generated visual/design artifacts first-class in the dashboard so users can 5. Provider/browser failures are diagnosable from UI. 6. Memory reuse is more transparent and less noisy. 7. Generated visual artifacts can be reviewed directly in dashboard. + +## GitHub Project hygiene + +Project-board cleanup and idempotent sync rules are tracked in `docs/github-project-hygiene.md`. The project board mirrors milestone execution, while repo docs remain the durable source of truth. + +## Repository/catalog skill import + +Skill discovery now expands GitHub README/index/catalog links into additional source candidates, so a repo/catalog URL can surface multiple installable skills without requiring a single direct skill URL. The dashboard Browse tab preserves duplicate variants by source, supports preview-before-install, and can bulk-install selected results with explicit conflict behavior. diff --git a/docs/security-gaps.md b/docs/security-gaps.md new file mode 100644 index 0000000..c6f4d86 --- /dev/null +++ b/docs/security-gaps.md @@ -0,0 +1,212 @@ +# Security Gaps — Hardening Roadmap + +This document lists known security gaps in the current codebase, their severity for enterprise/office deployment, and what work is needed to close each one. It is maintained by the project team. Gaps are closed by removing them from this list and updating `SECURITY.md` accordingly. + +**Last updated:** 2026-05-08 +**Current version:** 0.2.0 + +--- + +## Summary Table + +| # | Gap | Severity | Effort | Status | +|---|---|---|---|---| +| 1 | Web dashboard has no authentication | High | Medium | Open | +| 2 | Web server binds `0.0.0.0` by default | High | Low | Open | +| 3 | Skill registry fetches from unpinned `main` branch | Medium | Low | Open | +| 4 | Extension installer has no signature verification | Medium | High | Open | +| 5 | Zone checker is opt-in, not enforced by default | Medium | Medium | Open | +| 6 | No corporate identity / SSO integration | Medium | High | Open | +| 7 | No secrets vault integration | Low | High | Open | +| 8 | No SBOM published with releases | Low | Low | Open | +| 9 | `agent007_git_commit` has no mandatory approval gate | Low | Low | Open | + +--- + +## Gap Details + +--- + +### Gap 1 — Web dashboard has no authentication +**Severity: High** + +**What the problem is:** +The web dashboard (`agent007 serve`) serves the dashboard UI and all REST API endpoints with no authentication. Any request that can reach the port is accepted and acted on — there is no bearer token, session cookie, or basic auth check. + +**What an attacker could do:** +Anyone on the same network segment (office LAN, shared Wi-Fi, corporate network) who knows or discovers the port can: trigger task runs, read memory/skills/personas, export bundles, write new skills, and manage the MCP server registry — all without credentials. + +**Where it is in the code:** +`crates/web/src/server.rs` — `into_router()`. No auth middleware is applied to any route. + +**Workaround until fixed:** +- Use `--no-dashboard` flag to disable the web server entirely (recommended for MCP-only use) +- Restrict port access at the firewall/network level +- Run behind a localhost-only reverse proxy with basic auth if remote access is needed + +**Work needed to close this gap:** +- Add a configurable static bearer token (set via `AGENT007_DASHBOARD_TOKEN` env var or `config.toml`) +- Apply the token check as an Axum middleware layer before all `/api/*` routes +- Optionally: add an `--auth-token ` CLI flag to `serve` command +- Document the token requirement in `SECURITY.md` and `docs/configuration.md` + +--- + +### Gap 2 — Web server binds `0.0.0.0` by default +**Severity: High** (closely related to Gap 1) + +**What the problem is:** +`TcpListener::bind("0.0.0.0:{port}")` binds all network interfaces. On a developer laptop on a corporate network, this means the dashboard is reachable from any other machine on the same subnet. + +**Where it is in the code:** +`crates/web/src/server.rs`, lines 270, 315 — `format!("0.0.0.0:{port}")`. + +**Workaround until fixed:** +Use `--no-dashboard` or firewall the port. + +**Work needed to close this gap:** +- Change the default bind address to `127.0.0.1` (localhost only) +- Add a `--bind ` CLI flag to `serve` and `serve-web` commands so operators who need LAN/remote access can opt in explicitly +- Update `docs/configuration.md` with the flag documentation + +--- + +### Gap 3 — Skill registry fetches from unpinned `main` branch +**Severity: Medium** + +**What the problem is:** +The skill registry endpoint fetches `https://raw.githubusercontent.com/danieldear/agent007/main/docs/registry.json` — always the current HEAD of the `main` branch. If the registry file or the repo were ever compromised, a malicious skill catalog could be served to users who click the Registry tab. + +**Where it is in the code:** +`crates/web/src/api.rs`, `skill_registry_handler()` — the URL is hardcoded. + +**Workaround until fixed:** +The registry is only fetched on user action (opening the Registry tab). It is not fetched automatically. Users should review skills before importing them. + +**Work needed to close this gap:** +- Pin the registry URL to a specific commit SHA rather than `main` (update on each release) +- Or: add a `skill_registry_url` config option in `config.toml` so enterprise deployments can point to an internal, vetted catalog instead of the public one +- Add SHA256 content verification of the fetched JSON against a known-good hash + +--- + +### Gap 4 — Extension installer has no signature verification +**Severity: Medium** + +**What the problem is:** +The extension system (`crates/extensions/`) can install content from GitHub repos, npm packages, and URLs. There is path traversal protection (`sanitize_relative_path`), but there is no cryptographic signature check on installed content. A compromised GitHub repo or npm package would install without warning. + +**Where it is in the code:** +`crates/web/src/extensions_api.rs` — `install_handler`. `crates/extensions/src/adapters/` — each adapter fetches and returns content without verifying signatures. + +**Workaround until fixed:** +Only install extensions from repos/packages you personally control or have reviewed. + +**Work needed to close this gap:** +- Add a `[extensions] allowlist = ["owner/repo", "npm-package-name"]` config option; reject installs not on the list +- For npm: pin to exact version + verify `npm pack` integrity hash +- For GitHub: require a specific tag or commit SHA, not a branch ref +- Consider adding a `--dry-run` flag to `install_extension` that shows the file list without writing to disk + +--- + +### Gap 5 — Zone checker is opt-in, not enforced by default +**Severity: Medium** + +**What the problem is:** +`ToolExecutor` is constructed with `zone_checker: None` by default. When no zone checker is configured, `check_zone()` returns `Ok(())` unconditionally — all file paths are effectively unrestricted. The zone system only protects when the user explicitly configures `[zones]` in `config.toml`. + +**Where it is in the code:** +`crates/core/src/tool_executor.rs` — `pub fn new()` sets `zone_checker: None`. `check_zone()` returns early with `Ok(())` when `None`. + +**Work needed to close this gap:** +- Apply a default `ZoneChecker` with sensible deny-by-default patterns even when `[zones]` is not configured: + - Forbidden: `.env`, `*.pem`, `*.key`, `secrets/`, `keys/`, `.ssh/`, `.gnupg/` + - Readonly: `.git/config`, `.agent007/config.toml` +- Document the default rules in `SECURITY.md` and `docs/configuration.md` +- Add an `--unrestricted` flag for power users who explicitly want to disable zone checks + +--- + +### Gap 6 — No corporate identity / SSO integration +**Severity: Medium (for enterprise compliance)** + +**What the problem is:** +- There is no SAML, OIDC, or LDAP integration +- API keys are per-developer environment variables with no central management or rotation +- Audit logs identify agents by name string, not by authenticated user identity +- There is no RBAC — everyone with access to the MCP server or dashboard has the same permissions + +**Work needed to close this gap:** +This is a significant feature addition, not a single fix. Suggested incremental path: +1. Add an `authenticated_user` field to audit log entries (sourced from a configurable env var like `AGENT007_USER_ID` initially) +2. Add support for `AGENT007_DASHBOARD_TOKEN` tied to a named identity (see Gap 1) +3. Document a recommended pattern for secrets management (e.g., using `direnv` + `.envrc` per-project, or a secrets manager CLI like `op` or `vault` to inject keys at startup) +4. Longer-term: OIDC integration for the dashboard + +--- + +### Gap 7 — No secrets vault integration +**Severity: Low (common for local developer tools)** + +**What the problem is:** +API keys are read from environment variables. There is no integration with secrets managers (HashiCorp Vault, AWS Secrets Manager, 1Password `op`, etc.). This means key rotation requires developers to manually update their environment, and there is no centralized audit of key usage. + +**Workaround:** +Use `direnv` + `.envrc` (gitignored) or a per-project `.env` loader that fetches secrets from a vault CLI at shell startup. agent007 reads `ANTHROPIC_API_KEY` and `OPENAI_API_KEY` from the environment regardless of how they were set. + +**Work needed to close this gap:** +- Document recommended patterns for vault-injected keys in `docs/configuration.md` +- Optionally: add a `[secrets]` config block that can run a command to retrieve a key value (e.g., `key_command = "op read op://vault/anthropic/key"`) — similar to how `pass` works with git credentials + +--- + +### Gap 8 — No SBOM published with releases +**Severity: Low** + +**What the problem is:** +No Software Bill of Materials is generated or published as part of the release process. Security teams doing SCA (software composition analysis) have to generate it themselves. + +**Where it is in the code / release process:** +`scripts/install.sh` and the CI release pipeline — no SBOM generation step. + +**Work needed to close this gap:** +- Add `cargo cyclonedx --format json --output sbom.cdx.json` to the release CI workflow +- Publish `sbom.cdx.json` as a GitHub Release artifact alongside the binary +- Add a note in `SECURITY.md` that the SBOM is available on the Releases page + +**Can be done right now manually:** +```bash +cargo install cargo-cyclonedx +cargo cyclonedx --format json +# produces sbom.cdx.json in the workspace root +``` + +--- + +### Gap 9 — `agent007_git_commit` MCP tool has no mandatory approval gate +**Severity: Low** + +**What the problem is:** +The `agent007_git_commit` MCP tool (`crates/cli/src/commands/serve.rs`) can stage and commit files to the local git repo when called by the AI editor. There is no hard-coded confirmation step. While the AI editor's own permission prompts provide one layer of protection, agent007 itself does not require a user confirmation before writing a commit. + +**Note:** The workflow system has an `approval` step type that pauses for human review. But this is per-workflow configuration — ad-hoc calls to `agent007_git_commit` bypass it. + +**Work needed to close this gap:** +- Add a `require_commit_approval = true` option to `[core]` in `config.toml` (default: `false` to preserve current behavior) +- When enabled: before executing a commit, emit a confirmation prompt to the MCP client via a tool response asking the user to approve the staged diff +- Document this option in `docs/configuration.md` + +--- + +## How to use this document + +When a gap is resolved: +1. Move it from this file to a `## Closed Gaps` section with the version it was closed in +2. Update `SECURITY.md` to reflect the new behavior +3. Add a changelog entry in `CHANGELOG.md` + +When a new gap is found: +1. Add it to the summary table with a severity and effort estimate +2. Write a detail section following the same format +3. Reference it from `SECURITY.md` if it affects the security architecture description From 4785447c25d24c0316ec4c5ae02f453886742f2f Mon Sep 17 00:00:00 2001 From: Daniel Franklin Date: Mon, 18 May 2026 09:51:22 -0400 Subject: [PATCH 2/4] chore: update Cargo.lock for uuid dependency --- Cargo.lock | 1 + 1 file changed, 1 insertion(+) diff --git a/Cargo.lock b/Cargo.lock index 30f2b1b..29391e2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -405,6 +405,7 @@ dependencies = [ "tower-http", "tracing", "ts-rs", + "uuid", ] [[package]] From 78de0e7ff00145df410b020a9b9c100a01e495a8 Mon Sep 17 00:00:00 2001 From: Daniel Franklin Date: Mon, 18 May 2026 10:43:51 -0400 Subject: [PATCH 3/4] docs(security): expand security gaps with enterprise rejection findings Add 8 new critical/high gaps identified during enterprise security review: path traversal in memory keys, SSRF in skill discovery, skill import path traversal, post-approval execution sandbox, credential logging, LLM provider IP exfiltration risk, missing body size limits, and CI dependency scanning. Add ongoing audit process and toolchain reference. Renumber all gaps sequentially and add severity/category to summary table. --- docs/security-gaps.md | 288 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 269 insertions(+), 19 deletions(-) diff --git a/docs/security-gaps.md b/docs/security-gaps.md index c6f4d86..de49224 100644 --- a/docs/security-gaps.md +++ b/docs/security-gaps.md @@ -2,24 +2,38 @@ This document lists known security gaps in the current codebase, their severity for enterprise/office deployment, and what work is needed to close each one. It is maintained by the project team. Gaps are closed by removing them from this list and updating `SECURITY.md` accordingly. -**Last updated:** 2026-05-08 +**Last updated:** 2026-05-18 **Current version:** 0.2.0 --- +## Why this document exists + +This project was flagged by an enterprise security review citing weak code, missing authentication, and potential IP exfiltration risk. Every gap in this document maps directly to one or more of those concerns. Gaps are rated on two axes: **severity** (blast radius if exploited) and **effort** (engineering cost to close). + +--- + ## Summary Table -| # | Gap | Severity | Effort | Status | -|---|---|---|---|---| -| 1 | Web dashboard has no authentication | High | Medium | Open | -| 2 | Web server binds `0.0.0.0` by default | High | Low | Open | -| 3 | Skill registry fetches from unpinned `main` branch | Medium | Low | Open | -| 4 | Extension installer has no signature verification | Medium | High | Open | -| 5 | Zone checker is opt-in, not enforced by default | Medium | Medium | Open | -| 6 | No corporate identity / SSO integration | Medium | High | Open | -| 7 | No secrets vault integration | Low | High | Open | -| 8 | No SBOM published with releases | Low | Low | Open | -| 9 | `agent007_git_commit` has no mandatory approval gate | Low | Low | Open | +| # | Gap | Severity | Effort | Status | Category | +|---|---|---|---|---|---| +| 1 | Web dashboard has no authentication | Critical | Medium | Open | Auth | +| 2 | Web server binds `0.0.0.0` by default | Critical | Low | Open | Network | +| 3 | Path traversal in memory key parameter | High | Low | Open | Input Validation | +| 4 | SSRF in skill discovery source expansion | High | Medium | Open | SSRF | +| 5 | Skill import can write outside intended directory | High | Low | Open | Path Traversal | +| 6 | Skill execution sandbox unenforced post-approval | High | High | Open | Execution | +| 7 | Provider credentials can appear in logs | High | Low | Open | Secrets | +| 8 | Data sent to third-party LLM providers (IP risk) | High | High | Open | IP / Data | +| 9 | No request body size limits (DoS) | Medium | Low | Open | DoS | +| 10 | Skill registry fetches from unpinned `main` branch | Medium | Low | Open | Supply Chain | +| 11 | Extension installer has no signature verification | Medium | High | Open | Supply Chain | +| 12 | Zone checker is opt-in, not enforced by default | Medium | Medium | Open | Access Control | +| 13 | No corporate identity / SSO integration | Medium | High | Open | Auth | +| 14 | No secrets vault integration | Low | High | Open | Secrets | +| 15 | No SBOM published with releases | Low | Low | Open | Supply Chain | +| 16 | `agent007_git_commit` has no mandatory approval gate | Low | Low | Open | Access Control | +| 17 | No dependency vulnerability scanning in CI | Medium | Low | Open | Supply Chain | --- @@ -27,6 +41,12 @@ This document lists known security gaps in the current codebase, their severity --- +--- + +> **Reading order for a security reviewer:** Gaps 1–8 are the ones that caused the enterprise rejection. Read those first. Gaps 9–17 are real but lower urgency. + +--- + ### Gap 1 — Web dashboard has no authentication **Severity: High** @@ -71,7 +91,160 @@ Use `--no-dashboard` or firewall the port. --- -### Gap 3 — Skill registry fetches from unpinned `main` branch +--- + +### Gap 3 — Path traversal in memory key parameter +**Severity: High** + +**What the problem is:** +In `memory_delete_handler` (`crates/web/src/api.rs`), the `scope` parameter is validated for `..`, `/`, and `\`. The `key` parameter is only checked for null bytes. If `resolve_existing_key_path` does not canonicalize the full resolved path and re-anchor it to the store root, a key value of `../../../some/other/file` can reach arbitrary filesystem paths. + +**What an attacker could do:** +Delete or overwrite files outside the memory store directory, including config files, credentials, or other agent data. + +**Where it is in the code:** +`crates/web/src/api.rs` — `memory_delete_handler` and `memory_get_handler`. `crates/memory/src/store.rs` — `resolve_existing_key_path`. + +**Work needed to close this gap:** +- In `resolve_existing_key_path`: after constructing the full path, call `std::fs::canonicalize` (or equivalent) and assert the result starts with `self.root`. Return an error if the path escapes the root. +- Extend the key validation in `memory_delete_handler` to also reject keys containing `/`, `\`, and `..`. +- Add a test: key `"../escape"` must return an error, not a filesystem path. + +--- + +### Gap 4 — SSRF in skill discovery source expansion +**Severity: High** + +**What the problem is:** +`expand_skill_discovery_sources` (`crates/web/src/api.rs`) fetches arbitrary URLs after a prefix check for `https://github.com/` or `https://raw.githubusercontent.com/`. It then parses that Markdown and follows links found in it — a second-order fetch. The prefix check is evaluated on the string before DNS resolution and before following HTTP redirects. + +**Attack vectors:** +- An open redirect on `github.com` itself (GitHub has had these historically) redirects to an internal network address. +- A crafted Markdown catalog page at a legitimate GitHub URL contains links to internal-network addresses that pass the prefix check. +- The response size is unbounded — a large Markdown response causes memory exhaustion. + +**Where it is in the code:** +`crates/web/src/api.rs` — `expand_skill_discovery_sources`, `extract_github_urls_from_markdown`, `fetch_text_async`. + +**Work needed to close this gap:** +- Set a hard response size limit on `fetch_text_async` (e.g., 512 KB max). +- Set a connect and read timeout on the `reqwest::Client` used for discovery fetches. +- After following redirects, validate the final URL (not the original) still matches the allowlist. +- Limit the number of catalog-expanded URLs followed per source (currently capped at 40, which is reasonable — verify it holds after redirects). +- Add a config option to disable catalog expansion entirely for deployments that need strict source control. + +--- + +### Gap 5 — Skill import can write outside intended directory +**Severity: High** + +**What the problem is:** +`write_imported_skill` and `generate_tool_skill` construct a filesystem write path from a `sanitize_file_stem` call on the skill trigger/name. If `sanitize_file_stem` is insufficiently strict, a crafted skill manifest can name a file that resolves outside the skills directory. + +Package skills (subdirectory installs) are especially risky — the package directory name comes from the skill URL path and is used directly in `skills_dir.join(package_name)`. + +**What an attacker could do:** +Write a file to an arbitrary path on the filesystem, overwriting existing files. Combined with a crafted YAML/TOML payload, this could overwrite `config.toml`, an existing skill, or a shell config file. + +**Where it is in the code:** +`crates/web/src/api.rs` — `write_imported_skill`, `skill_import_handler`, `generate_tool_skill`. + +**Work needed to close this gap:** +- After constructing the final write path, call `Path::canonicalize` on the parent directory and assert it is strictly within `skills_dir`. Return an error before any write if it is not. +- Reject package directory names that contain `/`, `\`, `..`, or start with `.`. +- Add a test: a skill trigger of `"../../etc/malicious"` must be rejected, not written. + +--- + +### Gap 6 — Skill execution sandbox unenforced post-approval +**Severity: High** + +**What the problem is:** +The approval workflow pauses before a skill runs for the first time. Once approved, a skill's `system_prompt` can reference MCP tools, issue arbitrary shell commands (if shell tools are enabled), or access the full filesystem. There is no per-skill tool allowlist enforced at execution time — the approval is a one-time gate, not a runtime constraint. + +A malicious or compromised skill (e.g., imported from a catalog that later adds a backdoor) continues to execute with full privileges after the original approval. + +**What an attacker could do:** +- Exfiltrate memory contents, config files, or API keys via a skill that makes outbound HTTP calls. +- Modify other skills or workflows on disk. +- Use the `agent007_git_commit` MCP tool to silently commit malicious code to the repo. + +**Work needed to close this gap:** +- Add an `allowed_tools` list to each skill's frontmatter (already present in persona TOML — mirror for skills). +- At execution time, restrict the MCP tool set available to a skill to its declared `allowed_tools`. +- Treat a missing or empty `allowed_tools` as a prompt for the user to define one during the approval step. +- Document that re-approval is required if a skill's source content changes. + +--- + +### Gap 7 — Provider credentials can appear in logs +**Severity: High** + +**What the problem is:** +Provider configuration structs (containing API keys) pass through the readiness check and provider status layers. If a `tracing` call captures a provider config struct via `{:?}` or `{:#?}` debug formatting, the API key appears in the log output. The `fix(cli): redact ollama readiness source` commit in the history indicates this has already occurred at least once. + +**Where to check:** +- `crates/web/src/api.rs` — all `tracing::*` call sites that reference provider config, readiness response, or request structs +- Any `derive(Debug)` on structs that contain `api_key`, `token`, or `secret` fields + +**Work needed to close this gap:** +- Audit every `derive(Debug)` struct that could hold a credential. Override the `Debug` impl to redact sensitive fields, or use a wrapper type like `Redacted` that prints `[REDACTED]`. +- Add a CI lint or unit test that constructs a provider config with a known dummy key, formats it with `{:?}`, and asserts the key string does not appear in the output. +- Review all `tracing::debug!` and `tracing::trace!` calls in provider and config modules. + +--- + +### Gap 8 — Data sent to third-party LLM providers (IP / data exfiltration risk) +**Severity: High (enterprise blocker)** + +**What the problem is:** +This is the primary reason enterprise reviewers cite IP leakage risk. Every skill run, workflow step, and agent persona call sends data to a third-party LLM provider (Anthropic, OpenAI, etc.). This data includes: +- The skill/workflow system prompt (which may contain internal business logic) +- The full conversation context, which accumulates memory entries, run history, and user inputs +- Any documents or code snippets passed as context +- Memory store contents read for retrieval-augmented steps + +For an enterprise, this means proprietary workflows, internal data, and business-sensitive context are leaving the organization's network boundary on every inference call. + +**What an attacker or competing party could do:** +This is less about external attack and more about contractual and regulatory exposure: +- Violation of data residency requirements (GDPR, SOC 2, HIPAA depending on domain) +- Unintended disclosure of trade secrets embedded in skill prompts +- Provider terms of service may allow training on API data depending on tier + +**Work needed to close this gap:** +Short-term (required for any enterprise pilot): +- Add an `[enterprise]` config block with `allowed_providers = ["ollama"]` — when set, block inference calls to any non-listed provider and surface a clear error. +- Add a data classification field to skills (`data_classification = "internal" | "public"`) and refuse to run `internal`-classified skills against external providers. +- Document in `SECURITY.md` which data leaves the machine and to where. + +Longer-term: +- First-class Ollama and local model support so an org can run fully air-gapped. +- Prompt content filtering before send: strip patterns matching internal naming conventions, secrets patterns, or user-defined regexes. +- Audit log every outbound inference call: timestamp, provider, model, token count, skill/workflow that triggered it (no prompt content in the audit log itself). + +--- + +### Gap 9 — No request body size limits +**Severity: Medium** + +**What the problem is:** +The Axum router has no `DefaultBodyLimit` layer. Endpoints that accept JSON bodies (`RuntimeMessageRequest`, skill save, workflow save, persona save, skill import) will read an unbounded amount of data from the connection before deserializing. An unauthenticated caller (Gap 1) can send a multi-gigabyte body and exhaust memory or disk. + +**Where it is in the code:** +`crates/web/src/server.rs` — `into_router()`. No body limit layer is applied. + +**Work needed to close this gap:** +```rust +// Add to router construction in server.rs +use axum::extract::DefaultBodyLimit; +let router = router.layer(DefaultBodyLimit::max(4 * 1024 * 1024)); // 4 MB +``` +Individual endpoints that legitimately need larger bodies (e.g., bundle import) can override with `axum::extract::RequestBodyLimitLayer`. + +--- + +### Gap 10 — Skill registry fetches from unpinned `main` branch **Severity: Medium** **What the problem is:** @@ -90,7 +263,7 @@ The registry is only fetched on user action (opening the Registry tab). It is no --- -### Gap 4 — Extension installer has no signature verification +### Gap 11 — Extension installer has no signature verification **Severity: Medium** **What the problem is:** @@ -110,7 +283,7 @@ Only install extensions from repos/packages you personally control or have revie --- -### Gap 5 — Zone checker is opt-in, not enforced by default +### Gap 12 — Zone checker is opt-in, not enforced by default **Severity: Medium** **What the problem is:** @@ -128,7 +301,7 @@ Only install extensions from repos/packages you personally control or have revie --- -### Gap 6 — No corporate identity / SSO integration +### Gap 13 — No corporate identity / SSO integration **Severity: Medium (for enterprise compliance)** **What the problem is:** @@ -146,7 +319,7 @@ This is a significant feature addition, not a single fix. Suggested incremental --- -### Gap 7 — No secrets vault integration +### Gap 14 — No secrets vault integration **Severity: Low (common for local developer tools)** **What the problem is:** @@ -161,7 +334,7 @@ Use `direnv` + `.envrc` (gitignored) or a per-project `.env` loader that fetches --- -### Gap 8 — No SBOM published with releases +### Gap 15 — No SBOM published with releases **Severity: Low** **What the problem is:** @@ -184,7 +357,7 @@ cargo cyclonedx --format json --- -### Gap 9 — `agent007_git_commit` MCP tool has no mandatory approval gate +### Gap 16 — `agent007_git_commit` MCP tool has no mandatory approval gate **Severity: Low** **What the problem is:** @@ -199,6 +372,83 @@ The `agent007_git_commit` MCP tool (`crates/cli/src/commands/serve.rs`) can stag --- +--- + +### Gap 17 — No dependency vulnerability scanning in CI +**Severity: Medium** + +**What the problem is:** +`cargo audit` and related tools are not run in the CI pipeline. Known CVEs in transitive dependencies can silently enter the codebase and remain undetected until a manual check is run. + +`cargo audit` only checks for known CVEs in `Cargo.lock` — it does not catch logic vulnerabilities in your own code. But it is the minimum automated bar. + +**What each tool actually checks:** + +| Tool | What it finds | What it misses | +|---|---|---| +| `cargo audit` | Known CVEs in `Cargo.lock` via RustSec DB | Your own code, logic bugs, OWASP issues | +| `cargo geiger` | Count and location of `unsafe {}` blocks | Whether the unsafe code is actually exploitable | +| `cargo deny` | CVEs + license violations + duplicate deps | Logic vulnerabilities | +| `cargo outdated` | Deps with newer versions available | Whether the newer version is safe | +| `trufflehog` / `gitleaks` | Secrets committed to git history | Secrets in env, runtime, or logs | +| SAST / manual review | Logic bugs, path traversal, SSRF, auth issues | Nothing — this is the only thing that finds them | + +**Work needed to close this gap:** +Add to CI workflow (`.github/workflows/ci.yml`): +```yaml +- name: Security audit + run: | + cargo install cargo-audit --locked + cargo audit + +- name: Unsafe usage report + run: | + cargo install cargo-geiger --locked + cargo geiger --all-features 2>&1 | tee geiger-report.txt + +- name: Secret scan + uses: trufflesecurity/trufflehog@main + with: + path: ./ + base: main + head: HEAD +``` + +Run locally before any release: +```bash +cargo audit # CVEs in dependencies +cargo geiger # unsafe block inventory +cargo deny check # advisories + licenses + duplicates +cargo outdated # stale dependencies +trufflehog git . --since-commit HEAD~20 # secrets in recent history +gitleaks detect --source . # broader secrets scan +``` + +--- + +## Ongoing audit process + +Security is not a one-time checklist. These are the recurring activities needed to keep the gap count from growing: + +**Every PR:** +- Reviewer checks any new HTTP fetch call for SSRF risk +- Reviewer checks any new filesystem write for path traversal risk +- Reviewer checks any new struct with `derive(Debug)` for credential exposure +- `cargo audit` runs in CI (Gap 17) + +**Every release:** +- Run the full local toolchain above +- Update `Cargo.lock` (ensures `cargo audit` has current data) +- Review any new dependency added since last release +- Generate and publish SBOM (Gap 15) + +**Before any enterprise pilot:** +- Gaps 1, 2, 3, 4, 5, 6, 7, 8 must be closed or have documented mitigations accepted in writing by the customer +- Gap 8 (LLM data exfiltration) requires either local-model support or a signed data processing agreement with each provider +- Conduct a focused manual review of all code paths from HTTP request to filesystem write and from HTTP request to outbound network call + +--- + ## How to use this document When a gap is resolved: From 302915f7c5178a13da3ebd701fc63c7fa22abe68 Mon Sep 17 00:00:00 2001 From: Daniel Franklin Date: Mon, 18 May 2026 23:13:16 -0400 Subject: [PATCH 4/4] fix(web): address PR review feedback --- crates/web/frontend/src/composables/useApi.js | 2 +- crates/web/frontend/src/views/MemoryView.vue | 8 +- crates/web/frontend/src/views/SkillsView.vue | 2 +- crates/web/src/api.rs | 196 ++++++++++++------ crates/web/src/server.rs | 2 +- docs/security-gaps.md | 2 +- 6 files changed, 142 insertions(+), 70 deletions(-) diff --git a/crates/web/frontend/src/composables/useApi.js b/crates/web/frontend/src/composables/useApi.js index 6c3e9b8..874ecb2 100644 --- a/crates/web/frontend/src/composables/useApi.js +++ b/crates/web/frontend/src/composables/useApi.js @@ -95,7 +95,7 @@ export function useApi() { deleteMemory: (scope, key) => fetchJson(`/api/memory/${encodeURIComponent(scope)}/${encodeURIComponent(key)}`, { method: 'DELETE' }), purgeExpiredMemory: (scope) => - fetchJson(`/api/memory/${encodeURIComponent(scope)}/purge-expired`, { method: 'POST' }), + fetchJson(`/api/memory/${encodeURIComponent(scope)}/_actions/purge-expired`, { method: 'POST' }), // Status getStatus: () => fetchJson('/api/status'), diff --git a/crates/web/frontend/src/views/MemoryView.vue b/crates/web/frontend/src/views/MemoryView.vue index 719aa40..b76ccc8 100644 --- a/crates/web/frontend/src/views/MemoryView.vue +++ b/crates/web/frontend/src/views/MemoryView.vue @@ -483,6 +483,9 @@ watch(contentVisible, async (visible) => { /> +
@@ -555,10 +558,7 @@ watch(contentVisible, async (visible) => { - - diff --git a/crates/web/frontend/src/views/SkillsView.vue b/crates/web/frontend/src/views/SkillsView.vue index 0a7108c..1a8b12a 100644 --- a/crates/web/frontend/src/views/SkillsView.vue +++ b/crates/web/frontend/src/views/SkillsView.vue @@ -408,7 +408,7 @@ async function installSelectedDiscoverResults() { try { for (const item of items) { try { - const opts = item.installed ? { conflict_action: bulkConflictMode.value } : {} + const opts = { conflict_action: bulkConflictMode.value } const result = await api.importSkill(item.url, opts) if (result?.skipped) summary.skipped += 1 else summary.installed += 1 diff --git a/crates/web/src/api.rs b/crates/web/src/api.rs index 672287c..32ef079 100644 --- a/crates/web/src/api.rs +++ b/crates/web/src/api.rs @@ -1,4 +1,4 @@ -use std::sync::Arc; +use std::sync::{Arc, Mutex, OnceLock}; use axum::{ extract::{Path, Query, State}, @@ -42,6 +42,31 @@ fn run_store_for_web() -> agent007_core::RunStore { agent007_core::RunStore::new(agent007_write_home().join("sessions")) } +fn runtime_messages_lock() -> &'static Mutex<()> { + static LOCK: OnceLock> = OnceLock::new(); + LOCK.get_or_init(|| Mutex::new(())) +} + +fn validate_existing_run_id(run_id: &str) -> Result<(), (StatusCode, Json)> { + if uuid::Uuid::parse_str(run_id).is_err() { + return Err(( + StatusCode::BAD_REQUEST, + Json(serde_json::json!({ "error": "invalid run id" })), + )); + } + let meta_path = agent007_write_home() + .join("sessions") + .join(run_id) + .join("meta.json"); + if !meta_path.is_file() { + return Err(( + StatusCode::NOT_FOUND, + Json(serde_json::json!({ "error": "run not found" })), + )); + } + Ok(()) +} + // ── request/response shapes ─────────────────────────────────────────────────── #[derive(Deserialize, Serialize, TS)] @@ -147,22 +172,30 @@ pub struct ProviderReadinessCard { pub hint: String, } -fn increment_patch_version(raw: Option<&str>) -> String { +fn increment_patch_version(raw: Option<&str>) -> Option { let raw = raw.unwrap_or("0.0.0").trim(); - let mut parts = raw - .split('.') - .map(|part| part.parse::().unwrap_or(0)) - .collect::>(); + let mut parts = Vec::new(); + for part in raw.split('.') { + if part.is_empty() || !part.chars().all(|ch| ch.is_ascii_digit()) { + return None; + } + parts.push(part.parse::().ok()?); + } + if parts.len() > 3 { + return None; + } while parts.len() < 3 { parts.push(0); } parts[2] = parts[2].saturating_add(1); - format!("{}.{}.{}", parts[0], parts[1], parts[2]) + Some(format!("{}.{}.{}", parts[0], parts[1], parts[2])) } fn initial_or_incremented_version(existing: Option<&str>, requested: Option<&str>) -> String { match existing { - Some(value) if !value.trim().is_empty() => increment_patch_version(Some(value)), + Some(value) if !value.trim().is_empty() => { + increment_patch_version(Some(value)).unwrap_or_else(|| value.to_string()) + } _ => requested .filter(|value| !value.trim().is_empty()) .unwrap_or("1.0.0") @@ -177,8 +210,9 @@ fn markdown_frontmatter_version(path: &FsPath) -> Option { return None; } let frontmatter = serde_yaml::from_str::(parts[1]).ok()?; + let version_key = serde_yaml::Value::String("version".to_string()); frontmatter - .get(serde_yaml::Value::String("version".to_string())) + .get(&version_key) .and_then(|value| value.as_str()) .map(ToString::to_string) } @@ -1148,6 +1182,9 @@ pub async fn runtime_sessions_handler( } pub async fn runtime_messages_list_handler(Path(run_id): Path) -> impl IntoResponse { + if let Err(response) = validate_existing_run_id(&run_id) { + return response.into_response(); + } let store = run_store_for_web(); match store.read_json_artifact_optional::>(&run_id, "messages.json") { Ok(messages) => Json(RuntimeMessagesResponse { @@ -1167,6 +1204,9 @@ pub async fn runtime_messages_post_handler( Path(run_id): Path, Json(payload): Json, ) -> impl IntoResponse { + if let Err(response) = validate_existing_run_id(&run_id) { + return response.into_response(); + } let body = payload.body.trim(); if body.is_empty() { return ( @@ -1195,6 +1235,9 @@ pub async fn runtime_messages_post_handler( }; let store = run_store_for_web(); + let _guard = runtime_messages_lock() + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()); let mut messages = match store.read_json_artifact_optional::>(&run_id, "messages.json") { Ok(messages) => messages.unwrap_or_default(), @@ -2220,18 +2263,33 @@ pub async fn persona_save_handler( } fn persona_version_for_name(dir: &FsPath, name: &str) -> String { - let safe = name - .chars() - .map(|c| { - if c.is_alphanumeric() || c == '-' || c == '_' { - c - } else { - '-' - } - }) - .collect::() - .to_lowercase(); - toml_top_level_version(&dir.join(format!("{safe}.toml"))).unwrap_or_else(|| "1.0.0".to_string()) + let entries = match std::fs::read_dir(dir) { + Ok(entries) => entries, + Err(_) => return "1.0.0".to_string(), + }; + for entry in entries.flatten() { + let path = entry.path(); + if path.extension().and_then(|ext| ext.to_str()) != Some("toml") { + continue; + } + let Ok(raw) = std::fs::read_to_string(&path) else { + continue; + }; + let Ok(value) = toml::from_str::(&raw) else { + continue; + }; + let Some(file_name) = value.get("name").and_then(|value| value.as_str()) else { + continue; + }; + if file_name == name { + return value + .get("version") + .and_then(|value| value.as_str()) + .unwrap_or("1.0.0") + .to_string(); + } + } + "1.0.0".to_string() } pub async fn persona_delete_handler( @@ -3765,7 +3823,10 @@ pub async fn skill_discover_handler( }; let sources = match expand_skill_discovery_sources(&client, &source_urls).await { - Ok(expanded) => expanded, + Ok((expanded, expansion_warnings)) => { + warnings.extend(expansion_warnings); + expanded + } Err(e) => { return ( StatusCode::BAD_REQUEST, @@ -3911,6 +3972,7 @@ pub async fn skill_discover_handler( category: summary.category, model: summary.model, installed: conflict.is_some(), + from_catalog: source.from_catalog, conflict, }, )); @@ -4048,6 +4110,7 @@ struct SkillDiscoverResult { #[serde(skip_serializing_if = "Option::is_none")] model: Option, installed: bool, + from_catalog: bool, #[serde(skip_serializing_if = "Option::is_none")] conflict: Option, } @@ -4156,12 +4219,14 @@ fn parse_skill_discovery_source(url: &str) -> anyhow::Result anyhow::Result> { +) -> anyhow::Result<(Vec, Vec)> { let mut expanded = Vec::new(); + let mut warnings = Vec::new(); let mut seen = std::collections::BTreeSet::new(); for source_url in source_urls { - let source = parse_skill_discovery_source(source_url)?; + let source = parse_skill_discovery_source(source_url) + .map_err(|e| anyhow::anyhow!("{source_url}: {e}"))?; let source_key = format!( "{}/{}/{}/{}", source.owner, @@ -4176,7 +4241,8 @@ async fn expand_skill_discovery_sources( expanded.push(source.clone()); } - let import_source = parse_skill_import_source(source_url)?; + let import_source = parse_skill_import_source(source_url) + .map_err(|e| anyhow::anyhow!("{source_url}: {e}"))?; let SkillImportSourceKind::GitHubFile { owner, repo, @@ -4197,15 +4263,23 @@ async fn expand_skill_discovery_sources( } let raw_url = github_raw_file_url(&owner, &repo, reference.as_deref(), &path); - let Ok(markdown) = fetch_text_async(client, &raw_url).await else { - continue; + let markdown = match fetch_text_async(client, &raw_url).await { + Ok(markdown) => markdown, + Err(e) => { + warnings.push(format!("failed to expand catalog {source_url}: {e}")); + continue; + } }; for linked_url in extract_github_urls_from_markdown(&markdown) .into_iter() .take(40) { - let Ok(mut linked) = parse_skill_discovery_source(&linked_url) else { - continue; + let mut linked = match parse_skill_discovery_source(&linked_url) { + Ok(linked) => linked, + Err(e) => { + warnings.push(format!("ignored catalog link {linked_url}: {e}")); + continue; + } }; linked.from_catalog = true; let key = format!( @@ -4224,7 +4298,7 @@ async fn expand_skill_discovery_sources( } } - Ok(expanded) + Ok((expanded, warnings)) } fn extract_github_urls_from_markdown(markdown: &str) -> Vec { @@ -4760,12 +4834,9 @@ fn normalize_skill_manifest_content( let url_description = format!("Imported from {original_url}"); let parts: Vec<&str> = content.splitn(3, "---").collect(); - let (fm, body, has_original_trigger) = if parts.len() >= 3 { + let (fm, body) = if parts.len() >= 3 { match serde_yaml::from_str::(parts[1]) { - Ok(f) => { - let has_trigger = f.trigger.is_some(); - (f, parts[2].to_string(), has_trigger) - } + Ok(f) => (f, parts[2].to_string()), Err(_) => ( SkillImportFrontmatter { trigger: None, @@ -4776,7 +4847,6 @@ fn normalize_skill_manifest_content( version: None, }, content.to_string(), - false, ), } } else { @@ -4790,7 +4860,6 @@ fn normalize_skill_manifest_content( version: None, }, content.to_string(), - false, ) }; @@ -4798,30 +4867,26 @@ fn normalize_skill_manifest_content( let name = fm.name.unwrap_or(url_name); let description = fm.description.unwrap_or(url_description); - let output = if has_original_trigger { - content.to_string() - } else { - let mut frontmatter = parts - .get(1) - .and_then(|raw| serde_yaml::from_str::(raw).ok()) - .unwrap_or_default(); - frontmatter.insert( - serde_yaml::Value::String("name".to_string()), - serde_yaml::Value::String(name), - ); - frontmatter.insert( - serde_yaml::Value::String("trigger".to_string()), - serde_yaml::Value::String(trigger.clone()), - ); - frontmatter.insert( - serde_yaml::Value::String("description".to_string()), - serde_yaml::Value::String(description), - ); - let yaml = serde_yaml::to_string(&frontmatter) - .map_err(|e| anyhow::anyhow!("failed to serialize skill frontmatter: {e}"))?; - let yaml = yaml.strip_prefix("---\n").unwrap_or(yaml.as_str()); - format!("---\n{}---\n{}", yaml, body.trim_start_matches('\n')) - }; + let mut frontmatter = parts + .get(1) + .and_then(|raw| serde_yaml::from_str::(raw).ok()) + .unwrap_or_default(); + frontmatter.insert( + serde_yaml::Value::String("name".to_string()), + serde_yaml::Value::String(name), + ); + frontmatter.insert( + serde_yaml::Value::String("trigger".to_string()), + serde_yaml::Value::String(trigger.clone()), + ); + frontmatter.insert( + serde_yaml::Value::String("description".to_string()), + serde_yaml::Value::String(description), + ); + let yaml = serde_yaml::to_string(&frontmatter) + .map_err(|e| anyhow::anyhow!("failed to serialize skill frontmatter: {e}"))?; + let yaml = yaml.strip_prefix("---\n").unwrap_or(yaml.as_str()); + let output = format!("---\n{}---\n{}", yaml, body.trim_start_matches('\n')); Ok((trigger, output)) } @@ -8731,12 +8796,19 @@ requires_approval = true #[test] fn version_helpers_increment_existing_patch_version() { - assert_eq!(super::increment_patch_version(Some("1.2.3")), "1.2.4"); + assert_eq!( + super::increment_patch_version(Some("1.2.3")).as_deref(), + Some("1.2.4") + ); assert_eq!( super::initial_or_incremented_version(Some("2.0.9"), Some("9.9.9")), "2.0.10" ); assert_eq!(super::initial_or_incremented_version(None, None), "1.0.0"); + assert_eq!( + super::initial_or_incremented_version(Some("1.2.3-beta.1"), None), + "1.2.3-beta.1" + ); } #[test] diff --git a/crates/web/src/server.rs b/crates/web/src/server.rs index 5e7b4cd..b77f409 100644 --- a/crates/web/src/server.rs +++ b/crates/web/src/server.rs @@ -242,7 +242,7 @@ impl WebServer { .route("/api/memory/{scope}", get(api::memory_list_handler)) .route("/api/memory/{scope}/stats", get(api::memory_stats_handler)) .route( - "/api/memory/{scope}/purge-expired", + "/api/memory/{scope}/_actions/purge-expired", post(api::memory_purge_expired_handler), ) .route( diff --git a/docs/security-gaps.md b/docs/security-gaps.md index de49224..fc108bd 100644 --- a/docs/security-gaps.md +++ b/docs/security-gaps.md @@ -79,7 +79,7 @@ Anyone on the same network segment (office LAN, shared Wi-Fi, corporate network) `TcpListener::bind("0.0.0.0:{port}")` binds all network interfaces. On a developer laptop on a corporate network, this means the dashboard is reachable from any other machine on the same subnet. **Where it is in the code:** -`crates/web/src/server.rs`, lines 270, 315 — `format!("0.0.0.0:{port}")`. +`crates/web/src/server.rs`, lines 332, 377 — `format!("0.0.0.0:{port}")`. **Workaround until fixed:** Use `--no-dashboard` or firewall the port.