From d5ad0c1a3044fab37d5f81c10ed1253095abdaae Mon Sep 17 00:00:00 2001 From: yepper Date: Thu, 9 Apr 2026 11:46:56 +0800 Subject: [PATCH] fix(resources): implement trailing slash semantics for resource URIs add support for trailing slash rules in resource URIs to control file/directory placement update CLI, API, and documentation to reflect new URI handling semantics add comprehensive tests for all URI semantics cases --- crates/ov_cli/src/client.rs | 18 +- crates/ov_cli/src/main.rs | 5 +- docs/en/api/02-resources.md | 21 +- docs/zh/api/02-resources.md | 21 +- openviking/parse/parsers/markdown.py | 12 +- openviking/parse/tree_builder.py | 65 ++++++- openviking/utils/media_processor.py | 22 ++- openviking/utils/resource_processor.py | 83 ++++++-- tests/api_test/api/client.py | 4 +- tests/api_test/resources/test_add_resource.py | 129 +++++++++++++ .../test_add_resource_to_uri_semantics.py | 182 ++++++++++++++++++ .../test_markdown_resource_name_override.py | 24 +++ tests/server/test_api_resources.py | 173 +++++++++++++++++ 13 files changed, 721 insertions(+), 38 deletions(-) create mode 100644 tests/parse/test_add_resource_to_uri_semantics.py create mode 100644 tests/parse/test_markdown_resource_name_override.py diff --git a/crates/ov_cli/src/client.rs b/crates/ov_cli/src/client.rs index aed1ffef4..7d4beffa7 100644 --- a/crates/ov_cli/src/client.rs +++ b/crates/ov_cli/src/client.rs @@ -61,6 +61,12 @@ impl HttpClient { FileOptions::default().compression_method(CompressionMethod::Deflated); let walkdir = walkdir::WalkDir::new(dir_path); + let base_name = dir_path.file_name().and_then(|n| n.to_str()).ok_or_else(|| { + Error::InvalidPath(format!( + "Non-UTF-8 directory name: {}", + dir_path.to_string_lossy() + )) + })?; for entry in walkdir.into_iter().filter_map(|e| e.ok()) { let path = entry.path(); if path.is_file() { @@ -68,7 +74,12 @@ impl HttpClient { let name_str = name.to_str().ok_or_else(|| { Error::InvalidPath(format!("Non-UTF-8 path: {}", name.to_string_lossy())) })?; - zip.start_file(name_str, options)?; + let zip_name = if name_str.is_empty() { + base_name.to_string() + } else { + format!("{}/{}", base_name, name_str) + }; + zip.start_file(zip_name, options)?; let mut file = File::open(path)?; std::io::copy(&mut file, &mut zip)?; } @@ -596,10 +607,15 @@ impl HttpClient { self.post("/api/v1/resources", &body).await } else if path_obj.is_file() { + let source_name = path_obj + .file_stem() + .and_then(|n| n.to_str()) + .map(|s| s.to_string()); let temp_file_id = self.upload_temp_file(path_obj).await?; let body = serde_json::json!({ "temp_file_id": temp_file_id, + "source_name": source_name, "to": to, "parent": parent, "reason": reason, diff --git a/crates/ov_cli/src/main.rs b/crates/ov_cli/src/main.rs index 8bcea277f..c8fcf642a 100644 --- a/crates/ov_cli/src/main.rs +++ b/crates/ov_cli/src/main.rs @@ -110,7 +110,10 @@ enum Commands { AddResource { /// Local path or URL to import path: String, - /// Exact target URI (must not exist yet) (cannot be used with --parent) + /// Target URI (cannot be used with --parent). + /// - If ends with '/': treated as a directory; preserves original file/dir name under it. + /// - Otherwise: treated as exact root URI (file path or directory root). + /// Note: 'viking://resources' (no trailing slash) is not allowed; use 'viking://resources/'. #[arg(long)] to: Option, /// Target parent URI (must already exist and be a directory) (cannot be used with --to) diff --git a/docs/en/api/02-resources.md b/docs/en/api/02-resources.md index 409a96d13..28fb7b68a 100644 --- a/docs/en/api/02-resources.md +++ b/docs/en/api/02-resources.md @@ -42,12 +42,23 @@ Add a resource to the knowledge base. |-----------|------|----------|---------|-------------| | path | str | Yes | - | SDK/CLI: local path, directory path, or URL. Raw HTTP: remote URL only | | temp_file_id | str | No | None | Upload ID returned by `POST /api/v1/resources/temp_upload` for raw HTTP local file ingestion | -| target | str | No | None | Target Viking URI (must be in `resources` scope) | +| to | str | No | None | Target Viking URI (must be in `resources` scope; cannot be used with `parent`) | +| parent | str | No | None | Target parent URI (must exist and be a directory; cannot be used with `to`) | | reason | str | No | "" | Why this resource is being added (improves search relevance) | | instruction | str | No | "" | Special processing instructions | | wait | bool | No | False | Wait for semantic processing to complete | | timeout | float | No | None | Timeout in seconds (only used when wait=True) | -| watch_interval | float | No | 0 | Watch interval (minutes). >0 enables/updates watch; <=0 disables watch. Only takes effect when target is provided | +| watch_interval | float | No | 0 | Watch interval (minutes). >0 enables/updates watch; <=0 disables watch. Only takes effect when `to` is provided | + +**Trailing slash semantics of `to` (applies after the resource is fetched; archives are extracted first)** + +- If the resource is a **file**: + - `to` ends with `/`: treat `to` as a directory; final location is `to/` + - `to` does not end with `/`: treat `to` as a file; final location is `to` +- If the resource is a **directory**: + - `to` ends with `/`: treat `to` as a directory; final location is `to//` + - `to` does not end with `/`: treat `to` as a directory; map the directory contents into `to` (no extra directory layer) +- If `to == viking://resources` and the request would map a file to a file or map directory contents into `viking://resources` (no trailing slash), the server returns an error and asks you to change `to` (e.g. add `/` or use a more specific path). **How local files and directories work** @@ -62,7 +73,7 @@ Add a resource to the knowledge base. When you call `add_resource()` repeatedly for the same resource URI, the system performs an incremental update instead of rebuilding everything from scratch: -- **Trigger**: `target` is provided and already exists in the knowledge base. +- **Trigger**: `to` is provided and already exists in the knowledge base. - **High-level idea**: each ingestion first builds a temporary resource tree from the new input. During asynchronous semantic processing, the temporary tree is compared against the existing tree at `target`, and only the changed parts are re-processed and synchronized. - **Incremental behavior in the semantic stage**: - **Unchanged files**: reuse existing L0 summaries and vector index records; skip vectorization. @@ -126,7 +137,7 @@ openviking add-resource ./documents/guide.md --reason "User guide documentation" ```python result = client.add_resource( "https://example.com/api-docs.md", - target="viking://resources/external/", + to="viking://resources/external/", reason="External API documentation" ) client.wait_processed() @@ -140,7 +151,7 @@ curl -X POST http://localhost:1933/api/v1/resources \ -H "X-API-Key: your-key" \ -d '{ "path": "https://example.com/api-docs.md", - "target": "viking://resources/external/", + "to": "viking://resources/external/", "reason": "External API documentation", "wait": true }' diff --git a/docs/zh/api/02-resources.md b/docs/zh/api/02-resources.md index 04a9c184d..1639e584a 100644 --- a/docs/zh/api/02-resources.md +++ b/docs/zh/api/02-resources.md @@ -42,12 +42,23 @@ Input -> Parser -> TreeBuilder -> AGFS -> SemanticQueue -> Vector Index |------|------|------|--------|------| | path | str | 是 | - | SDK/CLI 可传本地路径、目录路径或 URL;裸 HTTP 仅支持远端 URL | | temp_file_id | str | 否 | None | `POST /api/v1/resources/temp_upload` 返回的上传 ID,用于裸 HTTP 导入本地文件 | -| target | str | 否 | None | 目标 Viking URI(必须在 `resources` 作用域内) | +| to | str | 否 | None | 目标 Viking URI(必须在 `resources` 作用域内;不可与 `parent` 同时使用) | +| parent | str | 否 | None | 目标父目录 URI(必须存在且为目录;不可与 `to` 同时使用) | | reason | str | 否 | "" | 添加该资源的原因(可提升搜索相关性) | | instruction | str | 否 | "" | 特殊处理指令 | | wait | bool | 否 | False | 等待语义处理完成 | | timeout | float | 否 | None | 超时时间(秒),仅在 wait=True 时生效 | -| watch_interval | float | 否 | 0 | 定时更新间隔(分钟)。>0 开启/更新定时任务;<=0 关闭(停用)定时任务。仅在指定 target 时生效 | +| watch_interval | float | 否 | 0 | 定时更新间隔(分钟)。>0 开启/更新定时任务;<=0 关闭(停用)定时任务。仅在指定 `to` 时生效 | + +**`to` 的尾斜杠语义(资源获取完成后适用;压缩内容先解压)** + +- 资源为**文件**: + - `to` 以 `/` 结尾:`to` 视为目录,最终落点为 `to/<源文件名>` + - `to` 不以 `/` 结尾:`to` 视为文件,最终落点为 `to` +- 资源为**目录**: + - `to` 以 `/` 结尾:`to` 视为目录,最终落点为 `to/<源目录名>/` + - `to` 不以 `/` 结尾:`to` 视为目录,目录内容映射到 `to`(不额外新增一层目录) +- 当 `to == viking://resources` 且命中“文件映射到文件”或“目录内容映射到目录(无尾斜杠)”语义时,服务端会直接报错并提示修改 `to`(例如加 `/` 或指定更具体路径)。 **本地文件和目录如何处理** @@ -62,7 +73,7 @@ Input -> Parser -> TreeBuilder -> AGFS -> SemanticQueue -> Vector Index 当你为同一个资源 URI 反复调用 `add_resource()` 时,系统会走“增量更新”而不是每次全量重建: -- **触发条件**:请求里显式指定 `target`,且该 `target` 在知识库中已存在。 +- **触发条件**:请求里显式指定 `to`,且该 `to` 在知识库中已存在。 - **总体思路**:每次导入都会先把新内容解析/构建成一棵“临时资源树”,随后在异步语义处理阶段,将临时树与 `target` 对应的现有资源树进行对比,只对发生变化的部分做重算与同步。 - **语义阶段的增量**: - 对**未变化的文件**:复用已有 L0(摘要)与向量索引记录,跳过向量化。 @@ -126,7 +137,7 @@ openviking add-resource ./documents/guide.md --reason "User guide documentation" ```python result = client.add_resource( "https://example.com/api-docs.md", - target="viking://resources/external/", + to="viking://resources/external/", reason="External API documentation" ) client.wait_processed() @@ -140,7 +151,7 @@ curl -X POST http://localhost:1933/api/v1/resources \ -H "X-API-Key: your-key" \ -d '{ "path": "https://example.com/api-docs.md", - "target": "viking://resources/external/", + "to": "viking://resources/external/", "reason": "External API documentation", "wait": true }' diff --git a/openviking/parse/parsers/markdown.py b/openviking/parse/parsers/markdown.py index 772a86445..44f5b7074 100644 --- a/openviking/parse/parsers/markdown.py +++ b/openviking/parse/parsers/markdown.py @@ -174,9 +174,10 @@ async def parse_content( await viking_fs.mkdir(temp_uri) logger.debug(f"[MarkdownParser] Created temp directory: {temp_uri}") + name_hint = kwargs.get("resource_name") or kwargs.get("source_name") # Get document title doc_title = meta.get("frontmatter", {}).get( - "title", Path(source_path).stem if source_path else "Document" + "title", name_hint or (Path(source_path).stem if source_path else "Document") ) # Create root directory @@ -187,7 +188,9 @@ async def parse_content( logger.info(f"[MarkdownParser] Found {len(headings)} headings") # Parse and create directory structure - await self._parse_and_create_structure(content, headings, root_dir, source_path) + await self._parse_and_create_structure( + content, headings, root_dir, source_path, doc_name_override=name_hint + ) parse_time = time.time() - start_time logger.info(f"[MarkdownParser] Parse completed in {parse_time:.2f}s") @@ -365,6 +368,7 @@ async def _parse_and_create_structure( headings: List[Tuple[int, int, str, int]], root_dir: str, source_path: Optional[str] = None, + doc_name_override: Optional[str] = None, ) -> None: """ Parse markdown and create directory structure directly in VikingFS. @@ -395,7 +399,9 @@ async def _parse_and_create_structure( await viking_fs.mkdir(root_dir) # Get document name - doc_name = self._sanitize_for_path(Path(source_path).stem if source_path else "content") + doc_name = self._sanitize_for_path( + doc_name_override or (Path(source_path).stem if source_path else "content") + ) # Small document: save as single file (check both token and char limits) if estimated_tokens <= max_size and len(content) <= max_chars: diff --git a/openviking/parse/tree_builder.py b/openviking/parse/tree_builder.py index 6f133f75c..c8db0dc64 100644 --- a/openviking/parse/tree_builder.py +++ b/openviking/parse/tree_builder.py @@ -29,6 +29,7 @@ from openviking.server.identity import RequestContext from openviking.storage.viking_fs import get_viking_fs from openviking.utils import parse_code_hosting_url +from openviking_cli.exceptions import InvalidArgumentError from openviking_cli.utils.uri import VikingURI logger = logging.getLogger(__name__) @@ -153,9 +154,59 @@ async def finalize_from_temp( # 2. Determine base_uri and final document name with org/repo for GitHub/GitLab auto_base_uri = self._get_base_uri(scope, source_path, source_format) base_uri = parent_uri or auto_base_uri - # 3. Determine candidate_uri + temp_artifact_uri = temp_doc_uri + artifact_kind = "dir" + + # 3. Determine candidate_uri / final_uri if to_uri: - candidate_uri = to_uri + candidate_uri = VikingURI.normalize(to_uri) + to_is_dir_semantics = candidate_uri.endswith("/") + to_is_protected_resources_root = candidate_uri == "viking://resources" + + entries = await viking_fs.ls(temp_doc_uri, ctx=ctx) + visible_entries = [ + e + for e in entries + if e.get("name") not in (".", "..") and not str(e.get("name", "")).startswith(".") + ] + force_directory_resource = source_format in ("directory", "repository") + is_single_file_resource = ( + not force_directory_resource + and len(visible_entries) == 1 + and not visible_entries[0].get("isDir", False) + ) + + if is_single_file_resource: + artifact_kind = "file" + source_filename = str(visible_entries[0].get("name", "")).strip() + if not source_filename: + raise ValueError(f"[TreeBuilder] Empty filename in {temp_doc_uri}") + temp_artifact_uri = f"{temp_doc_uri.rstrip('/')}/{source_filename}" + + if to_is_dir_semantics: + final_uri = VikingURI(candidate_uri).join(source_filename).uri + else: + if to_is_protected_resources_root: + raise InvalidArgumentError( + "`to` 不允许为 viking://resources(无尾斜杠)用于文件资源。" + "请修改 to:例如使用 viking://resources/ 以保留原文件名," + "或使用 viking://resources/<目录>/ 或 viking://resources/<目录>/<文件名>。" + ) + final_uri = candidate_uri + else: + artifact_kind = "dir" + temp_artifact_uri = temp_doc_uri + + if to_is_dir_semantics: + final_uri = VikingURI(candidate_uri).join(final_doc_name).uri.rstrip("/") + "/" + else: + if to_is_protected_resources_root: + raise InvalidArgumentError( + "`to` 不允许为 viking://resources(无尾斜杠)用于目录资源。" + "请修改 to:例如使用 viking://resources/ 以保留目录名," + "或使用 viking://resources/<目录名>/ 或 viking://resources/<目标目录>。" + ) + final_uri = candidate_uri else: if parent_uri: # Parent URI must exist and be a directory @@ -166,10 +217,6 @@ async def finalize_from_temp( if not stat_result.get("isDir"): raise ValueError(f"Parent URI is not a directory: {parent_uri}") candidate_uri = VikingURI(base_uri).join(final_doc_name).uri - - if to_uri: - final_uri = candidate_uri - else: final_uri = await self._resolve_unique_uri(candidate_uri) tree = BuildingTree( @@ -181,7 +228,11 @@ async def finalize_from_temp( tree._candidate_uri = candidate_uri # Create a minimal Context object for the root so that tree.root is not None - root_context = Context(uri=final_uri, temp_uri=temp_doc_uri) + root_context = Context( + uri=final_uri, + temp_uri=temp_artifact_uri, + meta={"artifact_kind": artifact_kind}, + ) tree.add_context(root_context) return tree diff --git a/openviking/utils/media_processor.py b/openviking/utils/media_processor.py index 1fb13b8ce..a8960acf6 100644 --- a/openviking/utils/media_processor.py +++ b/openviking/utils/media_processor.py @@ -167,7 +167,25 @@ async def _process_file( try: with zipfile.ZipFile(file_path, "r") as zipf: safe_extract_zip(zipf, temp_dir) - return await self._process_directory(temp_dir, instruction, **kwargs) + visible = [ + p + for p in temp_dir.iterdir() + if p.name not in {".", ".."} and not p.name.startswith(".") + ] + dirs = [p for p in visible if p.is_dir()] + files = [p for p in visible if p.is_file()] + + if len(dirs) == 1 and not files: + inferred_root = dirs[0] + kwargs_for_dir = dict(kwargs) + kwargs_for_dir.pop("source_name", None) + return await self._process_directory(inferred_root, instruction, **kwargs_for_dir) + if len(files) == 1 and not dirs: + return await self._process_file(files[0], instruction, **kwargs) + + kwargs_for_dir = dict(kwargs) + kwargs_for_dir["source_name"] = kwargs_for_dir.get("source_name") or file_path.stem + return await self._process_directory(temp_dir, instruction, **kwargs_for_dir) finally: pass # Don't delete temp_dir yet, it will be used by TreeBuilder return await parse( @@ -175,5 +193,5 @@ async def _process_file( instruction=instruction, vlm_processor=self._get_vlm_processor(), storage=self.storage, - resource_name=file_path.stem, + resource_name=kwargs.get("source_name") or kwargs.get("resource_name") or file_path.stem, ) diff --git a/openviking/utils/resource_processor.py b/openviking/utils/resource_processor.py index 01aaf75df..07498f016 100644 --- a/openviking/utils/resource_processor.py +++ b/openviking/utils/resource_processor.py @@ -18,6 +18,7 @@ from openviking.telemetry import get_current_telemetry from openviking.utils.embedding_utils import index_resource from openviking.utils.summarizer import Summarizer +from openviking_cli.exceptions import InvalidArgumentError from openviking_cli.exceptions import OpenVikingError from openviking_cli.utils import get_logger from openviking_cli.utils.storage import StoragePath @@ -199,6 +200,14 @@ async def process_resource( "resource.finalize.duration_ms", round((time.perf_counter() - finalize_start) * 1000, 3), ) + except InvalidArgumentError as e: + telemetry.set_error("resource_processor.finalize", "INVALID_ARGUMENT", str(e)) + try: + if parse_result.temp_dir_path: + await get_viking_fs().delete_temp(parse_result.temp_dir_path, ctx=ctx) + except Exception: + pass + raise except Exception as e: result["status"] = "error" result["errors"].append(f"Finalize from temp error: {e}") @@ -225,6 +234,12 @@ async def process_resource( viking_fs = get_viking_fs() lock_manager = get_lock_manager() target_exists = await viking_fs.exists(root_uri, ctx=ctx) + temp_is_dir = False + try: + temp_stat = await viking_fs.stat(temp_uri, ctx=ctx) + temp_is_dir = bool(temp_stat.get("isDir", False)) if isinstance(temp_stat, dict) else False + except Exception: + temp_is_dir = False if not target_exists: # 第一次添加:锁保护下将 temp 移到 final @@ -249,7 +264,7 @@ async def process_resource( # 在 POINT 锁内获取 SUBTREE 锁(消除竞态窗口) lifecycle_lock_handle_id = await self._try_acquire_lifecycle_lock( - lock_manager, dst_path + lock_manager, dst_path, is_dir=temp_is_dir ) try: @@ -259,10 +274,19 @@ async def process_resource( result["temp_uri"] = root_uri else: - # 增量更新:对目标目录加 SUBTREE 锁 + try: + existing_stat = await viking_fs.stat(root_uri, ctx=ctx) + existing_is_dir = ( + bool(existing_stat.get("isDir", False)) + if isinstance(existing_stat, dict) + else False + ) + except Exception: + existing_is_dir = True + resource_path = viking_fs._uri_to_path(root_uri, ctx=ctx) lifecycle_lock_handle_id = await self._try_acquire_lifecycle_lock( - lock_manager, resource_path + lock_manager, resource_path, is_dir=existing_is_dir ) # ============ Phase 4: Optional Steps ============ @@ -274,15 +298,43 @@ async def process_resource( is_code_repo = parse_result.source_format == "repository" try: with telemetry.measure("resource.summarize"): - await self._get_summarizer().summarize( - resource_uris=[result["root_uri"]], - ctx=ctx, - skip_vectorization=skip_vec, - lifecycle_lock_handle_id=lifecycle_lock_handle_id, - temp_uris=[temp_uri_for_summarize], - is_code_repo=is_code_repo, - **kwargs, + viking_fs = get_viking_fs() + root_stat = await viking_fs.stat(result["root_uri"], ctx=ctx) + root_is_dir = ( + bool(root_stat.get("isDir", False)) if isinstance(root_stat, dict) else False ) + + if root_is_dir: + await self._get_summarizer().summarize( + resource_uris=[result["root_uri"]], + ctx=ctx, + skip_vectorization=skip_vec, + lifecycle_lock_handle_id=lifecycle_lock_handle_id, + temp_uris=[temp_uri_for_summarize], + is_code_repo=is_code_repo, + **kwargs, + ) + else: + if build_index: + from openviking.utils.embedding_utils import vectorize_file + from openviking_cli.utils.uri import VikingURI + + parent = VikingURI(result["root_uri"]).parent + parent_uri = parent.uri if parent else "viking://resources" + await vectorize_file( + file_path=result["root_uri"], + summary_dict={"name": result["root_uri"].split("/")[-1]}, + parent_uri=parent_uri, + ctx=ctx, + context_type="resource", + ) + if lifecycle_lock_handle_id: + from openviking.storage.transaction import get_lock_manager + + handle = get_lock_manager().get_handle(lifecycle_lock_handle_id) + if handle: + await get_lock_manager().release(handle) + lifecycle_lock_handle_id = "" except Exception as e: logger.error(f"Summarization failed: {e}") result["warnings"] = result.get("warnings", []) + [f"Summarization failed: {e}"] @@ -297,10 +349,15 @@ async def process_resource( return result @staticmethod - async def _try_acquire_lifecycle_lock(lock_manager, path: str) -> str: + async def _try_acquire_lifecycle_lock(lock_manager, path: str, is_dir: bool = True) -> str: """尝试获取 SUBTREE 生命周期锁,失败时优雅降级返回空字符串。""" handle = lock_manager.create_handle() - if await lock_manager.acquire_subtree(handle, path): + acquired = ( + await lock_manager.acquire_subtree(handle, path) + if is_dir + else await lock_manager.acquire_point(handle, path.rsplit("/", 1)[0] if "/" in path else path) + ) + if acquired: return handle.id logger.warning(f"[ResourceProcessor] Failed to acquire lifecycle lock on {path}") await lock_manager.release(handle) diff --git a/tests/api_test/api/client.py b/tests/api_test/api/client.py index 1994e84f5..d79a42806 100644 --- a/tests/api_test/api/client.py +++ b/tests/api_test/api/client.py @@ -144,7 +144,7 @@ def _zip_directory_for_upload(self, directory_path: str) -> str: base_path = Path(directory_path) for file_path in base_path.rglob("*"): if file_path.is_file(): - zf.write(file_path, arcname=file_path.relative_to(base_path)) + zf.write(file_path, arcname=Path(base_path.name) / file_path.relative_to(base_path)) return temp_zip_path except Exception: Path(temp_zip_path).unlink(missing_ok=True) @@ -259,9 +259,11 @@ def add_resource( cleanup_path = None if os.path.isfile(path): payload["temp_file_id"] = self._upload_temp_file(path) + payload["source_name"] = Path(path).stem elif os.path.isdir(path): cleanup_path = self._zip_directory_for_upload(path) payload["temp_file_id"] = self._upload_temp_file(cleanup_path) + payload["source_name"] = Path(path).name else: payload["path"] = path if to: diff --git a/tests/api_test/resources/test_add_resource.py b/tests/api_test/resources/test_add_resource.py index e25ce25f8..341111914 100644 --- a/tests/api_test/resources/test_add_resource.py +++ b/tests/api_test/resources/test_add_resource.py @@ -1,6 +1,8 @@ import json import os import tempfile +import uuid +import zipfile class TestAddResource: @@ -72,3 +74,130 @@ def test_add_resource_simple(self, api_client): except Exception as e: print(f"Error: {e}") raise + + def test_add_resource_to_dir_semantics_rule1_file(self, api_client): + with tempfile.TemporaryDirectory() as temp_dir: + test_file_path = os.path.join(temp_dir, "a.txt") + with open(test_file_path, "w", encoding="utf-8") as f: + f.write("rule1") + + to_dir = f"viking://resources/test-add-resource-{uuid.uuid4().hex}/" + response = api_client.add_resource(path=test_file_path, to=to_dir, wait=True) + assert response.status_code == 200 + data = response.json() + assert data.get("status") == "ok" + root_uri = data["result"]["root_uri"] + assert root_uri == f"{to_dir}a.txt" + + stat_resp = api_client.fs_stat(root_uri) + assert stat_resp.status_code == 200 + stat = stat_resp.json() + assert stat.get("status") == "ok" + + def test_add_resource_to_file_semantics_rule2_file(self, api_client): + with tempfile.TemporaryDirectory() as temp_dir: + test_file_path = os.path.join(temp_dir, "a.txt") + with open(test_file_path, "w", encoding="utf-8") as f: + f.write("rule2") + + to_file = f"viking://resources/test-add-resource-{uuid.uuid4().hex}/b.txt" + response = api_client.add_resource(path=test_file_path, to=to_file, wait=True) + assert response.status_code == 200 + data = response.json() + assert data.get("status") == "ok" + root_uri = data["result"]["root_uri"] + assert root_uri == to_file + + stat_resp = api_client.fs_stat(root_uri) + assert stat_resp.status_code == 200 + stat = stat_resp.json() + assert stat.get("status") == "ok" + + def test_add_resource_to_dir_semantics_rule3_directory(self, api_client): + with tempfile.TemporaryDirectory() as temp_dir: + myproj = os.path.join(temp_dir, "myproj") + os.makedirs(myproj, exist_ok=True) + with open(os.path.join(myproj, "README.md"), "w", encoding="utf-8") as f: + f.write("# myproj\n") + + to_dir = f"viking://resources/test-add-resource-{uuid.uuid4().hex}/" + response = api_client.add_resource(path=myproj, to=to_dir, wait=True) + assert response.status_code == 200 + data = response.json() + assert data.get("status") == "ok" + root_uri = data["result"]["root_uri"] + assert root_uri == f"{to_dir}myproj/" + + tree_resp = api_client.fs_tree(root_uri) + assert tree_resp.status_code == 200 + tree = tree_resp.json() + assert tree.get("status") == "ok" + + def test_add_resource_to_no_trailing_slash_rule4_directory(self, api_client): + with tempfile.TemporaryDirectory() as temp_dir: + myproj = os.path.join(temp_dir, "myproj") + os.makedirs(myproj, exist_ok=True) + with open(os.path.join(myproj, "README.md"), "w", encoding="utf-8") as f: + f.write("# myproj\n") + + to_dir = f"viking://resources/test-add-resource-{uuid.uuid4().hex}" + response = api_client.add_resource(path=myproj, to=to_dir, wait=True) + assert response.status_code == 200 + data = response.json() + assert data.get("status") == "ok" + root_uri = data["result"]["root_uri"] + assert root_uri == to_dir + + tree_resp = api_client.fs_tree(root_uri) + assert tree_resp.status_code == 200 + tree = tree_resp.json() + assert tree.get("status") == "ok" + + def test_add_resource_zip_then_apply_rules_rule5(self, api_client): + with tempfile.TemporaryDirectory() as temp_dir: + proj = os.path.join(temp_dir, "myproj") + os.makedirs(proj, exist_ok=True) + with open(os.path.join(proj, "README.md"), "w", encoding="utf-8") as f: + f.write("# zip\n") + + zip_path = os.path.join(temp_dir, "myproj.zip") + with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf: + zf.write(os.path.join(proj, "README.md"), arcname="myproj/README.md") + + to_dir = f"viking://resources/test-add-resource-{uuid.uuid4().hex}/" + response = api_client.add_resource(path=zip_path, to=to_dir, wait=True) + assert response.status_code == 200 + data = response.json() + assert data.get("status") == "ok" + root_uri = data["result"]["root_uri"] + assert root_uri == f"{to_dir}myproj/" + + tree_resp = api_client.fs_tree(root_uri) + assert tree_resp.status_code == 200 + tree = tree_resp.json() + assert tree.get("status") == "ok" + + def test_add_resource_protect_resources_root_rule6_file(self, api_client): + with tempfile.TemporaryDirectory() as temp_dir: + test_file_path = os.path.join(temp_dir, "a.txt") + with open(test_file_path, "w", encoding="utf-8") as f: + f.write("rule6-file") + + response = api_client.add_resource(path=test_file_path, to="viking://resources", wait=True) + assert response.status_code == 400 + data = response.json() + assert data.get("status") == "error" + assert data.get("error", {}).get("code") == "INVALID_ARGUMENT" + + def test_add_resource_protect_resources_root_rule6_dir(self, api_client): + with tempfile.TemporaryDirectory() as temp_dir: + myproj = os.path.join(temp_dir, "myproj") + os.makedirs(myproj, exist_ok=True) + with open(os.path.join(myproj, "README.md"), "w", encoding="utf-8") as f: + f.write("# myproj\n") + + response = api_client.add_resource(path=myproj, to="viking://resources", wait=True) + assert response.status_code == 400 + data = response.json() + assert data.get("status") == "error" + assert data.get("error", {}).get("code") == "INVALID_ARGUMENT" diff --git a/tests/parse/test_add_resource_to_uri_semantics.py b/tests/parse/test_add_resource_to_uri_semantics.py new file mode 100644 index 000000000..ce00baba5 --- /dev/null +++ b/tests/parse/test_add_resource_to_uri_semantics.py @@ -0,0 +1,182 @@ +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + + +def _make_ctx(): + from openviking.server.identity import RequestContext, Role + from openviking_cli.session.user_id import UserIdentifier + + return RequestContext(user=UserIdentifier("default", "default", "default"), role=Role.USER) + + +def _make_viking_fs_mock(ls_map: dict[str, list[dict]]): + fs = MagicMock() + + async def _ls(uri: str, **kwargs): + return ls_map.get(uri, []) + + fs.ls = AsyncMock(side_effect=_ls) + fs.stat = AsyncMock(return_value={"isDir": True}) + return fs + + +class TestAddResourceToUriSemantics: + @pytest.mark.asyncio + async def test_file_resource_to_dir_semantics_rule1(self): + from openviking.parse.tree_builder import TreeBuilder + + temp_base = "viking://temp/t1" + temp_doc = f"{temp_base}/doc" + fs = _make_viking_fs_mock( + { + temp_base: [{"name": "doc", "isDir": True}], + temp_doc: [{"name": "a.txt", "isDir": False}], + } + ) + + builder = TreeBuilder() + with patch("openviking.parse.tree_builder.get_viking_fs", return_value=fs): + tree = await builder.finalize_from_temp( + temp_dir_path=temp_base, + ctx=_make_ctx(), + scope="resources", + to_uri="viking://resources/mydir/", + ) + + assert tree.root is not None + assert tree.root.uri == "viking://resources/mydir/a.txt" + assert tree.root.temp_uri == f"{temp_doc}/a.txt" + assert tree.root.meta.get("artifact_kind") == "file" + + @pytest.mark.asyncio + async def test_directory_resource_to_dir_semantics_rule3(self): + from openviking.parse.tree_builder import TreeBuilder + + temp_base = "viking://temp/t2" + temp_doc = f"{temp_base}/myproj" + fs = _make_viking_fs_mock( + { + temp_base: [{"name": "myproj", "isDir": True}], + temp_doc: [ + {"name": "README.md", "isDir": False}, + {"name": "src", "isDir": True}, + ], + } + ) + + builder = TreeBuilder() + with patch("openviking.parse.tree_builder.get_viking_fs", return_value=fs): + tree = await builder.finalize_from_temp( + temp_dir_path=temp_base, + ctx=_make_ctx(), + scope="resources", + to_uri="viking://resources/target/", + ) + + assert tree.root is not None + assert tree.root.uri == "viking://resources/target/myproj/" + assert tree.root.temp_uri == temp_doc + assert tree.root.meta.get("artifact_kind") == "dir" + + @pytest.mark.asyncio + async def test_directory_resource_to_no_trailing_slash_rule4(self): + from openviking.parse.tree_builder import TreeBuilder + + temp_base = "viking://temp/t3" + temp_doc = f"{temp_base}/myproj" + fs = _make_viking_fs_mock( + { + temp_base: [{"name": "myproj", "isDir": True}], + temp_doc: [{"name": "README.md", "isDir": False}, {"name": "src", "isDir": True}], + } + ) + + builder = TreeBuilder() + with patch("openviking.parse.tree_builder.get_viking_fs", return_value=fs): + tree = await builder.finalize_from_temp( + temp_dir_path=temp_base, + ctx=_make_ctx(), + scope="resources", + to_uri="viking://resources/target", + ) + + assert tree.root is not None + assert tree.root.uri == "viking://resources/target" + assert tree.root.temp_uri == temp_doc + + @pytest.mark.asyncio + async def test_force_directory_for_directory_source_even_single_file(self): + from openviking.parse.tree_builder import TreeBuilder + + temp_base = "viking://temp/t6" + temp_doc = f"{temp_base}/tt_a" + fs = _make_viking_fs_mock( + { + temp_base: [{"name": "tt_a", "isDir": True}], + temp_doc: [{"name": "aa.md", "isDir": False}], + } + ) + + builder = TreeBuilder() + with patch("openviking.parse.tree_builder.get_viking_fs", return_value=fs): + tree = await builder.finalize_from_temp( + temp_dir_path=temp_base, + ctx=_make_ctx(), + scope="resources", + to_uri="viking://resources/", + source_format="directory", + ) + + assert tree.root is not None + assert tree.root.uri == "viking://resources/tt_a/" + assert tree.root.temp_uri == temp_doc + assert tree.root.meta.get("artifact_kind") == "dir" + + @pytest.mark.asyncio + async def test_protect_resources_root_file_rule6(self): + from openviking.parse.tree_builder import TreeBuilder + from openviking_cli.exceptions import InvalidArgumentError + + temp_base = "viking://temp/t4" + temp_doc = f"{temp_base}/doc" + fs = _make_viking_fs_mock( + { + temp_base: [{"name": "doc", "isDir": True}], + temp_doc: [{"name": "a.txt", "isDir": False}], + } + ) + + builder = TreeBuilder() + with patch("openviking.parse.tree_builder.get_viking_fs", return_value=fs): + with pytest.raises(InvalidArgumentError): + await builder.finalize_from_temp( + temp_dir_path=temp_base, + ctx=_make_ctx(), + scope="resources", + to_uri="viking://resources", + ) + + @pytest.mark.asyncio + async def test_protect_resources_root_dir_rule6(self): + from openviking.parse.tree_builder import TreeBuilder + from openviking_cli.exceptions import InvalidArgumentError + + temp_base = "viking://temp/t5" + temp_doc = f"{temp_base}/myproj" + fs = _make_viking_fs_mock( + { + temp_base: [{"name": "myproj", "isDir": True}], + temp_doc: [{"name": "README.md", "isDir": False}, {"name": "src", "isDir": True}], + } + ) + + builder = TreeBuilder() + with patch("openviking.parse.tree_builder.get_viking_fs", return_value=fs): + with pytest.raises(InvalidArgumentError): + await builder.finalize_from_temp( + temp_dir_path=temp_base, + ctx=_make_ctx(), + scope="resources", + to_uri="viking://resources", + ) diff --git a/tests/parse/test_markdown_resource_name_override.py b/tests/parse/test_markdown_resource_name_override.py new file mode 100644 index 000000000..41b00a6fa --- /dev/null +++ b/tests/parse/test_markdown_resource_name_override.py @@ -0,0 +1,24 @@ +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + + +@pytest.mark.asyncio +async def test_markdown_parser_uses_resource_name_for_output_naming(): + from openviking.parse.parsers.markdown import MarkdownParser + + fs = MagicMock() + fs.mkdir = AsyncMock() + fs.write_file = AsyncMock() + + parser = MarkdownParser() + with patch.object(parser, "_get_viking_fs", return_value=fs): + result = await parser.parse_content( + "# Title\n\nHello\n", + source_path="/tmp/upload_deadbeef.md", + resource_name="aa", + ) + + assert result is not None + written_paths = [call.args[0] for call in fs.write_file.call_args_list] + assert any(p.endswith("/aa.md") for p in written_paths) diff --git a/tests/server/test_api_resources.py b/tests/server/test_api_resources.py index 2357fd5a4..28d6a574d 100644 --- a/tests/server/test_api_resources.py +++ b/tests/server/test_api_resources.py @@ -3,11 +3,20 @@ """Tests for resource management endpoints.""" +import zipfile +from pathlib import Path + import httpx from openviking.telemetry import get_current_telemetry +def _write_zip(zip_path: Path, files: dict[str, str]) -> None: + with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf: + for arcname, content in files.items(): + zf.writestr(arcname, content) + + async def test_add_resource_success( client: httpx.AsyncClient, sample_markdown_file, @@ -207,6 +216,170 @@ async def test_add_resource_with_to( assert "custom" in body["result"]["root_uri"] +async def test_add_resource_file_to_trailing_slash_rule_1( + client: httpx.AsyncClient, + sample_markdown_file, + upload_temp_dir, +): + resp = await client.post( + "/api/v1/resources", + json={ + "temp_file_id": sample_markdown_file.name, + "to": "viking://resources/custom_dir/", + "reason": "rule 1", + "wait": True, + }, + ) + assert resp.status_code == 200 + body = resp.json() + assert body["status"] == "ok" + root_uri = body["result"]["root_uri"] + assert root_uri == "viking://resources/custom_dir/sample.md" + + stat_resp = await client.get("/api/v1/fs/stat", params={"uri": root_uri}) + assert stat_resp.status_code == 200 + assert stat_resp.json()["result"]["isDir"] is False + + +async def test_add_resource_file_to_no_trailing_slash_rule_2( + client: httpx.AsyncClient, + sample_markdown_file, + upload_temp_dir, +): + target = "viking://resources/custom_dir/renamed.md" + resp = await client.post( + "/api/v1/resources", + json={ + "temp_file_id": sample_markdown_file.name, + "to": target, + "reason": "rule 2", + "wait": True, + }, + ) + assert resp.status_code == 200 + body = resp.json() + assert body["status"] == "ok" + root_uri = body["result"]["root_uri"] + assert root_uri == target + + stat_resp = await client.get("/api/v1/fs/stat", params={"uri": root_uri}) + assert stat_resp.status_code == 200 + assert stat_resp.json()["result"]["isDir"] is False + + +async def test_add_resource_dir_to_trailing_slash_rule_3( + client: httpx.AsyncClient, + upload_temp_dir, +): + zip_path = upload_temp_dir / "demo_dir.zip" + _write_zip( + zip_path, + { + "demo_dir/a.md": "# a\n", + "demo_dir/b.md": "# b\n", + }, + ) + resp = await client.post( + "/api/v1/resources", + json={ + "temp_file_id": zip_path.name, + "to": "viking://resources/custom_dir/", + "reason": "rule 3", + "wait": True, + }, + ) + assert resp.status_code == 200 + body = resp.json() + assert body["status"] == "ok" + root_uri = body["result"]["root_uri"] + assert root_uri == "viking://resources/custom_dir/demo_dir/" + + stat_resp = await client.get("/api/v1/fs/stat", params={"uri": root_uri}) + assert stat_resp.status_code == 200 + assert stat_resp.json()["result"]["isDir"] is True + + +async def test_add_resource_dir_to_no_trailing_slash_rule_4( + client: httpx.AsyncClient, + upload_temp_dir, +): + zip_path = upload_temp_dir / "demo_dir.zip" + _write_zip( + zip_path, + { + "demo_dir/a.md": "# a\n", + "demo_dir/b.md": "# b\n", + }, + ) + target = "viking://resources/custom_dir/explicit_root" + resp = await client.post( + "/api/v1/resources", + json={ + "temp_file_id": zip_path.name, + "to": target, + "reason": "rule 4", + "wait": True, + }, + ) + assert resp.status_code == 200 + body = resp.json() + assert body["status"] == "ok" + root_uri = body["result"]["root_uri"] + assert root_uri == target + + stat_resp = await client.get("/api/v1/fs/stat", params={"uri": root_uri}) + assert stat_resp.status_code == 200 + assert stat_resp.json()["result"]["isDir"] is True + + +async def test_add_resource_zip_single_file_follows_rules_rule_5( + client: httpx.AsyncClient, + upload_temp_dir, +): + zip_path = upload_temp_dir / "single_file.zip" + _write_zip(zip_path, {"hello.md": "# hello\n"}) + resp = await client.post( + "/api/v1/resources", + json={ + "temp_file_id": zip_path.name, + "to": "viking://resources/custom_dir/", + "reason": "rule 5", + "wait": True, + }, + ) + assert resp.status_code == 200 + body = resp.json() + assert body["status"] == "ok" + root_uri = body["result"]["root_uri"] + assert root_uri == "viking://resources/custom_dir/hello.md" + + stat_resp = await client.get("/api/v1/fs/stat", params={"uri": root_uri}) + assert stat_resp.status_code == 200 + assert stat_resp.json()["result"]["isDir"] is False + + +async def test_add_resource_to_resources_root_is_rejected_rule_6( + client: httpx.AsyncClient, + sample_markdown_file, + upload_temp_dir, +): + resp = await client.post( + "/api/v1/resources", + json={ + "temp_file_id": sample_markdown_file.name, + "to": "viking://resources", + "reason": "rule 6", + "wait": True, + }, + ) + assert resp.status_code == 400 + body = resp.json() + assert body["status"] == "error" + assert body["error"]["code"] == "INVALID_ARGUMENT" + assert "viking://resources" in body["error"]["message"] + assert "viking://resources/" in body["error"]["message"] + + async def test_wait_processed_empty_queue(client: httpx.AsyncClient): resp = await client.post( "/api/v1/system/wait",