diff --git a/shortcuts/common/artifact_path.go b/shortcuts/common/artifact_path.go new file mode 100644 index 000000000..42ba3beb0 --- /dev/null +++ b/shortcuts/common/artifact_path.go @@ -0,0 +1,30 @@ +// Copyright (c) 2026 Lark Technologies Pte. Ltd. +// SPDX-License-Identifier: MIT + +// This file defines artifact-path conventions shared between +// `minutes +download` and `vc +notes`. Callers outside those two shortcuts +// should not take a dependency on these symbols. + +package common + +import "path/filepath" + +// DefaultMinuteArtifactSubdir is the top-level directory for minute-scoped +// artifacts under the default layout. +const DefaultMinuteArtifactSubdir = "minutes" + +// DefaultTranscriptFileName is the fixed transcript filename under the +// default layout. Recording files keep the server-provided name. +const DefaultTranscriptFileName = "transcript.txt" + +// ArtifactTypeRecording is the artifact_type value emitted by +// `minutes +download` so that callers can index results by kind without +// parsing saved_path. +const ArtifactTypeRecording = "recording" + +// DefaultMinuteArtifactDir returns the default output directory for an +// artifact keyed by minuteToken. The same path is shared across commands so +// that related artifacts of one meeting land together. +func DefaultMinuteArtifactDir(minuteToken string) string { + return filepath.Join(DefaultMinuteArtifactSubdir, minuteToken) +} diff --git a/shortcuts/common/validate.go b/shortcuts/common/validate.go index a50b7ff10..39bbf21b0 100644 --- a/shortcuts/common/validate.go +++ b/shortcuts/common/validate.go @@ -83,13 +83,12 @@ func ParseIntBounded(rt *RuntimeContext, name string, min, max int) int { return v } -// ValidateSafeOutputDir ensures outputDir is a relative path that resolves -// within the current working directory, preventing path traversal attacks -// (including symlink-based escape). -// It delegates all validation to FileIO.ResolvePath which already performs -// cwd-boundary checks, symlink resolution, and control-character rejection. -func ValidateSafeOutputDir(fio fileio.FileIO, outputDir string) error { - _, err := fio.ResolvePath(outputDir) +// ValidateSafePath ensures path is relative and resolves within the current +// working directory. It catches traversal, symlink escape, and control +// characters by delegating to FileIO.ResolvePath. Works for both file and +// directory paths. +func ValidateSafePath(fio fileio.FileIO, path string) error { + _, err := fio.ResolvePath(path) return err } diff --git a/shortcuts/common/validate_test.go b/shortcuts/common/validate_test.go index 572e9c959..89eb72e88 100644 --- a/shortcuts/common/validate_test.go +++ b/shortcuts/common/validate_test.go @@ -172,7 +172,7 @@ func TestParseIntBounded(t *testing.T) { } // --------------------------------------------------------------------------- -// ValidateSafeOutputDir — symlink escape prevention +// ValidateSafePath — symlink escape prevention // --------------------------------------------------------------------------- // chdirForTest changes CWD to dir and restores the original CWD on cleanup. @@ -188,9 +188,9 @@ func chdirForTest(t *testing.T, dir string) { t.Cleanup(func() { os.Chdir(orig) }) } -// TestValidateSafeOutputDir_RejectsSymlinkEscape verifies that a relative path +// TestValidateSafePath_RejectsSymlinkEscape verifies that a relative path // that resolves to a symlink pointing outside CWD is rejected. -func TestValidateSafeOutputDir_RejectsSymlinkEscape(t *testing.T) { +func TestValidateSafePath_RejectsSymlinkEscape(t *testing.T) { outside := t.TempDir() // target outside CWD workDir := t.TempDir() chdirForTest(t, workDir) @@ -200,14 +200,14 @@ func TestValidateSafeOutputDir_RejectsSymlinkEscape(t *testing.T) { t.Fatalf("Symlink: %v", err) } - if err := ValidateSafeOutputDir(&localfileio.LocalFileIO{}, "evil_out"); err == nil { + if err := ValidateSafePath(&localfileio.LocalFileIO{}, "evil_out"); err == nil { t.Fatal("expected error for symlink pointing outside CWD, got nil") } } -// TestValidateSafeOutputDir_RejectsDanglingSymlink verifies that a dangling +// TestValidateSafePath_RejectsDanglingSymlink verifies that a dangling // symlink (target does not exist) is rejected to prevent future escapes. -func TestValidateSafeOutputDir_RejectsDanglingSymlink(t *testing.T) { +func TestValidateSafePath_RejectsDanglingSymlink(t *testing.T) { workDir := t.TempDir() chdirForTest(t, workDir) @@ -215,14 +215,14 @@ func TestValidateSafeOutputDir_RejectsDanglingSymlink(t *testing.T) { t.Fatalf("Symlink: %v", err) } - if err := ValidateSafeOutputDir(&localfileio.LocalFileIO{}, "dangling"); err == nil { + if err := ValidateSafePath(&localfileio.LocalFileIO{}, "dangling"); err == nil { t.Fatal("expected error for dangling symlink, got nil") } } -// TestValidateSafeOutputDir_AllowsNormalSubdir verifies that an existing real +// TestValidateSafePath_AllowsNormalSubdir verifies that an existing real // subdirectory within CWD is accepted. -func TestValidateSafeOutputDir_AllowsNormalSubdir(t *testing.T) { +func TestValidateSafePath_AllowsNormalSubdir(t *testing.T) { workDir := t.TempDir() chdirForTest(t, workDir) @@ -231,18 +231,18 @@ func TestValidateSafeOutputDir_AllowsNormalSubdir(t *testing.T) { t.Fatalf("Mkdir: %v", err) } - if err := ValidateSafeOutputDir(&localfileio.LocalFileIO{}, "output"); err != nil { + if err := ValidateSafePath(&localfileio.LocalFileIO{}, "output"); err != nil { t.Fatalf("expected no error for real subdir, got: %v", err) } } -// TestValidateSafeOutputDir_AllowsNonExistentPath verifies that a path that +// TestValidateSafePath_AllowsNonExistentPath verifies that a path that // does not yet exist (new output directory) is accepted. -func TestValidateSafeOutputDir_AllowsNonExistentPath(t *testing.T) { +func TestValidateSafePath_AllowsNonExistentPath(t *testing.T) { workDir := t.TempDir() chdirForTest(t, workDir) - if err := ValidateSafeOutputDir(&localfileio.LocalFileIO{}, "new_output_dir"); err != nil { + if err := ValidateSafePath(&localfileio.LocalFileIO{}, "new_output_dir"); err != nil { t.Fatalf("expected no error for non-existent path, got: %v", err) } } diff --git a/shortcuts/minutes/minutes_download.go b/shortcuts/minutes/minutes_download.go index 3def454f6..ac9bcd0c6 100644 --- a/shortcuts/minutes/minutes_download.go +++ b/shortcuts/minutes/minutes_download.go @@ -5,10 +5,13 @@ package minutes import ( "context" + "errors" "fmt" "io" + "io/fs" "mime" "net/http" + "path" "path/filepath" "regexp" "strings" @@ -43,7 +46,8 @@ var MinutesDownload = common.Shortcut{ HasFormat: true, Flags: []common.Flag{ {Name: "minute-tokens", Desc: "minute tokens, comma-separated for batch download (max 50)", Required: true}, - {Name: "output", Desc: "output path: file path for single token, directory for batch (default: current dir)"}, + {Name: "output", Desc: "output file path (single token)"}, + {Name: "output-dir", Desc: "output directory (default: ./minutes/{minute_token}/)"}, {Name: "overwrite", Type: "bool", Desc: "overwrite existing output file"}, {Name: "url-only", Type: "bool", Desc: "only print the download URL(s) without downloading"}, }, @@ -60,6 +64,22 @@ var MinutesDownload = common.Shortcut{ return output.ErrValidation("invalid minute token %q: must contain only lowercase alphanumeric characters (e.g. obcnq3b9jl72l83w4f149w9c)", token) } } + // Cheap checks first, then path-safety resolution. + out := runtime.Str("output") + outDir := runtime.Str("output-dir") + if out != "" && outDir != "" { + return output.ErrValidation("--output and --output-dir cannot both be set") + } + if out != "" { + if err := common.ValidateSafePath(runtime.FileIO(), out); err != nil { + return err + } + } + if outDir != "" { + if err := common.ValidateSafePath(runtime.FileIO(), outDir); err != nil { + return err + } + } return nil }, DryRun: func(ctx context.Context, runtime *common.RuntimeContext) *common.DryRunAPI { @@ -70,29 +90,53 @@ var MinutesDownload = common.Shortcut{ }, Execute: func(ctx context.Context, runtime *common.RuntimeContext) error { tokens := common.SplitCSV(runtime.Str("minute-tokens")) - outputPath := runtime.Str("output") + rawOutput := runtime.Str("output") + rawOutputDir := runtime.Str("output-dir") overwrite := runtime.Bool("overwrite") urlOnly := runtime.Bool("url-only") errOut := runtime.IO().ErrOut single := len(tokens) == 1 - // Batch mode: --output must be a directory, not an existing file. - if !single && outputPath != "" { - if fi, err := runtime.FileIO().Stat(outputPath); err == nil && !fi.IsDir() { - return output.ErrValidation("--output %q is a file; batch mode expects a directory path", outputPath) + // Re-interpret --output based on what the path points to. An existing + // directory is promoted to --output-dir so single-token cp semantics + // work. An existing file is rejected in batch mode (the flag carries + // directory semantics there). Unknown filesystem errors are surfaced + // eagerly rather than deferred to Save. + explicitOutputPath := rawOutput + explicitOutputDir := rawOutputDir + if explicitOutputPath != "" { + fi, statErr := runtime.FileIO().Stat(explicitOutputPath) + switch { + case statErr == nil && fi.IsDir(): + explicitOutputDir = explicitOutputPath + explicitOutputPath = "" + case statErr == nil && !fi.IsDir(): + if !single { + return output.ErrValidation("--output %q is a file; batch mode expects a directory (use --output-dir)", explicitOutputPath) + } + case errors.Is(statErr, fs.ErrNotExist): + if !single { + explicitOutputDir = explicitOutputPath + explicitOutputPath = "" + } + default: + return output.Errorf(output.ExitAPI, "io_error", "cannot access --output %q: %s", explicitOutputPath, statErr) } } + useDefaultLayout := explicitOutputPath == "" && explicitOutputDir == "" + if !single { fmt.Fprintf(errOut, "[minutes +download] batch: %d token(s)\n", len(tokens)) } type result struct { - MinuteToken string `json:"minute_token"` - SavedPath string `json:"saved_path,omitempty"` - SizeBytes int64 `json:"size_bytes,omitempty"` - DownloadURL string `json:"download_url,omitempty"` - Error string `json:"error,omitempty"` + MinuteToken string `json:"minute_token"` + ArtifactType string `json:"artifact_type,omitempty"` + SavedPath string `json:"saved_path,omitempty"` + SizeBytes int64 `json:"size_bytes,omitempty"` + DownloadURL string `json:"download_url,omitempty"` + Error string `json:"error,omitempty"` } results := make([]result, len(tokens)) @@ -160,12 +204,18 @@ var MinutesDownload = common.Shortcut{ fmt.Fprintf(errOut, "Downloading media: %s\n", common.MaskToken(token)) - // single token: --output is a file path; batch: --output is a directory - opts := downloadOpts{fio: runtime.FileIO(), overwrite: overwrite, usedNames: usedNames} - if single { - opts.outputPath = outputPath - } else { - opts.outputDir = outputPath + opts := downloadOpts{fio: runtime.FileIO(), overwrite: overwrite} + switch { + case useDefaultLayout: + // Per-token subdirectory guarantees unique paths, so no dedup map. + opts.outputDir = common.DefaultMinuteArtifactDir(token) + case explicitOutputPath != "" && single: + opts.outputPath = explicitOutputPath + default: + opts.outputDir = explicitOutputDir + if !single { + opts.usedNames = usedNames + } } dl, err := downloadMediaFile(ctx, dlClient, downloadURL, token, opts) @@ -173,7 +223,12 @@ var MinutesDownload = common.Shortcut{ results[i] = result{MinuteToken: token, Error: err.Error()} continue } - results[i] = result{MinuteToken: token, SavedPath: dl.savedPath, SizeBytes: dl.sizeBytes} + results[i] = result{ + MinuteToken: token, + ArtifactType: common.ArtifactTypeRecording, + SavedPath: dl.savedPath, + SizeBytes: dl.sizeBytes, + } } // output @@ -183,9 +238,17 @@ var MinutesDownload = common.Shortcut{ return output.ErrAPI(0, r.Error, nil) } if urlOnly { - runtime.Out(map[string]interface{}{"download_url": r.DownloadURL}, nil) + runtime.Out(map[string]interface{}{ + "minute_token": r.MinuteToken, + "download_url": r.DownloadURL, + }, nil) } else { - runtime.Out(map[string]interface{}{"saved_path": r.SavedPath, "size_bytes": r.SizeBytes}, nil) + runtime.Out(map[string]interface{}{ + "minute_token": r.MinuteToken, + "artifact_type": r.ArtifactType, + "saved_path": r.SavedPath, + "size_bytes": r.SizeBytes, + }, nil) } return nil } @@ -230,7 +293,7 @@ type downloadResult struct { type downloadOpts struct { fio fileio.FileIO // file I/O abstraction outputPath string // explicit output file path (single mode only) - outputDir string // output directory (batch mode) + outputDir string // output directory (single or batch) overwrite bool usedNames map[string]bool // tracks used filenames to deduplicate in batch mode } @@ -300,7 +363,7 @@ func downloadMediaFile(ctx context.Context, client *http.Client, downloadURL, mi func resolveFilenameFromResponse(resp *http.Response, minuteToken string) string { if cd := resp.Header.Get("Content-Disposition"); cd != "" { if _, params, err := mime.ParseMediaType(cd); err == nil { - if filename := params["filename"]; filename != "" { + if filename := sanitizeServerFilename(params["filename"]); filename != "" { return filename } } @@ -311,6 +374,20 @@ func resolveFilenameFromResponse(resp *http.Response, minuteToken string) string return minuteToken + ".media" } +// sanitizeServerFilename reduces a server-provided filename to its basename, +// defending against Content-Disposition payloads that embed directory +// separators (e.g. "../other.mp4") and would otherwise escape the intended +// artifact directory after filepath.Join. Empty or dot-only names return "" +// so the caller can fall back to the next naming strategy. +func sanitizeServerFilename(filename string) string { + filename = strings.ReplaceAll(filename, "\\", "/") + filename = path.Base(filename) + if filename == "" || filename == "." || filename == ".." { + return "" + } + return filename +} + // preferredExt overrides Go's mime.ExtensionsByType which returns alphabetically sorted // results (e.g. .m4v before .mp4 for video/mp4). var preferredExt = map[string]string{ diff --git a/shortcuts/minutes/minutes_download_test.go b/shortcuts/minutes/minutes_download_test.go index 8bf398dde..b4ddb888e 100644 --- a/shortcuts/minutes/minutes_download_test.go +++ b/shortcuts/minutes/minutes_download_test.go @@ -159,6 +159,60 @@ func TestResolveFilenameFromResponse_InvalidContentDisposition(t *testing.T) { } } +func TestResolveFilenameFromResponse_RejectsTraversalInDisposition(t *testing.T) { + tests := []struct { + disposition string + wantBase string + }{ + {`attachment; filename="../evil.mp4"`, "evil.mp4"}, + {`attachment; filename="../../etc/passwd"`, "passwd"}, + {`attachment; filename="subdir/inner.mp4"`, "inner.mp4"}, + {`attachment; filename=".."`, "tok001.media"}, + {`attachment; filename="."`, "tok001.media"}, + } + for _, tt := range tests { + resp := &http.Response{ + Header: http.Header{ + "Content-Disposition": []string{tt.disposition}, + }, + } + got := resolveFilenameFromResponse(resp, "tok001") + if got != tt.wantBase { + t.Errorf("disposition=%q: got %q, want %q", tt.disposition, got, tt.wantBase) + } + } +} + +func TestDownload_ServerFilenameTraversalStaysInOutputDir(t *testing.T) { + // Integration: server returns Content-Disposition with "../evil.mp4"; + // file must land inside minutes/{token}/ not the parent directory. + chdir(t, t.TempDir()) + + f, _, _, reg := cmdutil.TestFactory(t, defaultConfig()) + reg.Register(mediaStub("tok001", "https://example.com/presigned/download")) + reg.Register(&httpmock.Stub{ + URL: "example.com/presigned/download", + RawBody: []byte("content"), + Headers: http.Header{ + "Content-Type": []string{"video/mp4"}, + "Content-Disposition": []string{`attachment; filename="../evil.mp4"`}, + }, + }) + + err := mountAndRun(t, MinutesDownload, []string{ + "+download", "--minute-tokens", "tok001", "--as", "bot", + }, f, nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if _, err := os.Stat("minutes/tok001/evil.mp4"); err != nil { + t.Errorf("expected file inside per-token subdir, got err: %v", err) + } + if _, err := os.Stat("minutes/evil.mp4"); err == nil { + t.Error("file escaped per-token subdir into parent: minutes/evil.mp4 exists") + } +} + func TestResolveFilenameFromResponse_EmptyDispositionFilename(t *testing.T) { resp := &http.Response{ Header: http.Header{ @@ -200,13 +254,20 @@ func TestDownload_Validation_InvalidToken(t *testing.T) { } } -func TestDownload_Validation_OutputWithBatch(t *testing.T) { +func TestDownload_Validation_OutputIsFileInBatchMode(t *testing.T) { + chdir(t, t.TempDir()) + if err := os.WriteFile("already.mp4", []byte("x"), 0644); err != nil { + t.Fatalf("setup: %v", err) + } f, _, _, _ := cmdutil.TestFactory(t, defaultConfig()) err := mountAndRun(t, MinutesDownload, []string{ - "+download", "--minute-tokens", "t1,t2", "--output", "file.mp4", "--as", "user", + "+download", "--minute-tokens", "t1,t2", "--output", "already.mp4", "--as", "user", }, f, nil) if err == nil { - t.Fatal("expected validation error for --output with --minute-tokens") + t.Fatal("expected error for --output pointing at an existing file in batch mode") + } + if !strings.Contains(err.Error(), "batch mode expects a directory") { + t.Errorf("error should mention batch-mode directory expectation, got: %v", err) } } @@ -354,12 +415,12 @@ func TestDownload_Batch_Download(t *testing.T) { t.Fatalf("unexpected error: %v", err) } - // verify output structure var result struct { Data struct { Downloads []struct { - MinuteToken string `json:"minute_token"` - SavedPath string `json:"saved_path"` + MinuteToken string `json:"minute_token"` + ArtifactType string `json:"artifact_type"` + SavedPath string `json:"saved_path"` } `json:"downloads"` } `json:"data"` } @@ -369,6 +430,15 @@ func TestDownload_Batch_Download(t *testing.T) { if len(result.Data.Downloads) != 2 { t.Fatalf("expected 2 downloads, got %d", len(result.Data.Downloads)) } + for _, d := range result.Data.Downloads { + if d.ArtifactType != "recording" { + t.Errorf("token=%s: artifact_type=%q, want recording", d.MinuteToken, d.ArtifactType) + } + wantPrefix := "minutes/" + d.MinuteToken + "/" + if !strings.Contains(d.SavedPath, wantPrefix) { + t.Errorf("token=%s: saved_path=%q, want contain %q", d.MinuteToken, d.SavedPath, wantPrefix) + } + } } func TestDownload_Batch_PartialFailure(t *testing.T) { @@ -417,6 +487,200 @@ func TestDownload_Batch_DuplicateToken(t *testing.T) { } } +// --------------------------------------------------------------------------- +// Integration tests: unified default layout (./minutes/{token}/) +// --------------------------------------------------------------------------- + +func TestDownload_DefaultLayout_Single(t *testing.T) { + chdir(t, t.TempDir()) + + f, stdout, _, reg := cmdutil.TestFactory(t, defaultConfig()) + reg.Register(mediaStub("tok001", "https://example.com/presigned/download")) + reg.Register(downloadStub("example.com/presigned/download", []byte("fake-video"), "video/mp4")) + + err := mountAndRun(t, MinutesDownload, []string{ + "+download", "--minute-tokens", "tok001", "--as", "bot", + }, f, stdout) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // The stub omits Content-Disposition, so filename resolution falls back + // to {token}{ext} derived from Content-Type. + wantPath := "minutes/tok001/tok001.mp4" + data, err := os.ReadFile(wantPath) + if err != nil { + t.Fatalf("expected file at %s: %v", wantPath, err) + } + if string(data) != "fake-video" { + t.Errorf("content mismatch: %q", string(data)) + } + + var result struct { + Data struct { + MinuteToken string `json:"minute_token"` + ArtifactType string `json:"artifact_type"` + SavedPath string `json:"saved_path"` + } `json:"data"` + } + if err := json.Unmarshal(stdout.Bytes(), &result); err != nil { + t.Fatalf("failed to parse output: %v\nraw: %s", err, stdout.String()) + } + if result.Data.MinuteToken != "tok001" { + t.Errorf("minute_token = %q, want tok001", result.Data.MinuteToken) + } + if result.Data.ArtifactType != "recording" { + t.Errorf("artifact_type = %q, want recording", result.Data.ArtifactType) + } + if !strings.Contains(result.Data.SavedPath, "minutes/tok001/tok001.mp4") { + t.Errorf("saved_path = %q, want contain minutes/tok001/tok001.mp4", result.Data.SavedPath) + } +} + +func TestDownload_DefaultLayout_Batch(t *testing.T) { + chdir(t, t.TempDir()) + + f, stdout, _, reg := cmdutil.TestFactory(t, defaultConfig()) + reg.Register(mediaStub("tok001", "https://example.com/download/1")) + reg.Register(mediaStub("tok002", "https://example.com/download/2")) + reg.Register(downloadStub("example.com/download/1", []byte("content-1"), "video/mp4")) + reg.Register(downloadStub("example.com/download/2", []byte("content-2"), "video/mp4")) + + err := mountAndRun(t, MinutesDownload, []string{ + "+download", "--minute-tokens", "tok001,tok002", "--as", "bot", + }, f, stdout) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + for _, tok := range []string{"tok001", "tok002"} { + p := "minutes/" + tok + "/" + tok + ".mp4" + if _, err := os.Stat(p); err != nil { + t.Errorf("expected file %s: %v", p, err) + } + } +} + +func TestDownload_OutputDirFlag_SingleToken(t *testing.T) { + chdir(t, t.TempDir()) + if err := os.MkdirAll("dl", 0755); err != nil { + t.Fatalf("setup: %v", err) + } + + f, _, _, reg := cmdutil.TestFactory(t, defaultConfig()) + reg.Register(mediaStub("tok001", "https://example.com/download/1")) + reg.Register(downloadStub("example.com/download/1", []byte("content-1"), "video/mp4")) + + err := mountAndRun(t, MinutesDownload, []string{ + "+download", "--minute-tokens", "tok001", "--output-dir", "dl", "--as", "bot", + }, f, nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if _, err := os.Stat("dl/tok001.mp4"); err != nil { + t.Errorf("expected dl/tok001.mp4, got err: %v", err) + } + if _, err := os.Stat("minutes"); err == nil { + t.Errorf("minutes/ should not be created when --output-dir is explicit") + } +} + +func TestDownload_Batch_OutputNonExistentPath(t *testing.T) { + // Batch mode with --output pointing at a path that doesn't exist yet: + // auto-upgrade to --output-dir semantics and create the directory. + chdir(t, t.TempDir()) + + f, stdout, _, reg := cmdutil.TestFactory(t, defaultConfig()) + reg.Register(mediaStub("tok001", "https://example.com/download/1")) + reg.Register(mediaStub("tok002", "https://example.com/download/2")) + reg.Register(downloadStub("example.com/download/1", []byte("c1"), "video/mp4")) + reg.Register(downloadStub("example.com/download/2", []byte("c2"), "video/mp4")) + + err := mountAndRun(t, MinutesDownload, []string{ + "+download", "--minute-tokens", "tok001,tok002", "--output", "new_dir", "--as", "bot", + }, f, stdout) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + for _, tok := range []string{"tok001", "tok002"} { + p := "new_dir/" + tok + ".mp4" + if _, err := os.Stat(p); err != nil { + t.Errorf("expected %s to exist: %v", p, err) + } + } +} + +func TestDownload_Validation_RejectsTraversalPath(t *testing.T) { + // --output / --output-dir escaping the working directory must be blocked + // at Validate time, before any API call or file write. + chdir(t, t.TempDir()) + f, _, _, _ := cmdutil.TestFactory(t, defaultConfig()) + + for _, flag := range []string{"--output", "--output-dir"} { + err := mountAndRun(t, MinutesDownload, []string{ + "+download", "--minute-tokens", "tok001", flag, "../escape", "--as", "bot", + }, f, nil) + if err == nil { + t.Errorf("%s ../escape: expected validation error, got nil", flag) + } + } +} + +func TestDownload_Bug_OutputIsExistingDir(t *testing.T) { + chdir(t, t.TempDir()) + if err := os.MkdirAll("existing", 0755); err != nil { + t.Fatalf("setup: %v", err) + } + + f, _, _, reg := cmdutil.TestFactory(t, defaultConfig()) + reg.Register(mediaStub("tok001", "https://example.com/download/1")) + reg.Register(downloadStub("example.com/download/1", []byte("x"), "video/mp4")) + + err := mountAndRun(t, MinutesDownload, []string{ + "+download", "--minute-tokens", "tok001", "--output", "existing", "--as", "bot", + }, f, nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if _, err := os.Stat("existing/tok001.mp4"); err != nil { + t.Errorf("expected existing/tok001.mp4, got err: %v", err) + } +} + +func TestDownload_Validation_OutputAndOutputDirBothSet(t *testing.T) { + f, _, _, _ := cmdutil.TestFactory(t, defaultConfig()) + err := mountAndRun(t, MinutesDownload, []string{ + "+download", "--minute-tokens", "tok001", "--output", "a.mp4", "--output-dir", "b", "--as", "bot", + }, f, nil) + if err == nil { + t.Fatal("expected validation error when both --output and --output-dir are set") + } + if !strings.Contains(err.Error(), "output-dir") { + t.Errorf("error should mention output-dir, got: %v", err) + } +} + +func TestDownload_ExplicitOutputFile_PreservesPath(t *testing.T) { + chdir(t, t.TempDir()) + + f, _, _, reg := cmdutil.TestFactory(t, defaultConfig()) + reg.Register(mediaStub("tok001", "https://example.com/download/1")) + reg.Register(downloadStub("example.com/download/1", []byte("x"), "video/mp4")) + + err := mountAndRun(t, MinutesDownload, []string{ + "+download", "--minute-tokens", "tok001", "--output", "my.mp4", "--as", "bot", + }, f, nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if _, err := os.Stat("my.mp4"); err != nil { + t.Errorf("expected my.mp4, got err: %v", err) + } + if _, err := os.Stat("minutes"); err == nil { + t.Errorf("minutes/ should not be created when --output is explicit file path") + } +} + func TestDownload_Batch_DryRun(t *testing.T) { f, stdout, _, _ := cmdutil.TestFactory(t, defaultConfig()) err := mountAndRun(t, MinutesDownload, []string{ diff --git a/shortcuts/vc/vc_notes.go b/shortcuts/vc/vc_notes.go index 592ff2f5f..2c1455e27 100644 --- a/shortcuts/vc/vc_notes.go +++ b/shortcuts/vc/vc_notes.go @@ -19,6 +19,7 @@ import ( "io" "net/http" "path/filepath" + "regexp" "strings" "time" @@ -57,6 +58,10 @@ const ( const logPrefix = "[vc +notes]" +// validMinuteToken matches the server's minute-token format and blocks any +// user-supplied token from reaching filesystem paths unsanitized. +var validMinuteToken = regexp.MustCompile(`^[a-z0-9]+$`) + // sanitizeLogValue strips newlines and ANSI escape sequences from user input for safe logging. func sanitizeLogValue(s string) string { s = strings.ReplaceAll(s, "\n", " ") @@ -299,7 +304,7 @@ func fetchNoteByMinuteToken(ctx context.Context, runtime *common.RuntimeContext, } } - // path 2 & 3: AI 产物统一归到 artifacts 字段下 + // path 2 & 3: AI artifacts are collected under the artifacts field. artifacts := map[string]any{} fetchInlineArtifacts(runtime, minuteToken, artifacts) transcriptPath := downloadTranscriptFile(runtime, minuteToken, title) @@ -336,12 +341,15 @@ func sanitizeDirName(title, minuteToken string) string { func downloadTranscriptFile(runtime *common.RuntimeContext, minuteToken string, title string) string { errOut := runtime.IO().ErrOut - base := "." + // With no --output-dir the default layout shares the directory with + // `minutes +download`. Legacy layout is preserved when the flag is set. + var dirName string if outDir := runtime.Str("output-dir"); outDir != "" { - base = outDir + dirName = filepath.Join(outDir, sanitizeDirName(title, minuteToken)) + } else { + dirName = common.DefaultMinuteArtifactDir(minuteToken) } - dirName := filepath.Join(base, sanitizeDirName(title, minuteToken)) - transcriptPath := filepath.Join(dirName, "transcript.txt") + transcriptPath := filepath.Join(dirName, common.DefaultTranscriptFileName) // Overwrite check via FileIO.Stat if !runtime.Bool("overwrite") { @@ -498,7 +506,7 @@ var VCNotes = common.Shortcut{ {Name: "meeting-ids", Desc: "meeting IDs, comma-separated for batch"}, {Name: "minute-tokens", Desc: "minute tokens, comma-separated for batch"}, {Name: "calendar-event-ids", Desc: "calendar event instance IDs, comma-separated for batch"}, - {Name: "output-dir", Desc: "output directory for artifact files (default: current dir)"}, + {Name: "output-dir", Desc: "output directory for artifact files (default: ./minutes/{minute_token}/)"}, {Name: "overwrite", Type: "bool", Desc: "overwrite existing artifact files"}, }, Validate: func(ctx context.Context, runtime *common.RuntimeContext) error { @@ -514,12 +522,19 @@ var VCNotes = common.Shortcut{ } } } - // output-dir 路径安全校验 if outDir := runtime.Str("output-dir"); outDir != "" { - if err := common.ValidateSafeOutputDir(runtime.FileIO(), outDir); err != nil { + if err := common.ValidateSafePath(runtime.FileIO(), outDir); err != nil { return err } } + // Reject malformed minute tokens before they flow into filesystem paths. + if v := runtime.Str("minute-tokens"); v != "" { + for _, token := range common.SplitCSV(v) { + if !validMinuteToken.MatchString(token) { + return output.ErrValidation("invalid minute token %q: must contain only lowercase alphanumeric characters", token) + } + } + } // dynamic scope check based on which flag is provided var required []string switch { diff --git a/shortcuts/vc/vc_notes_test.go b/shortcuts/vc/vc_notes_test.go index 83569d240..9027cce17 100644 --- a/shortcuts/vc/vc_notes_test.go +++ b/shortcuts/vc/vc_notes_test.go @@ -8,6 +8,8 @@ import ( "context" "encoding/json" "fmt" + "os" + "path/filepath" "strings" "sync" "testing" @@ -145,6 +147,16 @@ func transcriptStub(token string) *httpmock.Stub { } } +// transcriptRawStub returns an actual transcript body so downloadTranscriptFile +// writes a file to disk. Used by path-layout tests. +func transcriptRawStub(token string, body []byte) *httpmock.Stub { + return &httpmock.Stub{ + Method: "GET", + URL: "/open-apis/minutes/v1/minutes/" + token + "/transcript", + RawBody: body, + } +} + func minuteGetStub(token, noteID, title string) *httpmock.Stub { minute := map[string]interface{}{"title": title} if noteID != "" { @@ -639,3 +651,79 @@ func TestNotes_CalendarPath_NeedNotes_RequestBody(t *testing.T) { t.Errorf("need_ai_meeting_notes should not be requested") } } + +// --------------------------------------------------------------------------- +// Transcript path layout tests (unified ./minutes/{token}/ default) +// --------------------------------------------------------------------------- + +// chdirForTest switches cwd to a temp dir for the test; restored on cleanup. +func chdirForTest(t *testing.T) string { + t.Helper() + orig, err := os.Getwd() + if err != nil { + t.Fatalf("getwd: %v", err) + } + dir := t.TempDir() + if err := os.Chdir(dir); err != nil { + t.Fatalf("chdir: %v", err) + } + t.Cleanup(func() { os.Chdir(orig) }) + return dir +} + +func TestNotes_TranscriptDefaultLayout(t *testing.T) { + chdirForTest(t) + + f, stdout, _, reg := cmdutil.TestFactory(t, defaultConfig()) + reg.Register(minuteGetStub("tok001", "", "Meeting Title")) + reg.Register(emptyArtifactsStub("tok001")) + reg.Register(transcriptRawStub("tok001", []byte("speaker1: hello world\n"))) + + err := mountAndRun(t, VCNotes, []string{ + "+notes", "--minute-tokens", "tok001", "--as", "user", + }, f, stdout) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + wantPath := "minutes/tok001/transcript.txt" + data, err := os.ReadFile(wantPath) + if err != nil { + t.Fatalf("expected file at %s: %v", wantPath, err) + } + if string(data) != "speaker1: hello world\n" { + t.Errorf("content mismatch: %q", string(data)) + } + + if _, err := os.Stat("artifact-Meeting Title-tok001"); err == nil { + t.Errorf("legacy artifact dir should not appear under default layout") + } +} + +func TestNotes_TranscriptExplicitOutputDir_PreservesLegacyLayout(t *testing.T) { + chdirForTest(t) + + f, _, _, reg := cmdutil.TestFactory(t, defaultConfig()) + reg.Register(minuteGetStub("tok001", "", "Meeting Title")) + reg.Register(emptyArtifactsStub("tok001")) + reg.Register(transcriptRawStub("tok001", []byte("content"))) + + if err := os.MkdirAll("out", 0755); err != nil { + t.Fatalf("setup: %v", err) + } + + err := mountAndRun(t, VCNotes, []string{ + "+notes", "--minute-tokens", "tok001", "--output-dir", "out", "--as", "user", + }, f, nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + wantPath := filepath.Join("out", "artifact-Meeting Title-tok001", "transcript.txt") + if _, err := os.Stat(wantPath); err != nil { + t.Errorf("expected legacy path %s preserved, got err: %v", wantPath, err) + } + if _, err := os.Stat("minutes"); err == nil { + t.Errorf("minutes/ should not be created when --output-dir is explicit") + } +} diff --git a/skills/lark-minutes/SKILL.md b/skills/lark-minutes/SKILL.md index 283621729..281a97dfd 100644 --- a/skills/lark-minutes/SKILL.md +++ b/skills/lark-minutes/SKILL.md @@ -15,7 +15,7 @@ metadata: ## 核心概念 - **妙记(Minutes)**:来源于飞书视频会议的录制产物或用户上传的音视频文件,通过 `minute_token` 标识。 -- **妙记 Token(minute\_token)**:妙记的唯一标识符,可从妙记 URL 末尾提取(例如 `https://*.feishu.cn/minutes/obcnq3b9jl72l83w4f14xxxx` 中的 `obcnq3b9jl72l83w4f14xxxx`)。如果 URL 中包含额外参数(如 `?xxx`),应截取路径最后一段。 +- **妙记 Token(minute\_token)**:妙记的唯一标识符,可从妙记 URL 末尾提取(例如 `https://*.feishu.cn/minutes/obcnxxxxxxxxxxxxxxxxxxxx` 中的 `obcnxxxxxxxxxxxxxxxxxxxx`)。如果 URL 中包含额外参数(如 `?xxx`),应截取路径最后一段。 ## 核心场景 @@ -40,6 +40,7 @@ metadata: 1. 下载妙记音视频文件到本地,或获取有效期 1 天的下载链接。详见 [minutes +download](references/lark-minutes-download.md)。 2. `minutes +download` 只负责音视频媒体文件。 3. 用户只想拿可分享的下载地址时,使用 `--url-only`;用户要落地到本地文件时,直接下载。 +4. 未显式指定路径时,文件默认落到 `./minutes/{minute_token}/`,与 `vc +notes` 的逐字稿共享同一目录便于聚合。 > **注意**:`+download` 只负责音视频媒体文件。如果用户需要的是逐字稿、总结、待办、章节等纪要内容,请使用 [vc +notes --minute-tokens](../lark-vc/references/lark-vc-notes.md)。 diff --git a/skills/lark-minutes/references/lark-minutes-download.md b/skills/lark-minutes/references/lark-minutes-download.md index 64402728b..280fce750 100644 --- a/skills/lark-minutes/references/lark-minutes-download.md +++ b/skills/lark-minutes/references/lark-minutes-download.md @@ -10,23 +10,26 @@ ## 命令 ```bash -# 下载单个妙记的音视频文件 -lark-cli minutes +download --minute-tokens obcnq3b9jl72l83w4f149w9c +# 下载妙记(默认布局,落到 ./minutes/{minute_token}/) +lark-cli minutes +download --minute-tokens obcnxxxxxxxxxxxxxxxxxxxx -# 指定输出路径 -lark-cli minutes +download --minute-tokens obcnq3b9jl72l83w4f149w9c --output ./meeting.mp4 +# 指定输出文件(单 token,文件路径) +lark-cli minutes +download --minute-tokens obcnxxxxxxxxxxxxxxxxxxxx --output ./meeting.mp4 + +# 指定输出目录(单/批量均可,目录路径) +lark-cli minutes +download --minute-tokens obcnxxxxxxxxxxxxxxxxxxxx --output-dir ./downloads # 仅获取下载链接(有效期 1 天),不下载文件 -lark-cli minutes +download --minute-tokens obcnq3b9jl72l83w4f149w9c --url-only +lark-cli minutes +download --minute-tokens obcnxxxxxxxxxxxxxxxxxxxx --url-only -# 批量下载多个妙记 -lark-cli minutes +download --minute-tokens obcnq3b9jl72l83w4f149w9c,obcnexa7814k4t41c446fzwj +# 批量下载多个妙记(默认布局,逐个落到 ./minutes/{minute_token}/) +lark-cli minutes +download --minute-tokens obcnxxxxxxxxxxxxxxxxxxxx,obcnyyyyyyyyyyyyyyyyyyyy -# 批量下载到指定目录 -lark-cli minutes +download --minute-tokens obcnq3b9jl72l83w4f149w9c,obcnexa7814k4t41c446fzwj --output ./downloads +# 批量下载到同一指定目录 +lark-cli minutes +download --minute-tokens obcnxxxxxxxxxxxxxxxxxxxx,obcnyyyyyyyyyyyyyyyyyyyy --output-dir ./downloads # 预览 API 调用 -lark-cli minutes +download --minute-tokens obcnq3b9jl72l83w4f149w9c --dry-run +lark-cli minutes +download --minute-tokens obcnxxxxxxxxxxxxxxxxxxxx --dry-run ``` ## 参数 @@ -34,11 +37,14 @@ lark-cli minutes +download --minute-tokens obcnq3b9jl72l83w4f149w9c --dry-run | 参数 | 必填 | 说明 | |------|------|------| | `--minute-tokens ` | 是 | 妙记 Token,逗号分隔支持批量(最多 50 个) | -| `--output ` | 否 | 输出路径:单个 token 时为文件路径,批量时为目录(默认当前目录) | +| `--output ` | 否 | 输出文件路径(单 token)。若传入的是已存在目录,等价于 `--output-dir`。与 `--output-dir` 互斥 | +| `--output-dir ` | 否 | 输出目录(单/批量均可)。与 `--output` 互斥 | | `--overwrite` | 否 | 覆盖已存在的输出文件 | | `--url-only` | 否 | 仅返回下载链接,不下载文件 | | `--dry-run` | 否 | 预览 API 调用,不执行 | +> **默认落点**:未指定 `--output` / `--output-dir` 时,文件落到 `./minutes/{minute_token}/`。文件名沿用服务端 Content-Disposition / Content-Type 推断,Agent 可从 `saved_path` 字段读取实际路径。同一 minute_token 的录像和 `vc +notes` 的逐字稿默认会落在**同一目录**下,方便聚合。 + ## 核心约束 ### 1. 妙记必须已完成转写 @@ -63,35 +69,45 @@ API 限流 5 次/秒,批量下载时需注意控制频率。 ### 下载模式(默认) +单 token: + ```json { - "saved_path": "访谈一则 & 澳成.mp4", + "minute_token": "obcnxxxxxxxxxxxxxxxxxxxx", + "artifact_type": "recording", + "saved_path": "/path/to/minutes/obcnxxxxxxxxxxxxxxxxxxxx/访谈一则.m4a", "size_bytes": 52428800 } ``` +批量:`downloads` 数组,每条与上面结构一致,失败项带 `error` 字段。 + | 字段 | 说明 | |------|------| -| `saved_path` | 文件保存的本地路径 | +| `minute_token` | 妙记 Token(用于 Agent 索引) | +| `artifact_type` | 固定为 `"recording"`(与 `vc +notes` 的 `"transcript"` 区分) | +| `saved_path` | 文件保存的本地路径(绝对路径) | | `size_bytes` | 文件大小(字节) | ### URL 模式(--url-only) ```json { + "minute_token": "obcnxxxxxxxxxxxxxxxxxxxx", "download_url": "https://..." } ``` | 字段 | 说明 | |------|------| +| `minute_token` | 妙记 Token | | `download_url` | 媒体文件下载链接(有效期 1 天) | ## 如何获取 minute_token | 来源 | 获取方式 | |------|---------| -| 妙记 URL | 从 URL 末尾提取,如 `https://sample.feishu.cn/minutes/obcnq3b9jl72l83w4f149w9c` → `obcnq3b9jl72l83w4f149w9c` | +| 妙记 URL | 从 URL 末尾提取,如 `https://sample.feishu.cn/minutes/obcnxxxxxxxxxxxxxxxxxxxx` → `obcnxxxxxxxxxxxxxxxxxxxx` | | 妙记元信息查询 | `lark-cli minutes minutes get --params '{"minute_token": "obcn..."}'` | | 会议纪要查询 | `lark-cli vc +notes --meeting-ids ` 返回结果中关联的妙记 token | @@ -109,7 +125,9 @@ API 限流 5 次/秒,批量下载时需注意控制频率。 ## 提示 - 音视频文件可能较大,下载无固定超时限制(由用户 Ctrl+C 控制取消)。 -- 未指定 `--output` 时,默认使用妙记原始标题作为文件名(如 `Office Oncall流程2.0宣讲.mp4`)。 +- 默认落点 `./minutes/{minute_token}/` 与 `vc +notes` 的逐字稿共享同一目录,方便 Agent 聚合同一会议的所有产物。 +- 单 token 模式下 `--output` 若传入已存在目录(如 `--output ./existing-dir`),等价于 `--output-dir`,文件落入该目录(cp 语义)。 +- 批量模式下 `--output` 不接受已存在的文件路径(会报错),应改用 `--output-dir`。 - 如需获取妙记的纪要内容(逐字稿、AI 总结等),请使用 [vc +notes](../../lark-vc/references/lark-vc-notes.md)。 ## 参考 diff --git a/skills/lark-vc/SKILL.md b/skills/lark-vc/SKILL.md index 8d67fbf68..ed2659dca 100644 --- a/skills/lark-vc/SKILL.md +++ b/skills/lark-vc/SKILL.md @@ -37,11 +37,11 @@ metadata: # 1. 读取纪要内容 lark-cli docs +fetch --doc # 2. 从返回的 markdown 中提取第一个 的 token -# 3. 下载封面图到 artifact 目录(和逐字稿同目录,保持产物归拢) +# 3. 下载封面图到聚合目录(和逐字稿、录像同目录,保持产物归拢) # 并非所有纪要都有封面画板,没有 标签时跳过即可 -lark-cli docs +media-download --type whiteboard --token --output ./artifact-/cover +lark-cli docs +media-download --type whiteboard --token <whiteboard_token> --output ./minutes/<minute_token>/cover ``` -> **产物目录规范**:同一会议的所有下载产物(封面图、逐字稿等)统一放到 `artifact-<title>/` 目录下,不要散落在当前工作目录。 +> **产物目录规范**:同一会议的所有下载产物(录像、逐字稿、封面图等)统一放到 `./minutes/{minute_token}/` 目录下。这与 `minutes +download` 和 `vc +notes --minute-tokens` 的默认落点保持一致,便于 Agent 聚合。显式路径(如封面图)需手动对齐到同一目录。 > **纪要相关文档 — 根据用户意图选择:** > - `note_doc_token` → **AI 智能纪要**(AI 总结 + 待办 + 章节) diff --git a/skills/lark-vc/references/lark-vc-notes.md b/skills/lark-vc/references/lark-vc-notes.md index 7e082f9cc..1ada83534 100644 --- a/skills/lark-vc/references/lark-vc-notes.md +++ b/skills/lark-vc/references/lark-vc-notes.md @@ -39,7 +39,7 @@ lark-cli vc +notes --meeting-ids 69xxxxxxxxxxxxx28 --dry-run | `--meeting-ids <ids>` | 三选一 | 会议 ID,逗号分隔支持批量 | | `--minute-tokens <tokens>` | 三选一 | 妙记 Token,逗号分隔支持批量 | | `--calendar-event-ids <ids>` | 三选一 | 日程事件 ID,逗号分隔支持批量 | -| `--output-dir <dir>` | 否 | 逐字稿输出目录(默认当前目录),仅 `--minute-tokens` 路径有效 | +| `--output-dir <dir>` | 否 | 逐字稿输出目录。未指定时默认落到 `./minutes/{minute_token}/transcript.txt`(与 `minutes +download` 共享目录);显式指定时沿用旧布局 `./{output-dir}/artifact-{title}-{token}/transcript.txt`。仅 `--minute-tokens` 路径有效 | | `--overwrite` | 否 | 覆盖已存在的逐字稿文件,仅 `--minute-tokens` 路径有效 | | `--dry-run` | 否 | 预览 API 调用,不执行 | @@ -93,7 +93,7 @@ lark-cli vc +notes --meeting-ids 69xxxxxxxxxxxxx28 --dry-run | `artifacts.summary` | AI 总结(JSON 内联) | | `artifacts.todos` | 待办事项(JSON 内联) | | `artifacts.chapters` | 章节纪要(JSON 内联) | -| `artifacts.transcript_file` | 逐字稿本地文件路径(下载到 `./artifact-<title>/transcript.txt`) | +| `artifacts.transcript_file` | 逐字稿本地文件路径。默认落到 `./minutes/{minute_token}/transcript.txt`(与 `minutes +download` 聚合);显式 `--output-dir` 时走旧布局 `./{output-dir}/artifact-{title}-{token}/transcript.txt` | ## 如何获取输入参数