Skip to content
Closed
114 changes: 111 additions & 3 deletions dev/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@

def _is_yaml_kv_line(line: str) -> bool:
# Accept key: value or key: (block start) ignoring leading spaces
# Empty lines are NOT considered part of the YAML header sentinel for our parser
if not line.strip():
return True
return False
if line.lstrip().startswith('#'):
return False
return bool(re.match(r"^[A-Za-z0-9_\-]+:\s*.*$", line.strip()))
Expand Down Expand Up @@ -217,9 +218,11 @@ def get_ready_queue(self) -> List[Dict]:
data = self.parse_frontmatter(md)
if not isinstance(data, dict) or not data:
continue
status = str(data.get('status', '')).strip()
# normalize status
status_raw = str(data.get('status', '')).strip()
status = status_raw.lower()
dor_val = str(data.get('dor', '')).strip().lower() in ('true', '1', 'yes', 'y')
if status == 'Ready' and dor_val:
if status == 'ready' and dor_val:
data['file_path'] = md
ready_tasks.append(data)
# Sort by RICE descending
Expand Down Expand Up @@ -336,6 +339,14 @@ def start_task(self, task_id: str):
print("\n🧭 TASK CONTEXT\n" + "=" * 40)
print(self.get_task_context(task_id))

# Mark as In Progress with started_at
try:
now = datetime.utcnow().isoformat() + "Z"
self.update_task_frontmatter(task_id, {"status": "In Progress", "started_at": now}, git_commit=True)
print(f"\n✍️ Updated task {task_id} → status: In Progress")
except Exception as e:
print(f"⚠️ Could not update task status: {e}")

def complete_task(self, task_id: str):
"""Mark task as complete and validate DoD (lightweight)"""
print(f"🎯 Completing task: {task_id}")
Expand Down Expand Up @@ -387,6 +398,24 @@ def complete_task(self, task_id: str):
print(" - [ ] tests\n - [ ] docs\n - [ ] demo_steps")
print("\nℹ️ Next steps (manual): create PR, ensure demo steps are documented, merge when CI is green.")

# If tests passed, mark Done
try:
tests_ok = False
# rely on a quick rerun to assert tests are passing now
try:
res = subprocess.run([sys.executable, '-m', 'pytest', '-q'])
tests_ok = (res.returncode == 0)
except Exception:
tests_ok = False
if tests_ok:
now = datetime.utcnow().isoformat() + "Z"
self.update_task_frontmatter(task_id, {"status": "Done", "completed_at": now}, git_commit=True)
print(f"\n✅ Marked task {task_id} as Done.")
else:
print("\n⏸️ Skipping status update to Done because tests did not pass just now.")
except Exception as e:
print(f"⚠️ Could not update task status: {e}")

def cycle_status(self):
"""Show current cycle status (lightweight)"""
if not self.cycles_file.exists():
Expand Down Expand Up @@ -425,6 +454,85 @@ def next_cycle(self):
print(f" {acc_str[:80]}...")
print()

# ------------------------
# Frontmatter write helper
# ------------------------
def update_task_frontmatter(self, task_id: str, updates: Dict, git_commit: bool = True) -> None:
"""Safely merge and rewrite YAML frontmatter for a task file.
Always writes a fenced YAML frontmatter at top and preserves the rest of the file body.
"""
task_file = self.find_task_file(task_id)
if not task_file or not task_file.exists():
raise FileNotFoundError(f"Task {task_id} not found")

text = task_file.read_text(encoding="utf-8")

def dump_yaml(d: Dict) -> str:
return yaml.safe_dump(d or {}, sort_keys=False).strip() + "\n"

# Parse existing metadata using reader (robust to styles)
try:
cur = self.parse_frontmatter(task_file) or {}
except Exception:
cur = {}
if not isinstance(cur, dict):
cur = {}
cur.update(updates or {})

# Compute the remaining body content by stripping any existing frontmatter/header
body = text
if text.startswith('---'):
lines = text.splitlines()
end_idx = None
for i in range(1, len(lines)):
if lines[i].strip() == '---':
end_idx = i
break
body = "\n".join(lines[end_idx+1:]) if (end_idx is not None and end_idx + 1 < len(lines)) else ""
else:
# Try to detect unfenced header and cut it off
lines = text.splitlines()
header_lines = []
body_start = 0
in_header = True
for idx, line in enumerate(lines):
if line.startswith('```') or line.startswith('~~~'):
body_start = idx
break
if in_header and (_is_yaml_kv_line(line) or (header_lines and line.startswith((' ', '\t', '-')))):
header_lines.append(line)
continue
if header_lines:
body_start = idx
break
else:
in_header = False
body_start = 0
break
if header_lines:
# If the whole file is YAML-like (no clear boundary), treat it as pure metadata
if body_start == 0 and len(header_lines) == len(lines):
body = ""
else:
body = "\n".join(lines[body_start:])
else:
body = text

new_fm = dump_yaml(cur)
new_text = f"---\n{new_fm}---\n" + body.lstrip('\n')
task_file.write_text(new_text, encoding="utf-8")

if git_commit:
try:
res = subprocess.run("git rev-parse --is-inside-work-tree", shell=True, capture_output=True, text=True)
if res.returncode == 0 and (res.stdout or '').strip() == 'true':
rel = os.path.relpath(str(task_file), start=str(self.repo_root))
subprocess.run(f"git add {sh_quote(rel)}", shell=True)
msg = f"chore(task): update frontmatter {task_id}"
subprocess.run(f"git commit -m {sh_quote(msg)}", shell=True)
except Exception:
pass


def sh_quote(s: str) -> str:
return "'" + s.replace("'", "'\\''") + "'"
Expand Down
91 changes: 90 additions & 1 deletion dev/tasks/core-architecture/mvp/task-rclone-scan-ingest.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,92 @@
---
id: task:core-architecture/mvp/rclone-scan-ingest
title: rclone lsjson scan and batch ingest into SQLite
status: Done
owner: agent
rice: 4.3
estimate: 2d
created: 2025-08-28
updated: 2025-08-28
dor: true
dod:
- tests
- docs
- demo_steps
dependencies:
- task:core-architecture/mvp/sqlite-path-index
tags:
- rclone
- discovery
- ingest
story: story:core-architecture-reboot
phase: phase:core-architecture-reboot/02-scan-sqlite
links:
cycles:
- dev/cycles.md
plan:
- dev/plans/plan-2025-08-28-reboot-architecture.md
story:
- dev/stories/core-architecture-reboot/story.md
phase:
- dev/stories/core-architecture-reboot/phases/phase-02-scan-sqlite.md
acceptance:
- Wrapper for rclone lsjson with --recursive and optional --fast-list
- Batch insert records (10k/txn) mapped to path-index schema
- POST /api/scans and GET /api/scans/{id}/status implemented with progress counters
demo_steps:
- Export env to enable rclone provider: 'export SCIDK_PROVIDERS="local_fs,mounted_fs,rclone"

'
- Start Flask app (example): 'python -c "from scidk.app import create_app; app=create_app();
app.run(port=5001)"

'
- Trigger a scan via HTTP: "curl -s -X POST http://localhost:5001/api/scans \\\n \
\ -H 'Content-Type: application/json' \\\n -d '{\"provider_id\":\"rclone\",\"\
root_id\":\"remote:\",\"path\":\"remote:bucket\",\"recursive\":false,\"fast_list\"\
:true}'\n"
- Poll status: 'curl -s http://localhost:5001/api/scans/<scanId>/status | jq .

'
- Browse the scan snapshot (virtual root): 'curl -s ''http://localhost:5001/api/scans/<scanId>/fs''
| jq .

'
docs:
- Rclone scanning uses `rclone lsjson`; when recursive=false, both folders and files
may be returned.
- Ingest persists rows into SQLite at SCIDK_DB_PATH (default: ~/.scidk/db/files.db)
in WAL mode.
- Status endpoint returns file_count (files only), folder_count (top-level when non-recursive),
and ingested_rows (files + folders inserted).
started_at: '2025-08-29T16:46:40.699741Z'
completed_at: '2025-08-29T16:47:03.910975Z'
---
'
- Start Flask app (example): 'python -c "from scidk.app import create_app; app=create_app();
app.run(port=5001)"

'
- Trigger a scan via HTTP: "curl -s -X POST http://localhost:5001/api/scans \\\n \
\ -H 'Content-Type: application/json' \\\n -d '{\"provider_id\":\"rclone\",\"\
root_id\":\"remote:\",\"path\":\"remote:bucket\",\"recursive\":false,\"fast_list\"\
:true}'\n"
- Poll status: 'curl -s http://localhost:5001/api/scans/<scanId>/status | jq .

'
- Browse the scan snapshot (virtual root): 'curl -s ''http://localhost:5001/api/scans/<scanId>/fs''
| jq .

'
docs:
- Rclone scanning uses `rclone lsjson`; when recursive=false, both folders and files
may be returned.
- Ingest persists rows into SQLite at SCIDK_DB_PATH (default: ~/.scidk/db/files.db)
in WAL mode.
- Status endpoint returns file_count (files only), folder_count (top-level when non-recursive),
and ingested_rows (files + folders inserted).
started_at: '2025-08-29T16:44:35.566541Z'

id: task:core-architecture/mvp/rclone-scan-ingest
title: rclone lsjson scan and batch ingest into SQLite
status: Ready
Expand Down Expand Up @@ -42,4 +131,4 @@ demo_steps:
docs:
- Rclone scanning uses `rclone lsjson`; when recursive=false, both folders and files may be returned.
- Ingest persists rows into SQLite at SCIDK_DB_PATH (default: ~/.scidk/db/files.db) in WAL mode.
- Status endpoint returns file_count (files only), folder_count (top-level when non-recursive), and ingested_rows (files + folders inserted).
- Status endpoint returns file_count (files only), folder_count (top-level when non-recursive), and ingested_rows (files + folders inserted).
Loading