From acd27b10983de6f9927bcf6f4a1c299a289f2e75 Mon Sep 17 00:00:00 2001 From: Olaf Monien Date: Wed, 18 Feb 2026 01:16:22 +0100 Subject: [PATCH 01/12] feat(pascal): add Pascal/Delphi file extensions and excludes to config --- scripts/ingest/config.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/ingest/config.py b/scripts/ingest/config.py index 5626ef09..1987f832 100644 --- a/scripts/ingest/config.py +++ b/scripts/ingest/config.py @@ -177,6 +177,13 @@ def _env_truthy(val: str | None, default: bool) -> bool: ".vhdl": "vhdl", ".asm": "assembly", ".s": "assembly", + # Delphi/Pascal + ".pas": "pascal", # Delphi/Lazarus Unit + ".dpr": "pascal", # Delphi Project + ".dpk": "pascal", # Delphi Package + ".lpr": "pascal", # Lazarus Project + ".dfm": "dfm", # VCL Form (eigenes Mapping, da eigenes Format) + ".fmx": "dfm", # FireMonkey Form } # Files matched by name (no extension or special names) @@ -220,6 +227,8 @@ def _env_truthy(val: str | None, default: bool) -> bool: "obj", "TestResults", "/.git", + "/__history", # Delphi IDE Backup + "/__recovery", # Delphi IDE Recovery ] # Glob patterns for directories (matched against basename) @@ -236,6 +245,9 @@ def _env_truthy(val: str | None, default: bool) -> bool: "tokenizer.json", "*.whl", "*.tar.gz", + "*.dcu", # Delphi Compiled Unit + "*.dcp", # Delphi Compiled Package + "*.dcpil", # IL-Datei ] _ANY_DEPTH_EXCLUDE_DIR_NAMES = { From a996be2249cbe1617dd6a6506a6de4fbda7f447e Mon Sep 17 00:00:00 2001 From: Olaf Monien Date: Wed, 18 Feb 2026 01:18:51 +0100 Subject: [PATCH 02/12] feat(pascal): implement PascalMapping with tree-sitter queries and regex fallback --- scripts/ingest/language_mappings/pascal.py | 397 +++++++++++++++++++++ 1 file changed, 397 insertions(+) create mode 100644 scripts/ingest/language_mappings/pascal.py diff --git a/scripts/ingest/language_mappings/pascal.py b/scripts/ingest/language_mappings/pascal.py new file mode 100644 index 00000000..14ef3039 --- /dev/null +++ b/scripts/ingest/language_mappings/pascal.py @@ -0,0 +1,397 @@ +"""Pascal/Delphi language mapping for the unified parser architecture. + +Provides Pascal/Delphi-specific tree-sitter queries and regex-based +extraction logic for semantic chunking and symbol extraction. + +Supports .pas, .dpr, .dpk, .lpr files (Object Pascal / Delphi / Lazarus). +Uses tree-sitter queries when the pascal grammar is available, +with a regex-based fallback for all ConceptTypes. +""" + +import re +from typing import TYPE_CHECKING, Any, Dict, List, Optional +import logging + +logger = logging.getLogger(__name__) + +from .base import BaseMapping, ConceptType, MAX_CONSTANT_VALUE_LENGTH + +if TYPE_CHECKING: + from tree_sitter import Node as TSNode + + +# RTL/VCL/FMX standard units — filtered out during import resolution +PASCAL_BUILTIN_UNIT_PREFIXES = ( + 'System.', 'Winapi.', 'Vcl.', 'Fmx.', 'Data.', 'Datasnap.', + 'Web.', 'Soap.', 'Xml.', 'Bde.', 'Ibd.', 'Dbx.', +) + +PASCAL_BUILTIN_UNITS = frozenset({ + 'System', 'SysUtils', 'Classes', 'Types', 'Variants', 'StrUtils', + 'Math', 'DateUtils', 'IOUtils', 'AnsiStrings', 'Character', + 'Generics.Collections', 'Generics.Defaults', + 'SyncObjs', 'SysConst', 'RTLConsts', + 'Windows', 'Messages', 'ShellAPI', + 'Dialogs', 'Forms', 'Controls', 'StdCtrls', 'ExtCtrls', + 'Menus', 'ComCtrls', 'Graphics', 'Buttons', 'CheckLst', + 'DBCtrls', 'DBGrids', 'DB', 'DBTables', +}) + +# Pascal reserved words to skip during import extraction +_PASCAL_RESERVED = frozenset({ + 'uses', 'in', 'interface', 'implementation', 'unit', 'program', + 'package', 'library', 'initialization', 'finalization', 'end', + 'begin', 'type', 'var', 'const', 'procedure', 'function', + 'class', 'record', 'object', 'array', 'string', 'integer', + 'boolean', 'byte', 'word', 'cardinal', 'int64', 'double', + 'single', 'extended', 'currency', 'char', 'widechar', 'ansichar', + 'shortint', 'smallint', 'longint', 'longword', 'uint64', + 'pointer', 'nil', 'true', 'false', 'inherited', 'override', + 'virtual', 'abstract', 'published', 'public', 'private', + 'protected', 'property', 'read', 'write', 'default', 'stored', + 'constructor', 'destructor', 'if', 'then', 'else', 'for', 'to', + 'downto', 'do', 'while', 'repeat', 'until', 'case', 'of', + 'with', 'raise', 'try', 'except', 'finally', 'on', 'not', 'and', + 'or', 'xor', 'div', 'mod', 'shl', 'shr', 'is', 'as', + 'exit', 'break', 'continue', 'goto', 'label', 'packed', 'set', + 'file', 'forward', 'external', 'cdecl', 'stdcall', 'register', + 'pascal', 'safecall', 'assembler', 'inline', 'static', 'dynamic', + 'message', 'dispinterface', 'automation', 'implements', 'reintroduce', + 'overload', 'result', 'self', 'resourcestring', +}) + +# --- Regex patterns for tree-sitter fallback --- + +# Type declarations: TMyClass = class(...), TMyRecord = record, IMyIntf = interface +_RE_CLASS = re.compile( + r"^\s*(T\w+)\s*=\s*(?:class|record)\b", + re.IGNORECASE | re.MULTILINE, +) +_RE_INTERFACE_DECL = re.compile( + r"^\s*(I\w+)\s*=\s*interface\b", + re.IGNORECASE | re.MULTILINE, +) +_RE_TYPE_ALIAS = re.compile( + r"^\s*(T\w+)\s*=\s*(?!class\b|record\b|interface\b|\()(\w[\w\.\[\]]*)", + re.IGNORECASE | re.MULTILINE, +) +# Methods: procedure TMyClass.DoWork; / function TMyClass.GetValue: Integer; +_RE_METHOD_IMPL = re.compile( + r"^\s*(?:class\s+)?(?:procedure|function|constructor|destructor)\s+" + r"(T\w+\.\w+)\s*(?:\(|;|:|\s)", + re.IGNORECASE | re.MULTILINE, +) +# Standalone procedures/functions (no class prefix) +_RE_STANDALONE_PROC = re.compile( + r"^\s*(?:procedure|function)\s+([A-Za-z_]\w+)\s*(?:\(|;)", + re.IGNORECASE | re.MULTILINE, +) +# Module declaration: unit X; / program X; / package X; +_RE_MODULE = re.compile( + r"^\s*(?:unit|program|package|library)\s+([A-Za-z_][\w\.]*)\s*;", + re.IGNORECASE | re.MULTILINE, +) + + +class PascalMapping(BaseMapping): + """Pascal/Delphi (Object Pascal) language mapping. + + Supports .pas, .dpr, .dpk, .lpr files. + Tree-sitter queries use the Isopod/tree-sitter-pascal grammar node types. + Falls back to regex if tree-sitter grammar is unavailable. + """ + + def __init__(self) -> None: + """Initialize Pascal mapping.""" + super().__init__("pascal") + + # ------------------------------------------------------------------------- + # Tree-sitter queries (Isopod/tree-sitter-pascal grammar) + # ------------------------------------------------------------------------- + + def get_query_for_concept(self, concept: ConceptType) -> Optional[str]: + """Get tree-sitter query for universal concept in Pascal. + + Returns None for concepts without a matching tree-sitter query, + or when tree-sitter-pascal is not available. + """ + if concept == ConceptType.DEFINITION: + return """ + ; Type declarations (includes classes, records, etc.) + (declType + name: (identifier) @name + ) @definition + + ; Procedures and functions (standalone or method implementations) + (declProc + name: (identifier) @name + ) @definition + + ; Constant declarations + (declConst + name: (identifier) @name + ) @definition + """ + + elif concept == ConceptType.IMPORT: + return """ + (declUses + (moduleName) @name + ) @definition + """ + + elif concept == ConceptType.COMMENT: + return """ + (comment) @definition + """ + + elif concept == ConceptType.STRUCTURE: + return """ + (module + name: (identifier) @name + ) @definition + """ + + return None + + # ------------------------------------------------------------------------- + # Name extraction + # ------------------------------------------------------------------------- + + def extract_name( + self, + concept: ConceptType, + captures: Dict[str, Any], + content: bytes, + ) -> str: + """Extract name from tree-sitter captures.""" + source = content.decode("utf-8", errors="replace") + + if concept == ConceptType.DEFINITION: + if "name" in captures: + return self.get_node_text(captures["name"], source).strip() + def_node = captures.get("definition") + if def_node: + return self.get_fallback_name(def_node, "definition") + return "unnamed_definition" + + elif concept == ConceptType.COMMENT: + def_node = captures.get("definition") + if def_node: + line = def_node.start_point[0] + 1 + return f"comment_line_{line}" + return "unnamed_comment" + + elif concept == ConceptType.IMPORT: + if "name" in captures: + return self.get_node_text(captures["name"], source).strip() + if "definition" in captures: + return self.get_node_text(captures["definition"], source).strip() + return "unnamed_import" + + elif concept == ConceptType.STRUCTURE: + if "name" in captures: + return self.get_node_text(captures["name"], source).strip() + return "module" + + return "unnamed" + + # ------------------------------------------------------------------------- + # Content extraction + # ------------------------------------------------------------------------- + + def extract_content( + self, + concept: ConceptType, + captures: Dict[str, Any], + content: bytes, + ) -> str: + """Extract content text from tree-sitter captures.""" + source = content.decode("utf-8", errors="replace") + + if "definition" in captures: + node = captures["definition"] + return self.get_node_text(node, source) + if captures: + node = list(captures.values())[0] + return self.get_node_text(node, source) + return "" + + # ------------------------------------------------------------------------- + # Metadata extraction + # ------------------------------------------------------------------------- + + def extract_metadata( + self, + concept: ConceptType, + captures: Dict[str, Any], + content: bytes, + ) -> Dict[str, Any]: + """Extract Pascal-specific metadata including kind classification. + + Kind values: + - ``class`` — class or record type declaration + - ``interface`` — interface type declaration + - ``method`` — procedure/function with class prefix (TFoo.Bar) + - ``function`` — standalone procedure or function + - ``enum`` — enumeration type + - ``constant`` — named constant + - ``type_alias`` — type alias declaration + - ``import`` — uses-clause unit reference + """ + source = content.decode("utf-8", errors="replace") + metadata: Dict[str, Any] = {} + + if concept == ConceptType.DEFINITION: + def_node = captures.get("definition") + if def_node: + metadata["node_type"] = def_node.type + node_text = self.get_node_text(def_node, source).strip() + + if def_node.type == "declType": + kind = _classify_type_decl(node_text) + elif def_node.type == "declProc": + name_node = captures.get("name") + name = self.get_node_text(name_node, source).strip() if name_node else "" + # Method implementations have a dotted name (e.g. TClass.Method) + kind = "method" if "." in name else "function" + elif def_node.type == "declConst": + kind = "constant" + else: + kind = "unknown" + + metadata["kind"] = kind + + elif concept == ConceptType.IMPORT: + metadata["kind"] = "import" + + return metadata + + # ------------------------------------------------------------------------- + # Import module extraction + # ------------------------------------------------------------------------- + + def get_import_module(self, import_text: str) -> Optional[str]: + """Extract and filter the unit name from a uses-clause entry. + + Filters out RTL/VCL/FMX built-in units to reduce noise. + + Returns: + Unit name, or ``None`` for built-in/standard units. + """ + unit = import_text.strip().split()[0].rstrip(";,") + if not unit: + return None + # Filter known built-in unit prefixes + for prefix in PASCAL_BUILTIN_UNIT_PREFIXES: + if unit.startswith(prefix): + return None + # Filter known built-in unit names + if unit in PASCAL_BUILTIN_UNITS: + return None + return unit + + # ------------------------------------------------------------------------- + # Regex-based fallback extraction (used when tree-sitter is unavailable) + # ------------------------------------------------------------------------- + + def extract_definitions_regex(self, text: str) -> List[Dict[str, Any]]: + """Extract definitions using regex fallback. + + Returns a list of dicts with ``name``, ``kind``, ``start_line``, + ``end_line`` for each definition found. + """ + # Strip BOM if present (Delphi UTF-8 with BOM) + clean = text.lstrip("\ufeff") + results: List[Dict[str, Any]] = [] + + for m in _RE_CLASS.finditer(clean): + line = clean[:m.start()].count("\n") + 1 + results.append({ + "name": m.group(1), + "kind": "class", + "start_line": line, + "end_line": line, + "content": m.group(0).strip(), + }) + + for m in _RE_INTERFACE_DECL.finditer(clean): + line = clean[:m.start()].count("\n") + 1 + results.append({ + "name": m.group(1), + "kind": "interface", + "start_line": line, + "end_line": line, + "content": m.group(0).strip(), + }) + + for m in _RE_METHOD_IMPL.finditer(clean): + line = clean[:m.start()].count("\n") + 1 + results.append({ + "name": m.group(1), + "kind": "method", + "start_line": line, + "end_line": line, + "content": m.group(0).strip(), + }) + + for m in _RE_STANDALONE_PROC.finditer(clean): + line = clean[:m.start()].count("\n") + 1 + # Skip if name contains a dot (already handled as method) + if "." not in m.group(1): + results.append({ + "name": m.group(1), + "kind": "function", + "start_line": line, + "end_line": line, + "content": m.group(0).strip(), + }) + + return results + + def extract_imports_regex(self, text: str) -> List[str]: + """Extract unit names from uses clause using regex. + + Handles single-line and multi-line uses blocks. + Filters out reserved words. + """ + clean = text.lstrip("\ufeff") + imports: List[str] = [] + in_uses = False + + for line in clean.splitlines(): + stripped = line.strip() + if re.match(r"^\s*uses\b", stripped, re.IGNORECASE): + in_uses = True + if in_uses: + for m in re.finditer(r"\b([A-Za-z_][\w\.]*)\b", stripped): + unit = m.group(1) + if unit.lower() not in _PASCAL_RESERVED: + imports.append(unit) + if ";" in stripped: + in_uses = False + + return imports + + +# --------------------------------------------------------------------------- +# Helper +# --------------------------------------------------------------------------- + +def _classify_type_decl(node_text: str) -> str: + """Classify a Pascal type declaration into a semantic kind. + + Args: + node_text: Raw text of the declType node. + + Returns: + One of ``class``, ``interface``, ``enum``, ``type_alias``. + """ + lower = node_text.lower() + if re.search(r"=\s*(?:class|record)\b", lower): + return "class" + if re.search(r"=\s*interface\b", lower): + return "interface" + if re.search(r"=\s*\(", node_text): + return "enum" + return "type_alias" From 0ffca1cdc372b0d33a9c0985c1758884fc839356 Mon Sep 17 00:00:00 2001 From: Olaf Monien Date: Wed, 18 Feb 2026 01:19:43 +0100 Subject: [PATCH 03/12] feat(pascal): implement DfmMapping for DFM/FMX Delphi form files --- scripts/ingest/language_mappings/dfm.py | 222 ++++++++++++++++++++++++ 1 file changed, 222 insertions(+) create mode 100644 scripts/ingest/language_mappings/dfm.py diff --git a/scripts/ingest/language_mappings/dfm.py b/scripts/ingest/language_mappings/dfm.py new file mode 100644 index 00000000..5953b1d6 --- /dev/null +++ b/scripts/ingest/language_mappings/dfm.py @@ -0,0 +1,222 @@ +"""DFM/FMX form file mapping for the unified parser architecture. + +Provides regex-based extraction for Delphi VCL/FireMonkey form files (.dfm, .fmx). +These files do not use tree-sitter (no grammar available); all extraction is +regex-based. + +DFM format overview:: + + object Form1: TForm1 + Left = 0 + Top = 0 + Caption = 'Hello World' + object Button1: TButton + Left = 100 + Caption = 'Click me' + OnClick = Button1Click + end + end +""" + +import re +from typing import Any, Dict, List, Optional +import logging + +logger = logging.getLogger(__name__) + +from .base import BaseMapping, ConceptType + + +# Matches: object ComponentName: TComponentType +_RE_OBJECT = re.compile( + r"^\s*(object|inherited)\s+(\w+)\s*:\s*(\w+)\s*$", + re.IGNORECASE, +) +# Matches event handler assignments: OnClick = ButtonClickHandler +_RE_EVENT = re.compile( + r"^\s*(On\w+)\s*=\s*(\w+)\s*$", + re.IGNORECASE, +) +# Matches start of multi-line property value: PropName = ( +_RE_MULTILINE_START = re.compile(r"^\s*\w[\w\.]*\s*=\s*\(") +# Matches start +_RE_ITEM_START = re.compile(r"^\s*?\s*$", re.IGNORECASE) +_RE_ITEM_END = re.compile(r"^\s*\s*$", re.IGNORECASE) + + +class DfmMapping(BaseMapping): + """DFM/FMX Delphi form file mapping (regex-based, no tree-sitter). + + Extracts component objects from ``object Name: TType ... end`` blocks + and event handler references (``OnEvent = HandlerMethod``). + """ + + def __init__(self) -> None: + """Initialize DFM mapping.""" + super().__init__("dfm") + + # ------------------------------------------------------------------------- + # Tree-sitter — not supported for DFM/FMX + # ------------------------------------------------------------------------- + + def get_query_for_concept(self, concept: ConceptType) -> Optional[str]: + """DFM/FMX files do not support tree-sitter queries. + + Always returns ``None``. + """ + return None + + # ------------------------------------------------------------------------- + # Name extraction + # ------------------------------------------------------------------------- + + def extract_name( + self, + concept: ConceptType, + captures: Dict[str, Any], + content: bytes, + ) -> str: + """Extract name from captures. + + For DFM the captures dict is populated by custom regex extraction, + not tree-sitter; the ``name`` key holds the component name. + """ + return captures.get("name", "unnamed_component") # type: ignore[return-value] + + # ------------------------------------------------------------------------- + # Content extraction + # ------------------------------------------------------------------------- + + def extract_content( + self, + concept: ConceptType, + captures: Dict[str, Any], + content: bytes, + ) -> str: + """Return the raw text segment for the matched component.""" + return captures.get("content", "") # type: ignore[return-value] + + # ------------------------------------------------------------------------- + # Metadata extraction + # ------------------------------------------------------------------------- + + def extract_metadata( + self, + concept: ConceptType, + captures: Dict[str, Any], + content: bytes, + ) -> Dict[str, Any]: + """Return DFM-specific metadata. + + Keys returned: + - ``kind``: always ``"component"`` + - ``component_type``: the Delphi class name (e.g. ``TButton``) + - ``events``: list of ``{"event": name, "handler": method}`` dicts + - ``depth``: nesting depth (0 = root form) + """ + metadata: Dict[str, Any] = { + "kind": "component", + "component_type": captures.get("component_type", ""), + "events": captures.get("events", []), + "depth": captures.get("depth", 0), + } + return metadata + + # ------------------------------------------------------------------------- + # DFM-specific helper: extract all components from form source + # ------------------------------------------------------------------------- + + def extract_components(self, text: str) -> List[Dict[str, Any]]: + """Parse a DFM/FMX file and return a list of component dicts. + + Each dict contains: + - ``name``: component name + - ``component_type``: Delphi class (e.g. ``TButton``) + - ``kind``: ``"component"`` + - ``start_line``: 1-based line number of ``object`` keyword + - ``end_line``: 1-based line number of matching ``end`` + - ``depth``: nesting depth (0 = the form itself) + - ``events``: list of ``{"event": str, "handler": str}`` + - ``parent``: name of enclosing component, or ``""`` for root + + Args: + text: Full DFM/FMX file content as a string. + + Returns: + List of component dicts in declaration order. + """ + lines = text.splitlines() + components: List[Dict[str, Any]] = [] + # Stack of (component_dict, indent_level) + stack: List[Dict[str, Any]] = [] + skip_depth = 0 # skip_depth > 0 means we are inside a block to skip + paren_depth = 0 # for multi-line parenthesised property values + + for line_idx, line in enumerate(lines, start=1): + stripped = line.strip() + + # --- Skip multi-line property values (e.g. Items.Strings = (...)) + if paren_depth > 0: + paren_depth += stripped.count("(") + paren_depth -= stripped.count(")") + if paren_depth < 0: + paren_depth = 0 + continue + + if _RE_MULTILINE_START.match(line): + paren_depth = 1 + continue + + # --- Skip ... collection entries + if skip_depth > 0: + if _RE_ITEM_START.match(line): + skip_depth += 1 + elif _RE_ITEM_END.match(line): + skip_depth -= 1 + continue + + if _RE_ITEM_START.match(line): + skip_depth += 1 + continue + + # --- Component declaration + m_obj = _RE_OBJECT.match(line) + if m_obj: + keyword = m_obj.group(1).lower() + comp_name = m_obj.group(2) + comp_type = m_obj.group(3) + parent_name = stack[-1]["name"] if stack else "" + depth = len(stack) + + comp: Dict[str, Any] = { + "name": comp_name, + "component_type": comp_type, + "kind": "component", + "start_line": line_idx, + "end_line": line_idx, + "depth": depth, + "events": [], + "parent": parent_name, + "inherited": keyword == "inherited", + } + stack.append(comp) + components.append(comp) + continue + + # --- End of component block + if stripped.lower() == "end": + if stack: + finished = stack.pop() + finished["end_line"] = line_idx + continue + + # --- Event handler assignment + m_event = _RE_EVENT.match(line) + if m_event and stack: + stack[-1]["events"].append({ + "event": m_event.group(1), + "handler": m_event.group(2), + }) + continue + + return components From c442010ca6eaddd0f1b2f77915d35e34804ddcba Mon Sep 17 00:00:00 2001 From: Olaf Monien Date: Wed, 18 Feb 2026 01:23:38 +0100 Subject: [PATCH 04/12] feat(pascal): register PascalMapping and DfmMapping in language registry --- scripts/ingest/language_mappings/__init__.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/ingest/language_mappings/__init__.py b/scripts/ingest/language_mappings/__init__.py index 739123dd..b9152af8 100644 --- a/scripts/ingest/language_mappings/__init__.py +++ b/scripts/ingest/language_mappings/__init__.py @@ -28,6 +28,8 @@ from .matlab import MatlabMapping from .objc import ObjCMapping +from .pascal import PascalMapping +from .dfm import DfmMapping from .php import PHPMapping from .python import PythonMapping from .rust import RustMapping @@ -64,6 +66,9 @@ "matlab": MatlabMapping, "objc": ObjCMapping, + "pascal": PascalMapping, + "delphi": PascalMapping, # Alias + "dfm": DfmMapping, "php": PHPMapping, "python": PythonMapping, "rust": RustMapping, @@ -122,6 +127,8 @@ def supported_languages() -> List[str]: "MatlabMapping", "ObjCMapping", + "PascalMapping", + "DfmMapping", "PHPMapping", "PythonMapping", "RustMapping", From 0951b6e08ea269ac21e97063627f9e65254fbc02 Mon Sep 17 00:00:00 2001 From: Olaf Monien Date: Wed, 18 Feb 2026 01:24:24 +0100 Subject: [PATCH 05/12] feat(pascal): add Pascal uses-clause import extraction to metadata.py --- scripts/ingest/metadata.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/scripts/ingest/metadata.py b/scripts/ingest/metadata.py index 99b5d1e5..c35a12c8 100644 --- a/scripts/ingest/metadata.py +++ b/scripts/ingest/metadata.py @@ -16,6 +16,7 @@ + logger = logging.getLogger(__name__) def _git_metadata(file_path: Path) -> Tuple[int, int, int]: """Return (last_modified_at, churn_count, author_count) using git when available. @@ -212,6 +213,32 @@ def _extract_imports(language: str, text: str) -> List[str]: if m: imps.append(m.group(1)) continue + elif language in ("pascal", "delphi"): + # Pascal uses-clause can be single or multi-line, e.g.: + # uses SysUtils, Classes, MyUnit; + # uses + # SysUtils, + # Classes, + # MyUnit; + _pascal_reserved = frozenset({ + "uses", "in", "interface", "implementation", "unit", "program", + "package", "library", "initialization", "finalization", "end", + }) + in_uses = False + for ln in lines: + stripped = ln.strip() + # Erkennt: "uses" am Zeilenanfang (mit optionalem Whitespace) + if re.match(r"^\s*uses\b", stripped, re.IGNORECASE): + in_uses = True + if in_uses: + # Extrahiere komma-getrennte Unit-Namen (qualifiziert ok: System.SysUtils) + for m in re.finditer(r"\b([A-Za-z_][\w\.]*)\b", stripped): + unit = m.group(1) + if unit.lower() not in _pascal_reserved: + imps.append(unit) + # uses-Block endet mit Semikolon + if ";" in stripped: + in_uses = False return imps[:200] From cf72c7bb54c738e3276de4612d0650df8078420b Mon Sep 17 00:00:00 2001 From: Olaf Monien Date: Wed, 18 Feb 2026 01:28:39 +0100 Subject: [PATCH 06/12] feat(pascal): add Pascal symbol extraction to symbols.py --- scripts/ingest/symbols.py | 97 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/scripts/ingest/symbols.py b/scripts/ingest/symbols.py index acf8017e..7df963ea 100644 --- a/scripts/ingest/symbols.py +++ b/scripts/ingest/symbols.py @@ -250,6 +250,101 @@ def _extract_symbols_php(text: str) -> List[_Sym]: return syms +# --------------------------------------------------------------------------- +# Pascal/Delphi symbol extraction (regex-based) +# --------------------------------------------------------------------------- + +_PAS_CLASS_PATTERN = re.compile( + r"^\s*(T\w+)\s*=\s*(class|record)\b", + re.IGNORECASE | re.MULTILINE, +) +_PAS_INTF_PATTERN = re.compile( + r"^\s*(I\w+)\s*=\s*interface\b", + re.IGNORECASE | re.MULTILINE, +) +_PAS_ENUM_PATTERN = re.compile( + r"^\s*(T\w+)\s*=\s*\(", + re.MULTILINE, +) +_PAS_METHOD_PATTERN = re.compile( + r"^\s*(?:class\s+)?(?:procedure|function|constructor|destructor)\s+" + r"(T\w+\.\w+)\s*(?:\(|;|:|\s)", + re.IGNORECASE | re.MULTILINE, +) +_PAS_STANDALONE_PROC_PATTERN = re.compile( + r"^\s*(?:procedure|function)\s+([A-Za-z_]\w+)\s*(?:\(|;)", + re.IGNORECASE | re.MULTILINE, +) +_PAS_CONST_PATTERN = re.compile( + r"^\s+([A-Z][A-Z0-9_]{2,})\s*=\s*", + re.MULTILINE, +) + + +def _extract_symbols_pascal(text: str) -> List[_Sym]: + """Extract symbols from Pascal/Delphi code using regex. + + Handles the ``interface`` and ``implementation`` sections of .pas files. + Extracts classes, records, interfaces, methods, standalone functions, + enumerations and UPPER_SNAKE_CASE constants. + """ + # Strip UTF-8 BOM (common in Delphi files) + clean = text.lstrip("\ufeff") + lines = clean.splitlines() + syms: List[_Sym] = [] + + def _line_of(m: "re.Match") -> int: # type: ignore[type-arg] + """Return 1-based line number for a regex match.""" + return clean[: m.start()].count("\n") + 1 + + # Classes and records + for m in _PAS_CLASS_PATTERN.finditer(clean): + syms.append(_Sym(kind="class", name=m.group(1), start=_line_of(m), end=_line_of(m))) + + # Interfaces + for m in _PAS_INTF_PATTERN.finditer(clean): + syms.append(_Sym(kind="interface", name=m.group(1), start=_line_of(m), end=_line_of(m))) + + # Enumerations (heuristic: TFoo = (...) — only if not already matched as class) + existing_names = {s["name"] for s in syms} + for m in _PAS_ENUM_PATTERN.finditer(clean): + name = m.group(1) + if name not in existing_names: + syms.append(_Sym(kind="enum", name=name, start=_line_of(m), end=_line_of(m))) + existing_names.add(name) + + # Method implementations (TClass.Method) + for m in _PAS_METHOD_PATTERN.finditer(clean): + qualified = m.group(1) # e.g. TMyClass.DoWork + short_name = qualified.rsplit(".", 1)[-1] + syms.append(_Sym( + kind="method", + name=short_name, + path=qualified, + start=_line_of(m), + end=_line_of(m), + )) + + # Standalone procedures/functions (no class prefix) + for m in _PAS_STANDALONE_PROC_PATTERN.finditer(clean): + name = m.group(1) + if "." not in name: + syms.append(_Sym(kind="function", name=name, start=_line_of(m), end=_line_of(m))) + + # UPPER_SNAKE_CASE constants + for m in _PAS_CONST_PATTERN.finditer(clean): + name = m.group(1) + syms.append(_Sym(kind="constant", name=name, start=_line_of(m), end=_line_of(m))) + + # Sort by start line and approximate end lines + syms.sort(key=lambda s: s.start) + for i in range(len(syms)): + end_candidate = syms[i + 1].start - 1 if i + 1 < len(syms) else len(lines) + syms[i]["end"] = max(syms[i].start, end_candidate) + + return syms + + def _extract_symbols_shell(text: str) -> List[_Sym]: """Extract symbols from shell scripts.""" lines = text.splitlines() @@ -1211,6 +1306,8 @@ def _extract_symbols(language: str, text: str) -> List[_Sym]: return _extract_symbols_csharp(text) if language == "php": return _extract_symbols_php(text) + if language in ("pascal", "delphi"): + return _extract_symbols_pascal(text) return [] From 834ebcb42e6ae4fdcfc6e9b5a993424774f827b9 Mon Sep 17 00:00:00 2001 From: Olaf Monien Date: Wed, 18 Feb 2026 01:29:53 +0100 Subject: [PATCH 07/12] feat(pascal): add optional tree_sitter_pascal support to tree_sitter loader --- scripts/ingest/tree_sitter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/ingest/tree_sitter.py b/scripts/ingest/tree_sitter.py index 0bfa6a8e..a20114fc 100644 --- a/scripts/ingest/tree_sitter.py +++ b/scripts/ingest/tree_sitter.py @@ -77,6 +77,7 @@ def _load_ts_language(mod: Any, *, preferred: list[str] | None = None) -> Any | ("swift", "tree_sitter_swift"), ("scala", "tree_sitter_scala"), ("php", "tree_sitter_php"), + ("pascal", "tree_sitter_pascal"), ]: try: mod = __import__(pkg_name) From 56d9ee33ef7f67e1eb027217abcb3b57c3ae0a1f Mon Sep 17 00:00:00 2001 From: Olaf Monien Date: Wed, 18 Feb 2026 01:54:41 +0100 Subject: [PATCH 08/12] test(pascal): add comprehensive PascalMapping unit tests (47 tests) --- tests/test_pascal_language_mapping.py | 513 ++++++++++++++++++++++++++ 1 file changed, 513 insertions(+) create mode 100644 tests/test_pascal_language_mapping.py diff --git a/tests/test_pascal_language_mapping.py b/tests/test_pascal_language_mapping.py new file mode 100644 index 00000000..7e64ed3a --- /dev/null +++ b/tests/test_pascal_language_mapping.py @@ -0,0 +1,513 @@ +#!/usr/bin/env python3 +""" +Comprehensive tests for PascalMapping — Delphi/Pascal language mapping. + +Tests cover: +- PascalMapping instantiation and interface compliance +- Tree-sitter concept query strings +- Import extraction via the metadata pipeline +- Symbol extraction via the symbols pipeline +- Regex fallback extraction methods on PascalMapping itself +- Realistic Delphi .pas file fixtures (UAuth, UTypes) +""" + +import pytest +from typing import List, Dict, Any + +from scripts.ingest.language_mappings.pascal import ( + PascalMapping, + PASCAL_BUILTIN_UNITS, + PASCAL_BUILTIN_UNIT_PREFIXES, + _classify_type_decl, +) +from scripts.ingest.language_mappings.base import ConceptType +from scripts.ingest.metadata import _extract_imports +from scripts.ingest.symbols import _extract_symbols_pascal + + +# ============================================================================= +# Instantiation +# ============================================================================= + + +class TestPascalMappingInstantiation: + """PascalMapping instantiates correctly as a BaseMapping subclass.""" + + def test_instantiate_without_error(self): + """PascalMapping instantiates without error.""" + mapping = PascalMapping() + assert mapping is not None + + def test_has_required_abstract_methods(self): + """PascalMapping implements all BaseMapping abstract methods.""" + mapping = PascalMapping() + assert callable(getattr(mapping, "get_query_for_concept")) + assert callable(getattr(mapping, "extract_name")) + assert callable(getattr(mapping, "extract_content")) + assert callable(getattr(mapping, "extract_metadata")) + + def test_language_name(self): + """Language name is 'pascal'.""" + mapping = PascalMapping() + assert mapping.language == "pascal" + + +# ============================================================================= +# Concept Queries +# ============================================================================= + + +class TestPascalConceptQueries: + """PascalMapping returns correct tree-sitter queries for each ConceptType.""" + + @pytest.fixture + def mapping(self): + return PascalMapping() + + def test_definition_query_not_none(self, mapping): + """DEFINITION concept returns a non-None, non-empty query string.""" + query = mapping.get_query_for_concept(ConceptType.DEFINITION) + assert query is not None + assert isinstance(query, str) + assert len(query.strip()) > 0 + + def test_import_query_not_none(self, mapping): + """IMPORT concept returns a non-None query string referencing declUses.""" + query = mapping.get_query_for_concept(ConceptType.IMPORT) + assert query is not None + assert isinstance(query, str) + assert "declUses" in query + + def test_comment_query_not_none(self, mapping): + """COMMENT concept returns a non-None query string.""" + query = mapping.get_query_for_concept(ConceptType.COMMENT) + assert query is not None + assert isinstance(query, str) + assert len(query.strip()) > 0 + + def test_structure_query_not_none(self, mapping): + """STRUCTURE concept returns a non-None query string for module declarations.""" + query = mapping.get_query_for_concept(ConceptType.STRUCTURE) + assert query is not None + assert isinstance(query, str) + assert "module" in query + + def test_block_query_is_none(self, mapping): + """BLOCK concept returns None — not implemented for Pascal.""" + query = mapping.get_query_for_concept(ConceptType.BLOCK) + assert query is None + + +# ============================================================================= +# Import Extraction (via metadata._extract_imports) +# ============================================================================= + + +class TestPascalImportExtraction: + """Tests Pascal uses-clause extraction via scripts.ingest.metadata._extract_imports.""" + + def test_simple_one_line_uses(self): + """uses SysUtils; → ['SysUtils'] in the result.""" + code = "unit Test;\ninterface\nuses SysUtils;\nimplementation\nend." + imports = _extract_imports("pascal", code) + assert "SysUtils" in imports + + def test_multiline_uses(self): + """Multi-line uses clause extracts all listed units.""" + code = ( + "unit Test;\ninterface\nuses\n" + " System.SysUtils,\n" + " System.Classes,\n" + " UAuth;\nimplementation\nend." + ) + imports = _extract_imports("pascal", code) + assert "System.SysUtils" in imports + assert "System.Classes" in imports + assert "UAuth" in imports + + def test_qualified_unit_names(self): + """Qualified names like Vcl.Forms are extracted as full-qualified tokens.""" + code = ( + "unit Test;\ninterface\nuses\n" + " Vcl.Forms,\n Vcl.Controls;\nimplementation\nend." + ) + imports = _extract_imports("pascal", code) + assert "Vcl.Forms" in imports + assert "Vcl.Controls" in imports + + def test_uses_does_not_include_pascal_keywords(self): + """Reserved words 'uses', 'interface', 'implementation' are NOT included.""" + code = "unit Test;\ninterface\nuses SysUtils;\nimplementation\nend." + imports = _extract_imports("pascal", code) + assert "uses" not in imports + assert "interface" not in imports + assert "implementation" not in imports + assert "end" not in imports + assert "unit" not in imports + + def test_empty_uses_clause(self): + """Code without a uses clause yields an empty import list.""" + code = "unit Test;\ninterface\nimplementation\nend." + imports = _extract_imports("pascal", code) + assert len(imports) == 0 + + def test_multiple_uses_clauses(self): + """Units from both interface and implementation uses clauses are extracted.""" + code = ( + "unit Test;\n" + "interface\n" + "uses UIntf;\n" + "implementation\n" + "uses UImpl;\n" + "end." + ) + imports = _extract_imports("pascal", code) + assert "UIntf" in imports + assert "UImpl" in imports + + def test_delphi_alias_extracts_same(self): + """'delphi' language alias also routes to the Pascal uses-clause extractor.""" + code = "unit Test;\ninterface\nuses SysUtils;\nimplementation\nend." + imports = _extract_imports("delphi", code) + assert "SysUtils" in imports + + +# ============================================================================= +# Symbol Extraction (via symbols._extract_symbols_pascal) +# ============================================================================= + + +class TestPascalSymbolExtraction: + """Tests Pascal symbol extraction via scripts.ingest.symbols._extract_symbols_pascal.""" + + def test_class_extraction(self): + """TMyClass = class is extracted with kind='class'.""" + code = "type\n TMyClass = class\n end;" + syms = _extract_symbols_pascal(code) + names_kinds = {s["name"]: s["kind"] for s in syms} + assert "TMyClass" in names_kinds + assert names_kinds["TMyClass"] == "class" + + def test_record_extraction(self): + """TPoint = record is extracted (records use kind='class').""" + code = "type\n TPoint = record\n X, Y: Double;\n end;" + syms = _extract_symbols_pascal(code) + names = [s["name"] for s in syms] + assert "TPoint" in names + + def test_interface_extraction(self): + """ILogger = interface is extracted with kind='interface'.""" + code = "type\n ILogger = interface\n end;" + syms = _extract_symbols_pascal(code) + names_kinds = {s["name"]: s["kind"] for s in syms} + assert "ILogger" in names_kinds + assert names_kinds["ILogger"] == "interface" + + def test_procedure_extraction(self): + """Standalone procedure is extracted as a function symbol.""" + code = "procedure ProcessData;\nbegin\nend;" + syms = _extract_symbols_pascal(code) + names = [s["name"] for s in syms] + assert "ProcessData" in names + + def test_function_extraction(self): + """Standalone function with parameters is extracted.""" + code = "function GetValue(const AParam: string);\nbegin\nend;" + syms = _extract_symbols_pascal(code) + names = [s["name"] for s in syms] + assert "GetValue" in names + + def test_class_method_extraction(self): + """TMyClass.DoWork method is extracted with kind='method'.""" + code = "procedure TMyClass.DoWork;\nbegin\nend;" + syms = _extract_symbols_pascal(code) + names_kinds = {s["name"]: s["kind"] for s in syms} + assert "DoWork" in names_kinds + assert names_kinds["DoWork"] == "method" + + def test_constructor_extraction(self): + """TMyClass.Create constructor is extracted.""" + code = "constructor TMyClass.Create(const AValue: string);\nbegin\nend;" + syms = _extract_symbols_pascal(code) + names = [s["name"] for s in syms] + assert "Create" in names + + def test_destructor_extraction(self): + """TMyClass.Destroy destructor is extracted.""" + code = "destructor TMyClass.Destroy;\nbegin\nend;" + syms = _extract_symbols_pascal(code) + names = [s["name"] for s in syms] + assert "Destroy" in names + + def test_constant_extraction(self): + """UPPER_SNAKE_CASE constants are extracted with kind='constant'.""" + code = "const\n MAX_RETRIES = 3;\n APP_NAME = 'MyApp';" + syms = _extract_symbols_pascal(code) + names_kinds = {s["name"]: s["kind"] for s in syms} + assert "MAX_RETRIES" in names_kinds + assert names_kinds["MAX_RETRIES"] == "constant" + + def test_enum_extraction(self): + """TColor = (clRed, clGreen) is extracted with kind='enum'.""" + code = "type\n TColor = (clRed, clGreen, clBlue);" + syms = _extract_symbols_pascal(code) + names_kinds = {s["name"]: s["kind"] for s in syms} + assert "TColor" in names_kinds + assert names_kinds["TColor"] == "enum" + + +# ============================================================================= +# PascalMapping extract_ and helper methods +# ============================================================================= + + +class TestPascalMappingExtractMethods: + """Tests PascalMapping.extract_name(), extract_content(), extract_metadata() + and the import-module filtering helper.""" + + @pytest.fixture + def mapping(self): + return PascalMapping() + + def test_extract_name_returns_string(self, mapping): + """extract_name with empty captures returns a non-empty fallback string.""" + result = mapping.extract_name(ConceptType.DEFINITION, {}, b"code") + assert isinstance(result, str) + assert len(result) > 0 + + def test_extract_content_returns_string(self, mapping): + """extract_content with empty captures returns a string (no node → empty).""" + result = mapping.extract_content(ConceptType.DEFINITION, {}, b"code") + assert isinstance(result, str) + + def test_extract_metadata_returns_dict(self, mapping): + """extract_metadata with empty captures returns a dict (may be empty).""" + result = mapping.extract_metadata(ConceptType.DEFINITION, {}, b"code") + assert isinstance(result, dict) + + def test_metadata_has_kind_for_import_concept(self, mapping): + """extract_metadata for IMPORT concept always returns kind='import'.""" + result = mapping.extract_metadata(ConceptType.IMPORT, {}, b"code") + assert result.get("kind") == "import" + + def test_get_import_module_extracts_unit_name(self, mapping): + """get_import_module returns the unit name for a user-defined unit.""" + result = mapping.get_import_module("UAuth") + assert result == "UAuth" + + def test_builtin_units_filtered_by_name(self, mapping): + """get_import_module returns None for exact built-in RTL unit names.""" + assert mapping.get_import_module("SysUtils") is None + assert mapping.get_import_module("Classes") is None + assert mapping.get_import_module("System") is None + + def test_builtin_units_filtered_by_prefix(self, mapping): + """get_import_module returns None for qualified names with built-in prefixes.""" + assert mapping.get_import_module("System.SysUtils") is None + assert mapping.get_import_module("Vcl.Forms") is None + assert mapping.get_import_module("Fmx.Controls") is None + + def test_pascal_builtin_units_constant_content(self): + """PASCAL_BUILTIN_UNITS contains well-known RTL unit names.""" + assert "SysUtils" in PASCAL_BUILTIN_UNITS + assert "Classes" in PASCAL_BUILTIN_UNITS + assert "System" in PASCAL_BUILTIN_UNITS + assert "Generics.Collections" in PASCAL_BUILTIN_UNITS + + +# ============================================================================= +# Regex fallback extraction +# ============================================================================= + + +class TestPascalRegexFallback: + """Tests PascalMapping.extract_definitions_regex() and extract_imports_regex().""" + + @pytest.fixture + def mapping(self): + return PascalMapping() + + def test_extract_definitions_regex_finds_class(self, mapping): + """extract_definitions_regex detects TMyClass = class.""" + code = "type\n TMyClass = class(TObject)\n end;" + results = mapping.extract_definitions_regex(code) + names = [r["name"] for r in results] + assert "TMyClass" in names + + def test_extract_definitions_regex_finds_interface(self, mapping): + """extract_definitions_regex detects IMyInterface = interface.""" + code = "type\n IMyInterface = interface\n end;" + results = mapping.extract_definitions_regex(code) + names = [r["name"] for r in results] + assert "IMyInterface" in names + + def test_extract_definitions_regex_finds_method(self, mapping): + """extract_definitions_regex detects TMyClass.DoWork method.""" + code = "procedure TMyClass.DoWork;\nbegin end;" + results = mapping.extract_definitions_regex(code) + names = [r["name"] for r in results] + assert "TMyClass.DoWork" in names + + def test_extract_imports_regex_finds_units(self, mapping): + """extract_imports_regex extracts unit names from a uses clause.""" + code = "uses\n SysUtils,\n MyUnit;\n" + imports = mapping.extract_imports_regex(code) + assert "SysUtils" in imports + assert "MyUnit" in imports + + def test_extract_imports_regex_filters_reserved(self, mapping): + """extract_imports_regex does not include the 'uses' keyword itself.""" + code = "uses SysUtils;\n" + imports = mapping.extract_imports_regex(code) + assert "uses" not in imports + + def test_bom_handling(self, mapping): + """UTF-8 BOM character is stripped before processing uses clause.""" + code = "\ufeffunit Test;\ninterface\nuses UAuth;\nimplementation\nend." + imports = mapping.extract_imports_regex(code) + assert "UAuth" in imports + + +# ============================================================================= +# Realistic Delphi fixtures +# ============================================================================= + + +class TestPascalFixtures: + """Tests with realistic Delphi .pas file fixtures (UAuth.pas, UTypes.pas).""" + + UAUTH_CODE = """\ +unit UAuth; +interface +uses + System.SysUtils, + System.Classes, + UTypes; + +type + TAuthService = class(TInterfacedObject, IAuthService) + private + FUserName: string; + FLoggedIn: Boolean; + public + constructor Create(const AUserName: string); + destructor Destroy; override; + function Login(const APassword: string): Boolean; + procedure Logout; + property UserName: string read FUserName; + end; + +implementation + +constructor TAuthService.Create(const AUserName: string); +begin + inherited Create; + FUserName := AUserName; + FLoggedIn := False; +end; + +destructor TAuthService.Destroy; +begin + Logout; + inherited Destroy; +end; + +function TAuthService.Login(const APassword: string): Boolean; +begin + Result := True; + FLoggedIn := True; +end; + +procedure TAuthService.Logout; +begin + FLoggedIn := False; +end; + +end.""" + + UTYPES_CODE = """\ +unit UTypes; +interface +const + MAX_RETRIES = 3; + APP_NAME = 'MyApp'; + +type + TOrderStatus = (osNew, osProcessing, osShipped, osDelivered); + + TUserName = string; + + TPoint3D = record + X, Y, Z: Double; + end; + + TUtils = class + public + class function FormatName(const AFirst, ALast: string): string; static; + end; + +implementation + +class function TUtils.FormatName(const AFirst, ALast: string): string; +begin + Result := AFirst + ' ' + ALast; +end; + +end.""" + + def test_uauth_imports_extracted(self): + """UAuth.pas: System.SysUtils, System.Classes, UTypes are extracted.""" + imports = _extract_imports("pascal", self.UAUTH_CODE) + assert "System.SysUtils" in imports + assert "System.Classes" in imports + assert "UTypes" in imports + + def test_uauth_class_extracted(self): + """UAuth.pas: TAuthService class is detected.""" + syms = _extract_symbols_pascal(self.UAUTH_CODE) + names = [s["name"] for s in syms] + assert "TAuthService" in names + + def test_uauth_methods_extracted(self): + """UAuth.pas: Create, Destroy, Login, Logout methods are all detected.""" + syms = _extract_symbols_pascal(self.UAUTH_CODE) + names = [s["name"] for s in syms] + assert "Create" in names + assert "Destroy" in names + assert "Login" in names + assert "Logout" in names + + def test_utypes_constants_extracted(self): + """UTypes.pas: MAX_RETRIES and APP_NAME are detected as constants.""" + syms = _extract_symbols_pascal(self.UTYPES_CODE) + names_kinds = {s["name"]: s["kind"] for s in syms} + assert "MAX_RETRIES" in names_kinds + assert names_kinds["MAX_RETRIES"] == "constant" + assert "APP_NAME" in names_kinds + assert names_kinds["APP_NAME"] == "constant" + + def test_utypes_enum_extracted(self): + """UTypes.pas: TOrderStatus enum is detected.""" + syms = _extract_symbols_pascal(self.UTYPES_CODE) + names_kinds = {s["name"]: s["kind"] for s in syms} + assert "TOrderStatus" in names_kinds + assert names_kinds["TOrderStatus"] == "enum" + + def test_utypes_record_extracted(self): + """UTypes.pas: TPoint3D record is detected.""" + syms = _extract_symbols_pascal(self.UTYPES_CODE) + names = [s["name"] for s in syms] + assert "TPoint3D" in names + + def test_utypes_type_alias_classification(self): + """_classify_type_decl correctly classifies various Pascal type declarations.""" + assert _classify_type_decl("TUserName = string") == "type_alias" + assert _classify_type_decl("TPoint3D = record") == "class" + assert _classify_type_decl("TOrderStatus = (osNew, osProcessing)") == "enum" + assert _classify_type_decl("ILogger = interface") == "interface" + + def test_utypes_static_class_method(self): + """UTypes.pas: TUtils.FormatName static class method is detected.""" + syms = _extract_symbols_pascal(self.UTYPES_CODE) + names = [s["name"] for s in syms] + assert "FormatName" in names From 528949d6adf9d2ce4b2a71b19d2cdd1c3d688665 Mon Sep 17 00:00:00 2001 From: Olaf Monien Date: Wed, 18 Feb 2026 01:54:52 +0100 Subject: [PATCH 09/12] test(pascal): add DfmMapping unit tests for DFM/FMX form files --- tests/test_dfm_language_mapping.py | 361 +++++++++++++++++++++++++++++ 1 file changed, 361 insertions(+) create mode 100644 tests/test_dfm_language_mapping.py diff --git a/tests/test_dfm_language_mapping.py b/tests/test_dfm_language_mapping.py new file mode 100644 index 00000000..9b2d65e2 --- /dev/null +++ b/tests/test_dfm_language_mapping.py @@ -0,0 +1,361 @@ +#!/usr/bin/env python3 +""" +Tests for DfmMapping — Delphi VCL/FireMonkey form file mapping (.dfm, .fmx). + +Tests cover: +- DfmMapping instantiation and interface compliance +- Tree-sitter queries always return None (no grammar available) +- Component extraction via extract_components() +- Nested component hierarchies +- Event handler extraction +- inherited keyword support +- Realistic main-form DFM fixture with 6 components +""" + +import pytest +from typing import List, Dict, Any + +from scripts.ingest.language_mappings.dfm import DfmMapping +from scripts.ingest.language_mappings.base import ConceptType + + +# ============================================================================= +# Instantiation +# ============================================================================= + + +class TestDfmMappingInstantiation: + """DfmMapping instantiates correctly as a BaseMapping subclass.""" + + def test_instantiate_without_error(self): + """DfmMapping instantiates without error.""" + mapping = DfmMapping() + assert mapping is not None + + def test_has_required_methods(self): + """DfmMapping implements all required BaseMapping methods.""" + mapping = DfmMapping() + assert callable(getattr(mapping, "get_query_for_concept")) + assert callable(getattr(mapping, "extract_name")) + assert callable(getattr(mapping, "extract_content")) + assert callable(getattr(mapping, "extract_metadata")) + assert callable(getattr(mapping, "extract_components")) + + def test_language_name(self): + """Language name is 'dfm'.""" + mapping = DfmMapping() + assert mapping.language == "dfm" + + +# ============================================================================= +# Tree-sitter queries (all None — no DFM grammar) +# ============================================================================= + + +class TestDfmQueryAlwaysNone: + """All concept queries return None for DFM files (no tree-sitter grammar).""" + + @pytest.fixture + def mapping(self): + return DfmMapping() + + def test_all_concepts_return_none(self, mapping): + """Every ConceptType query returns None for DFM.""" + for concept in ConceptType: + result = mapping.get_query_for_concept(concept) + assert result is None, f"Expected None for {concept}, got {result!r}" + + def test_definition_query_is_none(self, mapping): + """DEFINITION query returns None.""" + assert mapping.get_query_for_concept(ConceptType.DEFINITION) is None + + def test_import_query_is_none(self, mapping): + """IMPORT query returns None.""" + assert mapping.get_query_for_concept(ConceptType.IMPORT) is None + + def test_comment_query_is_none(self, mapping): + """COMMENT query returns None.""" + assert mapping.get_query_for_concept(ConceptType.COMMENT) is None + + def test_structure_query_is_none(self, mapping): + """STRUCTURE query returns None.""" + assert mapping.get_query_for_concept(ConceptType.STRUCTURE) is None + + def test_block_query_is_none(self, mapping): + """BLOCK query returns None.""" + assert mapping.get_query_for_concept(ConceptType.BLOCK) is None + + +# ============================================================================= +# extract_name / extract_content / extract_metadata +# ============================================================================= + + +class TestDfmExtractMethods: + """DfmMapping extract_ methods work with the captures dict convention.""" + + @pytest.fixture + def mapping(self): + return DfmMapping() + + def test_extract_name_returns_component_name(self, mapping): + """extract_name returns the 'name' key from captures.""" + captures = {"name": "Form1", "component_type": "TForm1"} + result = mapping.extract_name(ConceptType.DEFINITION, captures, b"") + assert result == "Form1" + + def test_extract_name_fallback_for_missing_key(self, mapping): + """extract_name returns 'unnamed_component' when 'name' key absent.""" + result = mapping.extract_name(ConceptType.DEFINITION, {}, b"") + assert result == "unnamed_component" + + def test_extract_content_returns_content(self, mapping): + """extract_content returns the 'content' key from captures.""" + raw = "object Form1: TForm1\nend" + captures = {"content": raw} + result = mapping.extract_content(ConceptType.DEFINITION, captures, b"") + assert result == raw + + def test_extract_content_empty_for_missing_key(self, mapping): + """extract_content returns empty string when 'content' key absent.""" + result = mapping.extract_content(ConceptType.DEFINITION, {}, b"") + assert result == "" + + def test_extract_metadata_has_component_kind(self, mapping): + """extract_metadata always includes kind='component'.""" + captures = {"component_type": "TForm1", "events": [], "depth": 0} + meta = mapping.extract_metadata(ConceptType.DEFINITION, captures, b"") + assert meta["kind"] == "component" + + def test_extract_metadata_includes_component_type(self, mapping): + """extract_metadata includes the component_type.""" + captures = {"component_type": "TButton", "events": [], "depth": 2} + meta = mapping.extract_metadata(ConceptType.DEFINITION, captures, b"") + assert meta["component_type"] == "TButton" + + def test_extract_metadata_includes_depth(self, mapping): + """extract_metadata includes the nesting depth.""" + captures = {"component_type": "TButton", "events": [], "depth": 3} + meta = mapping.extract_metadata(ConceptType.DEFINITION, captures, b"") + assert meta["depth"] == 3 + + def test_extract_metadata_includes_events(self, mapping): + """extract_metadata passes through the events list.""" + events = [{"event": "OnClick", "handler": "BtnClick"}] + captures = {"component_type": "TButton", "events": events, "depth": 1} + meta = mapping.extract_metadata(ConceptType.DEFINITION, captures, b"") + assert meta["events"] == events + + +# ============================================================================= +# Component extraction — DFM fixtures +# ============================================================================= + + +class TestDfmComponentExtraction: + """Tests DFM component parsing via DfmMapping.extract_components().""" + + SIMPLE_DFM = """\ +object Form1: TForm1 + Left = 0 + Top = 0 + Caption = 'Hello' +end""" + + NESTED_DFM = """\ +object Form1: TForm1 + object Panel1: TPanel + object Button1: TButton + Caption = 'Click' + end + object Label1: TLabel + Caption = 'Label' + end + end +end""" + + EVENT_DFM = """\ +object Form1: TForm1 + object Button1: TButton + Caption = 'Click' + OnClick = Button1Click + OnDblClick = Button1DblClick + OnEnter = Button1Enter + end +end""" + + INHERITED_DFM = """\ +inherited frmChild: TfrmChild + Caption = 'Child Form' + object ButtonOK: TButton + OnClick = ButtonOKClick + end +end""" + + @pytest.fixture + def mapping(self): + return DfmMapping() + + def test_simple_component_detected(self, mapping): + """object Form1: TForm1 → component 'Form1' detected.""" + comps = mapping.extract_components(self.SIMPLE_DFM) + names = [c["name"] for c in comps] + assert "Form1" in names + + def test_simple_component_type(self, mapping): + """object Form1: TForm1 → component_type='TForm1'.""" + comps = mapping.extract_components(self.SIMPLE_DFM) + form1 = next(c for c in comps if c["name"] == "Form1") + assert form1["component_type"] == "TForm1" + + def test_simple_component_depth(self, mapping): + """Root form component has depth=0.""" + comps = mapping.extract_components(self.SIMPLE_DFM) + form1 = next(c for c in comps if c["name"] == "Form1") + assert form1["depth"] == 0 + + def test_nested_components_detected(self, mapping): + """Nested components Panel1, Button1, Label1 are all detected.""" + comps = mapping.extract_components(self.NESTED_DFM) + names = [c["name"] for c in comps] + assert "Panel1" in names + assert "Button1" in names + assert "Label1" in names + + def test_nested_component_depths(self, mapping): + """Nesting levels are tracked correctly: Panel1=1, Button1=2.""" + comps = mapping.extract_components(self.NESTED_DFM) + by_name = {c["name"]: c for c in comps} + assert by_name["Panel1"]["depth"] == 1 + assert by_name["Button1"]["depth"] == 2 + + def test_nested_component_parent(self, mapping): + """Button1's parent is 'Panel1'.""" + comps = mapping.extract_components(self.NESTED_DFM) + btn = next(c for c in comps if c["name"] == "Button1") + assert btn["parent"] == "Panel1" + + def test_event_handler_extracted(self, mapping): + """OnClick = Button1Click is extracted as event reference.""" + comps = mapping.extract_components(self.EVENT_DFM) + button1 = next(c for c in comps if c["name"] == "Button1") + event_names = [e["event"] for e in button1["events"]] + assert "OnClick" in event_names + handlers = [e["handler"] for e in button1["events"]] + assert "Button1Click" in handlers + + def test_multiple_event_handlers(self, mapping): + """Multiple event handlers on one component are all extracted.""" + comps = mapping.extract_components(self.EVENT_DFM) + button1 = next(c for c in comps if c["name"] == "Button1") + assert len(button1["events"]) == 3 + + def test_inherited_form_detected(self, mapping): + """'inherited' keyword is parsed like 'object' — component detected.""" + comps = mapping.extract_components(self.INHERITED_DFM) + names = [c["name"] for c in comps] + assert "frmChild" in names + + def test_inherited_flag_set(self, mapping): + """Component created with 'inherited' has inherited=True.""" + comps = mapping.extract_components(self.INHERITED_DFM) + frmchild = next(c for c in comps if c["name"] == "frmChild") + assert frmchild["inherited"] is True + + def test_regular_component_not_inherited(self, mapping): + """Standard 'object' declaration has inherited=False.""" + comps = mapping.extract_components(self.SIMPLE_DFM) + form1 = next(c for c in comps if c["name"] == "Form1") + assert form1["inherited"] is False + + def test_component_count_nested(self, mapping): + """NESTED_DFM produces exactly 4 components.""" + comps = mapping.extract_components(self.NESTED_DFM) + assert len(comps) == 4 + + +# ============================================================================= +# Realistic main-form DFM fixture +# ============================================================================= + + +class TestDfmFixtures: + """Tests with a realistic main-form DFM fixture.""" + + MAIN_FORM_DFM = """\ +object frmMain: TfrmMain + Left = 0 + Top = 0 + Caption = 'Main Application' + ClientHeight = 600 + ClientWidth = 800 + object pnlTop: TPanel + Align = alTop + Height = 50 + object btnLogin: TButton + Left = 10 + Top = 10 + Caption = 'Login' + OnClick = btnLoginClick + end + object btnLogout: TButton + Left = 100 + Top = 10 + Caption = 'Logout' + OnClick = btnLogoutClick + end + end + object pnlMain: TPanel + Align = alClient + object grdData: TDBGrid + Align = alClient + end + end +end""" + + @pytest.fixture + def mapping(self): + return DfmMapping() + + def test_all_six_components_found(self, mapping): + """All 6 components (frmMain, pnlTop, btnLogin, btnLogout, pnlMain, grdData) found.""" + comps = mapping.extract_components(self.MAIN_FORM_DFM) + names = [c["name"] for c in comps] + assert len(comps) == 6 + assert "frmMain" in names + assert "pnlTop" in names + assert "btnLogin" in names + assert "btnLogout" in names + assert "pnlMain" in names + assert "grdData" in names + + def test_event_handlers_found(self, mapping): + """btnLoginClick and btnLogoutClick event handlers extracted.""" + comps = mapping.extract_components(self.MAIN_FORM_DFM) + all_handlers = [] + for c in comps: + all_handlers.extend(e["handler"] for e in c.get("events", [])) + assert "btnLoginClick" in all_handlers + assert "btnLogoutClick" in all_handlers + + def test_root_component_is_form(self, mapping): + """The root component is frmMain of type TfrmMain at depth 0.""" + comps = mapping.extract_components(self.MAIN_FORM_DFM) + root = comps[0] + assert root["name"] == "frmMain" + assert root["component_type"] == "TfrmMain" + assert root["depth"] == 0 + + def test_panel_depth_is_one(self, mapping): + """Top-level panels (pnlTop, pnlMain) are at depth 1.""" + comps = mapping.extract_components(self.MAIN_FORM_DFM) + by_name = {c["name"]: c for c in comps} + assert by_name["pnlTop"]["depth"] == 1 + assert by_name["pnlMain"]["depth"] == 1 + + def test_buttons_depth_is_two(self, mapping): + """Buttons inside panels are at depth 2.""" + comps = mapping.extract_components(self.MAIN_FORM_DFM) + by_name = {c["name"]: c for c in comps} + assert by_name["btnLogin"]["depth"] == 2 + assert by_name["btnLogout"]["depth"] == 2 From c2afaf8894945a8014fb77fa7524ee1e207d216b Mon Sep 17 00:00:00 2001 From: Olaf Monien Date: Wed, 18 Feb 2026 01:58:30 +0100 Subject: [PATCH 10/12] test(pascal): integrate Pascal into language_coverage tests --- tests/test_language_coverage.py | 67 +++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/tests/test_language_coverage.py b/tests/test_language_coverage.py index c16138d5..eae82082 100644 --- a/tests/test_language_coverage.py +++ b/tests/test_language_coverage.py @@ -228,6 +228,26 @@ def test_php_usings(self, extract_imports): assert any("Illuminate" in imp for imp in imports) assert "vendor/autoload.php" in imports assert "config.php" in imports + def test_pascal_uses_imports(self, extract_imports): + """Test Pascal uses-clause import extraction.""" + code = '''unit Test; +interface +uses + System.SysUtils, + System.Classes, + UAuth; +implementation +end.''' + imports = extract_imports("pascal", code) + assert "System.SysUtils" in imports + assert "System.Classes" in imports + assert "UAuth" in imports + + def test_delphi_alias_uses_imports(self, extract_imports): + """Test 'delphi' language alias for Pascal uses-clause extraction.""" + code = 'unit Test;\ninterface\nuses SysUtils;\nimplementation\nend.' + imports = extract_imports("delphi", code) + assert "SysUtils" in imports # ============================================================================== @@ -975,3 +995,50 @@ def test_csharp_namespace_and_type(self, ts_extract_imports, ts_languages): pytest.skip("Enhanced C# type name extraction not available in this env") assert "Qdrant.Client" in imports assert "Client" in imports + + +# ============================================================================== +# Integration Tests for Pascal Uses Clause +# ============================================================================== + +class TestPascalSymbolCoverage: + """Integration tests for Pascal symbol extraction via _extract_symbols_pascal.""" + + @pytest.fixture + def extract_symbols(self): + """Return the Pascal symbol extraction function.""" + from scripts.ingest.symbols import _extract_symbols_pascal + return _extract_symbols_pascal + + def test_pascal_class_symbol(self, extract_symbols): + """Pascal class declaration is extracted with kind='class'.""" + code = "type\n TMyService = class(TInterfacedObject)\n end;" + syms = extract_symbols(code) + names_kinds = {s["name"]: s["kind"] for s in syms} + assert "TMyService" in names_kinds + assert names_kinds["TMyService"] == "class" + + def test_pascal_method_symbol(self, extract_symbols): + """Pascal class method is extracted with kind='method' and dotted path.""" + code = "procedure TMyService.Execute;\nbegin\nend;" + syms = extract_symbols(code) + method = next((s for s in syms if s["name"] == "Execute"), None) + assert method is not None + assert method["kind"] == "method" + assert "TMyService.Execute" in method.get("path", "") + + def test_pascal_enum_symbol(self, extract_symbols): + """Pascal enumeration is extracted with kind='enum'.""" + code = "type\n TStatus = (stNew, stActive, stClosed);" + syms = extract_symbols(code) + names_kinds = {s["name"]: s["kind"] for s in syms} + assert "TStatus" in names_kinds + assert names_kinds["TStatus"] == "enum" + + def test_pascal_constant_symbol(self, extract_symbols): + """Pascal UPPERCASE constant is extracted with kind='constant'.""" + code = "const\n MAX_CONNECTIONS = 100;" + syms = extract_symbols(code) + names_kinds = {s["name"]: s["kind"] for s in syms} + assert "MAX_CONNECTIONS" in names_kinds + assert names_kinds["MAX_CONNECTIONS"] == "constant" From 4aca9525f6f4fa489553119d7cbc54dacf5529de Mon Sep 17 00:00:00 2001 From: Olaf Monien Date: Wed, 18 Feb 2026 01:58:41 +0100 Subject: [PATCH 11/12] test(pascal): update expected mapping count from 32 to 35 in ast_analyzer_mappings test --- tests/test_ast_analyzer_mappings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_ast_analyzer_mappings.py b/tests/test_ast_analyzer_mappings.py index 88017ec2..17f1ea03 100644 --- a/tests/test_ast_analyzer_mappings.py +++ b/tests/test_ast_analyzer_mappings.py @@ -29,7 +29,7 @@ # ============================================================================= class TestLanguageMappingsComplete: - """Verify all 32 language mappings can be instantiated.""" + """Verify all 35 language mappings can be instantiated.""" def test_all_mappings_instantiate(self): """Every registered mapping class should instantiate without error.""" @@ -46,7 +46,7 @@ def test_all_mappings_instantiate(self): failed.append((lang, str(e))) assert len(failed) == 0, f"Failed mappings: {failed}" - assert len(passed) == 32, f"Expected 32 mappings, got {len(passed)}" + assert len(passed) == 35, f"Expected 35 mappings, got {len(passed)}" def test_all_mappings_have_definition_query(self): """All mappings should provide a DEFINITION query.""" From 1646709a9714917b6d8d14fe501675168c1d357d Mon Sep 17 00:00:00 2001 From: Olaf Monien Date: Wed, 18 Feb 2026 02:48:19 +0100 Subject: [PATCH 12/12] docs(pascal): add Delphi/Pascal support documentation for PR review Co-Authored-By: Claude Opus 4.6 --- docs/DELPHI_SUPPORT.md | 148 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 docs/DELPHI_SUPPORT.md diff --git a/docs/DELPHI_SUPPORT.md b/docs/DELPHI_SUPPORT.md new file mode 100644 index 00000000..c381ea76 --- /dev/null +++ b/docs/DELPHI_SUPPORT.md @@ -0,0 +1,148 @@ +# Delphi/Pascal Support for Context-Engine + +## Summary + +This PR adds full support for **Delphi/Pascal** source files and **DFM/FMX form files** to the Context-Engine. The implementation follows the existing `language_mappings` system and integrates seamlessly into the current architecture. + +### About Delphi/Pascal + +[Delphi](https://www.embarcadero.com/products/delphi) is a commercial IDE and compiler for Object Pascal, widely used for building native Windows desktop applications, cross-platform mobile apps (via FireMonkey), and server-side systems. It has a large legacy codebase footprint, particularly in enterprise and industrial environments. + +Key characteristics relevant to indexing: +- **`.pas` units** contain Object Pascal source code with a distinctive `interface`/`implementation` section structure and `uses` clauses for dependency management. +- **`.dfm`/`.fmx` form files** are Delphi-specific declarative files that describe UI layouts (component trees, properties, and event handler bindings). They are not code but have strong cross-references into `.pas` units. +- **`.dpr`/`.dpk` project files** are Pascal source files that define the entry point for applications and packages respectively. +- **`.lpr` project files** are the [Lazarus](https://www.lazarus-ide.org/) (Free Pascal) equivalent of `.dpr` files, used by the open-source Lazarus IDE. + +**Scope:** 11 files changed, ~1700 lines added, of which ~940 are tests. + +--- + +## What Was Implemented? + +### 1. File Detection (`config.py`) + +New file extensions registered in `CODE_EXTS`: + +| Extension | Language | Description | +|-----------|----------|-------------| +| `.pas` | `pascal` | Delphi/Lazarus unit | +| `.dpr` | `pascal` | Delphi project file | +| `.dpk` | `pascal` | Delphi package file | +| `.lpr` | `pascal` | Lazarus project file | +| `.dfm` | `dfm` | VCL form file | +| `.fmx` | `dfm` | FireMonkey form file | + +Additional exclusions for Delphi-specific artifacts: +- Directories: `__history`, `__recovery` (Delphi IDE backups) +- Files: `*.dcu`, `*.dcp`, `*.dcpil` (compiled binaries) + +### 2. PascalMapping (`language_mappings/pascal.py`) + +Full language mapping for Pascal/Delphi using a **dual approach**: + +- **Tree-sitter queries** (when `tree_sitter_pascal` is installed) for AST nodes: `declProc`, `declClass`, `declIntf`, `declEnum`, `declType`, `declConst`, `defProc` +- **Regex fallback** (always available) as the primary implementation + +Extracted concepts: + +| Concept | Example | Metadata Kind | +|---------|---------|---------------| +| Classes | `TMyClass = class(TBase)` | `class` | +| Records | `TPoint = record` | `record` | +| Interfaces | `ILogger = interface` | `interface` | +| Enumerations | `TStatus = (stNew, stActive)` | `enum` | +| Procedures/Functions | `procedure Execute;` | `function` | +| Methods | `procedure TMyClass.Execute;` | `method` | +| Constants | `const MAX = 100;` | `constant` | +| Type aliases | `TStringList = TList;` | `type_alias` | +| Uses clauses | `uses System.SysUtils;` | `import` | + +**Built-in filter:** RTL/VCL/FMX standard units (System, SysUtils, Classes, etc.) are recognized as built-ins to prevent false cross-references. + +### 3. DfmMapping (`language_mappings/dfm.py`) + +Standalone mapping for DFM/FMX form files — purely regex-based (no tree-sitter needed): + +- Detects component declarations (`object ButtonLogin: TButton`) +- Detects nested components with hierarchy tracking +- Extracts event handler bindings (`OnClick = ButtonLoginClick`) as cross-file references +- Handles `inherited` forms correctly +- Skips multiline properties and item collections + +### 4. Import Extraction (`metadata.py`) + +Pascal `uses` clauses are correctly extracted — both single-line and multi-line: + +```pascal +uses + System.SysUtils, + System.Classes, + UAuth; +``` + +Keywords like `uses`, `in`, `interface`, `implementation` are filtered out. + +### 5. Symbol Extraction (`symbols.py`) + +`_extract_symbols_pascal()` extracts all symbol types with correct `kind`, `name`, and `path` (e.g., `TMyClass.Execute` for methods). + +### 6. Tree-sitter Integration (`tree_sitter.py`) + +`tree_sitter_pascal` has been added as an optional entry in the language loader. Since loading is wrapped in `try/except`, it gracefully falls back to the regex implementation when no Python package is installed. + +### 7. Language Registry (`language_mappings/__init__.py`) + +Three new entries: +- `"pascal"` → `PascalMapping` +- `"delphi"` → `PascalMapping` (alias) +- `"dfm"` → `DfmMapping` + +--- + +## Design Decisions + +### Why a Dual Approach (Regex + Optional Tree-sitter)? + +There is no official `tree_sitter_pascal` Python package on PyPI compatible with the 0.25+ API. The regex fallback therefore serves as the primary implementation. Once a compatible package becomes available, the tree-sitter integration will activate automatically — without any code changes. + +### Why Separate Mappings for `.pas` and `.dfm`? + +DFM/FMX files have a completely different format from Pascal code (property declarations rather than a programming language). A separate `DfmMapping` with its own `"dfm"` language key is cleaner than mixing everything into `PascalMapping`. + +### Reference: Codegraph + +The Delphi support in Codegraph (TypeScript/web-tree-sitter) served as the reference implementation for AST node types, built-in filters, and DFM parsing. + +--- + +## Tests + +**~120 new tests** across four test files: + +| Test File | Tests | Coverage | +|-----------|-------|----------| +| `test_pascal_language_mapping.py` | 47 | PascalMapping: instantiation, queries, import extraction, symbol extraction, metadata, realistic fixtures | +| `test_dfm_language_mapping.py` | ~30 | DfmMapping: instantiation, components, events, hierarchy, multiline properties, collections | +| `test_language_coverage.py` | 7 | Integration: Pascal uses-imports, Delphi alias, symbol coverage | +| `test_ast_analyzer_mappings.py` | 1 | Mapping count updated from 32 → 35 | + +All tests pass (`1197 passed`). + +--- + +## Files Changed + +| File | Type | Description | +|------|------|-------------| +| `scripts/ingest/config.py` | Modified | +6 extensions, +5 exclusions | +| `scripts/ingest/language_mappings/pascal.py` | **New** | PascalMapping (~400 lines) | +| `scripts/ingest/language_mappings/dfm.py` | **New** | DfmMapping (~220 lines) | +| `scripts/ingest/language_mappings/__init__.py` | Modified | +3 registry entries | +| `scripts/ingest/metadata.py` | Modified | +Pascal uses-clause extraction | +| `scripts/ingest/symbols.py` | Modified | +`_extract_symbols_pascal()` | +| `scripts/ingest/tree_sitter.py` | Modified | +optional `tree_sitter_pascal` entry | +| `tests/test_pascal_language_mapping.py` | **New** | 47 unit tests | +| `tests/test_dfm_language_mapping.py` | **New** | DFM tests | +| `tests/test_language_coverage.py` | Modified | +Pascal integration tests | +| `tests/test_ast_analyzer_mappings.py` | Modified | Mapping count updated |