From 38748882cd57704a950fa7b3558b0e09be7f9090 Mon Sep 17 00:00:00 2001 From: Ryan Orendorff <12442942+ryanorendorff@users.noreply.github.com> Date: Thu, 16 Oct 2025 14:59:34 -0600 Subject: [PATCH] Add file extension metadata Replace get_default_fence_markers with get_language_metadata that returns both fence markers and file extensions for all languages. --- src/compilation.rs | 2 +- src/config.rs | 2 +- src/language.rs | 617 ++++++++++++++++++++++++------------------ src/reporting.rs | 10 +- src/task_collector.rs | 8 +- 5 files changed, 362 insertions(+), 277 deletions(-) diff --git a/src/compilation.rs b/src/compilation.rs index 57169bf..be9d291 100644 --- a/src/compilation.rs +++ b/src/compilation.rs @@ -37,7 +37,7 @@ impl CompilationTask { /// This method performs the actual compilation, measures duration, /// and converts any errors into the appropriate result format. pub async fn compile(self) -> CompilationResult { - log::debug!("Compiling {} block", self.language.name()); + log::debug!("Compiling {} block", self.language); let start = Instant::now(); let compile_result = self.language.compile(&self.code, &self.temp_path).await; diff --git a/src/config.rs b/src/config.rs index 3132f9a..d5a7384 100644 --- a/src/config.rs +++ b/src/config.rs @@ -155,7 +155,7 @@ impl LanguageConfig { /// A vector of fence marker strings for this language. pub fn get_fence_markers(&self, lang_name: &str) -> Vec { if self.fence_markers.is_empty() { - crate::language::get_default_fence_markers(lang_name) + crate::language::get_language_metadata(lang_name).fence_markers } else { self.fence_markers.clone() } diff --git a/src/language.rs b/src/language.rs index 970f464..24ba680 100644 --- a/src/language.rs +++ b/src/language.rs @@ -1,15 +1,55 @@ use crate::config::{CheckCodeConfig, LanguageConfig}; use anyhow::{Context, Result}; +use std::borrow::Cow; +use std::fmt; use std::path::Path; use tokio::fs::File; use tokio::io::AsyncWriteExt; use tokio::process::Command; -/// Get default fence markers for a language based on highlight.js language definitions. +/// Metadata for a programming language including fence markers and file extension. +/// +/// Uses `Cow<'static, str>` for the file extension to avoid allocations for known +/// languages while supporting dynamic extensions for custom languages. +/// +/// # Special Cases +/// +/// - **Makefile**: Returns "Makefile" (no dot prefix) instead of ".makefile" since +/// Makefiles conventionally use this exact filename rather than an extension. +#[derive(Debug, Clone)] +pub struct LanguageMetadata { + pub fence_markers: Vec, + pub file_extension: Cow<'static, str>, +} + +impl LanguageMetadata { + /// Returns whether this file extension represents a complete filename rather than an extension. + /// + /// Some languages like Makefile use a specific filename convention without a dot prefix. + /// + /// # Examples + /// + /// ```ignore + /// let makefile_meta = get_language_metadata("makefile"); + /// assert!(makefile_meta.is_complete_filename()); + /// assert_eq!(makefile_meta.file_extension, "Makefile"); + /// + /// let c_meta = get_language_metadata("c"); + /// assert!(!c_meta.is_complete_filename()); + /// assert_eq!(c_meta.file_extension, ".c"); + /// ``` + #[allow(dead_code)] + pub fn is_complete_filename(&self) -> bool { + !self.file_extension.starts_with('.') + } +} + +/// Get language metadata (fence markers and file extension) for a language. /// /// This function returns the canonical language name plus common aliases that highlight.js -/// recognizes for syntax highlighting. If no built-in mapping exists, returns just the -/// language name itself. +/// recognizes for syntax highlighting, along with the standard file extension for the language. +/// If no built-in mapping exists, returns the language name as the fence marker and uses it +/// as the file extension with a dot prefix. /// /// # Arguments /// @@ -17,278 +57,300 @@ use tokio::process::Command; /// /// # Returns /// -/// A vector of fence marker strings that should recognize this language in markdown. +/// `LanguageMetadata` containing fence markers and file extension. /// /// # Examples /// /// ```ignore -/// assert_eq!(get_default_fence_markers("c"), vec!["c", "h"]); -/// assert_eq!(get_default_fence_markers("typescript"), vec!["typescript", "ts", "tsx", "mts", "cts"]); -/// assert_eq!(get_default_fence_markers("unknown"), vec!["unknown"]); +/// let metadata = get_language_metadata("c"); +/// assert_eq!(metadata.fence_markers, vec!["c", "h"]); +/// assert_eq!(metadata.file_extension, ".c"); +/// +/// let metadata = get_language_metadata("typescript"); +/// assert_eq!(metadata.file_extension, ".ts"); /// ``` /// /// # Reference /// -/// Language aliases are based on highlight.js SUPPORTED_LANGUAGES.md: -/// https://github.com/highlightjs/highlight.js/blob/main/SUPPORTED_LANGUAGES.md -pub fn get_default_fence_markers(lang_name: &str) -> Vec { - match lang_name { +/// - Fence markers based on highlight.js SUPPORTED_LANGUAGES.md +/// - File extensions based on GitHub Linguist languages.yml +pub fn get_language_metadata(lang_name: &str) -> LanguageMetadata { + let (markers, ext) = match lang_name { // Numbers & Special - "1c" => vec!["1c"], - "4d" => vec!["4d"], + "1c" => (vec!["1c"], ".bsl"), + "4d" => (vec!["4d"], ".4dm"), // A - "abap" => vec!["sap-abap", "abap"], - "abc" => vec!["abc"], - "abnf" => vec!["abnf"], - "accesslog" => vec!["accesslog"], - "actionscript" => vec!["actionscript", "as"], - "ada" => vec!["ada"], - "aiken" => vec!["aiken", "ak"], - "alan" => vec!["alan", "i", "ln"], - "angelscript" => vec!["angelscript", "asc"], - "apache" => vec!["apache", "apacheconf"], - "apex" => vec!["apex"], - "applescript" => vec!["applescript", "osascript"], - "arcade" => vec!["arcade"], - "arduino" => vec!["arduino", "ino"], - "armasm" => vec!["armasm", "arm"], - "asciidoc" => vec!["asciidoc", "adoc"], - "aspectj" => vec!["aspectj"], - "autohotkey" => vec!["autohotkey"], - "autoit" => vec!["autoit"], - "avrasm" => vec!["avrasm"], - "awk" => vec!["awk", "mawk", "nawk", "gawk"], + "abap" => (vec!["sap-abap", "abap"], ".abap"), + "abc" => (vec!["abc"], ".abc"), + "abnf" => (vec!["abnf"], ".abnf"), + "accesslog" => (vec!["accesslog"], ".log"), + "actionscript" => (vec!["actionscript", "as"], ".as"), + "ada" => (vec!["ada"], ".adb"), + "aiken" => (vec!["aiken", "ak"], ".ak"), + "alan" => (vec!["alan", "i", "ln"], ".alan"), + "angelscript" => (vec!["angelscript", "asc"], ".as"), + "apache" => (vec!["apache", "apacheconf"], ".conf"), + "apex" => (vec!["apex"], ".cls"), + "applescript" => (vec!["applescript", "osascript"], ".scpt"), + "arcade" => (vec!["arcade"], ".arcade"), + "arduino" => (vec!["arduino", "ino"], ".ino"), + "armasm" => (vec!["armasm", "arm"], ".s"), + "asciidoc" => (vec!["asciidoc", "adoc"], ".adoc"), + "aspectj" => (vec!["aspectj"], ".aj"), + "autohotkey" => (vec!["autohotkey"], ".ahk"), + "autoit" => (vec!["autoit"], ".au3"), + "avrasm" => (vec!["avrasm"], ".asm"), + "awk" => (vec!["awk", "mawk", "nawk", "gawk"], ".awk"), // B - "bash" => vec!["bash", "sh", "zsh"], - "basic" => vec!["basic"], - "bnf" => vec!["bnf"], - "brainfuck" => vec!["brainfuck", "bf"], + "bash" => (vec!["bash", "sh", "zsh"], ".sh"), + "basic" => (vec!["basic"], ".bas"), + "bnf" => (vec!["bnf"], ".bnf"), + "brainfuck" => (vec!["brainfuck", "bf"], ".bf"), // C - "c" => vec!["c", "h"], - "cal" => vec!["cal"], - "capnproto" => vec!["capnproto", "capnp"], - "ceylon" => vec!["ceylon"], - "clean" => vec!["clean", "icl", "dcl"], - "clojure" => vec!["clojure", "clj"], - "clojurerepl" => vec!["clojure-repl"], - "cmake" => vec!["cmake", "cmake.in"], - "coffeescript" => vec!["coffeescript", "coffee", "cson", "iced"], - "coq" => vec!["coq"], - "cos" => vec!["cos", "cls"], - "cpp" => vec!["cpp", "hpp", "cc", "hh", "c++", "h++", "cxx", "hxx"], - "crmsh" => vec!["crmsh", "crm", "pcmk"], - "crystal" => vec!["crystal", "cr"], - "csharp" => vec!["csharp", "cs"], - "csp" => vec!["csp"], - "css" => vec!["css"], + "c" => (vec!["c", "h"], ".c"), + "cal" => (vec!["cal"], ".cal"), + "capnproto" => (vec!["capnproto", "capnp"], ".capnp"), + "ceylon" => (vec!["ceylon"], ".ceylon"), + "clean" => (vec!["clean", "icl", "dcl"], ".icl"), + "clojure" => (vec!["clojure", "clj"], ".clj"), + "clojurerepl" => (vec!["clojure-repl"], ".clj"), + "cmake" => (vec!["cmake", "cmake.in"], ".cmake"), + "coffeescript" => (vec!["coffeescript", "coffee", "cson", "iced"], ".coffee"), + "coq" => (vec!["coq"], ".v"), + "cos" => (vec!["cos", "cls"], ".cls"), + "cpp" => ( + vec!["cpp", "hpp", "cc", "hh", "c++", "h++", "cxx", "hxx"], + ".cpp", + ), + "crmsh" => (vec!["crmsh", "crm", "pcmk"], ".crmsh"), + "crystal" => (vec!["crystal", "cr"], ".cr"), + "csharp" => (vec!["csharp", "cs"], ".cs"), + "csp" => (vec!["csp"], ".csp"), + "css" => (vec!["css"], ".css"), // D - "d" => vec!["d"], - "dart" => vec!["dart"], - "delphi" => vec!["delphi", "dpr", "dfm", "pas", "pascal"], - "diff" => vec!["diff", "patch"], - "django" => vec!["django", "jinja"], - "dns" => vec!["dns", "zone", "bind"], - "dockerfile" => vec!["dockerfile", "docker"], - "dos" => vec!["dos", "bat", "cmd"], - "dsconfig" => vec!["dsconfig"], - "dts" => vec!["dts"], - "dust" => vec!["dust", "dst"], + "d" => (vec!["d"], ".d"), + "dart" => (vec!["dart"], ".dart"), + "delphi" => (vec!["delphi", "dpr", "dfm", "pas", "pascal"], ".pas"), + "diff" => (vec!["diff", "patch"], ".diff"), + "django" => (vec!["django", "jinja"], ".html"), + "dns" => (vec!["dns", "zone", "bind"], ".zone"), + "dockerfile" => (vec!["dockerfile", "docker"], ".dockerfile"), + "dos" => (vec!["dos", "bat", "cmd"], ".bat"), + "dsconfig" => (vec!["dsconfig"], ".dsconfig"), + "dts" => (vec!["dts"], ".dts"), + "dust" => (vec!["dust", "dst"], ".dust"), // E - "ebnf" => vec!["ebnf"], - "elixir" => vec!["elixir"], - "elm" => vec!["elm"], - "erb" => vec!["erb"], - "erlang" => vec!["erlang", "erl"], - "erlang-repl" => vec!["erlang-repl"], - "excel" => vec!["excel", "xls", "xlsx"], + "ebnf" => (vec!["ebnf"], ".ebnf"), + "elixir" => (vec!["elixir"], ".ex"), + "elm" => (vec!["elm"], ".elm"), + "erb" => (vec!["erb"], ".erb"), + "erlang" => (vec!["erlang", "erl"], ".erl"), + "erlang-repl" => (vec!["erlang-repl"], ".erl"), + "excel" => (vec!["excel", "xls", "xlsx"], ".xlsx"), // F - "fix" => vec!["fix"], - "flix" => vec!["flix"], - "fortran" => vec!["fortran", "f90", "f95"], - "fsharp" => vec!["fsharp", "fs", "fsx", "fsi", "fsscript"], + "fix" => (vec!["fix"], ".fix"), + "flix" => (vec!["flix"], ".flix"), + "fortran" => (vec!["fortran", "f90", "f95"], ".f90"), + "fsharp" => (vec!["fsharp", "fs", "fsx", "fsi", "fsscript"], ".fs"), // G - "gams" => vec!["gams", "gms"], - "gauss" => vec!["gauss", "gss"], - "gcode" => vec!["gcode", "nc"], - "gherkin" => vec!["gherkin"], - "glsl" => vec!["glsl"], - "gml" => vec!["gml"], - "go" => vec!["go", "golang"], - "golo" => vec!["golo", "gololang"], - "gradle" => vec!["gradle"], - "graphql" => vec!["graphql", "gql"], - "groovy" => vec!["groovy"], + "gams" => (vec!["gams", "gms"], ".gms"), + "gauss" => (vec!["gauss", "gss"], ".gss"), + "gcode" => (vec!["gcode", "nc"], ".gcode"), + "gherkin" => (vec!["gherkin"], ".feature"), + "glsl" => (vec!["glsl"], ".glsl"), + "gml" => (vec!["gml"], ".gml"), + "go" => (vec!["go", "golang"], ".go"), + "golo" => (vec!["golo", "gololang"], ".golo"), + "gradle" => (vec!["gradle"], ".gradle"), + "graphql" => (vec!["graphql", "gql"], ".graphql"), + "groovy" => (vec!["groovy"], ".groovy"), // H - "haml" => vec!["haml"], - "handlebars" => vec!["handlebars", "hbs", "html.hbs", "html.handlebars"], - "haskell" => vec!["haskell", "hs"], - "haxe" => vec!["haxe", "hx"], - "hsp" => vec!["hsp"], - "html" => vec!["html", "xhtml"], - "http" => vec!["http", "https"], - "hy" => vec!["hy", "hylang"], + "haml" => (vec!["haml"], ".haml"), + "handlebars" => ( + vec!["handlebars", "hbs", "html.hbs", "html.handlebars"], + ".hbs", + ), + "haskell" => (vec!["haskell", "hs"], ".hs"), + "haxe" => (vec!["haxe", "hx"], ".hx"), + "hsp" => (vec!["hsp"], ".hsp"), + "html" => (vec!["html", "xhtml"], ".html"), + "http" => (vec!["http", "https"], ".http"), + "hy" => (vec!["hy", "hylang"], ".hy"), // I - "inform7" => vec!["inform7", "i7"], - "ini" => vec!["ini", "toml"], - "irpf90" => vec!["irpf90"], - "isbl" => vec!["isbl"], + "inform7" => (vec!["inform7", "i7"], ".ni"), + "ini" => (vec!["ini", "toml"], ".ini"), + "irpf90" => (vec!["irpf90"], ".irpf90"), + "isbl" => (vec!["isbl"], ".isbl"), // J - "java" => vec!["java", "jsp"], - "javascript" => vec!["javascript", "js", "jsx"], - "jbosscli" => vec!["jboss-cli", "wildfly-cli"], - "json" => vec!["json", "jsonc", "json5"], - "julia" => vec!["julia", "julia-repl"], + "java" => (vec!["java", "jsp"], ".java"), + "javascript" => (vec!["javascript", "js", "jsx"], ".js"), + "jbosscli" => (vec!["jboss-cli", "wildfly-cli"], ".cli"), + "json" => (vec!["json", "jsonc", "json5"], ".json"), + "julia" => (vec!["julia", "julia-repl"], ".jl"), // K - "kotlin" => vec!["kotlin", "kt"], + "kotlin" => (vec!["kotlin", "kt"], ".kt"), // L - "lasso" => vec!["lasso", "ls", "lassoscript"], - "latex" => vec!["tex"], - "ldif" => vec!["ldif"], - "leaf" => vec!["leaf"], - "less" => vec!["less"], - "lisp" => vec!["lisp"], - "livecodeserver" => vec!["livecodeserver"], - "livescript" => vec!["livescript", "ls"], - "llvm" => vec!["llvm"], - "lsl" => vec!["lsl"], - "lua" => vec!["lua", "pluto"], + "lasso" => (vec!["lasso", "ls", "lassoscript"], ".lasso"), + "latex" => (vec!["tex"], ".tex"), + "ldif" => (vec!["ldif"], ".ldif"), + "leaf" => (vec!["leaf"], ".leaf"), + "less" => (vec!["less"], ".less"), + "lisp" => (vec!["lisp"], ".lisp"), + "livecodeserver" => (vec!["livecodeserver"], ".livecodescript"), + "livescript" => (vec!["livescript", "ls"], ".ls"), + "llvm" => (vec!["llvm"], ".ll"), + "lsl" => (vec!["lsl"], ".lsl"), + "lua" => (vec!["lua", "pluto"], ".lua"), // M - "makefile" => vec!["makefile", "mk", "mak", "make"], - "markdown" => vec!["markdown", "md", "mkdown", "mkd"], - "mathematica" => vec!["mathematica", "mma", "wl"], - "matlab" => vec!["matlab"], - "maxima" => vec!["maxima"], - "mel" => vec!["mel"], - "mercury" => vec!["mercury"], - "mipsasm" => vec!["mipsasm", "mips"], - "mizar" => vec!["mizar"], - "mojolicious" => vec!["mojolicious"], - "monkey" => vec!["monkey"], - "moonscript" => vec!["moonscript", "moon"], + "makefile" => (vec!["makefile", "mk", "mak", "make"], "Makefile"), + "markdown" => (vec!["markdown", "md", "mkdown", "mkd"], ".md"), + "mathematica" => (vec!["mathematica", "mma", "wl"], ".m"), + "matlab" => (vec!["matlab"], ".m"), + "maxima" => (vec!["maxima"], ".mac"), + "mel" => (vec!["mel"], ".mel"), + "mercury" => (vec!["mercury"], ".m"), + "mipsasm" => (vec!["mipsasm", "mips"], ".s"), + "mizar" => (vec!["mizar"], ".miz"), + "mojolicious" => (vec!["mojolicious"], ".pm"), + "monkey" => (vec!["monkey"], ".monkey"), + "moonscript" => (vec!["moonscript", "moon"], ".moon"), // N - "n1ql" => vec!["n1ql"], - "nestedtext" => vec!["nestedtext", "nt"], - "nginx" => vec!["nginx", "nginxconf"], - "nim" => vec!["nim", "nimrod"], - "nix" => vec!["nix"], - "node-repl" => vec!["node-repl"], - "nsis" => vec!["nsis"], + "n1ql" => (vec!["n1ql"], ".n1ql"), + "nestedtext" => (vec!["nestedtext", "nt"], ".nt"), + "nginx" => (vec!["nginx", "nginxconf"], ".conf"), + "nim" => (vec!["nim", "nimrod"], ".nim"), + "nix" => (vec!["nix"], ".nix"), + "node-repl" => (vec!["node-repl"], ".js"), + "nsis" => (vec!["nsis"], ".nsi"), // O - "objectivec" => vec!["objectivec", "mm", "objc", "obj-c"], - "ocaml" => vec!["ocaml", "ml"], - "openscad" => vec!["openscad", "scad"], - "oxygene" => vec!["oxygene"], + "objectivec" => (vec!["objectivec", "mm", "objc", "obj-c"], ".m"), + "ocaml" => (vec!["ocaml", "ml"], ".ml"), + "openscad" => (vec!["openscad", "scad"], ".scad"), + "oxygene" => (vec!["oxygene"], ".pas"), // P - "parser3" => vec!["parser3"], - "perl" => vec!["perl", "pl", "pm"], - "pf" => vec!["pf", "pf.conf"], - "pgsql" => vec!["pgsql", "postgres", "postgresql"], - "php" => vec!["php"], - "phptemplate" => vec!["php-template"], - "plaintext" => vec!["plaintext", "txt", "text"], - "pony" => vec!["pony"], - "powershell" => vec!["powershell", "ps", "ps1"], - "processing" => vec!["processing"], - "profile" => vec!["profile"], - "prolog" => vec!["prolog"], - "properties" => vec!["properties"], - "protobuf" => vec!["protobuf"], - "puppet" => vec!["puppet", "pp"], - "purebasic" => vec!["purebasic", "pb", "pbi"], - "python" => vec!["python", "py", "gyp"], - "pythonrepl" => vec!["python-repl", "pycon"], + "parser3" => (vec!["parser3"], ".p"), + "perl" => (vec!["perl", "pl", "pm"], ".pl"), + "pf" => (vec!["pf", "pf.conf"], ".conf"), + "pgsql" => (vec!["pgsql", "postgres", "postgresql"], ".sql"), + "php" => (vec!["php"], ".php"), + "phptemplate" => (vec!["php-template"], ".php"), + "plaintext" => (vec!["plaintext", "txt", "text"], ".txt"), + "pony" => (vec!["pony"], ".pony"), + "powershell" => (vec!["powershell", "ps", "ps1"], ".ps1"), + "processing" => (vec!["processing"], ".pde"), + "profile" => (vec!["profile"], ".profile"), + "prolog" => (vec!["prolog"], ".pl"), + "properties" => (vec!["properties"], ".properties"), + "protobuf" => (vec!["protobuf"], ".proto"), + "puppet" => (vec!["puppet", "pp"], ".pp"), + "purebasic" => (vec!["purebasic", "pb", "pbi"], ".pb"), + "python" => (vec!["python", "py", "gyp"], ".py"), + "pythonrepl" => (vec!["python-repl", "pycon"], ".py"), // Q - "q" => vec!["k", "kdb"], - "qml" => vec!["qml"], + "q" => (vec!["k", "kdb"], ".q"), + "qml" => (vec!["qml"], ".qml"), // R - "r" => vec!["r"], - "reasonml" => vec!["reasonml", "re"], - "rib" => vec!["rib"], - "roboconf" => vec!["graph", "instances"], - "routeros" => vec!["routeros", "mikrotik"], - "rsl" => vec!["rsl"], - "ruby" => vec!["ruby", "rb", "gemspec", "podspec", "thor", "irb"], - "ruleslanguage" => vec!["ruleslanguage"], - "rust" => vec!["rust", "rs"], + "r" => (vec!["r"], ".r"), + "reasonml" => (vec!["reasonml", "re"], ".re"), + "rib" => (vec!["rib"], ".rib"), + "roboconf" => (vec!["graph", "instances"], ".graph"), + "routeros" => (vec!["routeros", "mikrotik"], ".rsc"), + "rsl" => (vec!["rsl"], ".rsl"), + "ruby" => ( + vec!["ruby", "rb", "gemspec", "podspec", "thor", "irb"], + ".rb", + ), + "ruleslanguage" => (vec!["ruleslanguage"], ".rule"), + "rust" => (vec!["rust", "rs"], ".rs"), // S - "sas" => vec!["sas"], - "scala" => vec!["scala"], - "scheme" => vec!["scheme"], - "scilab" => vec!["scilab", "sci"], - "scss" => vec!["scss"], - "shell" => vec!["shell", "console"], - "smali" => vec!["smali"], - "smalltalk" => vec!["smalltalk", "st"], - "sml" => vec!["sml", "ml"], - "solidity" => vec!["solidity", "sol"], - "sqf" => vec!["sqf"], - "sql" => vec!["sql"], - "stan" => vec!["stan", "stanfuncs"], - "stata" => vec!["stata"], - "step21" => vec!["step21", "p21", "step", "stp"], - "stylus" => vec!["stylus", "styl"], - "subunit" => vec!["subunit"], - "swift" => vec!["swift"], + "sas" => (vec!["sas"], ".sas"), + "scala" => (vec!["scala"], ".scala"), + "scheme" => (vec!["scheme"], ".scm"), + "scilab" => (vec!["scilab", "sci"], ".sci"), + "scss" => (vec!["scss"], ".scss"), + "shell" => (vec!["shell", "console"], ".sh"), + "smali" => (vec!["smali"], ".smali"), + "smalltalk" => (vec!["smalltalk", "st"], ".st"), + "sml" => (vec!["sml", "ml"], ".sml"), + "solidity" => (vec!["solidity", "sol"], ".sol"), + "sqf" => (vec!["sqf"], ".sqf"), + "sql" => (vec!["sql"], ".sql"), + "stan" => (vec!["stan", "stanfuncs"], ".stan"), + "stata" => (vec!["stata"], ".do"), + "step21" => (vec!["step21", "p21", "step", "stp"], ".stp"), + "stylus" => (vec!["stylus", "styl"], ".styl"), + "subunit" => (vec!["subunit"], ".subunit"), + "swift" => (vec!["swift"], ".swift"), // T - "taggerscript" => vec!["taggerscript"], - "tap" => vec!["tap"], - "tcl" => vec!["tcl", "tk"], - "thrift" => vec!["thrift"], - "toml" => vec!["toml"], - "tp" => vec!["tp"], - "twig" => vec!["twig", "craftcms"], - "typescript" => vec!["typescript", "ts", "tsx", "mts", "cts"], + "taggerscript" => (vec!["taggerscript"], ".tagger"), + "tap" => (vec!["tap"], ".t"), + "tcl" => (vec!["tcl", "tk"], ".tcl"), + "thrift" => (vec!["thrift"], ".thrift"), + "toml" => (vec!["toml"], ".toml"), + "tp" => (vec!["tp"], ".tp"), + "twig" => (vec!["twig", "craftcms"], ".twig"), + "typescript" => (vec!["typescript", "ts", "tsx", "mts", "cts"], ".ts"), // V - "vala" => vec!["vala"], - "vbnet" => vec!["vbnet", "vb"], - "vbscript" => vec!["vbscript", "vbs"], - "vbscripthtmlvbscript" => vec!["vbscript-html"], - "verilog" => vec!["verilog", "v"], - "vhdl" => vec!["vhdl"], - "vim" => vec!["vim"], - "wasm" => vec!["wasm"], - "wren" => vec!["wren"], + "vala" => (vec!["vala"], ".vala"), + "vbnet" => (vec!["vbnet", "vb"], ".vb"), + "vbscript" => (vec!["vbscript", "vbs"], ".vbs"), + "vbscripthtmlvbscript" => (vec!["vbscript-html"], ".vbs"), + "verilog" => (vec!["verilog", "v"], ".v"), + "vhdl" => (vec!["vhdl"], ".vhd"), + "vim" => (vec!["vim"], ".vim"), + "wasm" => (vec!["wasm"], ".wat"), + "wren" => (vec!["wren"], ".wren"), // X - "x86asm" => vec!["x86asm"], - "xl" => vec!["xl", "tao"], - "xml" => vec!["xml", "rss", "atom", "xjb", "xsd", "xsl", "plist", "svg"], - "xquery" => vec!["xquery", "xpath", "xq"], + "x86asm" => (vec!["x86asm"], ".asm"), + "xl" => (vec!["xl", "tao"], ".xl"), + "xml" => ( + vec!["xml", "rss", "atom", "xjb", "xsd", "xsl", "plist", "svg"], + ".xml", + ), + "xquery" => (vec!["xquery", "xpath", "xq"], ".xq"), // Y - "yaml" => vec!["yaml", "yml"], + "yaml" => (vec!["yaml", "yml"], ".yaml"), // Z - "zephir" => vec!["zephir", "zep"], - "zig" => vec!["zig"], + "zephir" => (vec!["zephir", "zep"], ".zep"), + "zig" => (vec!["zig"], ".zig"), // Default: use the language name itself - _ => vec![lang_name], + _ => { + return LanguageMetadata { + fence_markers: vec![lang_name.to_string()], + file_extension: Cow::Owned(format!(".{}", lang_name)), + } + } + }; + + LanguageMetadata { + fence_markers: markers.iter().map(|s| s.to_string()).collect(), + file_extension: Cow::Borrowed(ext), } - .iter() - .map(|s| s.to_string()) - .collect() } /// A language implementation configured from `book.toml`. @@ -304,49 +366,71 @@ pub fn get_default_fence_markers(lang_name: &str) -> Vec { /// - Compiler flags (array of strings) /// - Optional preamble (prepended to all blocks) /// - Fence markers (which markdown fences map to this language) +/// +/// # Display +/// +/// The language can be formatted for display using the `Display` trait, which +/// combines the base language and variant (if present) into a string like "c" or "c-parasol". pub struct ConfiguredLanguage { - name: String, + base_language: String, + variant: Option, config: LanguageConfig, file_extension: String, } +impl fmt::Display for ConfiguredLanguage { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.base_language)?; + if let Some(ref v) = self.variant { + write!(f, "-{}", v)?; + } + Ok(()) + } +} + impl ConfiguredLanguage { - pub fn new(name: String, config: LanguageConfig) -> Self { - let file_extension = Self::determine_file_extension(&config.fence_markers); + pub fn new(base_language: String, variant: Option, config: LanguageConfig) -> Self { + // Get metadata for the base language to determine file extension + let metadata = get_language_metadata(&base_language); + let file_extension = metadata.file_extension.into_owned(); + Self { - name, + base_language, + variant, config, file_extension, } } - /// Returns the name of this language (e.g., "c", "c-parasol", "typescript"). - pub fn name(&self) -> &str { - &self.name - } - /// Returns the file extension for this language (e.g., ".c", ".ts"). pub fn file_extension(&self) -> &str { &self.file_extension } - /// Determine file extension from fence markers (first marker + common extensions) - fn determine_file_extension(fence_markers: &[String]) -> String { - if let Some(first_marker) = fence_markers.first() { - match first_marker.as_str() { - "c" | "parasol-c" | "parasol" => ".c".to_string(), - "cpp" | "c++" | "cxx" => ".cpp".to_string(), - "rust" | "rs" => ".rs".to_string(), - "python" | "py" => ".py".to_string(), - "javascript" | "js" => ".js".to_string(), - "typescript" | "ts" => ".ts".to_string(), - "go" => ".go".to_string(), - "java" => ".java".to_string(), - _ => format!(".{}", first_marker), - } - } else { - ".txt".to_string() + /// Writes source code with optional preamble to a temporary file. + /// + /// # Arguments + /// + /// * `code` - The source code to write + /// * `temp_file` - Path where the code should be written + /// + /// # Errors + /// + /// Returns an error if the file cannot be created or written. + async fn write_source_file(&self, code: &str, temp_file: &Path) -> Result<()> { + let mut file = File::create(temp_file) + .await + .with_context(|| format!("Failed to create temporary file: {}", temp_file.display()))?; + + if let Some(ref preamble) = self.config.preamble { + file.write_all(preamble.as_bytes()).await?; + file.write_all(b"\n\n").await?; } + + file.write_all(code.as_bytes()).await?; + file.flush().await?; + + Ok(()) } /// Compiles or validates the given code asynchronously. @@ -368,22 +452,10 @@ impl ConfiguredLanguage { /// - The compiler executable cannot be found or executed /// - The code fails to compile pub async fn compile(&self, code: &str, temp_file: &Path) -> Result<()> { - // Write code with optional preamble to temp file (async) - { - let mut file = File::create(temp_file) - .await - .context("Failed to create temporary file")?; - - if let Some(ref preamble) = self.config.preamble { - file.write_all(preamble.as_bytes()).await?; - file.write_all(b"\n\n").await?; - } - - file.write_all(code.as_bytes()).await?; - file.flush().await?; - } // file is automatically dropped here + // Write code with optional preamble to temp file + self.write_source_file(code, temp_file).await?; - // Execute compiler with configured flags (async) + // Execute compiler with configured flags let output = Command::new(&self.config.compiler) .args(&self.config.flags) .arg(temp_file) @@ -391,8 +463,11 @@ impl ConfiguredLanguage { .await .with_context(|| { format!( - "Failed to execute compiler '{}' for language '{}'", - self.config.compiler, self.name + "Failed to execute compiler '{}' for language '{}'\nFlags: {:?}\nFile: {}", + self.config.compiler, + self, + self.config.flags, + temp_file.display() ) })?; @@ -404,7 +479,14 @@ impl ConfiguredLanguage { } else { stdout.to_string() }; - anyhow::bail!("{} compilation failed:\n{}", self.name, error_msg); + anyhow::bail!( + "{} compilation failed\nCompiler: {}\nFlags: {:?}\nFile: {}\n\n{}", + self, + self.config.compiler, + self.config.flags, + temp_file.display(), + error_msg + ); } Ok(()) @@ -465,12 +547,12 @@ impl LanguageRegistry { /// ```ignore /// // Base C language /// if let Some(lang) = registry.find_by_fence("c", None) { - /// println!("Found language: {}", lang.name()); + /// println!("Found language: {}", lang); /// } /// /// // Parasol variant of C /// if let Some(lang) = registry.find_by_fence("c", Some("parasol")) { - /// println!("Found language: {}", lang.name()); + /// println!("Found language: {}", lang); /// } /// ``` pub fn find_by_fence(&self, fence: &str, variant: Option<&str>) -> Option { @@ -499,7 +581,11 @@ impl LanguageRegistry { variants: base_config.variants.clone(), }; - return Some(ConfiguredLanguage::new(lang_name.clone(), resolved_config)); + return Some(ConfiguredLanguage::new( + lang_name.clone(), + None, + resolved_config, + )); } Some(v) => v, }; @@ -520,8 +606,11 @@ impl LanguageRegistry { variants: std::collections::HashMap::new(), // Variants don't inherit variants }; - // Create a new language with variant-specific name - let variant_lang_name = format!("{}-{}", lang_name, variant_name); - Some(ConfiguredLanguage::new(variant_lang_name, merged_config)) + // Create a new language with the base language and variant + Some(ConfiguredLanguage::new( + lang_name.clone(), + Some(variant_name.to_string()), + merged_config, + )) } } diff --git a/src/reporting.rs b/src/reporting.rs index bbacf38..f4b2148 100644 --- a/src/reporting.rs +++ b/src/reporting.rs @@ -42,7 +42,7 @@ pub fn report_compilation_errors(failed_results: &[&CompilationResult]) -> Resul print_error(format!( "Block: #{} ({})", result.block_index(), - result.language().name() + result.language() )); print_error(""); @@ -54,7 +54,7 @@ pub fn report_compilation_errors(failed_results: &[&CompilationResult]) -> Resul print_error(""); print_error("Code block:"); - print_error(format!("```{}", result.language().name())); + print_error(format!("```{}", result.language())); for line in result.code().lines() { print_error(line); @@ -88,7 +88,7 @@ pub fn print_compilation_statistics(results: &[CompilationResult], parallel_dura let mut lang_counts: HashMap = HashMap::new(); for result in &successful_results { *lang_counts - .entry(result.language().name().to_string()) + .entry(result.language().to_string()) .or_insert(0) += 1; } @@ -127,7 +127,7 @@ pub fn print_compilation_statistics(results: &[CompilationResult], parallel_dura for (lang, count) in sorted_stats { let lang_results: Vec<_> = successful_results .iter() - .filter(|r| r.language().name() == lang) + .filter(|r| &r.language().to_string() == lang) .collect(); let lang_total: Duration = lang_results.iter().map(|r| r.duration()).sum(); let lang_avg_ms = lang_total.as_millis() / *count as u128; @@ -138,7 +138,7 @@ pub fn print_compilation_statistics(results: &[CompilationResult], parallel_dura for result in results { log::debug!( "[CODE_COMPILE_TIME] [{}] {} block #{}: {}ms", - result.language().name(), + result.language(), result.chapter_path().display(), result.block_index(), result.duration().as_millis() diff --git a/src/task_collector.rs b/src/task_collector.rs index bc907ec..cf6d8ae 100644 --- a/src/task_collector.rs +++ b/src/task_collector.rs @@ -81,12 +81,8 @@ pub fn collect_compilation_tasks( } }; - let block_name = format!( - "{}_{}_block_{}", - language.name(), - chapter_name, - task_counter - ); + let block_name = + format!("{}_{}_block_{}", language, chapter_name, task_counter); task_counter += 1; let temp_file_path = temp_dir.path().join(format!(