From c62fad043981486ddb4641b148079a870abe537f Mon Sep 17 00:00:00 2001 From: Christoph Hegemann Date: Tue, 17 Jun 2025 11:10:26 +0200 Subject: [PATCH] wip: lex documentation comments, and attach them to declarations --- rust/candid_parser/src/grammar.lalrpop | 30 ++++++++++++++++----- rust/candid_parser/src/lib.rs | 4 +-- rust/candid_parser/src/test.rs | 2 +- rust/candid_parser/src/token.rs | 36 +++++++++++++++++++++----- rust/candid_parser/src/types.rs | 23 +++++++++++----- rust/candid_parser/src/typing.rs | 12 ++++----- 6 files changed, 78 insertions(+), 29 deletions(-) diff --git a/rust/candid_parser/src/grammar.lalrpop b/rust/candid_parser/src/grammar.lalrpop index 3a3f3bb05..aee822ac6 100644 --- a/rust/candid_parser/src/grammar.lalrpop +++ b/rust/candid_parser/src/grammar.lalrpop @@ -1,12 +1,12 @@ -use super::types::{IDLType, PrimType, TypeField, FuncType, Binding, Dec, IDLProg, IDLTypes, IDLInitArgs, IDLArgType}; +use super::types::{IDLType, PrimType, TypeField, FuncType, Binding, Dec, DecInner, IDLProg, IDLTypes, IDLInitArgs, IDLArgType}; use super::test::{Assert, Input, Test}; -use super::token::{Token, error, error2, LexicalError, Span}; +use super::token::{Token, error, error2, LexicalError, Span, TriviaMap}; use candid::{Principal, types::Label}; use candid::types::value::{IDLField, IDLValue, IDLArgs, VariantValue}; use candid::types::{TypeEnv, FuncMode}; use candid::utils::check_unique; -grammar; +grammar(trivia: Option<&TriviaMap>); extern { type Location = usize; @@ -257,11 +257,17 @@ MethTyp: Binding = { ":" => Binding { id: n, typ: IDLType::VarT(id) }, } -// Type declarations Def: Dec = { - "type" "=" => Dec::TypD(Binding { id: id, typ: t }), - "import" => Dec::ImportType(<>), - "import" "service" => Dec::ImportServ(<>), + => { + Dec { doc_comment, inner } + } +} + +// Type declarations +DefInner: DecInner = { + "type" "=" => DecInner::TypD(Binding { id: id, typ: t }), + "import" => DecInner::ImportType(<>), + "import" "service" => DecInner::ImportServ(<>), } Actor: IDLType = { @@ -327,3 +333,13 @@ SepBy: Vec = { #[inline] Sp: (T, Span) = => (t, l..r); + +#[inline] +DocComment: Option> = + => { + let mut comment = None; + if let Some(trivia) = &trivia { + comment = trivia.borrow().get(&l).cloned(); + } + comment + }; diff --git a/rust/candid_parser/src/lib.rs b/rust/candid_parser/src/lib.rs index ca850bcdf..ad911c00b 100644 --- a/rust/candid_parser/src/lib.rs +++ b/rust/candid_parser/src/lib.rs @@ -144,10 +144,10 @@ pub mod test; pub fn parse_idl_args(s: &str) -> crate::Result { let lexer = token::Tokenizer::new(s); - Ok(grammar::ArgsParser::new().parse(lexer)?) + Ok(grammar::ArgsParser::new().parse(None, lexer)?) } pub fn parse_idl_value(s: &str) -> crate::Result { let lexer = token::Tokenizer::new(s); - Ok(grammar::ArgParser::new().parse(lexer)?) + Ok(grammar::ArgParser::new().parse(None, lexer)?) } diff --git a/rust/candid_parser/src/test.rs b/rust/candid_parser/src/test.rs index 563b756a7..6df82be9b 100644 --- a/rust/candid_parser/src/test.rs +++ b/rust/candid_parser/src/test.rs @@ -78,7 +78,7 @@ impl std::str::FromStr for Test { type Err = Error; fn from_str(str: &str) -> std::result::Result { let lexer = super::token::Tokenizer::new(str); - Ok(super::grammar::TestParser::new().parse(lexer)?) + Ok(super::grammar::TestParser::new().parse(None, lexer)?) } } diff --git a/rust/candid_parser/src/token.rs b/rust/candid_parser/src/token.rs index db7ae2d61..5338e4584 100644 --- a/rust/candid_parser/src/token.rs +++ b/rust/candid_parser/src/token.rs @@ -1,3 +1,4 @@ +use std::{cell::RefCell, collections::HashMap, mem, rc::Rc}; use lalrpop_util::ParseError; use logos::{Lexer, Logos}; @@ -5,6 +6,8 @@ use logos::{Lexer, Logos}; #[logos(skip r"[ \t\r\n]+")] #[logos(skip r"//[^\n]*")] // line comment pub enum Token { + #[regex(r"///[^\n]*")] + DocComment, #[token("/*")] StartComment, #[token("=")] @@ -118,23 +121,27 @@ fn parse_number(lex: &mut Lexer) -> String { } } +pub type TriviaMap = Rc>>>; + pub struct Tokenizer<'input> { lex: Lexer<'input, Token>, + comment_buffer: Vec, + trivia: Option, } impl<'input> Tokenizer<'input> { pub fn new(input: &'input str) -> Self { let lex = Token::lexer(input); - Tokenizer { lex } + Tokenizer { lex, comment_buffer: vec![], trivia: None } + } + + pub fn new_with_trivia(input: &'input str, trivia: TriviaMap) -> Self { + let lex = Token::lexer(input); + Tokenizer { lex, comment_buffer: vec![], trivia: Some(trivia) } } } pub type Span = std::ops::Range; #[derive(Clone, Debug, Eq, PartialEq)] -pub struct Spanned { - pub span: Span, - pub value: T, -} -#[derive(Clone, Debug, Eq, PartialEq)] pub struct LexicalError { pub err: String, pub span: Span, @@ -179,6 +186,13 @@ impl Iterator for Tokenizer<'_> { let err = format!("Unknown token {}", self.lex.slice()); Some(Err(LexicalError::new(err, span))) } + Ok(Token::DocComment) => { + let content = self.lex.slice(); + if self.trivia.is_some() { + self.comment_buffer.push(content.to_string()); + } + self.next() + } Ok(Token::StartComment) => { let mut lex = self.lex.to_owned().morph::(); let mut nesting = 1; @@ -278,7 +292,15 @@ impl Iterator for Tokenizer<'_> { self.lex = lex.morph::(); Some(Ok((span.start, Token::Text(result), self.lex.span().end))) } - Ok(token) => Some(Ok((span.start, token, span.end))), + Ok(token) => { + if let Some(trivia) = &mut self.trivia { + if !self.comment_buffer.is_empty() { + let content: Vec = mem::take(&mut self.comment_buffer); + trivia.borrow_mut().insert(span.start, content); + } + } + Some(Ok((span.start, token, span.end))) + } } } } diff --git a/rust/candid_parser/src/types.rs b/rust/candid_parser/src/types.rs index 91ed6efdb..32ae7587e 100644 --- a/rust/candid_parser/src/types.rs +++ b/rust/candid_parser/src/types.rs @@ -1,3 +1,5 @@ +use std::{cell::RefCell, collections::HashMap, rc::Rc}; + use crate::Result; use candid::types::{FuncMode, Label}; @@ -98,7 +100,13 @@ pub struct TypeField { } #[derive(Debug)] -pub enum Dec { +pub struct Dec { + pub doc_comment: Option>, + pub inner: DecInner, +} + +#[derive(Debug)] +pub enum DecInner { TypD(Binding), ImportType(String), ImportServ(String), @@ -125,15 +133,18 @@ pub struct IDLInitArgs { impl std::str::FromStr for IDLProg { type Err = crate::Error; fn from_str(str: &str) -> Result { - let lexer = super::token::Tokenizer::new(str); - Ok(super::grammar::IDLProgParser::new().parse(lexer)?) + let trivia: super::token::TriviaMap = Rc::new(RefCell::new(HashMap::new())); + let lexer = super::token::Tokenizer::new_with_trivia(str, trivia.clone()); + let res = super::grammar::IDLProgParser::new().parse(Some(&trivia.clone()), lexer)?; + println!("{res:?}"); + Ok(res) } } impl std::str::FromStr for IDLInitArgs { type Err = crate::Error; fn from_str(str: &str) -> Result { let lexer = super::token::Tokenizer::new(str); - Ok(super::grammar::IDLInitArgsParser::new().parse(lexer)?) + Ok(super::grammar::IDLInitArgsParser::new().parse(None, lexer)?) } } @@ -141,7 +152,7 @@ impl std::str::FromStr for IDLType { type Err = crate::Error; fn from_str(str: &str) -> Result { let lexer = super::token::Tokenizer::new(str); - Ok(super::grammar::TypParser::new().parse(lexer)?) + Ok(super::grammar::TypParser::new().parse(None, lexer)?) } } @@ -149,6 +160,6 @@ impl std::str::FromStr for IDLTypes { type Err = crate::Error; fn from_str(str: &str) -> Result { let lexer = super::token::Tokenizer::new(str); - Ok(super::grammar::TypsParser::new().parse(lexer)?) + Ok(super::grammar::TypsParser::new().parse(None, lexer)?) } } diff --git a/rust/candid_parser/src/typing.rs b/rust/candid_parser/src/typing.rs index 3cc20367f..52ac82af1 100644 --- a/rust/candid_parser/src/typing.rs +++ b/rust/candid_parser/src/typing.rs @@ -138,12 +138,12 @@ fn check_meths(env: &Env, ms: &[Binding]) -> Result> { fn check_defs(env: &mut Env, decs: &[Dec]) -> Result<()> { for dec in decs.iter() { - match dec { - Dec::TypD(Binding { id, typ }) => { + match &dec.inner { + DecInner::TypD(Binding { id, typ }) => { let t = check_type(env, typ)?; env.te.0.insert(id.to_string(), t); } - Dec::ImportType(_) | Dec::ImportServ(_) => (), + DecInner::ImportType(_) | DecInner::ImportServ(_) => (), } } Ok(()) @@ -174,7 +174,7 @@ fn check_cycle(env: &TypeEnv) -> Result<()> { fn check_decs(env: &mut Env, decs: &[Dec]) -> Result<()> { for dec in decs.iter() { - if let Dec::TypD(Binding { id, typ: _ }) = dec { + if let DecInner::TypD(Binding { id, typ: _ }) = &dec.inner { let duplicate = env.te.0.insert(id.to_string(), TypeInner::Unknown.into()); if duplicate.is_some() { return Err(Error::msg(format!("duplicate binding for {id}"))); @@ -227,8 +227,8 @@ fn load_imports( list: &mut Vec<(PathBuf, String)>, ) -> Result<()> { for dec in prog.decs.iter() { - let include_serv = matches!(dec, Dec::ImportServ(_)); - if let Dec::ImportType(file) | Dec::ImportServ(file) = dec { + let include_serv = matches!(dec.inner, DecInner::ImportServ(_)); + if let DecInner::ImportType(file) | DecInner::ImportServ(file) = &dec.inner { let path = resolve_path(base, file); match visited.get_mut(&path) { Some(x) => *x = *x || include_serv,