diff --git a/Cargo.lock b/Cargo.lock index dec692e..ad31626 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,9 +2,28 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "colored" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "commons" +version = "0.1.0" +dependencies = [ + "colored", +] + [[package]] name = "lexer" version = "0.1.0" +dependencies = [ + "commons", +] [[package]] name = "parser" @@ -25,3 +44,18 @@ dependencies = [ [[package]] name = "utils" version = "0.1.0" + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] diff --git a/Cargo.toml b/Cargo.toml index 114b75d..2abb98a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,2 +1,2 @@ [workspace] -members = ["lexer", "parser", "test-main", "utils"] +members = ["commons","lexer", "parser", "test-main", "utils"] diff --git a/commons/Cargo.toml b/commons/Cargo.toml new file mode 100644 index 0000000..bf73e78 --- /dev/null +++ b/commons/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "commons" +version = "0.1.0" +edition = "2024" + +[dependencies] +colored = "3.1.1" diff --git a/commons/src/err/mod.rs b/commons/src/err/mod.rs new file mode 100644 index 0000000..47759a4 --- /dev/null +++ b/commons/src/err/mod.rs @@ -0,0 +1,45 @@ +//! +//! Error handling code for most of the Quickfall compiler! +//! + +use core::fmt; + +use colored::Colorize; + +use crate::Position; + +pub type PositionedResult = Result; + +/// An error that has a position +pub struct PositionedError { + pub start: Position, + pub end: Position, + pub reason: String +} + +impl PositionedError { + pub fn new(start: Position, end: Position, reason: String) -> Self { + return PositionedError { start, end, reason } + } +} + +impl fmt::Display for PositionedError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "{} at {}", "ERR".bright_red().bold(), self.start); + + let line = match self.start.get_line_content() { + Ok(v) => v, + Err(e) => format!("{}","Couldn't read file contents!".red().bold()) + }; + + let before = &line[self.start.col - 1..]; + let target = &line[self.start.col..self.end.col - 1].cyan().underline(); + let after = &line[self.end.col..]; + + writeln!(f, "{}{}{}", before, target, after); + writeln!(f, ""); + writeln!(f, "{}", self.reason.bright_red()); + + Ok(()) + } +} \ No newline at end of file diff --git a/commons/src/lib.rs b/commons/src/lib.rs new file mode 100644 index 0000000..3f7e032 --- /dev/null +++ b/commons/src/lib.rs @@ -0,0 +1,39 @@ +use core::fmt; +use std::{fs, io::Error}; + +pub mod err; + +#[derive(Debug, Clone)] +pub struct Position { + pub line: usize, + pub col: usize, + pub file_path: String +} + +impl Position { + fn get_line_content(&self) -> Result { + let contents = fs::read_to_string(&self.file_path)?; + + let spl: Vec<&str> = contents.split('\n').collect(); + + return Ok(String::from(spl[self.line - 1])); + } +} + +impl Position { + pub fn new(path: String, line: usize, col: usize) -> Self { + return Position { line, col, file_path: path }; + } + + pub fn increment_by(&self, count: usize) -> Self { + return Position::new(self.file_path.clone(), self.line, self.col + count); + } +} + +impl fmt::Display for Position { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}:{} in {}", self.line, self.col, self.file_path); + + Ok(()) + } +} \ No newline at end of file diff --git a/lexer/Cargo.toml b/lexer/Cargo.toml index c88bd7c..f5a29e8 100644 --- a/lexer/Cargo.toml +++ b/lexer/Cargo.toml @@ -4,3 +4,4 @@ version = "0.1.0" edition = "2024" [dependencies] +commons = { path = "../commons" } \ No newline at end of file diff --git a/lexer/src/lexer.rs b/lexer/src/lexer.rs index f9bdcd3..9143903 100644 --- a/lexer/src/lexer.rs +++ b/lexer/src/lexer.rs @@ -4,7 +4,9 @@ use std::{fs, hash::{DefaultHasher, Hash, Hasher}, io::Error}; -use crate::{LexerParseResult, LexerParsingError, token::LexerToken}; +use commons::Position; + +use crate::{LexerParseResult, LexerParsingError, token::LexerToken, token::LexerTokenType}; const FUNC_KEYWORD_HASH: u64 = 17439195341824537259; const RET_KEYWORD_HASH: u64 = 9222097151127739705; @@ -35,52 +37,64 @@ pub fn lexer_parse_file(file_path: &String) -> LexerParseResult> let mut tokens: Vec = Vec::new(); let mut i: usize = 0; + + let mut line: usize = 1; + let mut col: usize = 0; while i < contents.len() { let c: char = contents.chars().nth(i).unwrap(); + + col += 1; + + if c == '\n' { + line += 1; + continue; + } if c.is_numeric() { - tokens.push(parse_number_token(&contents, &mut i)?); + tokens.push(parse_number_token(&contents, &mut i, Position::new(file_path.to_string(), line, col))?); continue; } if c == '"' { - tokens.push(parse_string_token(&contents, &mut i)); + tokens.push(parse_string_token(&contents, &mut i, Position::new(file_path.to_string(), line, col))); continue; } if c.is_alphabetic() { - tokens.push(parse_keyword(&contents, &mut i)); + tokens.push(parse_keyword(&contents, &mut i, Position::new(file_path.to_string(), line, col))); continue; } i += c.len_utf8(); + let pos = Position::new(file_path.to_string(), line, col); + match c { - '{' => tokens.push(LexerToken::BRACKET_OPEN), - '}' => tokens.push(LexerToken::BRACKET_CLOSE), - '(' => tokens.push(LexerToken::PAREN_OPEN), - ')' => tokens.push(LexerToken::PAREN_CLOSE), - '[' => tokens.push(LexerToken::ARRAY_OPEN), - ']' => tokens.push(LexerToken::ARRAY_CLOSE), - '=' => tokens.push(LexerToken::EQUAL_SIGN), - ',' => tokens.push(LexerToken::COMMA), - '.' => tokens.push(LexerToken::DOT), - '!' => tokens.push(LexerToken::EXCLAMATION_MARK), - '&' => tokens.push(LexerToken::AMPERSAND), - '<' => tokens.push(LexerToken::ANGEL_BRACKET_OPEN), - '>' => tokens.push(LexerToken::ANGEL_BRACKET_CLOSE), + '{' => tokens.push(LexerToken::make_single_sized(pos, LexerTokenType::BRACKET_OPEN)), + '}' => tokens.push(LexerToken::make_single_sized(pos, LexerTokenType::BRACKET_CLOSE)), + '(' => tokens.push(LexerToken::make_single_sized(pos, LexerTokenType::PAREN_OPEN)), + ')' => tokens.push(LexerToken::make_single_sized(pos, LexerTokenType::PAREN_CLOSE)), + '[' => tokens.push(LexerToken::make_single_sized(pos, LexerTokenType::ARRAY_OPEN)), + ']' => tokens.push(LexerToken::make_single_sized(pos, LexerTokenType::ARRAY_CLOSE)), + '=' => tokens.push(LexerToken::make_single_sized(pos, LexerTokenType::EQUAL_SIGN)), + ',' => tokens.push(LexerToken::make_single_sized(pos, LexerTokenType::COMMA)), + '.' => tokens.push(LexerToken::make_single_sized(pos, LexerTokenType::DOT)), + '!' => tokens.push(LexerToken::make_single_sized(pos, LexerTokenType::EXCLAMATION_MARK)), + '&' => tokens.push(LexerToken::make_single_sized(pos, LexerTokenType::AMPERSAND)), + '<' => tokens.push(LexerToken::make_single_sized(pos, LexerTokenType::ANGEL_BRACKET_OPEN)), + '>' => tokens.push(LexerToken::make_single_sized(pos, LexerTokenType::ANGEL_BRACKET_CLOSE)), _ => continue } } - tokens.push(LexerToken::END_OF_FILE); + tokens.push(LexerToken::make_single_sized(Position::new(file_path.to_string(), line, col), LexerTokenType::END_OF_FILE)); Ok(tokens) } -fn parse_number_token(str: &String, ind: &mut usize) -> LexerParseResult { +fn parse_number_token(str: &String, ind: &mut usize, start_pos: Position) -> LexerParseResult { let start = *ind + 1; let mut end: usize = start; @@ -100,10 +114,11 @@ fn parse_number_token(str: &String, ind: &mut usize) -> LexerParseResult LexerToken { +fn parse_string_token(str: &String, ind: &mut usize, start_pos: Position) -> LexerToken { let start = *ind + 1; let mut end: usize = start; @@ -120,10 +135,11 @@ fn parse_string_token(str: &String, ind: &mut usize) -> LexerToken { *ind = end; - return LexerToken::STRING_LIT(slice.to_string()); + let endpos: Position = start_pos.increment_by(start - end); + return LexerToken::new(start_pos, endpos, LexerTokenType::STRING_LIT(slice.to_string())); } -fn parse_keyword(str: &String, ind: &mut usize) -> LexerToken { +fn parse_keyword(str: &String, ind: &mut usize, start_pos: Position) -> LexerToken { let start = *ind; let mut end: usize = start; @@ -144,22 +160,22 @@ fn parse_keyword(str: &String, ind: &mut usize) -> LexerToken { *ind = end; - match hash { - FUNC_KEYWORD_HASH => return LexerToken::FUNCTION, - RET_KEYWORD_HASH => return LexerToken::RETURN, - STRUCT_KEYWORD_HASH => return LexerToken::STRUCT, - LAYOUT_KEYWORD_HASH => return LexerToken::LAYOUT, - LAY_KEYWORD_HASH => return LexerToken::LAY, - TRUE_KEYWORD_HASH => return LexerToken::TRUE, - FALSE_KEYWORD_HASH => return LexerToken::FALSE, - VAR_KEYWORD_HASH => return LexerToken::VAR, - IF_KEYWORD_HASH => return LexerToken::IF, - ELSE_KEYWORD_HASH => return LexerToken::ELSE, - WHILE_KEYWORD_HASH => return LexerToken::WHILE, - FOR_KEYWORD_HASH => return LexerToken::FOR, - - _ => { - return LexerToken::KEYWORD(slice.to_string(), hash); - } - } + let token_type = match hash { + FUNC_KEYWORD_HASH => LexerTokenType::FUNCTION, + RET_KEYWORD_HASH => LexerTokenType::RETURN, + STRUCT_KEYWORD_HASH => LexerTokenType::STRUCT, + LAYOUT_KEYWORD_HASH => LexerTokenType::LAYOUT, + LAY_KEYWORD_HASH => LexerTokenType::LAY, + TRUE_KEYWORD_HASH => LexerTokenType::TRUE, + FALSE_KEYWORD_HASH => LexerTokenType::FALSE, + VAR_KEYWORD_HASH => LexerTokenType::VAR, + IF_KEYWORD_HASH => LexerTokenType::IF, + ELSE_KEYWORD_HASH => LexerTokenType::ELSE, + WHILE_KEYWORD_HASH => LexerTokenType::WHILE, + FOR_KEYWORD_HASH => LexerTokenType::FOR, + _ => LexerTokenType::KEYWORD(slice.to_string(), hash) + }; + + let endpos: Position = start_pos.increment_by(start - end); + return LexerToken::new(start_pos, endpos, token_type); } \ No newline at end of file diff --git a/lexer/src/token.rs b/lexer/src/token.rs index aaf13de..148eb69 100644 --- a/lexer/src/token.rs +++ b/lexer/src/token.rs @@ -2,11 +2,15 @@ //! Module containing lexer token-based utilities and classes //! +use std::any::Any; + +use commons::{Position, err::{PositionedError, PositionedResult}}; + use crate::{LexerParseResult, LexerParsingError}; /// The token type for the lexer #[derive(PartialEq, Debug)] -pub enum LexerToken { +pub enum LexerTokenType { /// Represent the func keyword FUNCTION, @@ -52,17 +56,69 @@ pub enum LexerToken { END_OF_FILE } +pub struct LexerToken { + pub tok_type: LexerTokenType, + pub pos: Position, // Valid tokens require a position + pub end_pos: Position +} + impl LexerToken { + pub fn make_single_sized(pos: Position, t: LexerTokenType) -> Self { + let end = pos.increment_by(1); + return LexerToken { tok_type: t, pos, end_pos: end }; + } + + pub fn new(start: Position, end: Position, t: LexerTokenType) -> Self { + return LexerToken { tok_type:t , pos: start, end_pos: end } + } + + pub fn is(&self, t: LexerTokenType) -> bool { + return self.tok_type == t; + } + + pub fn expects(&self, t: LexerTokenType) -> PositionedResult { + if self.tok_type != t { + return Err(PositionedError::new(self.pos.clone(), self.end_pos.clone(), format!("Expected {:#?} token but instead got {:#?}!", t, self.tok_type))) + } + + return Ok(true); + } + + pub fn expects_int_lit(&self) -> PositionedResult { + match &self.tok_type { + LexerTokenType::INT_LIT(v) => return Ok(*v), + _ => return Err(self.make_err("Expected int litteral here!")) + }; + } + + pub fn expects_string_lit(&self) -> PositionedResult { + match &self.tok_type { + LexerTokenType::STRING_LIT(v) => return Ok(v.to_string()), + _ => return Err(self.make_err("Expected string litteral here!")) + }; + } + + pub fn expects_keyword(&self) -> PositionedResult<(String, u64)> { + match &self.tok_type { + LexerTokenType::KEYWORD(s, h) => return Ok((s.to_string(), *h)), + _ => return Err(self.make_err("Expected keyword here!")) + }; + } + + pub fn make_err(&self, err: &str) -> PositionedError { + return PositionedError::new(self.pos.clone(), self.end_pos.clone(), String::from(err)); + } + pub fn as_keyword(&self) -> LexerParseResult<(String, u64)> { - match self { - LexerToken::KEYWORD(str, hash) => Ok((str.clone(), *hash)), + match &self.tok_type { + LexerTokenType::KEYWORD(str, hash) => Ok((str.clone(), *hash)), _ => Err(LexerParsingError::new(String::from("Token is not a keyword!"), 0)) } } pub fn is_keyword(&self) -> bool { - match self { - LexerToken::KEYWORD(_, _) => true, + match &self.tok_type { + LexerTokenType::KEYWORD(_, _) => true, _ => false } } diff --git a/parser/src/ast/literals.rs b/parser/src/ast/literals.rs index 2fbd0a5..3cfca32 100644 --- a/parser/src/ast/literals.rs +++ b/parser/src/ast/literals.rs @@ -1,23 +1,17 @@ -use lexer::token::LexerToken; +use lexer::token::{LexerToken, LexerTokenType}; use crate::{ParserError, ParserResult, ast::tree::ASTTreeNode}; pub fn parse_integer_literal(tokens: &Vec, ind: &mut usize) -> ParserResult> { - match &tokens[*ind] { - LexerToken::INT_LIT(val) => { - *ind += 1; - return Ok(Box::new(ASTTreeNode::IntegerLit(*val))) - }, - _ => Err(ParserError::new(String::from("Given token is not an integer literal"), 0)) - } + let mut val; + tokens[*ind].expects(lexer::token::LexerTokenType::INT_LIT(val)); + + *ind += 1; + + return Ok(Box::new(ASTTreeNode::IntegerLit(val))); } pub fn parse_string_literal(tokens: &Vec, ind: &mut usize) -> ParserResult> { - match &tokens[*ind] { - LexerToken::STRING_LIT(val) => { - *ind += 1; - return Ok(Box::new(ASTTreeNode::StringLit(String::clone(val)))) - }, - _ => return Err(ParserError::new(String::from("Given token is not an string literal"), 0)) - }; + let val; + tokens[*ind].expects(LexerTokenType::STRING_LIT(val)); } \ No newline at end of file diff --git a/parser/src/ast/mod.rs b/parser/src/ast/mod.rs index 77b428d..42c605d 100644 --- a/parser/src/ast/mod.rs +++ b/parser/src/ast/mod.rs @@ -8,7 +8,7 @@ use std::fmt::Debug; -use lexer::token::LexerToken; +use lexer::token::{LexerToken, LexerTokenType}; use utils::hash::WithHash; use crate::{ParserError, ParserResult, ast::{cond::operators::parse_condition_operator, control::{forloop::parse_for_loop, ifelse::parse_if_statement, whileblock::parse_while_block}, func::{call::parse_function_call, decl::parse_function_declaraction}, literals::{parse_integer_literal, parse_string_literal}, tree::ASTTreeNode, var::decl::parse_variable_declaration}}; @@ -21,8 +21,8 @@ pub mod cond; pub mod control; pub fn parse_ast_value_post_l(tokens: &Vec, ind: &mut usize, original: ParserResult>) -> ParserResult> { - match &tokens[*ind] { - LexerToken::DOT => { + match &tokens[*ind].tok_type { + LexerTokenType::DOT => { let o = &original?; let k = Box::new(ASTTreeNode::clone(o.as_ref())); @@ -42,7 +42,7 @@ pub fn parse_ast_value_post_l(tokens: &Vec, ind: &mut usize, origina return Err(ParserError::new(String::from("Next member isn't any valid field/func access type!"), 0)); }, - LexerToken::ANGEL_BRACKET_CLOSE | LexerToken::EQUAL_SIGN | LexerToken::ANGEL_BRACKET_OPEN => { + LexerTokenType::ANGEL_BRACKET_CLOSE | LexerTokenType::EQUAL_SIGN | LexerTokenType::ANGEL_BRACKET_OPEN => { let operator = parse_condition_operator(tokens, ind)?; let o = &original?; @@ -59,9 +59,9 @@ pub fn parse_ast_value_post_l(tokens: &Vec, ind: &mut usize, origina } pub fn parse_ast_value(tokens: &Vec, ind: &mut usize) -> ParserResult> { - match &tokens[*ind] { + match &tokens[*ind].tok_type { - LexerToken::EXCLAMATION_MARK => { + LexerTokenType::EXCLAMATION_MARK => { *ind += 1; let ast = parse_ast_value(tokens, ind)?; @@ -72,18 +72,18 @@ pub fn parse_ast_value(tokens: &Vec, ind: &mut usize) -> ParserResul return Err(ParserError::new(String::from("Boolean negation requires either function or variable usage!"), 0)); }, - LexerToken::INT_LIT(_) => { + LexerTokenType::INT_LIT(_) => { let int = parse_integer_literal(tokens, ind); return parse_ast_value_post_l(tokens, ind, int); }, - LexerToken::STRING_LIT(_) => { + LexerTokenType::STRING_LIT(_) => { let str = parse_string_literal(tokens, ind); return parse_ast_value_post_l(tokens, ind, str); }, - LexerToken::KEYWORD(str, _) => { - if tokens[*ind + 1] == LexerToken::PAREN_OPEN { + LexerTokenType::KEYWORD(str, _) => { + if tokens[*ind + 1].tok_type == LexerTokenType::PAREN_OPEN { let call = parse_function_call(tokens, ind); return parse_ast_value_post_l(tokens, ind, call); } @@ -100,31 +100,31 @@ pub fn parse_ast_value(tokens: &Vec, ind: &mut usize) -> ParserResul } pub fn parse_ast_node(tokens: &Vec, ind: &mut usize) -> ParserResult> { - println!("Ind: {}, tok at: {:#?}", ind, tokens[*ind]); + println!("Ind: {}, tok at: {:#?}", ind, tokens[*ind].tok_type); - match &tokens[*ind] { - LexerToken::FUNCTION => { + match &tokens[*ind].tok_type { + LexerTokenType::FUNCTION => { return parse_function_declaraction(tokens, ind); } - LexerToken::VAR => { + LexerTokenType::VAR => { return parse_variable_declaration(tokens, ind); }, - LexerToken::IF => { + LexerTokenType::IF => { return parse_if_statement(tokens, ind); }, - LexerToken::WHILE => { + LexerTokenType::WHILE => { return parse_while_block(tokens, ind); }, - LexerToken::FOR => { + LexerTokenType::FOR => { return parse_for_loop(tokens, ind); } _ => { - return Err(ParserError::new(format!("err: {:#?}", tokens[*ind]), 0)); + return Err(ParserError::new(format!("err: {:#?}", tokens[*ind].tok_type), 0)); } }