- 词法分析
- 基于递归下降的语法分析
- 基于
LL(1)的语法分析 - 抽象语法树
AST的生成 - 符号检查
- 类型检查与类型推导
-
LLVM IR中间代码生成 - 利用
LLVM进行JIT即时编译 - 三元式生成
- 目标代码生成
- 表达式优化
- 基于
AST的代码优化
extern crate inkwell;
extern crate parser;
use self::inkwell::targets::{Target, InitializationConfig};
use self::inkwell::execution_engine::Symbol;
use parser::parser::*;
use parser::parser::recursive_descent::*;
use parser::parser::llvm_ir_generater::*;
use parser::lexer::*;
fn main () {
Target::initialize_native(&InitializationConfig::default()).unwrap();
let src = "
int f(int a, int b)
{
if (a >= b)
return a;
return a + b;
}
";
let mut parser = RecursiveDescentParser::new(Lexer::new(src.as_bytes()));
parser.run().unwrap();
let mut generater = LLVMIRGenerater::new(parser.syntax_tree());
let module = generater.ir_gen();
let ee = generater.execution_engine().unwrap();
let f: Symbol<unsafe extern "C" fn(i64, i64) -> i64> = unsafe {
ee.get_function("f").unwrap()
};
assert_eq!(5, unsafe { f(2, 3) });
assert_eq!(6, unsafe { f(6, 5) });
assert_eq!(7, unsafe { f(3, 4) });
assert_eq!(5, unsafe { f(5, 5) });
} let src = "
int func_add(int a, int b)
{
return a + b;
}
int main()
{
return 0;
}
";
let lexer = Lexer::new(src.as_bytes());
let mut parser = RecursiveDescentParser::new(lexer);
parser.run();
parser.dump();output AST:
SyntaxTree
FuncDefine
Terminal(KeyWord(Int))
Terminal(Identifier("func_add"))
FuncArg
Terminal(KeyWord(Int))
Terminal(Identifier("a"))
FuncArg
Terminal(KeyWord(Int))
Terminal(Identifier("b"))
ReturnStmt
Expr
Terminal(Identifier("a"))
Terminal(Operator(Add))
Terminal(Identifier("b"))
FuncDefine
Terminal(KeyWord(Int))
Terminal(Identifier("main"))
ReturnStmt
Terminal(Number("0"))
let src = "
struct S { int a, b; };
struct S1 { double S1; int b; };
int a(int a, char b) { }
";
let mut parser = RecursiveDescentParser::new(Lexer::new(src.as_bytes()));
println!("result: {:?}\n", parser.run());output: result: Ok(())
let src = "
// multi define of variable S::a
struct S { int a, b; char a; };
";
let mut parser = RecursiveDescentParser::new(Lexer::new(src.as_bytes()));
println!("result: {:?}\n", parser.run());output: result: Err(ParseErrInfo { err_type: MultiDefineError })
if, else, for, ...
short, int, long, unsigned, ...
- number =
\d+ - identifier =
[a-z][a-z|0-9]* - ident =
number|identifier
-
expr_opt:
bool_exprepsilon
-
expr:
expradd_opexpr_mul- =>
expr_mulexpr_fix(消除左递归后的产生式,下同)
-
expr_fix:
add_opexpr_mulexpr_fix|epsilon
-
expr_mul:
expr_mulmul_opexpr_factor- =>
expr_factorexpr_mul_fix
-
expr_mul_fix =
mul_opexpr_factorexpr_mul_fix|epsilon -
expr_factor =
(expr)|ident
引入的 Tokens
- add_op =
+|- - mul_op =
*|/ - single_op =
!|~
原始定义
- bool_expr:
bool_expr||bool_exprbool_expr&&bool_exprbool_exprequal_opbool_exprbool_exprcmp_opbool_expr!exprexpr
消除左递归及添加优先级后的定义
-
bool_expr:
bool_expr||bool_expr_and- =>
bool_expr_andbool_expr_fix
-
bool_expr_fix:
||bool_expr_andbool_expr_fix|epsilon
-
bool_expr_and:
bool_expr_and&&bool_expr_equal- =>
bool_expr_equalbool_expr_and_fix
-
bool_expr_and_fix:
&&bool_expr_equalbool_expr_and_fix|epsilon
-
bool_expr_equal:
bool_expr_equalequal_opbool_expr_cmp- =>
bool_expr_cmpbool_expr_equal_fix
-
bool_expr_equal_fix:
equal_opbool_expr_cmpbool_expr_equal_fix|epsilon
-
bool_expr_cmp:
bool_expr_cmpcmp_opbool_expr_factor- =>
bool_expr_factorbool_expr_cmp_fix
-
bool_expr_cmp_fix:
cmp_opbool_expr_factorbool_expr_cmp_fix|epsilon
-
bool_expr_factor:
!bool_expr(bool_expr)expr
引入的 Tokens
- cmp_op:
>>=<<=
- equal_op:
==!=
-
stmt:
stmt_factor
-
stmt_factor:
stmt_single;stmt_blockstmt_control;
-
stmt_single
assign_stmtbreak_stmtreturn_stmt
-
stmt_control
if_stmtwhile_loopfor_loop
-
stmt_list:
stmtstmt_list|epsilon
-
stmt_block:
{stmt_list}
-
assign_stmt:
left_value=right_value
-
if_stmt:
if(bool_expr)stmtelsestmt
-
while_loop:
while(bool_expr)stmt
-
for_loop:
for(expr_opt;expr_opt;expr_opt)stmt
-
break_stmt:
break
-
return_stmt:
returnreturn_expr
-
return_expr:
bool_exprepsilon
-
left_value:
identifier
-
right_value:
bool_expr
-
variable_define:
variable_typevariable_def_list
-
variable_def_list:
identifier;identifier,variable_def_list
-
variable_type:
variable_prefixvariable_suffixfloatdouble
-
variable_prefix:
unsignedsignedlonglong long
-
variable_suffix:
int
-
func_declare:
func_ret_typefunc_name(func_param_list);
-
func_name
identifier
-
func_ret_type:
type
-
func_param_list:
func_paramfunc_param_list_tailepsilon
-
func_param_list_tail:
,func_paramfunc_param_list_tailepsilon
-
func_param:
func_param_typefunc_param_name
-
func_param_type:
type
-
func_param_name:
identifier
-
struct_define:
struct{struct_vars};structidentifier{struct_vars};
-
struct_vars
struct_var;struct_varstruct_varsepsilon
-
struct_var
variable_define
-
function:
func_ret_typefunc_name(func_param_list){func_body}
-
func_body:
stmt_list
-
func_call:
func_name(func_arg_list)
-
func_arg_list:
func_arg,func_arg_listepsilon
-
func_arg:
identifier