Workable!…Only the assignment part
It is so damn hard to debug for it!
#include"Lexer.h" namespace parser { class Parser { private: lexer::Lexer lex; const lexer::Token *look; symbols::Env* top = nullptr; int used = 0; public: Parser(lexer::Lexer &l) : lex(l), look(l.scan()) {}; void move() { look = lex.scan(); } void error(std::string s) { throw std::runtime_error("near line " + std::to_string(lex.line) + ": " + s); } void match(int t) { if (look->tag == t) move(); else error("syntax error"); } void program() { inter::Stmt* s = block(); int begin = s->newlabel(); int after = s->newlabel(); s->emitlabel(begin); s->gen(begin, after); s->emitlabel(after); } inter::Stmt* block() { match('{'); symbols::Env *savedEnv = top; top = new symbols::Env(top); decls(); inter::Stmt *s = stmts(); match('}'); top = savedEnv; return s; } void decls() { while (look->tag == lexer::Tag::BASIC) { symbols::Type* p = type(); const lexer::Word *tok = (lexer::Word*)look; match(lexer::Tag::ID); match(';'); inter::Id *id = new inter::Id(tok, p, used); top->put(id->lex, id); used = used + p->width; } } symbols::Type *type() { symbols::Type *p = (symbols::Type*)look; match(lexer::Tag::BASIC); if (look->tag != '[') return p; else return dims(*p); } symbols::Type* dims(symbols::Type p) { match('['); const lexer::Token *tok = look; match(lexer::Tag::NUM); match(']'); if (look->tag == '[') p = *dims(p); return new symbols::Array(((lexer::Num*)look)->value, p); } inter::Stmt* stmts() { if (look->tag == '}') return &inter::Null; else { inter::Stmt *s1 = stmt(); inter::Stmt *s2 = stmts(); return new inter::Seq(s1, s2); } } inter::Stmt* stmt() { inter::Expr *x; inter::Stmt *s, *s1, *s2; inter::Stmt savedStmt; inter::Do *donode = nullptr; inter::While *whilenode = nullptr; switch (look->tag) { case ';': move(); return &inter::Null; case lexer::Tag::IF: match(lexer::Tag::IF); match('('); x = boolvalue(); match(')'); s1 = stmt(); if (look->tag != lexer::Tag::ELSE) return new inter::If(x, s1); match(lexer::Tag::ELSE); s2 = stmt(); return new inter::Else(x, s1, s2); case lexer::Tag::WHILE: whilenode = new inter::While(); savedStmt = inter::Enclosing; inter::Enclosing = *whilenode; match(lexer::Tag::WHILE); match('('); x = boolvalue(); match(')'); s1 = stmt(); whilenode->init(x, s1); inter::Enclosing = savedStmt; case lexer::Tag::DO: donode = new inter::Do(); savedStmt = inter::Enclosing; inter::Enclosing = *donode; match(lexer::Tag::DO); s1 = stmt(); match(lexer::Tag::WHILE); match('('); x = boolvalue(); match(')'); match(':'); donode->init(x, s1); inter::Enclosing = savedStmt; return donode; case lexer::Tag::BREAK: match(lexer::Tag::BREAK); match(';'); return new inter::Break(); case '{': return block(); default: return assign(); } } inter::Stmt* assign() { inter::Stmt * stmt; const lexer::Token *t = look; match(lexer::Tag::ID); inter::Id* id = top->get(((lexer::Word*)t)->lexeme); if (id == nullptr) error(t->toString() + " undeclard"); if (look->tag == '=') { move(); stmt = new inter::Set(id, boolvalue()); } else { inter::Access* x = offset(id); match('='); stmt = new inter::SetElem(x, boolvalue()); } match(';'); return stmt; } inter::Expr * boolvalue() { inter::Expr* x = join(); while (look->tag == lexer::Tag::OR) { const lexer::Token *tok = look; move(); x = new inter::Or(tok, x, join()); } return x; } inter::Expr* join() { inter::Expr* x = equality(); while (look->tag == lexer::Tag::AND) { const lexer::Token *tok = look; move(); x = new inter::And(tok, x, equality()); } return x; } inter::Expr * equality() { inter::Expr* x = rel(); while (look->tag == lexer::Tag::EQ) { const lexer::Token *tok = look; move(); x = new inter::Rel(tok, x, join()); } return x; } inter::Expr * rel() { inter::Expr* x = expr(); const lexer::Token *tok = look; switch (look->tag) { case '<':case lexer::Tag::LE:case lexer::Tag::GE: case'>': move(); return new inter::Rel(tok, x, expr()); default: return x; } } inter::Expr * expr() { inter::Expr* x = term(); while (look->tag == '+' || look->tag == '-'){ const lexer::Token * tok = look; move(); x = new inter::Arith(tok, x, term()); } return x; } inter::Expr * term() { inter::Expr* x = unary(); while (look->tag == '*' || look->tag == '/') { const lexer::Token *tok = look; move(); x = new inter::Arith(tok, x, unary()); } return x; } inter::Expr* unary() { if (look->tag == '-') { move(); return new inter::Unary(&lexer::minus, unary()); } else if (look->tag == '!') { const lexer::Token* tok = look; move(); return new inter::Not(tok, unary()); } else return factor(); } inter::Expr * factor() { inter::Expr *x = nullptr; switch (look->tag) { case '(': move(); x = boolvalue(); match(')'); return x; case lexer::Tag::NUM: x = new inter::Constant(look, &symbols::Int); move(); return x; case lexer::Tag::REAL: x = new inter::Constant(look, &symbols::Double); move(); return x; case lexer::Tag::TRUE: x = &inter::True; move(); return x; case lexer::Tag::FALSE: x = new inter::Constant(look, &symbols::Int); x = &inter::False; move(); return x; default: error("syntax error"); return x; case lexer::Tag::ID: std::string s = look->toString(); inter::Id *idx = top->get(((lexer::Word*)look)->lexeme); if (idx == nullptr) error(look->toString() + " undeclared"); move(); if (look->tag != '[') return idx; else return offset(idx); } } inter::Access* offset(inter::Id * a) { inter::Expr *i; inter::Expr *w; inter::Expr *t1, *t2; inter::Expr *loc; symbols::Type type = *(a->type); match('['); i = boolvalue(); match(']'); type = ((symbols::Array*)a->type) -> of; w = new inter::Constant(type.width); t1 = new inter::Arith(new lexer::Token('*'), i, w); loc = t1; while (look->tag == ']') { match('['); i = boolvalue(); match(']'); type = ((symbols::Array*)a->type)->of; w = new inter::Constant(type.width); t1 = new inter::Arith(new lexer::Token('*'), i, w); t2 = new inter::Arith(new lexer::Token('+'), loc, t1); loc = t2; } return new inter::Access(a, loc, &type); } }; }
/* Developer: Minghua Deng Latest update: 2015/3/8 12:11 */ #ifndef LEXER_H #define LEXER_H #include<string> #include<map> #include<utility> #include<iostream> #include<cctype> #include<exception> namespace symbols { } namespace lexer { //词法分析器单元 class Tag { public: enum { AND = 256, BASIC = 257, BREAK = 258, DO = 259, ELSE = 260, EQ = 261, FALSE = 262, GE = 263, ID = 264, IF = 265, INDEX = 266, LE = 267, MINUS = 268, NE = 269, NUM = 270, OR = 271, REAL = 272, TEMP = 273, TRUE = 274, WHILE = 275 }; }; //枚举各个词法单元对应的的标识符 class Token { public: int tag; Token(int t) :tag(t) {}; virtual std::string toString() const { return "" + (char)tag; } bool operator== (Token &rhs) { if (this->tag == rhs.tag) return true; else return false; } bool operator<(const Token &rhs) const { if (this->tag < rhs.tag) return true; else return false; } }; //Token类,tag标记了词法单元类型,重载了==运算符 class Num :public Token { public: int value; Num(int v) : Token(Tag::NUM), value(v) {}; virtual std::string toString() const { return "" + std::to_string(value); } }; //Num类,整数词法单元,由Token派生 class Word :public Token { public: std::string lexeme = ""; Word(std::string s, int tag) :Token(tag), lexeme(s) {}; virtual std::string toString() const { return lexeme; } Word(Token tok) :Token(tok) {}; }; //Word类,用于保存标识符、保留字和复合词法单元,由Token派生 const Word and = Word("&&", Tag::AND), or = Word("||", Tag::OR), eq = Word("==", Tag::EQ), ne = Word("!=", Tag::NE), le = Word("<=", Tag::LE), ge = Word(">=", Tag::GE), minus = Word("minus", Tag::MINUS), True = Word("true", Tag::TRUE), False = Word("false", Tag::FALSE), temp = Word("t", Tag::TEMP); //枚举了逻辑运算符以及一些特殊词法单元常量 } namespace symbols { class Type :public lexer::Word { public: int width = 0; Type(std::string s, int tag, int w) :lexer::Word(s, tag), width(w) {}; bool operator==(Type const & rhs) const { if (rhs.lexeme == this->lexeme && rhs.tag == this->tag) return true; else return false; } }; //Type类保存类型符,重载==用于比较,由word派生 const Type Int("int", lexer::Tag::BASIC, 4), Double("double", lexer::Tag::BASIC, 16), Char("char", lexer::Tag::BASIC, 1), Bool("bool", lexer::Tag::BASIC, 1), Null("NULL", -1, 0); //枚举一些内置类型常量 //Null用作空白类型标示 } namespace lexer { class Real :public Token { public: double value; Real(double v) :Token(Tag::REAL), value(v) {}; virtual std::string toString() const { return "" + std::to_string(value); } }; //Real类,用于保存实数类型 class Lexer { public: int line = 1; //标记行数 char peek = ' '; std::multimap<std::string, const Word*> words; //通过<string, word>对的map保存词素到词法单元的映射 void reserve(const Word* w) { words.insert(std::make_pair(w->lexeme, w)); } Lexer() { reserve(new Word("if", Tag::IF)); reserve(new Word("else", Tag::ELSE)); reserve(new Word("while", Tag::DO)); reserve(new Word("break", Tag::BREAK)); reserve(&True); reserve(&False); reserve(&symbols::Int); reserve(&symbols::Char); reserve(&symbols::Bool); reserve(&symbols::Double); //预先加入一些保留字 } void readch() {peek = getchar(); } bool readch(char c) { readch(); if (peek != c) return false; peek = ' '; return true; } //读字符辅助函数 const Token* scan() { for (;; readch()) { if (peek == ' ' || peek == '\t') continue; else if (peek == '\n') line++; else break; } //处理blank switch (peek) { case '&': if (readch('&')) return ∧ else return new Token('&'); case '|': if (readch('|')) return ∨ else return new Token('|'); case '=': if (readch('=')) return &eq; else return new Token('='); case '!': if (readch('=')) return ≠ else return new Token('!'); case '<': if (readch('=')) return ≤ else return new Token('<'); case '>': if (readch('=')) return ≥ else return new Token('>'); } //处理复合运算符 if (std::isdigit(peek)) { int v = 0; do { v = 10 * v + peek - '0'; readch(); } while (std::isdigit(peek)); if (peek != '.') return new Num(v); //处理整数 double x = v; double d = 10; for (;;) { readch(); if (!std::isdigit(peek)) break; x = x + (peek - '0') / d; d *= 10; } return new Real(x); //处理浮点数 } if (std::isalpha(peek)) { std::string s; do { s += peek; readch(); } while (std::isalpha((peek))); auto w = words.find(s); if (w != words.end()) return w->second; //若词素已经存在直接返回词法单元 Word *newWord = new Word(s, Tag::ID); words.insert(std::make_pair(s, newWord)); return newWord; //词素不存在新建词素并返回词法单元 } //处理关键字、标识符 Token tok = Token(peek); peek = ' '; return new Token(tok); //处理其他特殊词素 } }; } namespace inter { class Node { public: int lexline = 0; //保存节点对应行号 Node(int l = 0) :lexline(l) {}; void error(std::string s) { throw(std::runtime_error(" near line " + std::to_string(lexline) + ": " + s)); } //报错辅助函数 static int labels; //节点标记,自动分配 int newlabel() { return ++labels; } void emitlabel(int i) { std::cout << "L" + std::to_string(i) + ":"; } void emit(std::string s) { std::cout << "\t" + s << std::endl; } //输出节点对应代码 }; int Node::labels = 0; //基类,AST中的基本节点 class Expr : public Node { public: const lexer::Token *op; const symbols::Type *type; Expr(const lexer::Token *tok, const symbols::Type *p) :op(tok), type(p) {}; virtual Expr *gen() { return this; } virtual Expr *reduce() { return this; } //gen用于生成该表达式对应的地址 //reduce用于化简表达式 //gen和reduce在不同的Expr派生类中给出自己的实现 virtual void jumping(int t, int f) { emitjumps(toString(), t, f); } void emitjumps(std::string test, int t, int f) { if (t != 0 && f != 0) { emit("if " + test + " goto L" + std::to_string(t)); emit("goto L" + std::to_string(f)); } else if (t != 0) emit("if " + test + " goto L" + std::to_string(t)); else if (f != 0) emit("iffalse " + test + " goto L" + std::to_string(f)); else; } //跳转代码,用于实现条件和循环 virtual std::string toString() { return op->toString(); } }; //Expr, 派生自Node,增加了运算符和运算类型 class Id : public Expr { public: int offset; std::string lex; Id(const lexer::Word *id, const symbols::Type *p, int b) : Expr(id, p), offset(b), lex(id->lexeme) {}; }; //派生自Expr, 单个标识符,offset保存相对地址 } namespace symbols { //符号表单元 class Env { private: std::multimap<std::string, inter::Id *> table; protected: Env *prev; public: Env(Env *n) :prev(n) {}; void put(std::string s, inter::Id *i) { table.insert(std::make_pair(s, i)); } inter::Id* get(std::string s) { for (Env *e = this; e != NULL; e = e->prev) { auto found = (e->table).find(s); if (found != e->table.end()) return found->second; } return NULL; } }; //处理作用域,以链表方式储存并逐级向上查找词素 bool numeric(Type p) { if (p == Char || p == Int || p == Double) return true; else return false; } //辅助函数,判断是否为数字 Type max(Type p1, Type p2) { if (!numeric(p1) || !numeric(p2)) return Null; else if (p1 == Double || p2 == Double) return Double; else if (p1 == Int || p2 == Int) return Int; else return Char; } //数字类型的类型转换,向上转换规则 char -> int -> double class Array : public Type { public: Type of; int size = 1; Array(int sz, Type p) : Type("[]", lexer::Tag::INDEX, sz*p.width), size(sz), of(p) {}; std::string toString() { return "[" + std::to_string(size) + "] " + of.toString(); } }; //数组类型,为一般类型的复合,保存了类型及数组大小 } namespace inter { //中间代码生成单元 class Temp :public Expr { public: static int count; int number = 0; Temp(const symbols::Type *p) :Expr((const lexer::Token*)&lexer::temp, p), number(++count) {}; virtual std::string toString() { return "t" + number; } }; int Temp::count = 0; //临时储存符,用于储存化简的中间结果并给出唯一标号 class Op : public Expr { public: Op(const lexer::Token *tok, const symbols::Type *p) :Expr(tok, p) {}; virtual Expr * reduce() { Expr* x = gen(); Temp *t = new Temp(type); emit(t->toString() + " = " + x->toString()); return t; } }; //派生自Expr,运算类型,给出reduce的另一实现 class Arith : public Op { public: Expr *expr1, *expr2; Arith(const lexer::Token *tok, Expr *x1, Expr *x2) : expr1(x1), expr2(x2), Op(tok,&symbols::Null) { type = &symbols::max(*(expr1->type), *(expr2->type)); if (*type == symbols::Null) error("type error"); } //构造Arithmetic节点,并完成类型转换 virtual Expr* gen() { return new Arith(op, expr1->reduce(), expr2->reduce()); //重载gen,对二目运算符两侧的操作数(表达式)进行进一步化简并构造Arith节点返回 } }; //派生自Op,二目运算符 class Unary : public Op { public: Expr* expr; Unary(const lexer::Token *tok, Expr *x) : Op(tok, &symbols::Null), expr(x) { type = &symbols::max(symbols::Int, *(expr->type)); if (*type == symbols::Null) error("type error"); } virtual Expr* gen() { return new Unary(op, expr->reduce()); } virtual std::string toString() { return op->toString() + " " + expr->toString(); } }; //派生自Op, 单目运算符 class Constant : public Expr { public: Constant(const lexer::Token *tok, const symbols::Type *p) :Expr(tok, p) {}; Constant(int i) :Expr(new lexer::Num(i), &symbols::Int) {}; virtual void jumping(int t, int f); bool operator==(Constant &rhs) { if (rhs.labels == this->labels && rhs.lexline == this->labels && rhs.op == this->op && rhs.type == this->type) return true; else return false; } }; //Constant派生自Expr用于处理逻辑运算产生的布尔值节点 Constant True(&lexer::True, &symbols::Bool), False(&lexer::False, &symbols::Bool); void Constant::jumping(int t, int f) { if (*this == True && t != 0) emit("goto L" + std::to_string(t)); else if (*this == False && f != 0) emit("goto L" + std::to_string(f)); } //枚举True和False节点用于比较 class Logical : public Expr { public: Expr *expr1, *expr2; Logical(const lexer::Token *tok, Expr *x1, Expr *x2) : Expr(tok, &symbols::Null), expr1(x1), expr2(x2) { type = &check(*(expr1->type), *(expr2->type)); if (*type == symbols::Null) error("type error"); } symbols::Type check(symbols::Type p1, symbols::Type p2) { if (p1 == symbols::Bool && p2 == symbols::Bool) return symbols::Bool; else return symbols::Null; } virtual Expr* gen() { int f = newlabel(); int a = newlabel(); Temp *temp = new Temp(type); this->jumping(0, f); emit(temp->toString() + " = true"); emit("goto L" + a); emitlabel(f); emit(temp->toString() + " = false"); emitlabel(a); return temp; } virtual std::string toString() { return expr1->toString() + " " + op->toString() + " " + expr2->toString(); } }; class Or :public Logical { public: Or(const lexer::Token *tok, Expr *x1, Expr *x2) :Logical(tok, x1, x2) {}; virtual void jumping(int t, int f) { int label = t != 0 ? t : newlabel(); expr1->jumping(label, 0); expr2->jumping(t, f); if (t == 0) emitlabel(label); } }; class And :public Logical { public: And(const lexer::Token *tok, Expr *x1, Expr *x2) : Logical(tok, x1, x2) {}; virtual void jumping(int t, int f) { int label = f != 0 ? f : newlabel(); expr1->jumping(0, label); expr2->jumping(t, f); if (f == 0) emitlabel(label); } }; class Not :public Logical { public: Not(const lexer::Token *tok, Expr *x2) : Logical(tok, x2, x2) {}; virtual void jumping(int t, int f) { expr2->jumping(f, t); } virtual std::string toString() { return op->toString() + " " + expr2->toString(); } }; class Rel : public Logical { public: Rel(const lexer::Token *tok, Expr *x1, Expr *x2) : Logical(tok, x1, x2) {}; symbols::Type check(symbols::Type &p1, symbols::Type &p2) { //Todo 检查数组类型 if (p1 == (p2)) return symbols::Bool; else return symbols::Null; } virtual void jumping(int t, int f) { Expr *a = expr1->reduce(); Expr *b = expr2->reduce(); std::string test = a->toString() + " " + op->toString() + " " + b->toString(); emitjumps(test, t, f); } }; class Access : public Op { public: Id* array; Expr* index; Access(Id *a, Expr *i, const symbols::Type *p) : Op(new lexer::Word("[]", lexer::Tag::INDEX), p), array(a), index(i) {}; virtual Expr* gen() { return new Access(array, index->reduce(), type); } virtual void jumping(int t, int f) { emitjumps(reduce()->toString(), t, f); } virtual std::string toString() { return array->toString() + " [ " + index->toString() + " ] "; } }; class Stmt : public Node { public: Stmt(){}; virtual void gen(int b, int a) {} int after = 0; bool operator==(Stmt & rhs) { if (rhs.after == this->after && rhs.labels == this->labels && rhs.lexline == this->lexline) return true; else return false; } }; Stmt Null,Enclosing = Null; class If : public Stmt { public: Expr *expr; Stmt *stmt; If(Expr *x, Stmt *s) : expr(x), stmt(s) { if (!(*(expr->type) == symbols::Bool)) expr->error("boolean required in if"); } virtual void gen(int b, int a) { int label = newlabel(); expr->jumping(0, a); emitlabel(label); stmt->gen(label, a); } }; class Else : public Stmt { public: Expr *expr; Stmt *stmt1, *stmt2; Else(Expr *x, Stmt *s1, Stmt *s2) { if (!(*(expr->type) == symbols::Bool)) expr->error("boolean required in if"); } virtual void gen(int b, int a) { int label1 = newlabel(); int label2 = newlabel(); expr->jumping(0, label2); emitlabel(label1); stmt1->gen(label1, a); emit("go to L" + a); emitlabel(label2); stmt2->gen(label2, a); } }; class While : public Stmt { public: Expr * expr; Stmt * stmt; While() {}; void init(Expr *x, Stmt *s) { expr = x; stmt = s; if (!(*(expr->type) == symbols::Bool)) expr->error("boolean requeired in while"); } virtual void gen(int b, int a) { after = a; expr->jumping(0, a); int label = newlabel(); emitlabel(label); stmt->gen(label, b); emit("goto L" + b); } }; class Do : public Stmt { public: Expr * expr; Stmt * stmt; Do() {}; void init(Expr *x, Stmt *s) { expr = x; stmt = s; if (!(*(expr->type) == symbols::Bool)) expr->error("boolean requeired in while"); } virtual void gen(int b, int a) { after = a; int label = newlabel(); stmt->gen(label, b); emitlabel(label); expr->jumping(b, 0); } }; class Set : public Stmt { public: Id *id; Expr *expr; Set(Id *i, Expr *x) : id(i), expr(x){ if (check(*(id->type), *(expr->type)) == symbols::Null) error("type error"); } virtual symbols::Type check(symbols::Type p1, symbols::Type p2) { if (symbols::numeric(p1) && symbols::numeric(p2)) return p2; else if (p1 == symbols::Bool && p2 == symbols::Bool) return p2; else return symbols::Null; } virtual void gen(int b, int a) { inter::Expr * ex = expr->gen(); emit(id->toString() + " = " + ex -> toString()); } }; class SetElem : public Stmt { public: Id *array; Expr *index; Expr *expr; SetElem(Access *x, Expr *y) : array(x->array), index(x->index), expr(y) { if (check(*(x->type), *(expr->type)) == symbols::Null) error("type error"); } virtual symbols::Type check(symbols::Type p1, symbols::Type p2) { if (p1 == p2) return p2; else if (symbols::numeric(p2) && symbols::numeric(p1)) return p2; else return symbols::Null; } virtual void gen(int b, int a) { std::string s1 = (index->reduce())->toString(); std::string s2 = (index->reduce())->toString(); emit(array->toString() + " [ " + s1 + " ] = " + s2); } }; class Seq : public Stmt { public: Stmt *stmt1; Stmt *stmt2; Seq(Stmt *s1, Stmt *s2) :stmt1(s1), stmt2(s2) {}; virtual void gen(int b, int a) { if (stmt1 == nullptr) stmt2->gen(b, a); else { int label = newlabel(); stmt1->gen(b, label); emitlabel(label); stmt2->gen(label, a); } } }; class Break : public Stmt { public: Stmt *stmt; Break() { if (Enclosing == Null) error("unenclosed break"); stmt = &Enclosing; } virtual void gen(int b, int a) { emit("goto L" + stmt->after); } }; } #endif