Implementation of A Simple Compiler (2) Lexer

Parser has been finished without comments.

/*

Developer: Minghua Deng
Latest update: 2015/3/8 12:11

*/

#include<string>
#include<unordered_map>
#include<utility>
#include<iostream>
#include<cctype>
#include<exception>
namespace lexer {
    //词法分析器单元
    class Tag {
    public:
        enum {
            AND = 256, BASIC = 257, BREAK = 258, DO = 259, ELSE = 260,
            EQ = 261, FALSE = 262, GE = 263, ID = 264, IF = 265,
            INDEX = 266, LE = 267, MINUS = 268, NE = 269, NUM = 270,
            OR = 271, REAL = 272, TEMP = 273, TRUE = 274, WHILE = 275
        };
    };
    //枚举各个词法单元对应的的标识符
    class Token {
    public:
        int tag;
        Token(int t) :tag(t) {};
        std::string toString() { return "" + (char)tag; }
        bool operator== (Token &rhs) { if (this->tag == rhs.tag) return true; else return false; }
    };
    //Token类,tag标记了词法单元类型,重载了==运算符
    class Num :public Token {
    public:
        int value;
        Num(int v) : Token(Tag::NUM), value(v) {};
        std::string toString() { return "" + value; }
    };
    //Num类,整数词法单元,由Token派生
    class Word :public Token {
    public:
        std::string lexeme = "";
        Word(std::string s, int tag) :Token(tag), lexeme(s) {};
        std::string toString() { return lexeme; }
    };
    //Word类,用于保存标识符、保留字和复合词法单元,由Token派生
    const Word and = Word("&&", Tag::AND), or = Word("||", Tag::OR),
        eq = Word("==", Tag::EQ), ne = Word("!=", Tag::NE),
        le = Word("<=", Tag::LE), ge = Word(">=", Tag::GE),
        minus = Word("minus", Tag::MINUS),
        True = Word("true", Tag::TRUE),
        False = Word("false", Tag::FALSE),
        temp = Word("t", Tag::TEMP);
    //枚举了逻辑运算符以及一些特殊词法单元常量
    class Real :public Token {
    public:
        double value;
        Real(double v) :Token(Tag::REAL), value(v) {};
        std::string toString() { return "" + std::to_string(value); }
    };
    //Real类,用于保存实数类型
    class Lexer {
    public:
        int line = 1;
        //标记行数
        char peek = ' ';
        std::unordered_multimap<std::string, Word> words;
        //通过<string, word>对的map保存词素到词法单元的映射
        void reserve(Word w) { words.insert(std::make_pair(w.lexeme, w)); }
        Lexer() {
            reserve(Word("if", Tag::IF));
            reserve(Word("else", Tag::ELSE));
            reserve(Word("while", Tag::DO));
            reserve(Word("break", Tag::BREAK));
            reserve(True);
            reserve(False);
            reserve(symbols::Int);
            reserve(symbols::Char);
            reserve(symbols::Bool);
            reserve(symbols::Double);
            //预先加入一些保留字
        }
        void readch() { std::cin >> peek; }
        bool readch(char c) {
            readch();
            if (peek != c) return false;
            peek = ' ';
            return true;
        }
        //读字符辅助函数
        Token scan() {
            for (;; readch()) {
                if (peek == ' ' || peek == '\t') continue;
                else if (peek == '\n') line++;
                else break;
            }
            //处理blank
            switch (peek) {
            case '&':
                if (readch('&')) return and; else return Token('&');
            case '|':
                if (readch('|')) return or; else return Token('|');
            case '=':
                if (readch('=')) return eq; else return Token('=');
            case '!':
                if (readch('=')) return ne; else return Token('!');
            case '<':
                if (readch('=')) return le; else return Token('<');
            case '>':
                if (readch('=')) return ge; else return Token('>');
            }
            //处理复合运算符
            if (std::isdigit(peek)) {
                int v = 0;
                do {
                    v = 10 * v + peek - '0'; readch();
                } while (std::isdigit(peek));
                if (peek != '.') return Num(v);
                //处理整数
                double x = v;
                double d = 10;
                for (;;) {
                    readch();
                    if (!std::isdigit(peek)) break;
                    x = x + (peek - '0') / d;
                    d *= 10;
                }
                return Real(x);
                //处理浮点数
            }
            if (std::isalpha(peek)) {
                std::string s;
                do {
                    s += peek;
                    readch();
                } while (std::isalpha((peek)));
                auto w = words.find(s);
                if (w == words.end) return w->second;
                //若词素已经存在直接返回词法单元
                Word newWord(s, Tag::ID);
                words.insert(std::make_pair(s, newWord));
                return newWord;
                //词素不存在新建词素并返回词法单元
            }
            //处理关键字、标识符
            Token tok = Token(peek); peek = ' ';
            return tok;
            //处理其他特殊词素
        }
    };

}

namespace symbols {
    //符号表单元
    class Env {
    private:
        std::unordered_multimap<lexer::Token, inter::Id> table;
    protected:
        Env *prev;
    public:
        Env(Env *n) :prev(n) {};
        void put(lexer::Token w, inter::Id i) { table.insert(std::make_pair(w, i)); }
        inter::Id* get(lexer::Token w) {
            for (Env *e = this; e != NULL; e = e->prev) {
                auto found = (e->table).find(w);
                if (found != e->table.end) return &(found->second);
            }
            return NULL;
        }
    };
    //处理作用域,以链表方式储存并逐级向上查找词素
    class Type :public lexer::Word {
    public:
        int width = 0;
        Type(std::string s, int tag, int w) :lexer::Word(s, tag), width(w) {};
        bool operator==(Type const & rhs) {
            if (rhs.lexeme == this->lexeme && rhs.tag == this->tag && rhs.width == this->width) return true;
            else return false;
        }
    };
    //Type类保存类型符,重载==用于比较,由word派生
    const Type
        Int("int", lexer::Tag::BASIC, 4),
        Double("double", lexer::Tag::BASIC, 16),
        Char("char", lexer::Tag::BASIC, 1),
        Bool("bool", lexer::Tag::BASIC, 1),
        Null("NULL", -1, 0);
    //枚举一些内置类型常量
    //Null用作空白类型标示
    bool numeric(Type p) {
        if (p == Char || p == Int || p == Double) return true;
        else return false;
    }
    //辅助函数,判断是否为数字
    Type max(Type p1, Type p2) {
        if (!numeric(p1) || !numeric(p2)) return Null;
        else if (p1 == Double || p2 == Double) return Double;
        else if (p1 == Int || p2 == Int) return Int;
        else return Char;
    }
    //数字类型的类型转换,向上转换规则 char -> int -> double
    class Array : public Type {
    public:
        Type of;
        int size = 1;
        Array(int sz, Type p) : Type("[]", lexer::Tag::INDEX, sz*p.width), size(sz), of(p) {};
        std::string toString() { return "[" + std::to_string(size) + "] " + of.toString(); }
    };
    //数组类型,为一般类型的复合,保存了类型及数组大小
}

namespace inter {
    //中间代码生成单元
    class Node {
    public:
        int lexline = 0;
        //保存节点对应行号
        Node(int i) :lexline(i) {};
        void error(std::string s) { throw(std::runtime_error(" near line " + std::to_string(lexline) + ": " + s)); }
        //报错辅助函数
        static int labels;
        //节点标记,自动分配
        int newlabel() { return ++labels; }
        void emitlabel(int i) { std::cout << "L" + std::to_string(i) + ":"; }
        void emit(std::string s) { std::cout << "\t" + s; }
        //输出节点对应代码
    };
    int Node::labels = 0;
    //基类,AST中的基本节点
    class Expr : public Node {
    public:
        lexer::Token op;
        symbols::Type type;
        Expr(lexer::Token tok, symbols::Type p) :op(tok), type(p) {};
        virtual Expr gen() { return *this; }
        virtual Expr reduce() { return *this; }
        //gen用于生成该表达式对应的地址
        //reduce用于化简表达式
        //gen和reduce在不同的Expr派生类中给出自己的实现
        virtual void jumping(int t, int f) { emitjumps(toString(), t, f); }
        void emitjumps(std::string test, int t, int f) {
            if (t != 0 && f != 0) {
                emit("if " + test + " goto L" + std::to_string(t));
                emit("goto L" + std::to_string(f));
            }
            else if (t != 0) emit("if " + test + " goto L" + std::to_string(t));
            else if (f != 0) emit("iffalse " + test + " goto L" + std::to_string(f));
            else;
        }
        //跳转代码,用于实现条件和循环
        virtual std::string toString() {
            return op.toString();
        }
    };
    //Expr, 派生自Node,增加了运算符和运算类型
    class Id : public Expr {
    public: int offset;
            Id(lexer::Word id, symbols::Type p, int b) : Expr(id, p), offset(b) {};
    };
    //派生自Expr, 单个标识符,offset保存相对地址
    class Op : public Expr {
    public:
        Op(lexer::Token tok, symbols::Type p) :Expr(tok, p) {};
        virtual Expr reduce() {
            Expr x = gen();
            Temp t = Temp(type);
            emit(t.toString() + " = " + x.toString());
            return t;
        }
    };
    //派生自Expr,运算类型,给出reduce的另一实现
    class Arith : public Op {
    public:
        Expr &expr1, &expr2;
        Arith(lexer::Token tok, Expr &x1, Expr &x2) : expr1(x1), expr2(x2), Op(tok,symbols::Null) {
            type = symbols::max(expr1.type, expr2.type);
            if (type == symbols::Null) error("type error");
        }
        //构造Arithmetic节点,并完成类型转换
        virtual Expr gen() {
            return Arith(op, expr1.reduce(), expr2.reduce());
        //重载gen,对二目运算符两侧的操作数(表达式)进行进一步化简并构造Arith节点返回
        }
    };
    //派生自Op,二目运算符
    class Temp :public Expr {
    public:
        static int count;
        int number = 0;
        Temp(symbols::Type p) :Expr(lexer::temp, p), number(++count) {};
        virtual std::string toString() { return "t" + number; }
    };
    int Temp::count = 0;
    //临时储存符,用于储存化简的中间结果并给出唯一标号
    class Unary : public Op {
    public:
        Expr& expr;
        Unary(lexer::Token tok, Expr &x) : Op(tok, symbols::Null), expr(x) {
            type = symbols::max(symbols::Int, expr.type);
            if (type == symbols::Null) error("type error");
        }
        virtual Expr gen() { return Unary(op, expr.reduce()); }
        virtual std::string toString() { return op.toString() + " " + expr.toString(); }
    };
    //派生自Op, 单目运算符
    class Constant : public Expr {
    public:
        Constant(lexer::Token tok, symbols::Type p) :Expr(tok, p) {};
        Constant(int i) :Expr(lexer::Num(i), symbols::Int) {};
        virtual void jumping(int t, int f) {
            if (*this == True && t != 0) emit("goto L" + std::to_string(t));
            else if (*this == False && f != 0) emit("goto L" + std::to_string(f));
        }
        bool operator==(Constant &rhs) {
            if (rhs.labels == this->labels && rhs.lexline == this->labels && rhs.op == this->op && rhs.type == this->type)
                return true;
            else return false;
        }
    };
    //Constant派生自Expr用于处理逻辑运算产生的布尔值节点
    Constant
        True(lexer::True, symbols::Bool),
        False(lexer::False, symbols::Bool);
    //枚举True和False节点用于比较
    class Logical : public Expr {
    public:
        Expr &expr1, &expr2;
        Logical(lexer::Token tok, Expr &x1, Expr &x2) : Expr(tok, symbols::Null), expr1(x1), expr2(x2) {
            type = check(expr1.type, expr2.type);
            if (type == symbols::Null) error("type error");
        }

        symbols::Type check(symbols::Type p1, symbols::Type p2) {
            if (p1 == symbols::Bool && p2 == symbols::Bool) return symbols::Bool;
            else return symbols::Null;
        }

        virtual Expr gen() {
            int f = newlabel(); int a = newlabel();
            Temp temp(type);
            this->jumping(0, f);
            emit(temp.toString() + " = true");
            emit("goto L" + a);
            emitlabel(f);
            emit(temp.toString() + " = false");
            emitlabel(a);
            return temp;
        }

        virtual std::string toString() {
            return expr1.toString() + " " + op.toString() + " " + expr2.toString();
        }
    };

    class Or :public Logical {
    public:
        Or(lexer::Token tok, Expr &x1, Expr &x2) :Logical(tok, x1, x2) {};
        virtual void jumping(int t, int f) {
            int label = t != 0 ? t : newlabel();
            expr1.jumping(label, 0);
            expr2.jumping(t, f);
            if (t == 0) emitlabel(label);
        } 
    };

    class And :public Logical {
    public:
        And(lexer::Token tok, Expr &x1, Expr &x2) : Logical(tok, x1, x2) {};
        virtual void jumping(int t, int f) {
            int label = f != 0 ? f : newlabel();
            expr1.jumping(0, label);
            expr2.jumping(t, f);
            if (f == 0) emitlabel(label);
        }
    };

    class Not :public Logical {
    public:
        Not(lexer::Token tok, Expr &x2) : Logical(tok, x2, x2) {};
        virtual void jumping(int t, int f) { expr2.jumping(f, t); }
        virtual std::string toString() { return op.toString() + " " + expr2.toString(); }
    };

    class Rel : public Logical {
    public:
        Rel(lexer::Token tok, Expr &x1, Expr &x2) : Logical(tok, x1, x2) {};
        symbols::Type check(symbols::Type &p1, symbols::Type &p2) {
            //Todo 检查数组类型
            if (p1 == (p2)) return symbols::Bool;
            else return symbols::Null;
        }
        virtual void jumping(int t, int f) {
            Expr &a = expr1.reduce();
            Expr &b = expr2.reduce();
            std::string test = a.toString() + " " + op.toString() + " " + b.toString();
            emitjumps(test, t, f);
        }
    };

    class Access : public Op {
    public:
        Id array;
        Expr& index;
        Access(Id a, Expr &i, symbols::Type p) : Op(lexer::Word("[]", lexer::Tag::INDEX), p), array(a), index(i) {};
        virtual Expr gen() { return Access(array, index.reduce(), type); }
        virtual void jumping(int t, int f) { emitjumps(reduce().toString(), t, f); }
        virtual std::string toString() {
            return array.toString() + " [ " + index.toString() + " ] ";
        }
    };

    class Stmt : public Node {
    public:
        Stmt() {  };
        virtual void gen(int b, int a) {}
        int after = 0;
        bool operator==(Stmt & rhs) {
            if (rhs.after == this->after && rhs.labels == this->labels && rhs.lexline == this->lexline)
                return true;
            else return false;
        }
    };
    Stmt 
        Null,Enclosing = Null;

    class If : public Stmt {
    public:
        Expr &expr; 
        Stmt &stmt;
        If(Expr &x, Stmt &s) : expr(x), stmt(s) {
            if (!(expr.type == symbols::Bool)) expr.error("boolean required in if");
        }
        virtual void gen(int b, int a) {
            int label = newlabel();
            expr.jumping(0, a);
            emitlabel(label);
            stmt.gen(label, a);
        }
    };

    class Else : public Stmt {
    public:
        Expr &expr;
        Stmt &stmt1, &stmt2;
        Else(Expr &x, Stmt &s1, Stmt &s2) {
            if (!(expr.type == symbols::Bool)) expr.error("boolean required in if");
        }
        virtual void gen(int b, int a) {
            int label1 = newlabel();
            int label2 = newlabel();
            expr.jumping(0, label2);
            emitlabel(label1);
            stmt1.gen(label1, a);
            emit("go to L" + a);
            emitlabel(label2);
            stmt2.gen(label2, a);
        }
    };

    class While : public Stmt {
    public:
        Expr & expr;
        Stmt & stmt;
        While() {};
        void init(Expr &x, Stmt &s) {
            expr = x;
            stmt = s;
            if (!(expr.type == symbols::Bool))
                expr.error("boolean requeired in while");
        }
        virtual void gen(int b, int a) {
            after = a;
            expr.jumping(0, a);
            int label = newlabel();
            emitlabel(label);
            stmt.gen(label, b);
            emit("goto L" + b);
        }
    };

    class Do : public Stmt {
    public:
        Expr & expr;
        Stmt & stmt;
        While() {};
        void init(Expr &x, Stmt &s) {
            expr = x;
            stmt = s;
            if (!(expr.type == symbols::Bool))
                expr.error("boolean requeired in while");
        }
        virtual void gen(int b, int a) {
            after = a;
            int label = newlabel();
            stmt.gen(label, b);
            emitlabel(label);
            expr.jumping(b, 0);
        }
    };

    class Set : public Stmt {
    public:
        Id id;
        Expr &expr;
        Set(Id i, Expr &x) : id(i), expr(x){
            if (check(id.type, expr.type) == symbols::Null) error("type error");
        }
        virtual symbols::Type check(symbols::Type p1, symbols::Type p2) {
            if (symbols::numeric(p1) && symbols::numeric(p2)) return p2;
            else if (p1 == symbols::Bool && p2 == symbols::Bool) return p2;
            else return symbols::Null;
        }
        virtual void gen(int b, int a) {
            emit(id.toString() + " = " + expr.gen().toString);
        }
    };

    class SetElem : public Stmt {
    public:
        Id array;
        Expr &index;
        Expr &expr;
        SetElem(Access &x, Expr &y) : array(x.array), index(x.index), expr(y){
            if (check(x.type, expr.type) == symbols::Null) error("type error");
        }

        virtual symbols::Type check(symbols::Type p1, symbols::Type p2) {
            if (p1 == p2) return p2;
            else if (symbols::numeric(p2) && symbols::numeric(p1)) return p2;
            else return symbols::Null;
        }

        virtual void gen(int b, int a) {
            std::string s1 = index.reduce().toString();
            std::string s2 = index.reduce().toString();
            emit(array.toString() + " [ " + s1 + " ] = " + s2);
        }

    };
    class Seq : public Stmt {
    public:
        Stmt &stmt1;
        Stmt &stmt2;
        Seq(Stmt &s1, Stmt &s2) :stmt1(s1), stmt2(s2) {};
        virtual void gen(int b, int a) {
            if (stmt1 == Null) stmt2.gen(b, a);
            else {
                int label = newlabel();
                stmt1.gen(b, label);
                emitlabel(label);
                stmt2.gen(label, a);
            }
        }
    };
    class Break : public Stmt {
    public:
        Stmt &stmt;
        Break() {
            if (Enclosing == Null) error("unenclosed break");
            stmt = Enclosing;
        }
        virtual void gen(int b, int a) {
            emit("goto L" + stmt.after);
        }
    };
}

1 thought on “Implementation of A Simple Compiler (2) Lexer

Leave a Reply

Your email address will not be published. Required fields are marked *