Implementation of A Simple Compiler (1) Lexer

Unfinished. Keep on working on it…

/*

Developer: Minghua Deng
Latest update: 2015/3/8 12:11

*/

#include<string>
#include<unordered_map>
#include<utility>
#include<iostream>
#include<cctype>
#include<exception>
namespace lexer {
    //词法分析器单元
    class Tag {
    public:
        enum {
            AND = 256, BASIC = 257, BREAK = 258, DO = 259, ELSE = 260,
            EQ = 261, FALSE = 262, GE = 263, ID = 264, IF = 265,
            INDEX = 266, LE = 267, MINUS = 268, NE = 269, NUM = 270,
            OR = 271, REAL = 272, TEMP = 273, TRUE = 274, WHILE = 275
        };
    };
    //枚举各个词法单元对应的的标识符
    class Token {
    public:
        int tag;
        Token(int t) :tag(t) {};
        std::string toString() { return "" + (char)tag; }
        bool operator== (Token &rhs) { 
        if (this->tag == rhs.tag) return true; 
        else return false; }
    };
    //Token类,tag标记了词法单元类型,重载了==运算符
    class Num :public Token {
    public:
        int value;
        Num(int v) : Token(Tag::NUM), value(v) {};
        std::string toString() { return "" + value; }
    };
    //Num类,整数词法单元,由Token派生
    class Word :public Token {
    public:
        std::string lexeme = "";
        Word(std::string s, int tag) :Token(tag), lexeme(s) {};
        std::string toString() { return lexeme; }
    };
    //Word类,用于保存标识符、保留字和复合词法单元,由Token派生
    const Word and = Word("&&", Tag::AND), or = Word("||", Tag::OR),
        eq = Word("==", Tag::EQ), ne = Word("!=", Tag::NE),
        le = Word("<=", Tag::LE), ge = Word(">=", Tag::GE),
        minus = Word("minus", Tag::MINUS),
        True = Word("true", Tag::TRUE),
        False = Word("false", Tag::FALSE),
        temp = Word("t", Tag::TEMP);
    //枚举了逻辑运算符以及一些特殊词法单元常量
    class Real :public Token {
    public:
        double value;
        Real(double v) :Token(Tag::REAL), value(v) {};
        std::string toString() { return "" + std::to_string(value); }
    };
    //Real类,用于保存实数类型
    class Lexer {
    public:
        int line = 1;
        //标记行数
        char peek = ' ';
        std::unordered_multimap<std::string, Word> words;
        //通过<string, word>对的map保存词素到词法单元的映射
        void reserve(Word w) { words.insert(std::make_pair(w.lexeme, w)); }
        Lexer() {
            reserve(Word("if", Tag::IF));
            reserve(Word("else", Tag::ELSE));
            reserve(Word("while", Tag::DO));
            reserve(Word("break", Tag::BREAK));
            reserve(True);
            reserve(False);
            reserve(symbols::Int);
            reserve(symbols::Char);
            reserve(symbols::Bool);
            reserve(symbols::Double);
            //预先加入一些保留字
        }
        void readch() { std::cin >> peek; }
        bool readch(char c) {
            readch();
            if (peek != c) return false;
            peek = ' ';
            return true;
        }
        //读字符辅助函数
        Token scan() {
            for (;; readch()) {
                if (peek == ' ' || peek == '\t') continue;
                else if (peek == '\n') line++;
                else break;
            }
            //处理blank
            switch (peek) {
            case '&':
                if (readch('&')) return and; else return Token('&');
            case '|':
                if (readch('|')) return or; else return Token('|');
            case '=':
                if (readch('=')) return eq; else return Token('=');
            case '!':
                if (readch('=')) return ne; else return Token('!');
            case '<':
                if (readch('=')) return le; else return Token('<');
            case '>':
                if (readch('=')) return ge; else return Token('>');
            }
            //处理复合运算符
            if (std::isdigit(peek)) {
                int v = 0;
                do {
                    v = 10 * v + peek - '0'; readch();
                } while (std::isdigit(peek));
                if (peek != '.') return Num(v);
                //处理整数
                double x = v;
                double d = 10;
                for (;;) {
                    readch();
                    if (!std::isdigit(peek)) break;
                    x = x + (peek - '0') / d;
                    d *= 10;
                }
                return Real(x);
                //处理浮点数
            }
            if (std::isalpha(peek)) {
                std::string s;
                do {
                    s += peek;
                    readch();
                } while (std::isalpha((peek)));
                auto w = words.find(s);
                if (w == words.end) return w->second;
                //若词素已经存在直接返回词法单元
                Word newWord(s, Tag::ID);
                words.insert(std::make_pair(s, newWord));
                return newWord;
                //词素不存在新建词素并返回词法单元
            }
            //处理关键字、标识符
            Token tok = Token(peek); peek = ' ';
            return tok;
            //处理其他特殊词素
        }
    };

}

namespace symbols {
    //符号表单元
    class Env {
    private:
        std::unordered_multimap<lexer::Token, inter::Id> table;
    protected:
        Env *prev;
    public:
        Env(Env *n) :prev(n) {};
        void put(lexer::Token w, inter::Id i) { table.insert(std::make_pair(w, i)); }
        inter::Id* get(lexer::Token w) {
            for (Env *e = this; e != NULL; e = e->prev) {
                auto found = (e->table).find(w);
                if (found != e->table.end) return &(found->second);
            }
            return NULL;
        }
    };
    //处理作用域,以链表方式储存并逐级向上查找词素
    class Type :public lexer::Word {
    public:
        int width = 0;
        Type(std::string s, int tag, int w) :lexer::Word(s, tag), width(w) {};
        bool operator==(Type const & const rhs) {
            if (rhs.lexeme == this->lexeme && rhs.tag == this->tag && rhs.width == this->width) return true;
            else return false;
        }
    };
    //Type类保存类型符,重载==用于比较,由word派生
    const Type
        Int("int", lexer::Tag::BASIC, 4),
        Double("double", lexer::Tag::BASIC, 16),
        Char("char", lexer::Tag::BASIC, 1),
        Bool("bool", lexer::Tag::BASIC, 1),
        Null("NULL", -1, 0);
    //枚举一些内置类型常量
    //Null用作空白类型标示
    bool numeric(Type p) {
        if (p == Char || p == Int || p == Double) return true;
        else return false;
    }
    //辅助函数,判断是否为数字
    Type max(Type p1, Type p2) {
        if (!numeric(p1) || !numeric(p2)) return Null;
        else if (p1 == Double || p2 == Double) return Double;
        else if (p1 == Int || p2 == Int) return Int;
        else return Char;
    }
    //数字类型的类型转换,向上转换规则 char -> int -> double
    class Array : public Type {
    public:
        Type of;
        int size = 1;
        Array(int sz, Type p) : Type("[]", lexer::Tag::INDEX, sz*p.width), size(sz), of(p) {};
        std::string toString() { return "[" + std::to_string(size) + "] " + of.toString(); }
    };
    //数组类型,为一般类型的复合,保存了类型及数组大小
}

namespace inter {
    //中间代码生成单元
    class Node {
    public:
        int lexline = 0;
        //保存节点对应行号
        Node(int i) :lexline(i) {};
        void error(std::string s) { throw(std::runtime_error(" near line " + std::to_string(lexline) + ": " + s)); }
        //报错辅助函数
        static int labels;
        //节点标记,自动分配
        int newlabel() { return ++labels; }
        void emitlabel(int i) { std::cout << "L" + std::to_string(i) + ":"; }
        void emit(std::string s) { std::cout << "\t" + s; }
        //输出节点对应代码
    };
    int Node::labels = 0;
    //基类,AST中的基本节点
    class Expr : public Node {
    public:
        lexer::Token op;
        symbols::Type type;
        Expr(lexer::Token tok, symbols::Type p) :op(tok), type(p) {};
        virtual Expr gen() { return *this; }
        virtual Expr reduce() { return *this; }
        //gen用于生成该表达式对应的地址
        //reduce用于化简表达式
        //gen和reduce在不同的Expr派生类中给出自己的实现
        virtual void jumping(int t, int f) { emitjumps(toString(), t, f); }
        void emitjumps(std::string test, int t, int f) {
            if (t != 0 && f != 0) {
                emit("if " + test + " goto L" + std::to_string(t));
                emit("goto L" + std::to_string(f));
            }
            else if (t != 0) emit("if " + test + " goto L" + std::to_string(t));
            else if (f != 0) emit("iffalse " + test + " goto L" + std::to_string(f));
            else;
        }
        //跳转代码,用于实现条件和循环
        std::string toString() {
            return op.toString();
        }
    };
    //Expr, 派生自Node,增加了运算符和运算类型
    class Id : public Expr {
    public: int offset;
            Id(lexer::Word id, symbols::Type p, int b) : Expr(id, p), offset(b) {};
    };
    //派生自Expr, 单个标识符,offset保存相对地址
    class Op : public Expr {
    public:
        Op(lexer::Token tok, symbols::Type p) :Expr(tok, p) {};
        virtual Expr reduce() {
            Expr x = gen();
            Temp t = Temp(type);
            emit(t.toString() + " = " + x.toString());
            return t;
        }
    };
    //派生自Expr,运算类型,给出reduce的另一实现
    class Arith : public Op {
    public:
        Expr expr1, expr2;
        Arith(lexer::Token tok, Expr &x1, Expr &x2) : expr1(x1), expr2(x2), Op(tok,symbols::Null) {
            type = symbols::max(expr1.type, expr2.type);
            if (type == symbols::Null) error("type error");
        }
        //构造Arithmetic节点,并完成类型转换
        virtual Expr gen() {
            return Arith(op, expr1.reduce(), expr2.reduce());
        //重载gen,对二目运算符两侧的操作数(表达式)进行进一步化简并构造Arith节点返回
        }
    };
    //派生自Op,二目运算符
    class Temp :public Expr {
    public:
        static int count;
        int number = 0;
        Temp(symbols::Type p) :Expr(lexer::temp, p), number(++count) {};
        std::string toString() { return "t" + number; }
    };
    int Temp::count = 0;
    //临时储存符,用于储存化简的中间结果并给出唯一标号
    class Unary : public Op {
    public:
        Expr expr;
        Unary(lexer::Token tok, Expr& x) : Op(tok, symbols::Null), expr(x) {
            type = symbols::max(symbols::Int, expr.type);
            if (type == symbols::Null) error("type error");
        }
        virtual Expr gen() { return Unary(op, expr.reduce()); }
        std::string toString() { return op.toString() + " " + expr.toString(); }
    };
    //派生自Op, 单目运算符
    class Constant : public Expr {
    public:
        Constant(lexer::Token tok, symbols::Type p) :Expr(tok, p) {};
        Constant(int i) :Expr(lexer::Num(i), symbols::Int) {};
        virtual void jumping(int t, int f) {
            if (*this == True && t != 0) emit("goto L" + std::to_string(t));
            else if (*this == False && f != 0) emit("goto L" + std::to_string(f));
        }
        bool operator==(Constant &rhs) {
            if (rhs.labels == this->labels && rhs.lexline == this->labels && rhs.op == this->op && rhs.type == this->type)
                return true;
            else return false;
        }
    };
    //Constant派生自Expr用于处理逻辑运算产生的布尔值节点
    Constant
        True(lexer::True, symbols::Bool),
        False(lexer::False, symbols::Bool);
    //枚举True和False节点用于比较
}

Leave a Reply

Your email address will not be published. Required fields are marked *