浏览代码

refactor: extract parser to separate crate

Signed-off-by: Matthias Seitz <matthias.seitz@outlook.de>
Matthias Seitz 3 年之前
父节点
当前提交
c79f10b729

+ 3 - 0
Cargo.toml

@@ -69,3 +69,6 @@ lto = true
 [features]
 default = ["llvm"]
 llvm = ["inkwell", "libc"]
+
+[workspace]
+members = ["solang-parser"]

+ 22 - 0
solang-parser/Cargo.toml

@@ -0,0 +1,22 @@
+[package]
+name = "solang-parser"
+version = "0.1.0"
+authors = ["Sean Young <sean@mess.org>"]
+homepage = "https://github.com/hyperledger-labs/solang"
+documentation = "https://solang.readthedocs.io/"
+license = "Apache-2.0"
+build = "build.rs"
+description = "Solang Solidity Parser"
+keywords = [ "solidity", "parser" ]
+edition = "2021"
+
+[build-dependencies]
+lalrpop = "0.19"
+
+[dependencies]
+lalrpop-util = "0.19"
+num-bigint = "0.4"
+num-traits = "0.2"
+num-rational = "0.4"
+phf = { version = "0.10", features = ["macros"] }
+unicode-xid = "0.2.0"

+ 8 - 0
solang-parser/build.rs

@@ -0,0 +1,8 @@
+
+fn main() {
+    lalrpop::Configuration::new()
+        .use_cargo_dir_conventions()
+        .emit_rerun_directives(true)
+        .process()
+        .unwrap();
+}

+ 164 - 0
solang-parser/src/diagnostics.rs

@@ -0,0 +1,164 @@
+use crate::pt;
+use crate::pt::Loc;
+
+#[derive(Debug, Eq, Hash, PartialOrd, Ord, PartialEq)]
+pub enum Level {
+    Debug,
+    Info,
+    Warning,
+    Error,
+}
+
+impl Level {
+    pub fn to_string(&self) -> &'static str {
+        match self {
+            Level::Debug => "debug",
+            Level::Info => "info",
+            Level::Warning => "warning",
+            Level::Error => "error",
+        }
+    }
+}
+
+#[derive(Debug, Eq, Hash, PartialOrd, Ord, PartialEq)]
+pub enum ErrorType {
+    None,
+    ParserError,
+    SyntaxError,
+    DeclarationError,
+    TypeError,
+    Warning,
+}
+
+#[derive(Debug, Eq, Hash, PartialOrd, Ord, PartialEq)]
+pub struct Note {
+    pub pos: pt::Loc,
+    pub message: String,
+}
+
+#[derive(Debug, Eq, Hash, PartialOrd, Ord, PartialEq)]
+pub struct Diagnostic {
+    pub level: Level,
+    pub ty: ErrorType,
+    pub pos: Option<pt::Loc>,
+    pub message: String,
+    pub notes: Vec<Note>,
+}
+
+impl Diagnostic {
+    pub fn debug(pos: Loc, message: String) -> Self {
+        Diagnostic {
+            level: Level::Debug,
+            ty: ErrorType::None,
+            pos: Some(pos),
+            message,
+            notes: Vec::new(),
+        }
+    }
+
+    pub fn info(pos: Loc, message: String) -> Self {
+        Diagnostic {
+            level: Level::Info,
+            ty: ErrorType::None,
+            pos: Some(pos),
+            message,
+            notes: Vec::new(),
+        }
+    }
+
+    pub fn parser_error(pos: Loc, message: String) -> Self {
+        Diagnostic {
+            level: Level::Error,
+            ty: ErrorType::ParserError,
+            pos: Some(pos),
+            message,
+            notes: Vec::new(),
+        }
+    }
+
+    pub fn error(pos: Loc, message: String) -> Self {
+        Diagnostic {
+            level: Level::Error,
+            ty: ErrorType::SyntaxError,
+            pos: Some(pos),
+            message,
+            notes: Vec::new(),
+        }
+    }
+
+    pub fn decl_error(pos: Loc, message: String) -> Self {
+        Diagnostic {
+            level: Level::Error,
+            ty: ErrorType::DeclarationError,
+            pos: Some(pos),
+            message,
+            notes: Vec::new(),
+        }
+    }
+
+    pub fn type_error(pos: Loc, message: String) -> Self {
+        Diagnostic {
+            level: Level::Error,
+            ty: ErrorType::TypeError,
+            pos: Some(pos),
+            message,
+            notes: Vec::new(),
+        }
+    }
+
+    pub fn warning(pos: Loc, message: String) -> Self {
+        Diagnostic {
+            level: Level::Warning,
+            ty: ErrorType::Warning,
+            pos: Some(pos),
+            message,
+            notes: Vec::new(),
+        }
+    }
+
+    pub fn warning_with_note(pos: Loc, message: String, note_pos: Loc, note: String) -> Self {
+        Diagnostic {
+            level: Level::Warning,
+            ty: ErrorType::Warning,
+            pos: Some(pos),
+            message,
+            notes: vec![Note {
+                pos: note_pos,
+                message: note,
+            }],
+        }
+    }
+
+    pub fn warning_with_notes(pos: Loc, message: String, notes: Vec<Note>) -> Self {
+        Diagnostic {
+            level: Level::Warning,
+            ty: ErrorType::Warning,
+            pos: Some(pos),
+            message,
+            notes,
+        }
+    }
+
+    pub fn error_with_note(pos: Loc, message: String, note_pos: Loc, note: String) -> Self {
+        Diagnostic {
+            level: Level::Error,
+            ty: ErrorType::None,
+            pos: Some(pos),
+            message,
+            notes: vec![Note {
+                pos: note_pos,
+                message: note,
+            }],
+        }
+    }
+
+    pub fn error_with_notes(pos: Loc, message: String, notes: Vec<Note>) -> Self {
+        Diagnostic {
+            level: Level::Error,
+            ty: ErrorType::None,
+            pos: Some(pos),
+            message,
+            notes,
+        }
+    }
+}

+ 77 - 0
solang-parser/src/doc.rs

@@ -0,0 +1,77 @@
+// Parse the fields f
+use crate::lexer::CommentType;
+use crate::pt::DocComment;
+
+/// Convert the comment to lines, stripping
+fn to_lines<'a>(comments: &[(usize, CommentType, &'a str)]) -> Vec<(usize, &'a str)> {
+    let mut res = Vec::new();
+
+    for (start, ty, comment) in comments.iter() {
+        match ty {
+            CommentType::Line => res.push((*start, comment.trim())),
+            CommentType::Block => {
+                let mut start = *start;
+
+                for s in comment.lines() {
+                    if let Some((i, _)) = s
+                        .char_indices()
+                        .find(|(_, ch)| !ch.is_whitespace() && *ch != '*')
+                    {
+                        res.push((start + i, s[i..].trim_end()))
+                    }
+
+                    start += s.len();
+                }
+            }
+        }
+    }
+
+    res
+}
+
+// Parse the DocComments tags
+pub fn tags(lines: &[(usize, CommentType, &str)]) -> Vec<DocComment> {
+    // first extract the tags
+    let mut tags = Vec::new();
+
+    for (start_offset, line) in to_lines(lines).into_iter() {
+        let mut chars = line.char_indices().peekable();
+
+        if let Some((_, '@')) = chars.peek() {
+            // step over @
+            let (tag_start, _) = chars.next().unwrap();
+            let mut tag_end = tag_start;
+
+            while let Some((offset, c)) = chars.peek() {
+                if c.is_whitespace() {
+                    break;
+                }
+
+                tag_end = *offset;
+
+                chars.next();
+            }
+
+            // tag value
+            tags.push(DocComment {
+                offset: tag_start,
+                tag: line[tag_start + 1..tag_end + 1].to_owned(),
+                value: line[tag_end + 1..].trim().to_owned(),
+            });
+        } else if let Some(tag) = tags.last_mut() {
+            let line = line.trim();
+            if !line.is_empty() {
+                tag.value.push(' ');
+                tag.value.push_str(line.trim());
+            }
+        } else {
+            tags.push(DocComment {
+                offset: start_offset,
+                tag: String::from("notice"),
+                value: line.trim().to_owned(),
+            });
+        }
+    }
+
+    tags
+}

+ 1504 - 0
solang-parser/src/lexer.rs

@@ -0,0 +1,1504 @@
+//
+// Solidity custom lexer. Solidity needs a custom lexer for two reasons:
+//  - comments and doc comments
+//  - pragma value is [^;]+
+//
+use phf::phf_map;
+use std::fmt;
+use std::iter::Peekable;
+use std::str::CharIndices;
+use unicode_xid::UnicodeXID;
+
+use crate::pt::Loc;
+
+pub type Spanned<Token, Loc, Error> = Result<(Loc, Token, Loc), Error>;
+
+#[derive(Copy, Clone, PartialEq, Debug)]
+pub enum CommentType {
+    Line,
+    Block,
+}
+
+#[derive(Copy, Clone, PartialEq, Debug)]
+pub enum Token<'input> {
+    Identifier(&'input str),
+    StringLiteral(&'input str),
+    AddressLiteral(&'input str),
+    HexLiteral(&'input str),
+    Number(&'input str, &'input str),
+    RationalNumber(&'input str, &'input str, &'input str),
+    HexNumber(&'input str),
+    DocComment(CommentType, &'input str),
+    Divide,
+    Contract,
+    Library,
+    Interface,
+    Function,
+    Pragma,
+    Import,
+
+    Struct,
+    Event,
+    Enum,
+
+    Memory,
+    Storage,
+    Calldata,
+
+    Public,
+    Private,
+    Internal,
+    External,
+
+    Constant,
+
+    New,
+    Delete,
+
+    Pure,
+    View,
+    Payable,
+
+    Do,
+    Continue,
+    Break,
+
+    Throw,
+    Emit,
+    Return,
+    Returns,
+
+    Uint(u16),
+    Int(u16),
+    Bytes(u8),
+    DynamicBytes,
+    Bool,
+    Address,
+    String,
+
+    Semicolon,
+    Comma,
+    OpenParenthesis,
+    CloseParenthesis,
+    OpenCurlyBrace,
+    CloseCurlyBrace,
+
+    BitwiseOr,
+    BitwiseOrAssign,
+    Or,
+
+    BitwiseXor,
+    BitwiseXorAssign,
+
+    BitwiseAnd,
+    BitwiseAndAssign,
+    And,
+
+    AddAssign,
+    Increment,
+    Add,
+
+    SubtractAssign,
+    Decrement,
+    Subtract,
+
+    MulAssign,
+    Mul,
+    Power,
+    DivideAssign,
+    ModuloAssign,
+    Modulo,
+
+    Equal,
+    Assign,
+    ColonAssign,
+
+    NotEqual,
+    Not,
+
+    True,
+    False,
+    Else,
+    Anonymous,
+    For,
+    While,
+    If,
+
+    ShiftRight,
+    ShiftRightAssign,
+    Less,
+    LessEqual,
+
+    ShiftLeft,
+    ShiftLeftAssign,
+    More,
+    MoreEqual,
+
+    Constructor,
+    Indexed,
+
+    Member,
+    Colon,
+    OpenBracket,
+    CloseBracket,
+    Complement,
+    Question,
+
+    Mapping,
+    Arrow,
+
+    Try,
+    Catch,
+
+    Receive,
+    Fallback,
+
+    Seconds,
+    Minutes,
+    Hours,
+    Days,
+    Weeks,
+    Wei,
+    Szabo,
+    Finney,
+    Ether,
+
+    This,
+    As,
+    Is,
+    Abstract,
+    Virtual,
+    Override,
+    Using,
+    Modifier,
+    Immutable,
+    Unchecked,
+    Assembly,
+    Let,
+}
+
+impl<'input> fmt::Display for Token<'input> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Token::DocComment(CommentType::Line, s) => write!(f, "///{}", s),
+            Token::DocComment(CommentType::Block, s) => write!(f, "/**{}\n*/", s),
+            Token::Identifier(id) => write!(f, "{}", id),
+            Token::StringLiteral(s) => write!(f, "\"{}\"", s),
+            Token::HexLiteral(hex) => write!(f, "{}", hex),
+            Token::AddressLiteral(address) => write!(f, "{}", address),
+            Token::Number(base, exp) if exp.is_empty() => write!(f, "{}", base),
+            Token::Number(base, exp) => write!(f, "{}e{}", base, exp),
+            Token::RationalNumber(significand, mantissa, exp) if exp.is_empty() => {
+                write!(f, "{}.{}", significand, mantissa)
+            }
+            Token::RationalNumber(significand, mantissa, exp) => {
+                write!(f, "{}.{}e{}", significand, mantissa, exp)
+            }
+            Token::HexNumber(n) => write!(f, "{}", n),
+            Token::Uint(w) => write!(f, "uint{}", w),
+            Token::Int(w) => write!(f, "int{}", w),
+            Token::Bytes(w) => write!(f, "bytes{}", w),
+            Token::DynamicBytes => write!(f, "bytes"),
+            Token::Semicolon => write!(f, ";"),
+            Token::Comma => write!(f, ","),
+            Token::OpenParenthesis => write!(f, "("),
+            Token::CloseParenthesis => write!(f, ")"),
+            Token::OpenCurlyBrace => write!(f, "{{"),
+            Token::CloseCurlyBrace => write!(f, "}}"),
+            Token::BitwiseOr => write!(f, "|"),
+            Token::BitwiseOrAssign => write!(f, "|="),
+            Token::Or => write!(f, "||"),
+            Token::BitwiseXor => write!(f, "^"),
+            Token::BitwiseXorAssign => write!(f, "^="),
+            Token::BitwiseAnd => write!(f, "&"),
+            Token::BitwiseAndAssign => write!(f, "&="),
+            Token::And => write!(f, "&&"),
+            Token::AddAssign => write!(f, "+="),
+            Token::Increment => write!(f, "++"),
+            Token::Add => write!(f, "+"),
+            Token::SubtractAssign => write!(f, "-="),
+            Token::Decrement => write!(f, "--"),
+            Token::Subtract => write!(f, "-"),
+            Token::MulAssign => write!(f, "*="),
+            Token::Mul => write!(f, "*"),
+            Token::Power => write!(f, "**"),
+            Token::Divide => write!(f, "/"),
+            Token::DivideAssign => write!(f, "/="),
+            Token::ModuloAssign => write!(f, "%="),
+            Token::Modulo => write!(f, "%"),
+            Token::Equal => write!(f, "=="),
+            Token::Assign => write!(f, "="),
+            Token::ColonAssign => write!(f, ":="),
+            Token::NotEqual => write!(f, "!="),
+            Token::Not => write!(f, "!"),
+            Token::ShiftLeft => write!(f, "<<"),
+            Token::ShiftLeftAssign => write!(f, "<<="),
+            Token::More => write!(f, ">"),
+            Token::MoreEqual => write!(f, ">="),
+            Token::Member => write!(f, "."),
+            Token::Colon => write!(f, ":"),
+            Token::OpenBracket => write!(f, "["),
+            Token::CloseBracket => write!(f, "]"),
+            Token::Complement => write!(f, "~"),
+            Token::Question => write!(f, "?"),
+            Token::ShiftRightAssign => write!(f, "<<="),
+            Token::ShiftRight => write!(f, "<<"),
+            Token::Less => write!(f, "<"),
+            Token::LessEqual => write!(f, "<="),
+            Token::Bool => write!(f, "bool"),
+            Token::Address => write!(f, "address"),
+            Token::String => write!(f, "string"),
+            Token::Contract => write!(f, "contract"),
+            Token::Library => write!(f, "library"),
+            Token::Interface => write!(f, "interface"),
+            Token::Function => write!(f, "function"),
+            Token::Pragma => write!(f, "pragma"),
+            Token::Import => write!(f, "import"),
+            Token::Struct => write!(f, "struct"),
+            Token::Event => write!(f, "event"),
+            Token::Enum => write!(f, "enum"),
+            Token::Memory => write!(f, "memory"),
+            Token::Storage => write!(f, "storage"),
+            Token::Calldata => write!(f, "calldata"),
+            Token::Public => write!(f, "public"),
+            Token::Private => write!(f, "private"),
+            Token::Internal => write!(f, "internal"),
+            Token::External => write!(f, "external"),
+            Token::Constant => write!(f, "constant"),
+            Token::New => write!(f, "new"),
+            Token::Delete => write!(f, "delete"),
+            Token::Pure => write!(f, "pure"),
+            Token::View => write!(f, "view"),
+            Token::Payable => write!(f, "payable"),
+            Token::Do => write!(f, "do"),
+            Token::Continue => write!(f, "continue"),
+            Token::Break => write!(f, "break"),
+            Token::Throw => write!(f, "throw"),
+            Token::Emit => write!(f, "emit"),
+            Token::Return => write!(f, "return"),
+            Token::Returns => write!(f, "returns"),
+            Token::True => write!(f, "true"),
+            Token::False => write!(f, "false"),
+            Token::Else => write!(f, "else"),
+            Token::Anonymous => write!(f, "anonymous"),
+            Token::For => write!(f, "for"),
+            Token::While => write!(f, "while"),
+            Token::If => write!(f, "if"),
+            Token::Constructor => write!(f, "constructor"),
+            Token::Indexed => write!(f, "indexed"),
+            Token::Mapping => write!(f, "mapping"),
+            Token::Arrow => write!(f, "=>"),
+            Token::Try => write!(f, "try"),
+            Token::Catch => write!(f, "catch"),
+            Token::Receive => write!(f, "receive"),
+            Token::Fallback => write!(f, "fallback"),
+            Token::Seconds => write!(f, "seconds"),
+            Token::Minutes => write!(f, "minutes"),
+            Token::Hours => write!(f, "hours"),
+            Token::Days => write!(f, "days"),
+            Token::Weeks => write!(f, "weeks"),
+            Token::Wei => write!(f, "wei"),
+            Token::Szabo => write!(f, "szabo"),
+            Token::Finney => write!(f, "finney"),
+            Token::Ether => write!(f, "ether"),
+            Token::This => write!(f, "this"),
+            Token::As => write!(f, "as"),
+            Token::Is => write!(f, "is"),
+            Token::Abstract => write!(f, "abstract"),
+            Token::Virtual => write!(f, "virtual"),
+            Token::Override => write!(f, "override"),
+            Token::Using => write!(f, "using"),
+            Token::Modifier => write!(f, "modifier"),
+            Token::Immutable => write!(f, "immutable"),
+            Token::Unchecked => write!(f, "unchecked"),
+            Token::Assembly => write!(f, "assembly"),
+            Token::Let => write!(f, "let"),
+        }
+    }
+}
+
+pub struct Lexer<'input> {
+    input: &'input str,
+    chars: Peekable<CharIndices<'input>>,
+    last_tokens: [Option<Token<'input>>; 2],
+}
+
+#[derive(Debug, PartialEq)]
+pub enum LexicalError {
+    EndOfFileInComment(usize, usize),
+    EndOfFileInString(usize, usize),
+    EndofFileInHex(usize, usize),
+    MissingNumber(usize, usize),
+    InvalidCharacterInHexLiteral(usize, char),
+    UnrecognisedToken(usize, usize, String),
+    MissingExponent(usize, usize),
+    DoublePoints(usize, usize),
+    UnrecognisedDecimal(usize, usize),
+    ExpectedFrom(usize, usize, String),
+}
+
+impl fmt::Display for LexicalError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            LexicalError::EndOfFileInComment(_, _) => write!(f, "end of file found in comment"),
+            LexicalError::EndOfFileInString(_, _) => {
+                write!(f, "end of file found in string literal")
+            }
+            LexicalError::EndofFileInHex(_, _) => {
+                write!(f, "end of file found in hex literal string")
+            }
+            LexicalError::MissingNumber(_, _) => write!(f, "missing number"),
+            LexicalError::InvalidCharacterInHexLiteral(_, ch) => {
+                write!(f, "invalid character ‘{}’ in hex literal string", ch)
+            }
+            LexicalError::UnrecognisedToken(_, _, t) => write!(f, "unrecognised token ‘{}’", t),
+            LexicalError::ExpectedFrom(_, _, t) => write!(f, "‘{}’ found where ‘from’ expected", t),
+            LexicalError::MissingExponent(_, _) => write!(f, "missing number"),
+            LexicalError::DoublePoints(_, _) => write!(f, "found two dots in number"),
+            LexicalError::UnrecognisedDecimal(_, _) => {
+                write!(f, "expected number after decimal point")
+            }
+        }
+    }
+}
+
+impl LexicalError {
+    pub fn loc(&self, file_no: usize) -> Loc {
+        match self {
+            LexicalError::EndOfFileInComment(start, end) => Loc(file_no, *start, *end),
+            LexicalError::EndOfFileInString(start, end) => Loc(file_no, *start, *end),
+            LexicalError::EndofFileInHex(start, end) => Loc(file_no, *start, *end),
+            LexicalError::MissingNumber(start, end) => Loc(file_no, *start, *end),
+            LexicalError::InvalidCharacterInHexLiteral(pos, _) => Loc(file_no, *pos, *pos),
+            LexicalError::UnrecognisedToken(start, end, _) => Loc(file_no, *start, *end),
+            LexicalError::ExpectedFrom(start, end, _) => Loc(file_no, *start, *end),
+            LexicalError::MissingExponent(start, end) => Loc(file_no, *start, *end),
+            LexicalError::DoublePoints(start, end) => Loc(file_no, *start, *end),
+            LexicalError::UnrecognisedDecimal(start, end) => Loc(file_no, *start, *end),
+        }
+    }
+}
+
+static KEYWORDS: phf::Map<&'static str, Token> = phf_map! {
+    "address" => Token::Address,
+    "anonymous" => Token::Anonymous,
+    "bool" => Token::Bool,
+    "break" => Token::Break,
+    "bytes1" => Token::Bytes(1),
+    "bytes2" => Token::Bytes(2),
+    "bytes3" => Token::Bytes(3),
+    "bytes4" => Token::Bytes(4),
+    "bytes5" => Token::Bytes(5),
+    "bytes6" => Token::Bytes(6),
+    "bytes7" => Token::Bytes(7),
+    "bytes8" => Token::Bytes(8),
+    "bytes9" => Token::Bytes(9),
+    "bytes10" => Token::Bytes(10),
+    "bytes11" => Token::Bytes(11),
+    "bytes12" => Token::Bytes(12),
+    "bytes13" => Token::Bytes(13),
+    "bytes14" => Token::Bytes(14),
+    "bytes15" => Token::Bytes(15),
+    "bytes16" => Token::Bytes(16),
+    "bytes17" => Token::Bytes(17),
+    "bytes18" => Token::Bytes(18),
+    "bytes19" => Token::Bytes(19),
+    "bytes20" => Token::Bytes(20),
+    "bytes21" => Token::Bytes(21),
+    "bytes22" => Token::Bytes(22),
+    "bytes23" => Token::Bytes(23),
+    "bytes24" => Token::Bytes(24),
+    "bytes25" => Token::Bytes(25),
+    "bytes26" => Token::Bytes(26),
+    "bytes27" => Token::Bytes(27),
+    "bytes28" => Token::Bytes(28),
+    "bytes29" => Token::Bytes(29),
+    "bytes30" => Token::Bytes(30),
+    "bytes31" => Token::Bytes(31),
+    "bytes32" => Token::Bytes(32),
+    "bytes" => Token::DynamicBytes,
+    "byte" => Token::Bytes(1),
+    "calldata" => Token::Calldata,
+    "constant" => Token::Constant,
+    "constructor" => Token::Constructor,
+    "continue" => Token::Continue,
+    "contract" => Token::Contract,
+    "delete" => Token::Delete,
+    "do" => Token::Do,
+    "else" => Token::Else,
+    "emit" => Token::Emit,
+    "enum" => Token::Enum,
+    "event" => Token::Event,
+    "external" => Token::External,
+    "false" => Token::False,
+    "for" => Token::For,
+    "function" => Token::Function,
+    "if" => Token::If,
+    "import" => Token::Import,
+    "indexed" => Token::Indexed,
+    "int8" => Token::Int(8),
+    "int16" => Token::Int(16),
+    "int24" => Token::Int(24),
+    "int32" => Token::Int(32),
+    "int40" => Token::Int(40),
+    "int48" => Token::Int(48),
+    "int56" => Token::Int(56),
+    "int64" => Token::Int(64),
+    "int72" => Token::Int(72),
+    "int80" => Token::Int(80),
+    "int88" => Token::Int(88),
+    "int96" => Token::Int(96),
+    "int104" => Token::Int(104),
+    "int112" => Token::Int(112),
+    "int120" => Token::Int(120),
+    "int128" => Token::Int(128),
+    "int136" => Token::Int(136),
+    "int144" => Token::Int(144),
+    "int152" => Token::Int(152),
+    "int160" => Token::Int(160),
+    "int168" => Token::Int(168),
+    "int176" => Token::Int(176),
+    "int184" => Token::Int(184),
+    "int192" => Token::Int(192),
+    "int200" => Token::Int(200),
+    "int208" => Token::Int(208),
+    "int216" => Token::Int(216),
+    "int224" => Token::Int(224),
+    "int232" => Token::Int(232),
+    "int240" => Token::Int(240),
+    "int248" => Token::Int(248),
+    "int256" => Token::Int(256),
+    "interface" => Token::Interface,
+    "internal" => Token::Internal,
+    "int" => Token::Int(256),
+    "library" => Token::Library,
+    "mapping" => Token::Mapping,
+    "memory" => Token::Memory,
+    "new" => Token::New,
+    "payable" => Token::Payable,
+    "pragma" => Token::Pragma,
+    "private" => Token::Private,
+    "public" => Token::Public,
+    "pure" => Token::Pure,
+    "returns" => Token::Returns,
+    "return" => Token::Return,
+    "storage" => Token::Storage,
+    "string" => Token::String,
+    "struct" => Token::Struct,
+    "throw" => Token::Throw,
+    "true" => Token::True,
+    "uint8" => Token::Uint(8),
+    "uint16" => Token::Uint(16),
+    "uint24" => Token::Uint(24),
+    "uint32" => Token::Uint(32),
+    "uint40" => Token::Uint(40),
+    "uint48" => Token::Uint(48),
+    "uint56" => Token::Uint(56),
+    "uint64" => Token::Uint(64),
+    "uint72" => Token::Uint(72),
+    "uint80" => Token::Uint(80),
+    "uint88" => Token::Uint(88),
+    "uint96" => Token::Uint(96),
+    "uint104" => Token::Uint(104),
+    "uint112" => Token::Uint(112),
+    "uint120" => Token::Uint(120),
+    "uint128" => Token::Uint(128),
+    "uint136" => Token::Uint(136),
+    "uint144" => Token::Uint(144),
+    "uint152" => Token::Uint(152),
+    "uint160" => Token::Uint(160),
+    "uint168" => Token::Uint(168),
+    "uint176" => Token::Uint(176),
+    "uint184" => Token::Uint(184),
+    "uint192" => Token::Uint(192),
+    "uint200" => Token::Uint(200),
+    "uint208" => Token::Uint(208),
+    "uint216" => Token::Uint(216),
+    "uint224" => Token::Uint(224),
+    "uint232" => Token::Uint(232),
+    "uint240" => Token::Uint(240),
+    "uint248" => Token::Uint(248),
+    "uint256" => Token::Uint(256),
+    "uint" => Token::Uint(256),
+    "view" => Token::View,
+    "while" => Token::While,
+    "try" => Token::Try,
+    "catch" => Token::Catch,
+    "receive" => Token::Receive,
+    "fallback" => Token::Fallback,
+    "seconds" => Token::Seconds,
+    "minutes" => Token::Minutes,
+    "hours" => Token::Hours,
+    "days" => Token::Days,
+    "weeks" => Token::Weeks,
+    "wei" => Token::Wei,
+    "szabo" => Token::Szabo,
+    "finney" => Token::Finney,
+    "ether" => Token::Ether,
+    "this" => Token::This,
+    "as" => Token::As,
+    "is" => Token::Is,
+    "abstract" => Token::Abstract,
+    "virtual" => Token::Virtual,
+    "override" => Token::Override,
+    "using" => Token::Using,
+    "modifier" => Token::Modifier,
+    "immutable" => Token::Immutable,
+    "unchecked" => Token::Unchecked,
+    "assembly" => Token::Assembly,
+    "let" => Token::Let,
+};
+
+impl<'input> Lexer<'input> {
+    pub fn new(input: &'input str) -> Self {
+        Lexer {
+            input,
+            chars: input.char_indices().peekable(),
+            last_tokens: [None, None],
+        }
+    }
+
+    fn parse_number(
+        &mut self,
+        start: usize,
+        end: usize,
+        ch: char,
+    ) -> Result<(usize, Token<'input>, usize), LexicalError> {
+        let mut is_rational = false;
+        if ch == '0' {
+            if let Some((_, 'x')) = self.chars.peek() {
+                // hex number
+                self.chars.next();
+
+                let mut end = match self.chars.next() {
+                    Some((end, ch)) if ch.is_ascii_hexdigit() => end,
+                    Some((_, _)) => {
+                        return Err(LexicalError::MissingNumber(start, start + 1));
+                    }
+                    None => {
+                        return Err(LexicalError::EndofFileInHex(start, self.input.len()));
+                    }
+                };
+
+                while let Some((i, ch)) = self.chars.peek() {
+                    if !ch.is_ascii_hexdigit() && *ch != '_' {
+                        break;
+                    }
+                    end = *i;
+                    self.chars.next();
+                }
+
+                return Ok((start, Token::HexNumber(&self.input[start..=end]), end + 1));
+            }
+        }
+
+        let mut start = start;
+        if ch == '.' {
+            is_rational = true;
+            start -= 1;
+        }
+
+        let mut end = end;
+        while let Some((i, ch)) = self.chars.peek() {
+            if !ch.is_ascii_digit() && *ch != '_' {
+                break;
+            }
+            end = *i;
+            self.chars.next();
+        }
+        let mut rational_end = end;
+        let mut end_before_rational = end;
+        let mut rational_start = end;
+        if is_rational {
+            end_before_rational = start - 1;
+            rational_start = start + 1;
+        }
+
+        if let Some((i, '.')) = self.chars.peek() {
+            if is_rational {
+                return Err(LexicalError::DoublePoints(start, self.input.len()));
+            }
+            rational_start = *i + 1;
+            rational_end = *i + 1;
+            let mut has_number = false;
+            is_rational = true;
+            self.chars.next();
+            while let Some((i, ch)) = self.chars.peek() {
+                if *ch == '.' {
+                    return Err(LexicalError::DoublePoints(start, self.input.len()));
+                }
+                if !ch.is_ascii_digit() {
+                    break;
+                }
+                has_number = true;
+                rational_end = *i;
+                end = *i;
+                self.chars.next();
+            }
+            if !has_number {
+                return Err(LexicalError::UnrecognisedDecimal(start, self.input.len()));
+            }
+        }
+
+        let old_end = end;
+        let mut exp_start = end + 1;
+
+        if let Some((i, 'e')) = self.chars.peek() {
+            exp_start = *i + 1;
+            self.chars.next();
+            while let Some((i, ch)) = self.chars.peek() {
+                if !ch.is_ascii_digit() && *ch != '_' && *ch != '-' {
+                    break;
+                }
+                end = *i;
+                self.chars.next();
+            }
+
+            if exp_start > end {
+                return Err(LexicalError::MissingExponent(start, self.input.len()));
+            }
+        }
+
+        if is_rational {
+            let significand = &self.input[start..=end_before_rational];
+            let mantissa = &self.input[rational_start..=rational_end];
+
+            if mantissa.is_empty() {
+                return Err(LexicalError::UnrecognisedDecimal(start, self.input.len()));
+            }
+            let exp = &self.input[exp_start..=end];
+            return Ok((
+                start,
+                Token::RationalNumber(significand, mantissa, exp),
+                end + 1,
+            ));
+        }
+
+        let base = &self.input[start..=old_end];
+        let exp = &self.input[exp_start..=end];
+
+        Ok((start, Token::Number(base, exp), end + 1))
+    }
+
+    fn string(
+        &mut self,
+        token_start: usize,
+        string_start: usize,
+        quote_char: char,
+    ) -> Result<(usize, Token<'input>, usize), LexicalError> {
+        let mut end;
+
+        let mut last_was_escape = false;
+
+        loop {
+            if let Some((i, ch)) = self.chars.next() {
+                end = i;
+                if !last_was_escape {
+                    if ch == quote_char {
+                        break;
+                    }
+                    last_was_escape = ch == '\\';
+                } else {
+                    last_was_escape = false;
+                }
+            } else {
+                return Err(LexicalError::EndOfFileInString(
+                    token_start,
+                    self.input.len(),
+                ));
+            }
+        }
+
+        Ok((
+            token_start,
+            Token::StringLiteral(&self.input[string_start..end]),
+            end + 1,
+        ))
+    }
+
+    fn next(&mut self) -> Option<Result<(usize, Token<'input>, usize), LexicalError>> {
+        loop {
+            match self.chars.next() {
+                Some((start, ch)) if ch == '_' || ch == '$' || UnicodeXID::is_xid_start(ch) => {
+                    let end;
+
+                    loop {
+                        if let Some((i, ch)) = self.chars.peek() {
+                            if !UnicodeXID::is_xid_continue(*ch) && *ch != '$' {
+                                end = *i;
+                                break;
+                            }
+                            self.chars.next();
+                        } else {
+                            end = self.input.len();
+                            break;
+                        }
+                    }
+
+                    let id = &self.input[start..end];
+
+                    if id == "unicode" {
+                        match self.chars.peek() {
+                            Some((_, quote_char @ '"')) | Some((_, quote_char @ '\'')) => {
+                                let quote_char = *quote_char;
+
+                                self.chars.next();
+
+                                return Some(self.string(start, start + 8, quote_char));
+                            }
+                            _ => (),
+                        }
+                    }
+
+                    if id == "hex" {
+                        match self.chars.peek() {
+                            Some((_, quote_char @ '"')) | Some((_, quote_char @ '\'')) => {
+                                let quote_char = *quote_char;
+
+                                self.chars.next();
+
+                                for (i, ch) in &mut self.chars {
+                                    if ch == quote_char {
+                                        return Some(Ok((
+                                            start,
+                                            Token::HexLiteral(&self.input[start..=i]),
+                                            i + 1,
+                                        )));
+                                    }
+
+                                    if !ch.is_ascii_hexdigit() && ch != '_' {
+                                        // Eat up the remainer of the string
+                                        for (_, ch) in &mut self.chars {
+                                            if ch == quote_char {
+                                                break;
+                                            }
+                                        }
+
+                                        return Some(Err(
+                                            LexicalError::InvalidCharacterInHexLiteral(i, ch),
+                                        ));
+                                    }
+                                }
+
+                                return Some(Err(LexicalError::EndOfFileInString(
+                                    start,
+                                    self.input.len(),
+                                )));
+                            }
+                            _ => (),
+                        }
+                    }
+
+                    if id == "address" {
+                        match self.chars.peek() {
+                            Some((_, quote_char @ '"')) | Some((_, quote_char @ '\'')) => {
+                                let quote_char = *quote_char;
+
+                                self.chars.next();
+
+                                for (i, ch) in &mut self.chars {
+                                    if ch == quote_char {
+                                        return Some(Ok((
+                                            start,
+                                            Token::AddressLiteral(&self.input[start..=i]),
+                                            i + 1,
+                                        )));
+                                    }
+                                }
+
+                                return Some(Err(LexicalError::EndOfFileInString(
+                                    start,
+                                    self.input.len(),
+                                )));
+                            }
+                            _ => (),
+                        }
+                    }
+
+                    return if let Some(w) = KEYWORDS.get(id) {
+                        Some(Ok((start, *w, end)))
+                    } else {
+                        Some(Ok((start, Token::Identifier(id), end)))
+                    };
+                }
+                Some((start, quote_char @ '"')) | Some((start, quote_char @ '\'')) => {
+                    return Some(self.string(start, start + 1, quote_char));
+                }
+                Some((start, '/')) => {
+                    match self.chars.peek() {
+                        Some((_, '=')) => {
+                            self.chars.next();
+                            return Some(Ok((start, Token::DivideAssign, start + 2)));
+                        }
+                        Some((_, '/')) => {
+                            // line comment
+                            self.chars.next();
+
+                            let mut newline = false;
+
+                            let doc_comment_start = match self.chars.next() {
+                                Some((i, '/')) => match self.chars.peek() {
+                                    // ///(/)+ is still a line comment
+                                    Some((_, '/')) => None,
+                                    _ => Some(i + 1),
+                                },
+                                Some((_, ch)) if ch == '\n' || ch == '\r' => {
+                                    newline = true;
+                                    None
+                                }
+                                _ => None,
+                            };
+
+                            let mut last = start + 3;
+
+                            if !newline {
+                                for (i, ch) in &mut self.chars {
+                                    if ch == '\n' || ch == '\r' {
+                                        break;
+                                    }
+                                    last = i;
+                                }
+                            }
+
+                            if let Some(doc_start) = doc_comment_start {
+                                if last > doc_start {
+                                    return Some(Ok((
+                                        start + 3,
+                                        Token::DocComment(
+                                            CommentType::Line,
+                                            &self.input[doc_start..=last],
+                                        ),
+                                        last + 1,
+                                    )));
+                                }
+                            }
+                        }
+                        Some((_, '*')) => {
+                            // multiline comment
+                            self.chars.next();
+
+                            let doc_comment_start = match self.chars.next() {
+                                Some((i, '*')) => match self.chars.peek() {
+                                    Some((_, '*')) => None,
+                                    _ => Some(i + 1),
+                                },
+                                _ => None,
+                            };
+
+                            let mut last = start + 3;
+                            let mut seen_star = false;
+
+                            loop {
+                                if let Some((i, ch)) = self.chars.next() {
+                                    if seen_star && ch == '/' {
+                                        break;
+                                    }
+                                    seen_star = ch == '*';
+                                    last = i;
+                                } else {
+                                    return Some(Err(LexicalError::EndOfFileInComment(
+                                        start,
+                                        self.input.len(),
+                                    )));
+                                }
+                            }
+
+                            if let Some(doc_start) = doc_comment_start {
+                                if last > doc_start {
+                                    return Some(Ok((
+                                        start + 3,
+                                        Token::DocComment(
+                                            CommentType::Block,
+                                            &self.input[doc_start..last],
+                                        ),
+                                        last,
+                                    )));
+                                }
+                            }
+                        }
+                        _ => {
+                            return Some(Ok((start, Token::Divide, start + 1)));
+                        }
+                    }
+                }
+                Some((start, ch)) if ch.is_ascii_digit() => {
+                    return Some(self.parse_number(start, start, ch))
+                }
+                Some((i, ';')) => return Some(Ok((i, Token::Semicolon, i + 1))),
+                Some((i, ',')) => return Some(Ok((i, Token::Comma, i + 1))),
+                Some((i, '(')) => return Some(Ok((i, Token::OpenParenthesis, i + 1))),
+                Some((i, ')')) => return Some(Ok((i, Token::CloseParenthesis, i + 1))),
+                Some((i, '{')) => return Some(Ok((i, Token::OpenCurlyBrace, i + 1))),
+                Some((i, '}')) => return Some(Ok((i, Token::CloseCurlyBrace, i + 1))),
+                Some((i, '~')) => return Some(Ok((i, Token::Complement, i + 1))),
+                Some((i, '=')) => match self.chars.peek() {
+                    Some((_, '=')) => {
+                        self.chars.next();
+                        return Some(Ok((i, Token::Equal, i + 2)));
+                    }
+                    Some((_, '>')) => {
+                        self.chars.next();
+                        return Some(Ok((i, Token::Arrow, i + 2)));
+                    }
+                    _ => {
+                        return Some(Ok((i, Token::Assign, i + 1)));
+                    }
+                },
+                Some((i, '!')) => {
+                    if let Some((_, '=')) = self.chars.peek() {
+                        self.chars.next();
+                        return Some(Ok((i, Token::NotEqual, i + 2)));
+                    } else {
+                        return Some(Ok((i, Token::Not, i + 1)));
+                    }
+                }
+                Some((i, '|')) => {
+                    return match self.chars.peek() {
+                        Some((_, '=')) => {
+                            self.chars.next();
+                            Some(Ok((i, Token::BitwiseOrAssign, i + 2)))
+                        }
+                        Some((_, '|')) => {
+                            self.chars.next();
+                            Some(Ok((i, Token::Or, i + 2)))
+                        }
+                        _ => Some(Ok((i, Token::BitwiseOr, i + 1))),
+                    };
+                }
+                Some((i, '&')) => {
+                    return match self.chars.peek() {
+                        Some((_, '=')) => {
+                            self.chars.next();
+                            Some(Ok((i, Token::BitwiseAndAssign, i + 2)))
+                        }
+                        Some((_, '&')) => {
+                            self.chars.next();
+                            Some(Ok((i, Token::And, i + 2)))
+                        }
+                        _ => Some(Ok((i, Token::BitwiseAnd, i + 1))),
+                    };
+                }
+                Some((i, '^')) => {
+                    return match self.chars.peek() {
+                        Some((_, '=')) => {
+                            self.chars.next();
+                            Some(Ok((i, Token::BitwiseXorAssign, i + 2)))
+                        }
+                        _ => Some(Ok((i, Token::BitwiseXor, i + 1))),
+                    };
+                }
+                Some((i, '+')) => {
+                    return match self.chars.peek() {
+                        Some((_, '=')) => {
+                            self.chars.next();
+                            Some(Ok((i, Token::AddAssign, i + 2)))
+                        }
+                        Some((_, '+')) => {
+                            self.chars.next();
+                            Some(Ok((i, Token::Increment, i + 2)))
+                        }
+                        _ => Some(Ok((i, Token::Add, i + 1))),
+                    };
+                }
+                Some((i, '-')) => {
+                    return match self.chars.peek() {
+                        Some((_, '=')) => {
+                            self.chars.next();
+                            Some(Ok((i, Token::SubtractAssign, i + 2)))
+                        }
+                        Some((_, '-')) => {
+                            self.chars.next();
+                            Some(Ok((i, Token::Decrement, i + 2)))
+                        }
+                        _ => Some(Ok((i, Token::Subtract, i + 1))),
+                    };
+                }
+                Some((i, '*')) => {
+                    return match self.chars.peek() {
+                        Some((_, '=')) => {
+                            self.chars.next();
+                            Some(Ok((i, Token::MulAssign, i + 2)))
+                        }
+                        Some((_, '*')) => {
+                            self.chars.next();
+                            Some(Ok((i, Token::Power, i + 2)))
+                        }
+                        _ => Some(Ok((i, Token::Mul, i + 1))),
+                    };
+                }
+                Some((i, '%')) => {
+                    return match self.chars.peek() {
+                        Some((_, '=')) => {
+                            self.chars.next();
+                            Some(Ok((i, Token::ModuloAssign, i + 2)))
+                        }
+                        _ => Some(Ok((i, Token::Modulo, i + 1))),
+                    };
+                }
+                Some((i, '<')) => {
+                    return match self.chars.peek() {
+                        Some((_, '<')) => {
+                            self.chars.next();
+                            if let Some((_, '=')) = self.chars.peek() {
+                                self.chars.next();
+                                Some(Ok((i, Token::ShiftLeftAssign, i + 3)))
+                            } else {
+                                Some(Ok((i, Token::ShiftLeft, i + 2)))
+                            }
+                        }
+                        Some((_, '=')) => {
+                            self.chars.next();
+                            Some(Ok((i, Token::LessEqual, i + 2)))
+                        }
+                        _ => Some(Ok((i, Token::Less, i + 1))),
+                    };
+                }
+                Some((i, '>')) => {
+                    return match self.chars.peek() {
+                        Some((_, '>')) => {
+                            self.chars.next();
+                            if let Some((_, '=')) = self.chars.peek() {
+                                self.chars.next();
+                                Some(Ok((i, Token::ShiftRightAssign, i + 3)))
+                            } else {
+                                Some(Ok((i, Token::ShiftRight, i + 2)))
+                            }
+                        }
+                        Some((_, '=')) => {
+                            self.chars.next();
+                            Some(Ok((i, Token::MoreEqual, i + 2)))
+                        }
+                        _ => Some(Ok((i, Token::More, i + 1))),
+                    };
+                }
+                Some((i, '.')) => {
+                    if let Some((_, a)) = self.chars.peek() {
+                        if a.is_ascii_digit() {
+                            return Some(self.parse_number(i + 1, i + 1, '.'));
+                        }
+                    }
+                    return Some(Ok((i, Token::Member, i + 1)));
+                }
+                Some((i, '[')) => return Some(Ok((i, Token::OpenBracket, i + 1))),
+                Some((i, ']')) => return Some(Ok((i, Token::CloseBracket, i + 1))),
+                Some((i, ':')) => {
+                    return match self.chars.peek() {
+                        Some((_, '=')) => {
+                            self.chars.next();
+                            Some(Ok((i, Token::ColonAssign, i + 2)))
+                        }
+                        _ => Some(Ok((i, Token::Colon, i + 1))),
+                    };
+                }
+                Some((i, '?')) => return Some(Ok((i, Token::Question, i + 1))),
+                Some((_, ch)) if ch.is_whitespace() => (),
+                Some((start, _)) => {
+                    let mut end;
+
+                    loop {
+                        if let Some((i, ch)) = self.chars.next() {
+                            end = i;
+
+                            if ch.is_whitespace() {
+                                break;
+                            }
+                        } else {
+                            end = self.input.len();
+                            break;
+                        }
+                    }
+
+                    return Some(Err(LexicalError::UnrecognisedToken(
+                        start,
+                        end,
+                        self.input[start..end].to_owned(),
+                    )));
+                }
+                None => return None, // End of file
+            }
+        }
+    }
+
+    /// Next token is pragma value. Return it
+    fn pragma_value(&mut self) -> Option<Result<(usize, Token<'input>, usize), LexicalError>> {
+        // special parser for pragma solidity >=0.4.22 <0.7.0;
+        let mut start = None;
+        let mut end = 0;
+
+        // solc will include anything upto the next semicolon, whitespace
+        // trimmed on left and right
+        loop {
+            match self.chars.peek() {
+                Some((_, ';')) | None => {
+                    return if let Some(start) = start {
+                        Some(Ok((
+                            start,
+                            Token::StringLiteral(&self.input[start..end]),
+                            end,
+                        )))
+                    } else {
+                        self.next()
+                    };
+                }
+                Some((_, ch)) if ch.is_whitespace() => {
+                    self.chars.next();
+                }
+                Some((i, _)) => {
+                    if start.is_none() {
+                        start = Some(*i);
+                    }
+                    self.chars.next();
+
+                    // end should point to the byte _after_ the character
+                    end = match self.chars.peek() {
+                        Some((i, _)) => *i,
+                        None => self.input.len(),
+                    }
+                }
+            }
+        }
+    }
+}
+
+impl<'input> Iterator for Lexer<'input> {
+    type Item = Spanned<Token<'input>, usize, LexicalError>;
+
+    /// Return the next token
+    fn next(&mut self) -> Option<Self::Item> {
+        // Lexer should be aware of whether the last two tokens were
+        // pragma followed by identifier. If this is true, then special parsing should be
+        // done for the pragma value
+        let token = if let [Some(Token::Pragma), Some(Token::Identifier(_))] = self.last_tokens {
+            self.pragma_value()
+        } else {
+            self.next()
+        };
+
+        self.last_tokens = [
+            self.last_tokens[1],
+            match token {
+                Some(Ok((_, n, _))) => Some(n),
+                _ => None,
+            },
+        ];
+
+        token
+    }
+}
+
+#[test]
+fn lexertest() {
+    let tokens = Lexer::new("bool").collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(tokens, vec!(Ok((0, Token::Bool, 4))));
+
+    let tokens = Lexer::new("uint8").collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(tokens, vec!(Ok((0, Token::Uint(8), 5))));
+
+    let tokens = Lexer::new("hex").collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(tokens, vec!(Ok((0, Token::Identifier("hex"), 3))));
+
+    let tokens = Lexer::new("hex\"cafe_dead\" /* adad*** */")
+        .collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(
+        tokens,
+        vec!(Ok((0, Token::HexLiteral("hex\"cafe_dead\""), 14)))
+    );
+
+    let tokens = Lexer::new("// foo bar\n0x00fead0_12 00090 0_0")
+        .collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(
+        tokens,
+        vec!(
+            Ok((11, Token::HexNumber("0x00fead0_12"), 23)),
+            Ok((24, Token::Number("00090", ""), 29)),
+            Ok((30, Token::Number("0_0", ""), 33))
+        )
+    );
+
+    let tokens = Lexer::new("// foo bar\n0x00fead0_12 9.0008 0_0")
+        .collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(
+        tokens,
+        vec!(
+            Ok((11, Token::HexNumber("0x00fead0_12"), 23)),
+            Ok((24, Token::RationalNumber("9", "0008", ""), 30)),
+            Ok((31, Token::Number("0_0", ""), 34))
+        )
+    );
+
+    let tokens = Lexer::new("// foo bar\n0x00fead0_12 .0008 0.9e2")
+        .collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(
+        tokens,
+        vec!(
+            Ok((11, Token::HexNumber("0x00fead0_12"), 23)),
+            Ok((24, Token::RationalNumber("", "0008", ""), 29)),
+            Ok((30, Token::RationalNumber("0", "9", "2"), 35))
+        )
+    );
+
+    let tokens = Lexer::new("// foo bar\n0x00fead0_12 .0008 0.9e-2")
+        .collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(
+        tokens,
+        vec!(
+            Ok((11, Token::HexNumber("0x00fead0_12"), 23)),
+            Ok((24, Token::RationalNumber("", "0008", ""), 29)),
+            Ok((30, Token::RationalNumber("0", "9", "-2"), 36))
+        )
+    );
+
+    let tokens =
+        Lexer::new("\"foo\"").collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(tokens, vec!(Ok((0, Token::StringLiteral("foo"), 5)),));
+
+    let tokens = Lexer::new("pragma solidity >=0.5.0 <0.7.0;")
+        .collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(
+        tokens,
+        vec!(
+            Ok((0, Token::Pragma, 6)),
+            Ok((7, Token::Identifier("solidity"), 15)),
+            Ok((16, Token::StringLiteral(">=0.5.0 <0.7.0"), 30)),
+            Ok((30, Token::Semicolon, 31)),
+        )
+    );
+
+    let tokens = Lexer::new("pragma solidity \t>=0.5.0 <0.7.0 \n ;")
+        .collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(
+        tokens,
+        vec!(
+            Ok((0, Token::Pragma, 6)),
+            Ok((7, Token::Identifier("solidity"), 15)),
+            Ok((17, Token::StringLiteral(">=0.5.0 <0.7.0"), 31)),
+            Ok((34, Token::Semicolon, 35)),
+        )
+    );
+
+    let tokens = Lexer::new("pragma solidity 赤;")
+        .collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(
+        tokens,
+        vec!(
+            Ok((0, Token::Pragma, 6)),
+            Ok((7, Token::Identifier("solidity"), 15)),
+            Ok((16, Token::StringLiteral("赤"), 19)),
+            Ok((19, Token::Semicolon, 20))
+        )
+    );
+
+    let tokens =
+        Lexer::new(">>= >> >= >").collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(
+        tokens,
+        vec!(
+            Ok((0, Token::ShiftRightAssign, 3)),
+            Ok((4, Token::ShiftRight, 6)),
+            Ok((7, Token::MoreEqual, 9)),
+            Ok((10, Token::More, 11)),
+        )
+    );
+
+    let tokens =
+        Lexer::new("<<= << <= <").collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(
+        tokens,
+        vec!(
+            Ok((0, Token::ShiftLeftAssign, 3)),
+            Ok((4, Token::ShiftLeft, 6)),
+            Ok((7, Token::LessEqual, 9)),
+            Ok((10, Token::Less, 11)),
+        )
+    );
+
+    let tokens =
+        Lexer::new("-16 -- - -=").collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(
+        tokens,
+        vec!(
+            Ok((0, Token::Subtract, 1)),
+            Ok((1, Token::Number("16", ""), 3)),
+            Ok((4, Token::Decrement, 6)),
+            Ok((7, Token::Subtract, 8)),
+            Ok((9, Token::SubtractAssign, 11)),
+        )
+    );
+
+    let tokens = Lexer::new("-4 ").collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(
+        tokens,
+        vec!(
+            Ok((0, Token::Subtract, 1)),
+            Ok((1, Token::Number("4", ""), 2)),
+        )
+    );
+
+    let tokens =
+        Lexer::new(r#"hex"abcdefg""#).collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(
+        tokens,
+        vec!(Err(LexicalError::InvalidCharacterInHexLiteral(10, 'g')))
+    );
+
+    let tokens = Lexer::new(r#" € "#).collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(
+        tokens,
+        vec!(Err(LexicalError::UnrecognisedToken(1, 4, "€".to_owned())))
+    );
+
+    let tokens = Lexer::new(r#"€"#).collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(
+        tokens,
+        vec!(Err(LexicalError::UnrecognisedToken(0, 3, "€".to_owned())))
+    );
+
+    let tokens = Lexer::new(r#"pragma foo bar"#)
+        .collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(
+        tokens,
+        vec!(
+            Ok((0, Token::Pragma, 6)),
+            Ok((7, Token::Identifier("foo"), 10)),
+            Ok((11, Token::StringLiteral("bar"), 14)),
+        )
+    );
+
+    let tokens =
+        Lexer::new(r#"/// foo"#).collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(
+        tokens,
+        vec!(Ok((3, Token::DocComment(CommentType::Line, " foo"), 7)))
+    );
+
+    let tokens = Lexer::new("/// jadajadadjada\n// bar")
+        .collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(
+        tokens,
+        vec!(Ok((
+            3,
+            Token::DocComment(CommentType::Line, " jadajadadjada"),
+            17
+        )))
+    );
+
+    let tokens =
+        Lexer::new(r#"/** foo */"#).collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(
+        tokens,
+        vec!(Ok((3, Token::DocComment(CommentType::Block, " foo "), 8)))
+    );
+
+    let tokens = Lexer::new("/** jadajadadjada */\n/* bar */")
+        .collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(
+        tokens,
+        vec!(Ok((
+            3,
+            Token::DocComment(CommentType::Block, " jadajadadjada "),
+            18
+        )))
+    );
+
+    let tokens = Lexer::new("/************/").next();
+    assert_eq!(tokens, None);
+
+    let tokens = Lexer::new("/**").next();
+    assert_eq!(tokens, Some(Err(LexicalError::EndOfFileInComment(0, 3))));
+
+    let tokens = Lexer::new("//////////////").next();
+    assert_eq!(tokens, None);
+
+    // some unicode tests
+    let tokens = Lexer::new(">=\u{a0} . très\u{2028}αβγδεζηθικλμνξοπρστυφχψω\u{85}カラス")
+        .collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(
+        tokens,
+        vec!(
+            Ok((0, Token::MoreEqual, 2)),
+            Ok((5, Token::Member, 6)),
+            Ok((7, Token::Identifier("très"), 12)),
+            Ok((15, Token::Identifier("αβγδεζηθικλμνξοπρστυφχψω"), 63)),
+            Ok((65, Token::Identifier("カラス"), 74))
+        )
+    );
+
+    let tokens =
+        Lexer::new(r#"unicode"€""#).collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(tokens, vec!(Ok((0, Token::StringLiteral("€"), 12)),));
+
+    let tokens =
+        Lexer::new(r#"unicode "€""#).collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(
+        tokens,
+        vec!(
+            Ok((0, Token::Identifier("unicode"), 7)),
+            Ok((8, Token::StringLiteral("€"), 13)),
+        )
+    );
+
+    // scientific notation
+    let tokens =
+        Lexer::new(r#" 1e0 "#).collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(tokens, vec!(Ok((1, Token::Number("1", "0"), 4)),));
+
+    let tokens =
+        Lexer::new(r#" -9e0123"#).collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(
+        tokens,
+        vec!(
+            Ok((1, Token::Subtract, 2)),
+            Ok((2, Token::Number("9", "0123"), 8)),
+        )
+    );
+
+    let tokens =
+        Lexer::new(r#" -9e"#).collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(
+        tokens,
+        vec!(
+            Ok((1, Token::Subtract, 2)),
+            Err(LexicalError::MissingExponent(2, 4))
+        )
+    );
+
+    let tokens = Lexer::new(r#"9ea"#).collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
+
+    assert_eq!(
+        tokens,
+        vec!(
+            Err(LexicalError::MissingExponent(0, 3)),
+            Ok((2, Token::Identifier("a"), 3))
+        )
+    );
+}

+ 174 - 0
solang-parser/src/lib.rs

@@ -0,0 +1,174 @@
+//! Solidity file parser
+
+mod doc;
+pub mod lexer;
+pub mod pt;
+pub mod diagnostics;
+pub use diagnostics::Diagnostic;
+
+#[allow(clippy::all)]
+pub mod solidity {
+    include!(concat!(env!("OUT_DIR"), "/solidity.rs"));
+}
+
+use lalrpop_util::ParseError;
+
+/// Parse soldiity file content
+pub fn parse(src: &str, file_no: usize) -> Result<pt::SourceUnit, Vec<Diagnostic>> {
+    // parse phase
+    let lex = lexer::Lexer::new(src);
+
+    let s = solidity::SourceUnitParser::new().parse(src, file_no, lex);
+
+    if let Err(e) = s {
+        let errors = vec![match e {
+            ParseError::InvalidToken { location } => Diagnostic::parser_error(
+                pt::Loc(file_no, location, location),
+                "invalid token".to_string(),
+            ),
+            ParseError::UnrecognizedToken {
+                token: (l, token, r),
+                expected,
+            } => Diagnostic::parser_error(
+                pt::Loc(file_no, l, r),
+                format!(
+                    "unrecognised token `{}', expected {}",
+                    token,
+                    expected.join(", ")
+                ),
+            ),
+            ParseError::User { error } => {
+                Diagnostic::parser_error(error.loc(file_no), error.to_string())
+            }
+            ParseError::ExtraToken { token } => Diagnostic::parser_error(
+                pt::Loc(file_no, token.0, token.2),
+                format!("extra token `{}' encountered", token.0),
+            ),
+            ParseError::UnrecognizedEOF { location, expected } => Diagnostic::parser_error(
+                pt::Loc(file_no, location, location),
+                format!("unexpected end of file, expecting {}", expected.join(", ")),
+            ),
+        }];
+
+        Err(errors)
+    } else {
+        Ok(s.unwrap())
+    }
+}
+
+
+pub fn box_option<T>(o: Option<T>) -> Option<Box<T>> {
+    o.map(Box::new)
+}
+
+#[cfg(test)]
+mod test {
+    use super::lexer;
+    use super::pt::*;
+    use super::solidity;
+
+    #[test]
+    fn parse_test() {
+        let src = "contract foo {
+                    struct Jurisdiction {
+                        bool exists;
+                        uint keyIdx;
+                        bytes2 country;
+                        bytes32 region;
+                    }
+                    string __abba_$;
+                    int64 $thing_102;
+                }";
+
+        let lex = lexer::Lexer::new(src);
+
+        let e = solidity::SourceUnitParser::new()
+            .parse(src, 0, lex)
+            .unwrap();
+
+        let a = SourceUnit(vec![SourceUnitPart::ContractDefinition(Box::new(
+            ContractDefinition {
+                doc: vec![],
+                loc: Loc(0, 0, 13),
+                ty: ContractTy::Contract(Loc(0, 0, 8)),
+                name: Identifier {
+                    loc: Loc(0, 9, 12),
+                    name: "foo".to_string(),
+                },
+                base: Vec::new(),
+                parts: vec![
+                    ContractPart::StructDefinition(Box::new(StructDefinition {
+                        doc: vec![],
+                        name: Identifier {
+                            loc: Loc(0, 42, 54),
+                            name: "Jurisdiction".to_string(),
+                        },
+                        loc: Loc(0, 35, 232),
+                        fields: vec![
+                            VariableDeclaration {
+                                loc: Loc(0, 81, 92),
+                                ty: Expression::Type(Loc(0, 81, 85), Type::Bool),
+                                storage: None,
+                                name: Identifier {
+                                    loc: Loc(0, 86, 92),
+                                    name: "exists".to_string(),
+                                },
+                            },
+                            VariableDeclaration {
+                                loc: Loc(0, 118, 129),
+                                ty: Expression::Type(Loc(0, 118, 122), Type::Uint(256)),
+                                storage: None,
+                                name: Identifier {
+                                    loc: Loc(0, 123, 129),
+                                    name: "keyIdx".to_string(),
+                                },
+                            },
+                            VariableDeclaration {
+                                loc: Loc(0, 155, 169),
+                                ty: Expression::Type(Loc(0, 155, 161), Type::Bytes(2)),
+                                storage: None,
+                                name: Identifier {
+                                    loc: Loc(0, 162, 169),
+                                    name: "country".to_string(),
+                                },
+                            },
+                            VariableDeclaration {
+                                loc: Loc(0, 195, 209),
+                                ty: Expression::Type(Loc(0, 195, 202), Type::Bytes(32)),
+                                storage: None,
+                                name: Identifier {
+                                    loc: Loc(0, 203, 209),
+                                    name: "region".to_string(),
+                                },
+                            },
+                        ],
+                    })),
+                    ContractPart::VariableDefinition(Box::new(VariableDefinition {
+                        doc: vec![],
+                        ty: Expression::Type(Loc(0, 253, 259), Type::String),
+                        attrs: vec![],
+                        name: Identifier {
+                            loc: Loc(0, 260, 268),
+                            name: "__abba_$".to_string(),
+                        },
+                        loc: Loc(0, 253, 268),
+                        initializer: None,
+                    })),
+                    ContractPart::VariableDefinition(Box::new(VariableDefinition {
+                        doc: vec![],
+                        ty: Expression::Type(Loc(0, 290, 295), Type::Int(64)),
+                        attrs: vec![],
+                        name: Identifier {
+                            loc: Loc(0, 296, 306),
+                            name: "$thing_102".to_string(),
+                        },
+                        loc: Loc(0, 290, 306),
+                        initializer: None,
+                    })),
+                ],
+            },
+        ))]);
+
+        assert_eq!(e, a);
+    }
+}

+ 570 - 0
solang-parser/src/pt.rs

@@ -0,0 +1,570 @@
+use crate::lexer::CommentType;
+use num_bigint::BigInt;
+use num_rational::BigRational;
+use std::fmt;
+
+#[derive(Debug, PartialEq, PartialOrd, Ord, Eq, Hash, Clone, Copy)]
+/// file no, start offset, end offset (in bytes)
+pub struct Loc(pub usize, pub usize, pub usize);
+
+impl Loc {
+    pub fn begin(&self) -> Self {
+        Loc(self.0, self.1, self.1)
+    }
+
+    pub fn end(&self) -> Self {
+        Loc(self.0, self.2, self.2)
+    }
+}
+
+#[derive(Debug, PartialEq, Clone)]
+pub struct Identifier {
+    pub loc: Loc,
+    pub name: String,
+}
+
+#[derive(Debug, PartialEq, Clone)]
+pub struct DocComment {
+    pub offset: usize,
+    pub tag: String,
+    pub value: String,
+}
+
+#[derive(Debug, PartialEq)]
+pub struct SourceUnit(pub Vec<SourceUnitPart>);
+
+#[derive(Debug, PartialEq)]
+pub enum SourceUnitPart {
+    ContractDefinition(Box<ContractDefinition>),
+    PragmaDirective(Vec<DocComment>, Identifier, StringLiteral),
+    ImportDirective(Vec<DocComment>, Import),
+    EnumDefinition(Box<EnumDefinition>),
+    StructDefinition(Box<StructDefinition>),
+    EventDefinition(Box<EventDefinition>),
+    FunctionDefinition(Box<FunctionDefinition>),
+    VariableDefinition(Box<VariableDefinition>),
+    StraySemicolon(Loc),
+}
+
+#[derive(Debug, PartialEq)]
+pub enum Import {
+    Plain(StringLiteral),
+    GlobalSymbol(StringLiteral, Identifier),
+    Rename(StringLiteral, Vec<(Identifier, Option<Identifier>)>),
+}
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum Type {
+    Address,
+    AddressPayable,
+    Payable,
+    Bool,
+    String,
+    Int(u16),
+    Uint(u16),
+    Bytes(u8),
+    Rational,
+    DynamicBytes,
+    Mapping(Loc, Box<Expression>, Box<Expression>),
+    Function {
+        params: Vec<(Loc, Option<Parameter>)>,
+        attributes: Vec<FunctionAttribute>,
+        returns: Vec<(Loc, Option<Parameter>)>,
+        trailing_attributes: Vec<FunctionAttribute>,
+    },
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum StorageLocation {
+    Memory(Loc),
+    Storage(Loc),
+    Calldata(Loc),
+}
+
+impl StorageLocation {
+    pub fn loc(&self) -> &Loc {
+        match self {
+            StorageLocation::Memory(l) => l,
+            StorageLocation::Storage(l) => l,
+            StorageLocation::Calldata(l) => l,
+        }
+    }
+}
+
+impl fmt::Display for StorageLocation {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            StorageLocation::Memory(_) => write!(f, "memory"),
+            StorageLocation::Storage(_) => write!(f, "storage"),
+            StorageLocation::Calldata(_) => write!(f, "calldata"),
+        }
+    }
+}
+
+#[derive(Debug, PartialEq, Clone)]
+pub struct VariableDeclaration {
+    pub loc: Loc,
+    pub ty: Expression,
+    pub storage: Option<StorageLocation>,
+    pub name: Identifier,
+}
+
+#[derive(Debug, PartialEq)]
+#[allow(clippy::vec_box)]
+pub struct StructDefinition {
+    pub doc: Vec<DocComment>,
+    pub loc: Loc,
+    pub name: Identifier,
+    pub fields: Vec<VariableDeclaration>,
+}
+
+#[derive(Debug, PartialEq)]
+pub enum ContractPart {
+    StructDefinition(Box<StructDefinition>),
+    EventDefinition(Box<EventDefinition>),
+    EnumDefinition(Box<EnumDefinition>),
+    VariableDefinition(Box<VariableDefinition>),
+    FunctionDefinition(Box<FunctionDefinition>),
+    StraySemicolon(Loc),
+    Using(Box<Using>),
+}
+
+#[derive(Debug, PartialEq)]
+pub struct Using {
+    pub loc: Loc,
+    pub library: Identifier,
+    pub ty: Option<Expression>,
+}
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum ContractTy {
+    Abstract(Loc),
+    Contract(Loc),
+    Interface(Loc),
+    Library(Loc),
+}
+
+impl fmt::Display for ContractTy {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            ContractTy::Abstract(_) => write!(f, "abstract contract"),
+            ContractTy::Contract(_) => write!(f, "contract"),
+            ContractTy::Interface(_) => write!(f, "interface"),
+            ContractTy::Library(_) => write!(f, "library"),
+        }
+    }
+}
+
+#[derive(Debug, PartialEq, Clone)]
+pub struct Base {
+    pub loc: Loc,
+    pub name: Identifier,
+    pub args: Option<Vec<Expression>>,
+}
+
+#[derive(Debug, PartialEq)]
+pub struct ContractDefinition {
+    pub doc: Vec<DocComment>,
+    pub loc: Loc,
+    pub ty: ContractTy,
+    pub name: Identifier,
+    pub base: Vec<Base>,
+    pub parts: Vec<ContractPart>,
+}
+
+#[derive(Debug, PartialEq)]
+pub struct EventParameter {
+    pub ty: Expression,
+    pub loc: Loc,
+    pub indexed: bool,
+    pub name: Option<Identifier>,
+}
+
+#[derive(Debug, PartialEq)]
+pub struct EventDefinition {
+    pub doc: Vec<DocComment>,
+    pub loc: Loc,
+    pub name: Identifier,
+    pub fields: Vec<EventParameter>,
+    pub anonymous: bool,
+}
+
+#[derive(Debug, PartialEq)]
+pub struct EnumDefinition {
+    pub doc: Vec<DocComment>,
+    pub loc: Loc,
+    pub name: Identifier,
+    pub values: Vec<Identifier>,
+}
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum VariableAttribute {
+    Visibility(Visibility),
+    Constant(Loc),
+    Immutable(Loc),
+    Override(Loc),
+}
+
+#[derive(Debug, PartialEq)]
+pub struct VariableDefinition {
+    pub doc: Vec<DocComment>,
+    pub loc: Loc,
+    pub ty: Expression,
+    pub attrs: Vec<VariableAttribute>,
+    pub name: Identifier,
+    pub initializer: Option<Expression>,
+}
+
+#[derive(Debug, PartialEq, Clone)]
+pub struct StringLiteral {
+    pub loc: Loc,
+    pub string: String,
+}
+
+#[derive(Debug, PartialEq, Clone)]
+pub struct HexLiteral {
+    pub loc: Loc,
+    pub hex: String,
+}
+
+#[derive(Debug, PartialEq, Clone)]
+pub struct NamedArgument {
+    pub loc: Loc,
+    pub name: Identifier,
+    pub expr: Expression,
+}
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum Unit {
+    Seconds(Loc),
+    Minutes(Loc),
+    Hours(Loc),
+    Days(Loc),
+    Weeks(Loc),
+    Wei(Loc),
+    Szabo(Loc),
+    Finney(Loc),
+    Ether(Loc),
+}
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum Expression {
+    PostIncrement(Loc, Box<Expression>),
+    PostDecrement(Loc, Box<Expression>),
+    New(Loc, Box<Expression>),
+    ArraySubscript(Loc, Box<Expression>, Option<Box<Expression>>),
+    MemberAccess(Loc, Box<Expression>, Identifier),
+    FunctionCall(Loc, Box<Expression>, Vec<Expression>),
+    FunctionCallBlock(Loc, Box<Expression>, Box<Statement>),
+    NamedFunctionCall(Loc, Box<Expression>, Vec<NamedArgument>),
+    Not(Loc, Box<Expression>),
+    Complement(Loc, Box<Expression>),
+    Delete(Loc, Box<Expression>),
+    PreIncrement(Loc, Box<Expression>),
+    PreDecrement(Loc, Box<Expression>),
+    UnaryPlus(Loc, Box<Expression>),
+    UnaryMinus(Loc, Box<Expression>),
+    Power(Loc, Box<Expression>, Box<Expression>),
+    Multiply(Loc, Box<Expression>, Box<Expression>),
+    Divide(Loc, Box<Expression>, Box<Expression>),
+    Modulo(Loc, Box<Expression>, Box<Expression>),
+    Add(Loc, Box<Expression>, Box<Expression>),
+    Subtract(Loc, Box<Expression>, Box<Expression>),
+    ShiftLeft(Loc, Box<Expression>, Box<Expression>),
+    ShiftRight(Loc, Box<Expression>, Box<Expression>),
+    BitwiseAnd(Loc, Box<Expression>, Box<Expression>),
+    BitwiseXor(Loc, Box<Expression>, Box<Expression>),
+    BitwiseOr(Loc, Box<Expression>, Box<Expression>),
+    Less(Loc, Box<Expression>, Box<Expression>),
+    More(Loc, Box<Expression>, Box<Expression>),
+    LessEqual(Loc, Box<Expression>, Box<Expression>),
+    MoreEqual(Loc, Box<Expression>, Box<Expression>),
+    Equal(Loc, Box<Expression>, Box<Expression>),
+    NotEqual(Loc, Box<Expression>, Box<Expression>),
+    And(Loc, Box<Expression>, Box<Expression>),
+    Or(Loc, Box<Expression>, Box<Expression>),
+    Ternary(Loc, Box<Expression>, Box<Expression>, Box<Expression>),
+    Assign(Loc, Box<Expression>, Box<Expression>),
+    AssignOr(Loc, Box<Expression>, Box<Expression>),
+    AssignAnd(Loc, Box<Expression>, Box<Expression>),
+    AssignXor(Loc, Box<Expression>, Box<Expression>),
+    AssignShiftLeft(Loc, Box<Expression>, Box<Expression>),
+    AssignShiftRight(Loc, Box<Expression>, Box<Expression>),
+    AssignAdd(Loc, Box<Expression>, Box<Expression>),
+    AssignSubtract(Loc, Box<Expression>, Box<Expression>),
+    AssignMultiply(Loc, Box<Expression>, Box<Expression>),
+    AssignDivide(Loc, Box<Expression>, Box<Expression>),
+    AssignModulo(Loc, Box<Expression>, Box<Expression>),
+    BoolLiteral(Loc, bool),
+    NumberLiteral(Loc, BigInt),
+    RationalNumberLiteral(Loc, BigRational),
+    HexNumberLiteral(Loc, String),
+    StringLiteral(Vec<StringLiteral>),
+    Type(Loc, Type),
+    HexLiteral(Vec<HexLiteral>),
+    AddressLiteral(Loc, String),
+    Variable(Identifier),
+    List(Loc, Vec<(Loc, Option<Parameter>)>),
+    ArrayLiteral(Loc, Vec<Expression>),
+    Unit(Loc, Box<Expression>, Unit),
+    This(Loc),
+}
+
+impl Expression {
+    pub fn loc(&self) -> Loc {
+        match self {
+            Expression::PostIncrement(loc, _)
+            | Expression::PostDecrement(loc, _)
+            | Expression::New(loc, _)
+            | Expression::ArraySubscript(loc, _, _)
+            | Expression::MemberAccess(loc, _, _)
+            | Expression::FunctionCall(loc, _, _)
+            | Expression::FunctionCallBlock(loc, _, _)
+            | Expression::NamedFunctionCall(loc, _, _)
+            | Expression::Not(loc, _)
+            | Expression::Complement(loc, _)
+            | Expression::Delete(loc, _)
+            | Expression::PreIncrement(loc, _)
+            | Expression::PreDecrement(loc, _)
+            | Expression::UnaryPlus(loc, _)
+            | Expression::UnaryMinus(loc, _)
+            | Expression::Power(loc, _, _)
+            | Expression::Multiply(loc, _, _)
+            | Expression::Divide(loc, _, _)
+            | Expression::Modulo(loc, _, _)
+            | Expression::Add(loc, _, _)
+            | Expression::Subtract(loc, _, _)
+            | Expression::ShiftLeft(loc, _, _)
+            | Expression::ShiftRight(loc, _, _)
+            | Expression::BitwiseAnd(loc, _, _)
+            | Expression::BitwiseXor(loc, _, _)
+            | Expression::BitwiseOr(loc, _, _)
+            | Expression::Less(loc, _, _)
+            | Expression::More(loc, _, _)
+            | Expression::LessEqual(loc, _, _)
+            | Expression::MoreEqual(loc, _, _)
+            | Expression::Equal(loc, _, _)
+            | Expression::NotEqual(loc, _, _)
+            | Expression::And(loc, _, _)
+            | Expression::Or(loc, _, _)
+            | Expression::Ternary(loc, _, _, _)
+            | Expression::Assign(loc, _, _)
+            | Expression::AssignOr(loc, _, _)
+            | Expression::AssignAnd(loc, _, _)
+            | Expression::AssignXor(loc, _, _)
+            | Expression::AssignShiftLeft(loc, _, _)
+            | Expression::AssignShiftRight(loc, _, _)
+            | Expression::AssignAdd(loc, _, _)
+            | Expression::AssignSubtract(loc, _, _)
+            | Expression::AssignMultiply(loc, _, _)
+            | Expression::AssignDivide(loc, _, _)
+            | Expression::AssignModulo(loc, _, _)
+            | Expression::BoolLiteral(loc, _)
+            | Expression::NumberLiteral(loc, _)
+            | Expression::RationalNumberLiteral(loc, _)
+            | Expression::HexNumberLiteral(loc, _)
+            | Expression::ArrayLiteral(loc, _)
+            | Expression::List(loc, _)
+            | Expression::Type(loc, _)
+            | Expression::Unit(loc, _, _)
+            | Expression::This(loc)
+            | Expression::Variable(Identifier { loc, .. })
+            | Expression::AddressLiteral(loc, _) => *loc,
+            Expression::StringLiteral(v) => v[0].loc,
+            Expression::HexLiteral(v) => v[0].loc,
+        }
+    }
+}
+
+#[derive(Clone, Debug, PartialEq)]
+pub struct Parameter {
+    pub loc: Loc,
+    pub ty: Expression,
+    pub storage: Option<StorageLocation>,
+    pub name: Option<Identifier>,
+}
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum Mutability {
+    Pure(Loc),
+    View(Loc),
+    Constant(Loc),
+    Payable(Loc),
+}
+
+impl fmt::Display for Mutability {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Mutability::Pure(_) => write!(f, "pure"),
+            Mutability::Constant(_) | Mutability::View(_) => write!(f, "view"),
+            Mutability::Payable(_) => write!(f, "payable"),
+        }
+    }
+}
+
+impl Mutability {
+    pub fn loc(&self) -> Loc {
+        match self {
+            Mutability::Pure(loc)
+            | Mutability::Constant(loc)
+            | Mutability::View(loc)
+            | Mutability::Payable(loc) => *loc,
+        }
+    }
+}
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum Visibility {
+    External(Option<Loc>),
+    Public(Option<Loc>),
+    Internal(Option<Loc>),
+    Private(Option<Loc>),
+}
+
+impl fmt::Display for Visibility {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Visibility::Public(_) => write!(f, "public"),
+            Visibility::External(_) => write!(f, "external"),
+            Visibility::Internal(_) => write!(f, "internal"),
+            Visibility::Private(_) => write!(f, "private"),
+        }
+    }
+}
+
+impl Visibility {
+    pub fn loc(&self) -> Option<Loc> {
+        match self {
+            Visibility::Public(loc)
+            | Visibility::External(loc)
+            | Visibility::Internal(loc)
+            | Visibility::Private(loc) => *loc,
+        }
+    }
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum FunctionAttribute {
+    Mutability(Mutability),
+    Visibility(Visibility),
+    Virtual(Loc),
+    Override(Loc, Vec<Identifier>),
+    BaseOrModifier(Loc, Base),
+}
+
+#[derive(Debug, PartialEq, Clone, Copy)]
+pub enum FunctionTy {
+    Constructor,
+    Function,
+    Fallback,
+    Receive,
+    Modifier,
+}
+
+impl fmt::Display for FunctionTy {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            FunctionTy::Constructor => write!(f, "constructor"),
+            FunctionTy::Function => write!(f, "function"),
+            FunctionTy::Fallback => write!(f, "fallback"),
+            FunctionTy::Receive => write!(f, "receive"),
+            FunctionTy::Modifier => write!(f, "modifier"),
+        }
+    }
+}
+
+#[derive(Debug, PartialEq)]
+pub struct FunctionDefinition {
+    pub doc: Vec<DocComment>,
+    pub loc: Loc,
+    pub ty: FunctionTy,
+    pub name: Option<Identifier>,
+    pub name_loc: Loc,
+    pub params: Vec<(Loc, Option<Parameter>)>,
+    pub attributes: Vec<FunctionAttribute>,
+    pub return_not_returns: Option<Loc>,
+    pub returns: Vec<(Loc, Option<Parameter>)>,
+    pub body: Option<Statement>,
+}
+
+#[derive(Debug, PartialEq, Clone)]
+#[allow(clippy::large_enum_variant, clippy::type_complexity)]
+pub enum Statement {
+    Block {
+        loc: Loc,
+        unchecked: bool,
+        statements: Vec<Statement>,
+    },
+    Assembly {
+        loc: Loc,
+        assembly: Vec<AssemblyStatement>,
+    },
+    Args(Loc, Vec<NamedArgument>),
+    If(Loc, Expression, Box<Statement>, Option<Box<Statement>>),
+    While(Loc, Expression, Box<Statement>),
+    Expression(Loc, Expression),
+    VariableDefinition(Loc, VariableDeclaration, Option<Expression>),
+    For(
+        Loc,
+        Option<Box<Statement>>,
+        Option<Box<Expression>>,
+        Option<Box<Statement>>,
+        Option<Box<Statement>>,
+    ),
+    DoWhile(Loc, Box<Statement>, Expression),
+    Continue(Loc),
+    Break(Loc),
+    Return(Loc, Option<Expression>),
+    Emit(Loc, Expression),
+    Try(
+        Loc,
+        Expression,
+        Option<(Vec<(Loc, Option<Parameter>)>, Box<Statement>)>,
+        Option<Box<(Identifier, Parameter, Statement)>>,
+        Box<(Option<Parameter>, Statement)>,
+    ),
+    DocComment(Loc, CommentType, String),
+}
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum AssemblyStatement {
+    Assign(Loc, AssemblyExpression, AssemblyExpression),
+    LetAssign(Loc, AssemblyExpression, AssemblyExpression),
+    Expression(AssemblyExpression),
+}
+
+#[derive(Debug, PartialEq, Clone)]
+pub enum AssemblyExpression {
+    BoolLiteral(Loc, bool),
+    NumberLiteral(Loc, BigInt),
+    HexNumberLiteral(Loc, String),
+    StringLiteral(StringLiteral),
+    Variable(Identifier),
+    Assign(Loc, Box<AssemblyExpression>, Box<AssemblyExpression>),
+    LetAssign(Loc, Box<AssemblyExpression>, Box<AssemblyExpression>),
+    Function(Loc, Box<AssemblyExpression>, Vec<AssemblyExpression>),
+    Member(Loc, Box<AssemblyExpression>, Identifier),
+    Subscript(Loc, Box<AssemblyExpression>, Box<AssemblyExpression>),
+}
+
+impl Statement {
+    pub fn loc(&self) -> Loc {
+        match self {
+            Statement::Block { loc, .. }
+            | Statement::Assembly { loc, .. }
+            | Statement::Args(loc, ..)
+            | Statement::If(loc, ..)
+            | Statement::While(loc, ..)
+            | Statement::Expression(loc, ..)
+            | Statement::VariableDefinition(loc, ..)
+            | Statement::For(loc, ..)
+            | Statement::DoWhile(loc, ..)
+            | Statement::Continue(loc)
+            | Statement::Break(loc)
+            | Statement::Return(loc, ..)
+            | Statement::Emit(loc, ..)
+            | Statement::Try(loc, ..)
+            | Statement::DocComment(loc, ..) => *loc,
+        }
+    }
+}

+ 966 - 0
solang-parser/src/solidity.lalrpop

@@ -0,0 +1,966 @@
+use std::str::FromStr;
+use num_bigint::{BigUint, BigInt};
+use num_traits::Pow;
+use num_traits::Zero;
+use std::ops::Mul;
+use lalrpop_util::ParseError;
+use super::pt::*;
+use super::box_option;
+use super::lexer::{Token, LexicalError, CommentType};
+use super::doc::tags;
+use num_rational::BigRational;
+
+grammar<'input>(input: &'input str, file_no: usize);
+
+pub SourceUnit: SourceUnit = {
+    SourceUnitPart* => SourceUnit(<>)
+}
+
+SourceUnitPart: SourceUnitPart = {
+    ContractDefinition => SourceUnitPart::ContractDefinition(<>),
+    PragmaDirective => <>,
+    ImportDirective => <>,
+    EnumDefinition => SourceUnitPart::EnumDefinition(<>),
+    StructDefinition => SourceUnitPart::StructDefinition(<>),
+    EventDefinition => SourceUnitPart::EventDefinition(<>),
+    FunctionDefinition => SourceUnitPart::FunctionDefinition(<>),
+    VariableDefinition => SourceUnitPart::VariableDefinition(<>),
+    <l:@L> ";" <r:@R> => SourceUnitPart::StraySemicolon(Loc(file_no, l, r)),
+}
+
+ImportDirective: SourceUnitPart = {
+    <doc:DocComments> "import" <s:StringLiteral> ";" => SourceUnitPart::ImportDirective(doc, Import::Plain(s)),
+    <doc:DocComments> "import" <s:StringLiteral> "as" <id:Identifier> ";" =>  SourceUnitPart::ImportDirective(doc, Import::GlobalSymbol(s, id)),
+    <doc:DocComments> "import" "*" "as" <id:Identifier> <from:Identifier> <s:StringLiteral> ";" =>? {
+        if from.name != "from" {
+            Err(ParseError::User { error: LexicalError::ExpectedFrom(from.loc.0, from.loc.1, from.name)})
+        } else {
+            Ok(SourceUnitPart::ImportDirective(doc, Import::GlobalSymbol(s, id)))
+        }
+    },
+    <doc:DocComments> "import" "{" <rename:CommaOne<ImportRename>> "}" <from:Identifier> <s:StringLiteral> ";" =>? {
+        if from.name != "from" {
+            Err(ParseError::User { error:LexicalError::ExpectedFrom(from.loc.0, from.loc.1, from.name)})
+        } else {
+            Ok(SourceUnitPart::ImportDirective(doc, Import::Rename(s, rename)))
+        }
+    }
+}
+
+ImportRename: (Identifier, Option<Identifier>) = {
+    <Identifier> => (<>, None),
+    <from:Identifier> "as" <to:Identifier> => (from, Some(to)),
+}
+
+PragmaDirective: SourceUnitPart = {
+    // The lexer does special parsing for String literal; it isn't really a string literal
+    <doc:DocComments> "pragma" <i:Identifier> <s:StringLiteral> ";" => SourceUnitPart::PragmaDirective(doc, i, s)
+}
+
+DocComments: Vec<DocComment> = {
+    SingleDocComment* => tags(&<>)
+}
+
+SingleDocComment: (usize, CommentType, &'input str) = {
+    <l:@L> <c:DocComment> => (l, c.0, c.1)
+}
+
+Type: Type = {
+    NoFunctionType,
+    FunctionType
+}
+
+NoFunctionType: Type = {
+    "bool" => Type::Bool,
+    "address" => Type::Address,
+    "address" "payable" => Type::AddressPayable,
+    // payable is only used as a cast in solc
+    "payable" => Type::Payable,
+    "string" => Type::String,
+    "bytes" => Type::DynamicBytes,
+    Uint => Type::Uint(<>),
+    Int => Type::Int(<>),
+    Bytes => Type::Bytes(<>),
+    <l:@L> "mapping" "(" <k:Precedence0> "=>" <v:Precedence0> ")" <r:@R> => {
+        Type::Mapping(Loc(file_no, l, r), Box::new(k), Box::new(v))
+    },
+}
+
+FunctionType: Type = {
+    "function" <params:ParameterList>
+    <attributes:FunctionTypeAttribute*>
+    <returns:("returns" <ParameterList> <FunctionTypeAttribute*>)?> => {
+        let (returns, trailing_attributes) = match returns {
+            Some((r, t)) => (r, t),
+            None => (Vec::new(), Vec::new())
+        };
+
+        Type::Function {
+            params,
+            attributes,
+            returns,
+            trailing_attributes,
+        }
+    }
+}
+
+FunctionTypeAttribute: FunctionAttribute = {
+    Mutability => FunctionAttribute::Mutability(<>),
+    Visibility => FunctionAttribute::Visibility(<>),
+}
+
+ArrayDimension: Option<Expression> = {
+    "[" "]" => None,
+    "[" <Expression> "]" => Some(<>)
+}
+
+StorageLocation: StorageLocation = {
+    <l:@L> "memory" <r:@R> => StorageLocation::Memory(Loc(file_no, l, r)),
+    <l:@L> "storage" <r:@R> => StorageLocation::Storage(Loc(file_no, l, r)),
+    <l:@L> "calldata" <r:@R> => StorageLocation::Calldata(Loc(file_no, l, r)),
+}
+
+Identifier: Identifier = {
+    <l:@L> <n:identifier> <r:@R> => Identifier{loc: Loc(file_no, l, r), name: n.to_string()}
+}
+
+VariableDeclaration: VariableDeclaration = {
+    <l:@L> <ty:Precedence0> <storage:StorageLocation?> <name:Identifier> <r:@R> => VariableDeclaration {
+        loc: Loc(file_no, l, r), ty, storage, name
+    },
+}
+
+StructDefinition: Box<StructDefinition> = {
+    <doc:DocComments> <l:@L> "struct" <name:Identifier> "{" <fields:(<VariableDeclaration> ";")*> "}" <r:@R> => {
+        Box::new(StructDefinition{loc: Loc(file_no, l, r), doc, name, fields})
+    }
+}
+
+ContractTy: ContractTy = {
+    <l:@L> "abstract" "contract" <r:@R> => ContractTy::Abstract(Loc(file_no, l, r)),
+    <l:@L> "contract" <r:@R> => ContractTy::Contract(Loc(file_no, l, r)),
+    <l:@L> "interface" <r:@R> => ContractTy::Interface(Loc(file_no, l, r)),
+    <l:@L> "library" <r:@R> => ContractTy::Library(Loc(file_no, l, r)),
+}
+
+ContractPart: ContractPart = {
+    StructDefinition => ContractPart::StructDefinition(<>),
+    EventDefinition => ContractPart::EventDefinition(<>),
+    EnumDefinition => ContractPart::EnumDefinition(<>),
+    VariableDefinition => ContractPart::VariableDefinition(<>),
+    FunctionDefinition => ContractPart::FunctionDefinition(<>),
+    ModifierDefinition => ContractPart::FunctionDefinition(<>),
+    ConstructorDefinition => ContractPart::FunctionDefinition(<>),
+    <l:@L> ";" <r:@R> => ContractPart::StraySemicolon(Loc(file_no, l, r)),
+    Using => ContractPart::Using(<>),
+}
+
+Bases: Vec<Base> = {
+    => Vec::new(),
+    "is" <CommaOne<Base>> => <>,
+}
+
+Base: Base = {
+    <l:@L> <name:Identifier> <args:("(" <Comma<Expression>> ")")?> <r:@R> => Base {
+        loc: Loc(file_no, l, r),
+        name,
+        args
+    }
+}
+
+ContractDefinition: Box<ContractDefinition> = {
+    <doc:DocComments> <l:@L> <ty:ContractTy> <name:Identifier> <base:Bases> <r:@R>
+    "{" <parts:(<ContractPart>)*> "}" => {
+        Box::new(ContractDefinition{doc, loc: Loc(file_no, l, r), ty, name, base, parts})
+    }
+}
+
+EventParameter: EventParameter = {
+    <l:@L> <ty:Precedence0> <i:"indexed"?> <name:Identifier?> <r:@R> => EventParameter{
+        loc: Loc(file_no, l, r), ty, indexed: i.is_some(), name
+    }
+}
+
+EventDefinition: Box<EventDefinition> = {
+    <doc:DocComments> <l:@L> "event" <name:Identifier> "(" <v:Comma<EventParameter>> ")" <a:"anonymous"?> ";" <r:@R> => {
+        Box::new(EventDefinition{
+            loc: Loc(file_no, l, r), doc, name, fields: v, anonymous: a.is_some()
+        })
+    },
+}
+
+EnumDefinition: Box<EnumDefinition> = {
+    <doc:DocComments> <l:@L> "enum" <name:Identifier> "{" <values:Comma<Identifier>> "}" <r:@R> => {
+        Box::new(EnumDefinition{loc: Loc(file_no, l, r), doc, name, values})
+    }
+}
+
+VariableDefinition: Box<VariableDefinition> = {
+    <doc:DocComments> <l:@L> <ty:NoFunctionTyPrecedence0> <attrs:VariableAttribute*> <name:Identifier> <e:("=" <Expression>)?> <r:@R>";" => {
+        Box::new(VariableDefinition{
+            doc, loc: Loc(file_no, l, r), ty, attrs, name, initializer: e,
+        })
+    },
+    // attributes cause shift-reduce errors with function type, since a function type contract variable can be declare as
+    // contract foo {
+    //     // f is a variable with internal visibility referencing an external function
+    //     function() external internal f;
+    // }
+    <doc:DocComments> <l:@L> <ty:FunctionTyPrecedence0> <name:Identifier> <e:("=" <Expression>)?> <r:@R>";" => {
+        Box::new(VariableDefinition{
+            doc, loc: Loc(file_no, l, r), ty, attrs: Vec::new(), name, initializer: e,
+        })
+    }
+}
+
+Visibility: Visibility = {
+    <l:@L> "public" <r:@R> => Visibility::Public(Some(Loc(file_no, l, r))),
+    <l:@L> "external" <r:@R> => Visibility::External(Some(Loc(file_no, l, r))),
+    <l:@L> "internal" <r:@R> => Visibility::Internal(Some(Loc(file_no, l, r))),
+    <l:@L> "private" <r:@R> => Visibility::Private(Some(Loc(file_no, l, r))),
+}
+
+VariableAttribute: VariableAttribute = {
+    Visibility => VariableAttribute::Visibility(<>),
+    <l:@L> "constant" <r:@R> => VariableAttribute::Constant(Loc(file_no, l, r)),
+    <l:@L> "immutable" <r:@R> => VariableAttribute::Immutable(Loc(file_no, l, r)),
+    <l:@L> "override" <r:@R> => VariableAttribute::Override(Loc(file_no, l, r))
+}
+
+Expression: Expression = {
+    Precedence15,
+}
+
+Precedence15: Expression = {
+    <l:Precedence14> <a:@L> "=" <b:@R> <r:Precedence15> => Expression::Assign(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    <l:Precedence14> <a:@L> "|=" <b:@R> <r:Precedence15> => Expression::AssignOr(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    <l:Precedence14> <a:@L> "^=" <b:@R> <r:Precedence15> => Expression::AssignXor(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    <l:Precedence14> <a:@L> "&=" <b:@R> <r:Precedence15> => Expression::AssignAnd(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    <l:Precedence14> <a:@L> "<<=" <b:@R> <r:Precedence15> => Expression::AssignShiftLeft(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    <l:Precedence14> <a:@L> ">>=" <b:@R> <r:Precedence15> => Expression::AssignShiftRight(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    <l:Precedence14> <a:@L> "+=" <b:@R> <r:Precedence15> => Expression::AssignAdd(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    <l:Precedence14> <a:@L> "-=" <b:@R> <r:Precedence15> => Expression::AssignSubtract(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    <l:Precedence14> <a:@L> "*=" <b:@R> <r:Precedence15> => Expression::AssignMultiply(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    <l:Precedence14> <a:@L> "/=" <b:@R> <r:Precedence15> => Expression::AssignDivide(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    <l:Precedence14> <a:@L> "%=" <b:@R> <r:Precedence15> => Expression::AssignModulo(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    Precedence14,
+}
+
+Precedence14: Expression = {
+    <c:Precedence14> <a:@L> "?" <l:Precedence13> ":" <b:@R> <r:Precedence13> => Expression::Ternary(Loc(file_no, a, b), Box::new(c), Box::new(l), Box::new(r)),
+    Precedence13,
+}
+
+Precedence13: Expression = {
+    <l:Precedence13> <a:@L> "||" <b:@R> <r:Precedence12> => Expression::Or(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    Precedence12,
+}
+
+Precedence12: Expression = {
+    <l:Precedence12> <a:@L> "&&" <b:@R> <r:Precedence11> => Expression::And(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    Precedence11,
+}
+
+Precedence11: Expression = {
+    <l:Precedence11> <a:@L> "==" <b:@R> <r:Precedence10> => Expression::Equal(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    <l:Precedence11> <a:@L> "!=" <b:@R> <r:Precedence10> => Expression::NotEqual(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    Precedence10,
+}
+
+Precedence10: Expression = {
+    <l:Precedence10> <a:@L> "<" <b:@R> <r:Precedence9> => Expression::Less(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    <l:Precedence10> <a:@L> ">" <b:@R> <r:Precedence9> => Expression::More(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    <l:Precedence10> <a:@L> "<=" <b:@R> <r:Precedence9> => Expression::LessEqual(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    <l:Precedence10> <a:@L> ">=" <b:@R> <r:Precedence9> => Expression::MoreEqual(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    Precedence9,
+}
+
+Precedence9: Expression = {
+    <l:Precedence9> <a:@L> "|" <b:@R> <r:Precedence8> => Expression::BitwiseOr(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    Precedence8,
+}
+
+Precedence8: Expression = {
+    <l:Precedence8> <a:@L> "^" <b:@R> <r:Precedence7> => Expression::BitwiseXor(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    Precedence7,
+}
+
+Precedence7: Expression = {
+    <l:Precedence7> <a:@L> "&" <b:@R> <r:Precedence6> => Expression::BitwiseAnd(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    Precedence6,
+}
+
+Precedence6: Expression = {
+    <l:Precedence6> <a:@L> "<<" <b:@R> <r:Precedence5> => Expression::ShiftLeft(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    <l:Precedence6> <a:@L> ">>" <b:@R> <r:Precedence5> => Expression::ShiftRight(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    Precedence5,
+}
+
+Precedence5: Expression = {
+    <l:Precedence5> <a:@L> "+" <b:@R> <r:Precedence4> => Expression::Add(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    <l:Precedence5> <a:@L> "-" <b:@R> <r:Precedence4> => Expression::Subtract(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    Precedence4,
+}
+
+Precedence4: Expression = {
+    <l:Precedence4> <a:@L> "*" <b:@R> <r:Precedence3> => Expression::Multiply(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    <l:Precedence4> <a:@L> "/" <b:@R> <r:Precedence3> => Expression::Divide(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    <l:Precedence4> <a:@L> "%" <b:@R> <r:Precedence3> => Expression::Modulo(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    Precedence3,
+}
+
+Precedence3: Expression = {
+    <l:Precedence3> <a:@L> "**" <b:@R> <r:Precedence2> => Expression::Power(Loc(file_no, a, b), Box::new(l), Box::new(r)),
+    Precedence2,
+}
+
+Precedence2: Expression = {
+    <a:@L> "!" <b:@R> <e:Precedence2> => Expression::Not(Loc(file_no, a, b), Box::new(e)),
+    <a:@L> "~" <b:@R> <e:Precedence2> => Expression::Complement(Loc(file_no, a, b), Box::new(e)),
+    <a:@L> "delete" <b:@R> <e:Precedence2> => Expression::Delete(Loc(file_no, a, b), Box::new(e)),
+    <a:@L> "new" <call:FunctionCall> <b:@R> => Expression::New(Loc(file_no, a, b), Box::new(call)),
+    <a:@L> "++" <b:@R> <e:Precedence2> => Expression::PreIncrement(Loc(file_no, a, b), Box::new(e)),
+    <a:@L> "--" <b:@R> <e:Precedence2> => Expression::PreDecrement(Loc(file_no, a, b), Box::new(e)),
+    <a:@L> "+" <b:@R> <e:Precedence2> => Expression::UnaryPlus(Loc(file_no, a, b), Box::new(e)),
+    <a:@L> "-" <b:@R> <e:Precedence2> => Expression::UnaryMinus(Loc(file_no, a, b), Box::new(e)),
+    Precedence0,
+}
+
+NamedArgument: NamedArgument = {
+    <l:@L> <name:Identifier> ":" <expr:Expression> <r:@R> => {
+        NamedArgument{ loc: Loc(file_no, l, r), name, expr }
+    }
+}
+
+FunctionCall: Expression = {
+    <a:@L> <i:NoFunctionTyPrecedence0> "(" <v:Comma<Expression>> ")" <b:@R> => {
+        Expression::FunctionCall(Loc(file_no, a, b), Box::new(i), v)
+    },
+    <i:NoFunctionTyPrecedence0> <l:@L> <block:BlockStatement> <r:@R> => {
+        Expression::FunctionCallBlock(Loc(file_no, l, r), Box::new(i), Box::new(block))
+    },
+    <a:@L> <i:NoFunctionTyPrecedence0> "(" "{" <v:Comma<NamedArgument>> "}" ")" <b:@R> => {
+        Expression::NamedFunctionCall(Loc(file_no, a, b), Box::new(i), v)
+    },
+}
+
+Precedence0: Expression = {
+    NoFunctionTyPrecedence0,
+    FunctionTyPrecedence0
+}
+
+FunctionTyPrecedence0: Expression = {
+    <l:@L> <ty:FunctionType> <r:@R> => Expression::Type(Loc(file_no, l, r), ty),
+}
+
+NoFunctionTyPrecedence0: Expression = {
+    <a:@L> <e:Precedence0> "++" <b:@R> => Expression::PostIncrement(Loc(file_no, a, b), Box::new(e)),
+    <a:@L> <e:Precedence0> "--" <b:@R> => Expression::PostDecrement(Loc(file_no, a, b), Box::new(e)),
+    <FunctionCall> => <>,
+    <a:@L> <e:Precedence0> "[" <i:Expression?> "]" <b:@R> => Expression::ArraySubscript(Loc(file_no, a, b), Box::new(e), box_option(i)),
+    <a:@L> <e:Precedence0> "." <i:Identifier> <b:@R> => Expression::MemberAccess(Loc(file_no, a, b), Box::new(e), i),
+    // Solidity has ".address" members on external function types. Address is a keyword, so special casing needed
+    <a:@L> <e:Precedence0> "." <al:@L> "address" <b:@R> => {
+        Expression::MemberAccess(Loc(file_no, a, b), Box::new(e),
+            Identifier { loc: Loc(file_no, al, b), name: "address".to_string() })
+    },
+    <a:@L> "true" <b:@R> => Expression::BoolLiteral(Loc(file_no, a, b), true),
+    <a:@L> "false" <b:@R> => Expression::BoolLiteral(Loc(file_no, a, b), false),
+    <StringLiteral+> => Expression::StringLiteral(<>),
+    <HexLiteral+> => Expression::HexLiteral(<>),
+    <l:@L> <a:address> <r:@R> => {
+        let v = a.to_string();
+        let address_len = a.len() - 9;
+
+        Expression::AddressLiteral(Loc(file_no, l, r), a.chars().skip(8).filter(|c|  *c != '"' && *c != '\'').collect() )
+    },
+    <l:@L> <ty:NoFunctionType> <r:@R> => Expression::Type(Loc(file_no, l, r), ty),
+    <a:@L> "[" <v:CommaOne<Expression>> "]" <b:@R> => {
+        Expression::ArrayLiteral(Loc(file_no, a, b), v)
+    },
+    <Identifier> => Expression::Variable(<>),
+    <l:@L> <e:Precedence0> <u:Unit> <r:@R> => Expression::Unit(Loc(file_no, l, r), Box::new(e), u),
+    <l:@L> <n:number> <r:@R> => {
+        let base: String = n.0.chars().filter(|v| *v != '_').collect();
+        let exp: String = n.1.chars().filter(|v| *v != '_').collect();
+
+        let n = if exp.is_empty() {
+            BigInt::from_str(&base).unwrap()
+        } else {
+            let base = BigInt::from_str(&base).unwrap();
+            let exp = BigInt::from_str("10").unwrap().pow(BigUint::from_str(&exp).unwrap());
+
+            base.mul(exp)
+        };
+
+        Expression::NumberLiteral(Loc(file_no, l, r), n)
+    },
+    <l:@L> <n:rational> <r:@R> => {
+        let mut significand: String = n.0.to_string();
+        let mantissa: String = n.1.to_string();
+        let exp: String = n.2.to_string();
+        let len = if mantissa.is_empty() {
+            0
+        } else {
+            mantissa.len()
+        };
+        let mut test = false;
+        if exp.starts_with("-") {
+            test = true;
+        }
+
+        let denominator = BigInt::from_str("10").unwrap().pow(BigUint::from(len as u64));
+        let zero_index = mantissa.chars().position(|c| c != '0').unwrap_or(usize::MAX);
+        let n =  if exp.is_empty() {
+            if significand.is_empty() || significand == "0" {
+                if zero_index < usize::MAX {
+                    BigRational::new(BigInt::from_str(&mantissa[zero_index..]).unwrap(), denominator)
+                } else {
+                    BigRational::from(BigInt::zero())
+                }
+            } else {
+                significand.push_str(&mantissa);
+                BigRational::new(BigInt::from_str(&significand).unwrap(), denominator)
+            }
+        } else {
+            if significand.is_empty() || significand == "0" {
+                if zero_index < usize::MAX {
+                    if test {
+                        let exp_result = BigInt::from_str("10").unwrap().pow(BigUint::from_str(&exp[1..]).unwrap());
+                        BigRational::new(BigInt::from_str(&mantissa[zero_index..]).unwrap(), denominator.mul(exp_result))
+                    } else {
+                        let exp_result = BigInt::from_str("10").unwrap().pow(BigUint::from_str(&exp).unwrap());
+                        BigRational::new(BigInt::from_str(&mantissa[zero_index..]).unwrap().mul(exp_result), denominator)
+                    }
+                } else {
+                    BigRational::from(BigInt::zero())
+                }
+            } else {
+                significand.push_str(&mantissa);
+                if test {
+                    let exp_result = BigInt::from_str("10").unwrap().pow(BigUint::from_str(&exp[1..]).unwrap());
+                    BigRational::new(BigInt::from_str(&significand).unwrap(), denominator.mul(exp_result))
+                } else {
+                    let exp_result = BigInt::from_str("10").unwrap().pow(BigUint::from_str(&exp).unwrap());
+                    BigRational::new(BigInt::from_str(&significand).unwrap().mul(exp_result), denominator)
+                }
+            }
+        };
+
+        Expression::RationalNumberLiteral(Loc(file_no, l, r), n)
+    },
+    <l:@L> <n:hexnumber> <r:@R> => {
+        Expression::HexNumberLiteral(Loc(file_no, l, r), n.to_owned())
+    },
+    <l:@L> <a:ParameterList> <r:@R> => {
+        if a.len() == 1 {
+            if let Some(Parameter{ ty, storage: None, name: None, .. }) = &a[0].1 {
+                // this means "(" Expression ")"
+                return ty.clone();
+            }
+        }
+
+        Expression::List(Loc(file_no, l, r), a)
+    },
+    <@L> "this" <@R> => Expression::This(Loc(file_no, <>)),
+}
+
+Unit: Unit = {
+    <@L> "seconds" <@R> => Unit::Seconds(Loc(file_no, <>)),
+    <@L> "minutes" <@R> => Unit::Minutes(Loc(file_no, <>)),
+    <@L> "hours" <@R> => Unit::Hours(Loc(file_no, <>)),
+    <@L> "days" <@R> => Unit::Days(Loc(file_no, <>)),
+    <@L> "weeks" <@R> => Unit::Weeks(Loc(file_no, <>)),
+    <@L> "wei" <@R> => Unit::Wei(Loc(file_no, <>)),
+    <@L> "szabo" <@R> => Unit::Szabo(Loc(file_no, <>)),
+    <@L> "finney" <@R> => Unit::Finney(Loc(file_no, <>)),
+    <@L> "ether" <@R> => Unit::Ether(Loc(file_no, <>)),
+}
+
+StringLiteral: StringLiteral = {
+    <l:@L> <s:string> <r:@R> => {
+        StringLiteral{ loc: Loc(file_no, l, r), string: s.to_string() }
+    }
+}
+
+HexLiteral: HexLiteral = {
+    <l:@L> <s:hexstring> <r:@R> => {
+        let v = s.to_string();
+        let hex_len = v.len() - 5;
+
+        HexLiteral{ loc: Loc(file_no, l, r), hex: v.chars().skip(4).filter(|c| *c != '_' && *c != '"' && *c != '\'').collect() }
+    }
+}
+
+// A parameter list is used for function arguments, returns, and destructuring statements.
+// In destructuring statements, parameters can be optional. So, we make parameters optional
+// and as an added bonus we can generate error messages about missing parameters/returns
+// to functions
+Parameter: Parameter = {
+    <l:@L> <ty:Expression> <storage:StorageLocation?> <name:Identifier?> <r:@R> => {
+        let loc = Loc(file_no, l, r);
+        Parameter{loc, ty, storage, name}
+    }
+}
+
+OptParameter: (Loc, Option<Parameter>) = {
+    <l:@L> <p:Parameter?> <r:@R> => (Loc(file_no, l, r), p),
+}
+
+ParameterList: Vec<(Loc, Option<Parameter>)> = {
+    "(" ")" => Vec::new(),
+    "(" <l:@L> <p:Parameter> <r:@R> ")" => vec!((Loc(file_no, l, r), Some(p))),
+    "(" <CommaTwo<OptParameter>> ")" => <>,
+}
+
+Mutability: Mutability = {
+    <l:@L> "pure" <r:@R> => Mutability::Pure(Loc(file_no, l, r)),
+    <l:@L> "constant" <r:@R> => Mutability::Constant(Loc(file_no, l, r)),
+    <l:@L> "view" <r:@R> => Mutability::View(Loc(file_no, l, r)),
+    <l:@L> "payable" <r:@R> => Mutability::Payable(Loc(file_no, l, r)),
+}
+
+FunctionAttribute: FunctionAttribute = {
+    Mutability => FunctionAttribute::Mutability(<>),
+    Visibility => FunctionAttribute::Visibility(<>),
+    <l:@L> "virtual" <r:@R> => FunctionAttribute::Virtual(Loc(file_no, <>)),
+    <l:@L> "override" <r:@R> => FunctionAttribute::Override(Loc(file_no, <>), Vec::new()),
+    <l:@L> "override" "(" <list:CommaOne<Identifier>> ")" <r:@R> => FunctionAttribute::Override(Loc(file_no, l, r), list),
+    <l:@L> <base:Base> <r:@R> => FunctionAttribute::BaseOrModifier(Loc(file_no, l, r), base),
+}
+
+// Modifier and function have special case
+FunctionTy: FunctionTy = {
+    "fallback" => FunctionTy::Fallback,
+    "receive" => FunctionTy::Receive,
+    "constructor" => FunctionTy::Constructor,
+}
+
+BlockStatementOrSemiColon: Option<Statement> = {
+    <@L> <@R> ";" => None,
+    BlockStatement => Some(<>),
+}
+
+returns: Option<Loc> = {
+    "returns" => None,
+    <l:@L> "return" <r:@R> => Some(Loc(file_no, l, r)),
+}
+
+// Modifiers can't have attributes or return values, but we parse them anyway so we can give nice
+// error messages. The parameter list is optional
+ModifierDefinition: Box<FunctionDefinition> = {
+    <doc:DocComments> <l:@L> "modifier" <nl:@L> <name:Identifier> <nr:@R> <params:ParameterList?>
+    <attributes:FunctionAttribute*>
+    <returns:(returns ParameterList)?> <r:@R> <body:BlockStatementOrSemiColon> => {
+        let params = params.unwrap_or(Vec::new());
+        let (return_not_returns, returns) = returns.unwrap_or((None, Vec::new()));
+
+        Box::new(FunctionDefinition{
+            doc,
+            loc: Loc(file_no, l, r),
+            ty: FunctionTy::Modifier,
+            name: Some(name),
+            name_loc: Loc(file_no, nl, nr),
+            params,
+            attributes,
+            return_not_returns,
+            returns,
+            body,
+        })
+    },
+}
+
+ConstructorDefinition: Box<FunctionDefinition> = {
+    <doc:DocComments> <l:@L> <ty:FunctionTy> <nl:@L> <name:Identifier?> <nr:@R> <params:ParameterList>
+    <attributes:FunctionAttribute*>
+    <returns:(returns ParameterList)?> <r:@R> <body:BlockStatementOrSemiColon> => {
+        let (return_not_returns, returns) = returns.unwrap_or((None, Vec::new()));
+
+        Box::new(FunctionDefinition{
+            doc,
+            loc: Loc(file_no, l, r),
+            ty,
+            name,
+            name_loc: Loc(file_no, nl, nr),
+            params,
+            attributes,
+            return_not_returns,
+            returns,
+            body,
+        })
+    },
+}
+
+FunctionDefinition: Box<FunctionDefinition> = {
+    <doc:DocComments> <l:@L> "function" <nl:@L> <name:Identifier> <nr:@R> <params:ParameterList>
+    <attributes:FunctionAttribute*>
+    <returns:(returns ParameterList)?> <r:@R> <body:BlockStatementOrSemiColon> => {
+        let (return_not_returns, returns) = returns.unwrap_or((None, Vec::new()));
+
+        Box::new(FunctionDefinition{
+            doc,
+            loc: Loc(file_no, l, r),
+            ty: FunctionTy::Function,
+            name: Some(name),
+            name_loc: Loc(file_no, nl, nr),
+            params,
+            attributes,
+            return_not_returns,
+            returns,
+            body,
+        })
+    },
+    // Old-style fallback function without name. Sema will give a nice error message
+    // with some instructions how to update your syntax
+    <doc:DocComments> <l:@L> <ft:FunctionType> <r:@R> <body:BlockStatementOrSemiColon> => {
+        match ft {
+            // we're dropping the trailing attributes, but this production is for
+            // generating an error messages
+            Type::Function { params, attributes, returns, .. }  => {
+                Box::new(FunctionDefinition{
+                    doc,
+                    loc: Loc(file_no, l, r),
+                    ty: FunctionTy::Function,
+                    name: None,
+                    name_loc: Loc(file_no, l, r),
+                    params,
+                    attributes,
+                    return_not_returns: None,
+                    returns,
+                    body,
+                })
+            },
+            _ => unreachable!(),
+        }
+    }
+}
+
+Using: Box<Using> = {
+    <l:@L> "using" <library:Identifier> "for" <ty:Precedence0> <r:@R> ";" => Box::new(Using {
+        loc: Loc(file_no, l, r),
+        library,
+        ty: Some(ty),
+    }),
+    <l:@L> "using" <library:Identifier> "for" "*" <r:@R> ";" => Box::new(Using {
+        loc: Loc(file_no, l, r),
+        library,
+        ty: None,
+    }),
+}
+
+BlockStatement: Statement = {
+    <l:@L> "{" <statements:Statement*> "}" <r:@R> => {
+        Statement::Block { loc: Loc(file_no, l, r), unchecked: false, statements }
+    },
+    <l:@L> "{" <v:CommaOne<NamedArgument>> "}" <r:@R> => Statement::Args(Loc(file_no, l, r), v),
+}
+
+OpenStatement: Statement = {
+    <l:@L> "if" "(" <cond:Expression> ")" <body:Statement> <r:@R> => {
+        Statement::If(Loc(file_no, l, r), cond, Box::new(body), None)
+    },
+    <l:@L> "if" "(" <cond:Expression> ")" <body:ClosedStatement> "else" <o:OpenStatement> <r:@R> => {
+        Statement::If(Loc(file_no, l, r), cond, Box::new(body), Some(Box::new(o)))
+    },
+    <l:@L> "while" "(" <e:Expression> ")" <b:OpenStatement> <r:@R> => {
+        Statement::While(Loc(file_no, l, r), e, Box::new(b))
+    }
+}
+
+ClosedStatement: Statement = {
+    NonIfStatement,
+    <l:@L> "if" "(" <cond:Expression> ")" <body:ClosedStatement> "else" <o:ClosedStatement> <r:@R> => {
+        Statement::If(Loc(file_no, l, r), cond, Box::new(body), Some(Box::new(o)))
+    },
+    <l:@L> "while" "(" <e:Expression> ")" <b:ClosedStatement> <r:@R> => {
+        Statement::While(Loc(file_no, l, r), e, Box::new(b))
+    },
+    <l:@L> "for" "(" <b:SimpleStatement?> ";" <c:Expression?> ";" <n:SimpleStatement?> ")" <block:ClosedStatement> <r:@R> => {
+        Statement::For(Loc(file_no, l, r), box_option(b), box_option(c), box_option(n), Some(Box::new(block)))
+    },
+    <l:@L> "for" "(" <b:SimpleStatement?> ";" <c:Expression?> ";" <n:SimpleStatement?> ")" ";" <r:@R> => {
+        Statement::For(Loc(file_no, l, r), box_option(b), box_option(c), box_option(n), None)
+    }
+}
+
+Statement: Statement = {
+    OpenStatement,
+    ClosedStatement,
+    <l:@L> <c:DocComment> <r:@R> => Statement::DocComment(Loc(file_no, l, r), c.0, c.1.to_string()),
+}
+
+SimpleStatement: Statement = {
+    <l:@L>  <v:VariableDeclaration> <e:("=" <Expression>)?> <r:@R> => {
+        Statement::VariableDefinition(Loc(file_no, l, r), v, e)
+    },
+    <l:@L> <e:Expression> <r:@R> => {
+        Statement::Expression(Loc(file_no, l, r), e)
+    }
+}
+
+CatchError: (Identifier, Parameter, Statement) = {
+    "catch" <id:Identifier> "(" <param:Parameter> ")" <block:BlockStatement> => {
+        (id, param, block)
+    }
+}
+
+TryReturns: (Vec<(Loc, Option<Parameter>)>, Box<Statement>) = {
+    "returns" <list:ParameterList> <ok:BlockStatement> => (list, Box::new(ok))
+}
+
+TryExpression: Expression = {
+    <a:@L> "new" <call:FunctionCall> <b:@R> => Expression::New(Loc(file_no, a, b), Box::new(call)),
+    FunctionCall
+}
+
+NonIfStatement: Statement = {
+    BlockStatement => <>,
+    <l:@L> "unchecked" "{" <statements:Statement*> "}" <r:@R> => {
+        Statement::Block { loc: Loc(file_no, l, r), unchecked: true, statements }
+    },
+    <l:@L> "assembly" "{" <assembly:AssemblyStatement*> "}" <r:@R> => {
+        Statement::Assembly { loc: Loc(file_no, l, r), assembly }
+    },
+    <SimpleStatement> ";" => <>,
+    <l:@L>"do" <b:Statement> "while" "(" <e:Expression> ")" <r:@R> ";" => {
+        Statement::DoWhile(Loc(file_no, l, r), Box::new(b), e)
+    },
+    <l:@L> "continue" <r:@R> ";" => {
+        Statement::Continue(Loc(file_no, l, r))
+    },
+    <l:@L> "break" <r:@R> ";" => {
+        Statement::Break(Loc(file_no, l, r))
+    },
+    <l:@L> "return" <r:@R> ";" => {
+        Statement::Return(Loc(file_no, l, r), None)
+    },
+    <l:@L> "return" <e:Expression> <r:@R> ";" => {
+        Statement::Return(Loc(file_no, l, r), Some(e))
+    },
+    <l:@L> "try" <e:TryExpression> <returns:TryReturns?> <error:CatchError?>
+        "catch" <p:("(" <Parameter> ")")?> <b:BlockStatement> <r:@R> => {
+            Statement::Try(Loc(file_no, l, r), e, returns,
+                box_option(error),
+                Box::new((p, b)))
+    },
+    <l:@L> "emit" <ty:FunctionCall> <r:@R> ";" => {
+        Statement::Emit(Loc(file_no, l, r), ty)
+    },
+}
+
+AssemblyStatement: AssemblyStatement = {
+    <l:@L> <left:AssemblyExpression0> ":=" <right:AssemblyExpression2> <r:@R> => {
+        AssemblyStatement::Assign(Loc(file_no, l, r), left, right)
+    },
+    <l:@L> "let" <left:AssemblyExpression0> ":=" <right:AssemblyExpression2> <r:@R> => {
+        AssemblyStatement::LetAssign(Loc(file_no, l, r), left, right)
+    },
+    <expr:AssemblyExpression1> => {
+        AssemblyStatement::Expression(expr)
+    },
+}
+
+AssemblyExpression2: AssemblyExpression = {
+    "(" <AssemblyExpression1> ")" => <>,
+    AssemblyExpression1
+}
+
+AssemblyExpression1: AssemblyExpression = {
+    <l:@L> <function:AssemblyExpression0> "(" <args:Comma<AssemblyExpression2>> ")" <r:@R> => {
+        AssemblyExpression::Function(Loc(file_no, l, r), Box::new(function), args)
+    },
+    <l:@L> <ty:NoFunctionType> <br:@R> "(" <args:Comma<AssemblyExpression2>> ")" <r:@R> => {
+        let id = Identifier { loc: Loc(file_no, l, br), name: String::from(&input[l..br]) };
+
+        AssemblyExpression::Function(Loc(file_no, l, r), Box::new(AssemblyExpression::Variable(id)), args)
+    },
+    AssemblyExpression0
+}
+
+AssemblyExpression0: AssemblyExpression = {
+    <a:@L> "true" <b:@R> => AssemblyExpression::BoolLiteral(Loc(file_no, a, b), true),
+    <a:@L> "false" <b:@R> => AssemblyExpression::BoolLiteral(Loc(file_no, a, b), false),
+    <l:@L> <n:number> <r:@R> => {
+        let base: String = n.0.chars().filter(|v| *v != '_').collect();
+        let exp: String = n.1.chars().filter(|v| *v != '_').collect();
+
+        let n = if exp.is_empty() {
+            BigInt::from_str(&base).unwrap()
+        } else {
+            let base = BigInt::from_str(&base).unwrap();
+            let exp = BigInt::from_str("10").unwrap().pow(BigUint::from_str(&exp).unwrap());
+
+            base.mul(exp)
+        };
+
+        AssemblyExpression::NumberLiteral(Loc(file_no, l, r), n)
+    },
+    <l:@L> <n:hexnumber> <r:@R> => {
+        AssemblyExpression::HexNumberLiteral(Loc(file_no, l, r), n.to_owned())
+    },
+    <StringLiteral> => {
+        AssemblyExpression::StringLiteral(<>)
+    },
+    <Identifier> => AssemblyExpression::Variable(<>),
+    <l:@L> <array:AssemblyExpression0> "[" <index:AssemblyExpression1> "]" <r:@R> => {
+        AssemblyExpression::Subscript(Loc(file_no, l, r), Box::new(array), Box::new(index))
+    },
+    <l:@L> <array:AssemblyExpression0> "." <member:Identifier> <r:@R> => {
+        AssemblyExpression::Member(Loc(file_no, l, r), Box::new(array), member)
+    },
+}
+
+Comma<T>: Vec<T> = {
+    => Vec::new(),
+    CommaOne<T> => <>,
+};
+
+CommaOne<T>: Vec<T> = {
+    <e:T> <v:("," <T>)*>  => {
+        let mut v = v;
+        v.insert(0, e);
+        v
+    }
+};
+
+CommaTwo<T>: Vec<T> = {
+    <e:T> <v:("," <T>)+>  => {
+        let mut v = v;
+        v.insert(0, e);
+        v
+    }
+};
+
+extern {
+    type Location = usize;
+    type Error = LexicalError;
+
+    enum Token<'input> {
+        identifier => Token::Identifier(<&'input str>),
+        string => Token::StringLiteral(<&'input str>),
+        hexstring => Token::HexLiteral(<&'input str>),
+        address => Token::AddressLiteral(<&'input str>),
+        number => Token::Number(<&'input str>, <&'input str>),
+        rational => Token::RationalNumber(<&'input str>, <&'input str>, <&'input str>),
+        hexnumber => Token::HexNumber(<&'input str>),
+        ";" => Token::Semicolon,
+        "{" => Token::OpenCurlyBrace,
+        "}" => Token::CloseCurlyBrace,
+        "(" => Token::OpenParenthesis,
+        ")" => Token::CloseParenthesis,
+        "=" => Token::Assign,
+        "==" => Token::Equal,
+        "=>" => Token::Arrow,
+        "|=" => Token::BitwiseOrAssign,
+        "^=" => Token::BitwiseXorAssign,
+        "&=" => Token::BitwiseAndAssign,
+        "<<=" => Token::ShiftLeftAssign,
+        ">>=" => Token::ShiftRightAssign,
+        "+=" => Token::AddAssign,
+        "-=" => Token::SubtractAssign,
+        "*=" => Token::MulAssign,
+        "/=" => Token::DivideAssign,
+        "%=" => Token::ModuloAssign,
+        "?" => Token::Question,
+        ":" => Token::Colon,
+        ":=" => Token::ColonAssign,
+        "||" => Token::Or,
+        "&&" => Token::And,
+        "!=" => Token::NotEqual,
+        "<" => Token::Less,
+        "<=" => Token::LessEqual,
+        ">" => Token::More,
+        ">=" => Token::MoreEqual,
+        "|" => Token::BitwiseOr,
+        "&" => Token::BitwiseAnd,
+        "^" => Token::BitwiseXor,
+        "<<" => Token::ShiftLeft,
+        ">>" => Token::ShiftRight,
+        "+" => Token::Add,
+        "-" => Token::Subtract,
+        "*" => Token::Mul,
+        "/" => Token::Divide,
+        "%" => Token::Modulo,
+        "**" => Token::Power,
+        "!" => Token::Not,
+        "~" => Token::Complement,
+        "++" => Token::Increment,
+        "--" => Token::Decrement,
+        "[" => Token::OpenBracket,
+        "]" => Token::CloseBracket,
+        "." => Token::Member,
+        "," => Token::Comma,
+        DocComment => Token::DocComment(<CommentType>, <&'input str>),
+        Uint => Token::Uint(<u16>),
+        Int => Token::Int(<u16>),
+        Bytes => Token::Bytes(<u8>),
+        "struct" => Token::Struct,
+        "memory" => Token::Memory,
+        "calldata" => Token::Calldata,
+        "storage" => Token::Storage,
+        "import" => Token::Import,
+        "contract" => Token::Contract,
+        "pragma" => Token::Pragma,
+        "bool" => Token::Bool,
+        "address" => Token::Address,
+        "string" => Token::String,
+        "bytes" => Token::DynamicBytes,
+        "delete" => Token::Delete,
+        "new" => Token::New,
+        "interface" => Token::Interface,
+        "library" => Token::Library,
+        "event" => Token::Event,
+        "enum" => Token::Enum,
+        "public" => Token::Public,
+        "private" => Token::Private,
+        "external" => Token::External,
+        "internal" => Token::Internal,
+        "constant" => Token::Constant,
+        "true" => Token::True,
+        "false" => Token::False,
+        "pure" => Token::Pure,
+        "view" => Token::View,
+        "payable" => Token::Payable,
+        "constructor" => Token::Constructor,
+        "function" => Token::Function,
+        "returns" => Token::Returns,
+        "return" => Token::Return,
+        "if" => Token::If,
+        "for" => Token::For,
+        "while" => Token::While,
+        "else" => Token::Else,
+        "do" => Token::Do,
+        "continue" => Token::Continue,
+        "break" => Token::Break,
+        "throw" => Token::Throw,
+        "emit" => Token::Emit,
+        "anonymous" => Token::Anonymous,
+        "indexed" => Token::Indexed,
+        "mapping" => Token::Mapping,
+        "try" => Token::Try,
+        "catch" => Token::Catch,
+        "receive" => Token::Receive,
+        "fallback" => Token::Fallback,
+        "seconds" => Token::Seconds,
+        "minutes" => Token::Minutes,
+        "hours" => Token::Hours,
+        "days" => Token::Days,
+        "weeks" => Token::Weeks,
+        "wei" => Token::Wei,
+        "szabo" => Token::Szabo,
+        "finney" => Token::Finney,
+        "ether" => Token::Ether,
+        "this" => Token::This,
+        "as" => Token::As,
+        "is" => Token::Is,
+        "abstract" => Token::Abstract,
+        "virtual" => Token::Virtual,
+        "override" => Token::Override,
+        "using" => Token::Using,
+        "modifier" => Token::Modifier,
+        "immutable" => Token::Immutable,
+        "unchecked" => Token::Unchecked,
+        "assembly" => Token::Assembly,
+        "let" => Token::Let,
+    }
+}

+ 44 - 173
src/sema/diagnostics.rs

@@ -3,179 +3,50 @@ use crate::file_resolver::FileResolver;
 use crate::parser::pt::Loc;
 use serde::Serialize;
 
-impl Level {
-    pub fn to_string(&self) -> &'static str {
-        match self {
-            Level::Debug => "debug",
-            Level::Info => "info",
-            Level::Warning => "warning",
-            Level::Error => "error",
-        }
-    }
-}
-
-impl Diagnostic {
-    pub fn debug(pos: Loc, message: String) -> Self {
-        Diagnostic {
-            level: Level::Debug,
-            ty: ErrorType::None,
-            pos: Some(pos),
-            message,
-            notes: Vec::new(),
-        }
-    }
-
-    pub fn info(pos: Loc, message: String) -> Self {
-        Diagnostic {
-            level: Level::Info,
-            ty: ErrorType::None,
-            pos: Some(pos),
-            message,
-            notes: Vec::new(),
-        }
-    }
-
-    pub fn parser_error(pos: Loc, message: String) -> Self {
-        Diagnostic {
-            level: Level::Error,
-            ty: ErrorType::ParserError,
-            pos: Some(pos),
-            message,
-            notes: Vec::new(),
-        }
-    }
-
-    pub fn error(pos: Loc, message: String) -> Self {
-        Diagnostic {
-            level: Level::Error,
-            ty: ErrorType::SyntaxError,
-            pos: Some(pos),
-            message,
-            notes: Vec::new(),
-        }
-    }
-
-    pub fn decl_error(pos: Loc, message: String) -> Self {
-        Diagnostic {
-            level: Level::Error,
-            ty: ErrorType::DeclarationError,
-            pos: Some(pos),
-            message,
-            notes: Vec::new(),
-        }
-    }
-
-    pub fn type_error(pos: Loc, message: String) -> Self {
-        Diagnostic {
-            level: Level::Error,
-            ty: ErrorType::TypeError,
-            pos: Some(pos),
-            message,
-            notes: Vec::new(),
-        }
-    }
-
-    pub fn warning(pos: Loc, message: String) -> Self {
-        Diagnostic {
-            level: Level::Warning,
-            ty: ErrorType::Warning,
-            pos: Some(pos),
-            message,
-            notes: Vec::new(),
-        }
-    }
-
-    pub fn warning_with_note(pos: Loc, message: String, note_pos: Loc, note: String) -> Self {
-        Diagnostic {
-            level: Level::Warning,
-            ty: ErrorType::Warning,
-            pos: Some(pos),
-            message,
-            notes: vec![Note {
-                pos: note_pos,
-                message: note,
-            }],
-        }
-    }
-
-    pub fn warning_with_notes(pos: Loc, message: String, notes: Vec<Note>) -> Self {
-        Diagnostic {
-            level: Level::Warning,
-            ty: ErrorType::Warning,
-            pos: Some(pos),
-            message,
-            notes,
-        }
+fn formatted_message(diagnostic: &Diagnostic, ns: &Namespace, cache: &FileResolver) -> String {
+    let mut s = if let Some(pos) = diagnostic.pos {
+        let loc = ns.files[pos.0].loc_to_string(&pos);
+
+        let (full_line, beg_line_no, beg_offset, type_size) =
+            cache.get_line_and_offset_from_loc(&ns.files[pos.0], &pos);
+
+        format!(
+            "{}: {}: {}\nLine {}:\n\t{}\n\t{:-<7$}{:^<8$}",
+            loc,
+            diagnostic.level.to_string(),
+            diagnostic.message,
+            beg_line_no + 1,
+            full_line,
+            "",
+            "",
+            beg_offset,
+            type_size
+        )
+    } else {
+        format!("solang: {}: {}", diagnostic.level.to_string(), diagnostic.message)
+    };
+
+    for note in &diagnostic.notes {
+        let loc = ns.files[note.pos.0].loc_to_string(&note.pos);
+
+        let (full_line, beg_line_no, beg_offset, type_size) =
+            cache.get_line_and_offset_from_loc(&ns.files[note.pos.0], &note.pos);
+
+        s.push_str(&format!(
+            "\n\t{}: {}: {}\n\tLine {}:\n\t\t{}\n\t\t{:-<7$}{:^<8$}",
+            loc,
+            "note",
+            note.message,
+            beg_line_no + 1,
+            full_line,
+            "",
+            "",
+            beg_offset,
+            type_size
+        ));
     }
 
-    pub fn error_with_note(pos: Loc, message: String, note_pos: Loc, note: String) -> Self {
-        Diagnostic {
-            level: Level::Error,
-            ty: ErrorType::None,
-            pos: Some(pos),
-            message,
-            notes: vec![Note {
-                pos: note_pos,
-                message: note,
-            }],
-        }
-    }
-
-    pub fn error_with_notes(pos: Loc, message: String, notes: Vec<Note>) -> Self {
-        Diagnostic {
-            level: Level::Error,
-            ty: ErrorType::None,
-            pos: Some(pos),
-            message,
-            notes,
-        }
-    }
-
-    fn formatted_message(&self, ns: &Namespace, cache: &FileResolver) -> String {
-        let mut s = if let Some(pos) = self.pos {
-            let loc = ns.files[pos.0].loc_to_string(&pos);
-
-            let (full_line, beg_line_no, beg_offset, type_size) =
-                cache.get_line_and_offset_from_loc(&ns.files[pos.0], &pos);
-
-            format!(
-                "{}: {}: {}\nLine {}:\n\t{}\n\t{:-<7$}{:^<8$}",
-                loc,
-                self.level.to_string(),
-                self.message,
-                beg_line_no + 1,
-                full_line,
-                "",
-                "",
-                beg_offset,
-                type_size
-            )
-        } else {
-            format!("solang: {}: {}", self.level.to_string(), self.message)
-        };
-
-        for note in &self.notes {
-            let loc = ns.files[note.pos.0].loc_to_string(&note.pos);
-
-            let (full_line, beg_line_no, beg_offset, type_size) =
-                cache.get_line_and_offset_from_loc(&ns.files[note.pos.0], &note.pos);
-
-            s.push_str(&format!(
-                "\n\t{}: {}: {}\n\tLine {}:\n\t\t{}\n\t\t{:-<7$}{:^<8$}",
-                loc,
-                "note",
-                note.message,
-                beg_line_no + 1,
-                full_line,
-                "",
-                "",
-                beg_offset,
-                type_size
-            ));
-        }
-
-        s
-    }
+    s
 }
 
 pub fn print_messages(cache: &FileResolver, ns: &Namespace, debug: bool) {
@@ -184,7 +55,7 @@ pub fn print_messages(cache: &FileResolver, ns: &Namespace, debug: bool) {
             continue;
         }
 
-        eprintln!("{}", msg.formatted_message(ns, cache));
+        eprintln!("{}", formatted_message(msg, ns, cache));
     }
 }
 
@@ -232,7 +103,7 @@ pub fn message_as_json(ns: &Namespace, cache: &FileResolver) -> Vec<OutputJson>
             component: "general".to_owned(),
             severity: msg.level.to_string().to_owned(),
             message: msg.message.to_owned(),
-            formattedMessage: msg.formatted_message(ns, cache),
+            formattedMessage: formatted_message(msg, ns, cache),
         });
     }