|
|
@@ -1,1504 +0,0 @@
|
|
|
-//
|
|
|
-// Solidity custom lexer. Solidity needs a custom lexer for two reasons:
|
|
|
-// - comments and doc comments
|
|
|
-// - pragma value is [^;]+
|
|
|
-//
|
|
|
-use phf::phf_map;
|
|
|
-use std::fmt;
|
|
|
-use std::iter::Peekable;
|
|
|
-use std::str::CharIndices;
|
|
|
-use unicode_xid::UnicodeXID;
|
|
|
-
|
|
|
-use super::pt::Loc;
|
|
|
-
|
|
|
-pub type Spanned<Token, Loc, Error> = Result<(Loc, Token, Loc), Error>;
|
|
|
-
|
|
|
-#[derive(Copy, Clone, PartialEq, Debug)]
|
|
|
-pub enum CommentType {
|
|
|
- Line,
|
|
|
- Block,
|
|
|
-}
|
|
|
-
|
|
|
-#[derive(Copy, Clone, PartialEq, Debug)]
|
|
|
-pub enum Token<'input> {
|
|
|
- Identifier(&'input str),
|
|
|
- StringLiteral(&'input str),
|
|
|
- AddressLiteral(&'input str),
|
|
|
- HexLiteral(&'input str),
|
|
|
- Number(&'input str, &'input str),
|
|
|
- RationalNumber(&'input str, &'input str, &'input str),
|
|
|
- HexNumber(&'input str),
|
|
|
- DocComment(CommentType, &'input str),
|
|
|
- Divide,
|
|
|
- Contract,
|
|
|
- Library,
|
|
|
- Interface,
|
|
|
- Function,
|
|
|
- Pragma,
|
|
|
- Import,
|
|
|
-
|
|
|
- Struct,
|
|
|
- Event,
|
|
|
- Enum,
|
|
|
-
|
|
|
- Memory,
|
|
|
- Storage,
|
|
|
- Calldata,
|
|
|
-
|
|
|
- Public,
|
|
|
- Private,
|
|
|
- Internal,
|
|
|
- External,
|
|
|
-
|
|
|
- Constant,
|
|
|
-
|
|
|
- New,
|
|
|
- Delete,
|
|
|
-
|
|
|
- Pure,
|
|
|
- View,
|
|
|
- Payable,
|
|
|
-
|
|
|
- Do,
|
|
|
- Continue,
|
|
|
- Break,
|
|
|
-
|
|
|
- Throw,
|
|
|
- Emit,
|
|
|
- Return,
|
|
|
- Returns,
|
|
|
-
|
|
|
- Uint(u16),
|
|
|
- Int(u16),
|
|
|
- Bytes(u8),
|
|
|
- DynamicBytes,
|
|
|
- Bool,
|
|
|
- Address,
|
|
|
- String,
|
|
|
-
|
|
|
- Semicolon,
|
|
|
- Comma,
|
|
|
- OpenParenthesis,
|
|
|
- CloseParenthesis,
|
|
|
- OpenCurlyBrace,
|
|
|
- CloseCurlyBrace,
|
|
|
-
|
|
|
- BitwiseOr,
|
|
|
- BitwiseOrAssign,
|
|
|
- Or,
|
|
|
-
|
|
|
- BitwiseXor,
|
|
|
- BitwiseXorAssign,
|
|
|
-
|
|
|
- BitwiseAnd,
|
|
|
- BitwiseAndAssign,
|
|
|
- And,
|
|
|
-
|
|
|
- AddAssign,
|
|
|
- Increment,
|
|
|
- Add,
|
|
|
-
|
|
|
- SubtractAssign,
|
|
|
- Decrement,
|
|
|
- Subtract,
|
|
|
-
|
|
|
- MulAssign,
|
|
|
- Mul,
|
|
|
- Power,
|
|
|
- DivideAssign,
|
|
|
- ModuloAssign,
|
|
|
- Modulo,
|
|
|
-
|
|
|
- Equal,
|
|
|
- Assign,
|
|
|
- ColonAssign,
|
|
|
-
|
|
|
- NotEqual,
|
|
|
- Not,
|
|
|
-
|
|
|
- True,
|
|
|
- False,
|
|
|
- Else,
|
|
|
- Anonymous,
|
|
|
- For,
|
|
|
- While,
|
|
|
- If,
|
|
|
-
|
|
|
- ShiftRight,
|
|
|
- ShiftRightAssign,
|
|
|
- Less,
|
|
|
- LessEqual,
|
|
|
-
|
|
|
- ShiftLeft,
|
|
|
- ShiftLeftAssign,
|
|
|
- More,
|
|
|
- MoreEqual,
|
|
|
-
|
|
|
- Constructor,
|
|
|
- Indexed,
|
|
|
-
|
|
|
- Member,
|
|
|
- Colon,
|
|
|
- OpenBracket,
|
|
|
- CloseBracket,
|
|
|
- Complement,
|
|
|
- Question,
|
|
|
-
|
|
|
- Mapping,
|
|
|
- Arrow,
|
|
|
-
|
|
|
- Try,
|
|
|
- Catch,
|
|
|
-
|
|
|
- Receive,
|
|
|
- Fallback,
|
|
|
-
|
|
|
- Seconds,
|
|
|
- Minutes,
|
|
|
- Hours,
|
|
|
- Days,
|
|
|
- Weeks,
|
|
|
- Wei,
|
|
|
- Szabo,
|
|
|
- Finney,
|
|
|
- Ether,
|
|
|
-
|
|
|
- This,
|
|
|
- As,
|
|
|
- Is,
|
|
|
- Abstract,
|
|
|
- Virtual,
|
|
|
- Override,
|
|
|
- Using,
|
|
|
- Modifier,
|
|
|
- Immutable,
|
|
|
- Unchecked,
|
|
|
- Assembly,
|
|
|
- Let,
|
|
|
-}
|
|
|
-
|
|
|
-impl<'input> fmt::Display for Token<'input> {
|
|
|
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
|
- match self {
|
|
|
- Token::DocComment(CommentType::Line, s) => write!(f, "///{}", s),
|
|
|
- Token::DocComment(CommentType::Block, s) => write!(f, "/**{}\n*/", s),
|
|
|
- Token::Identifier(id) => write!(f, "{}", id),
|
|
|
- Token::StringLiteral(s) => write!(f, "\"{}\"", s),
|
|
|
- Token::HexLiteral(hex) => write!(f, "{}", hex),
|
|
|
- Token::AddressLiteral(address) => write!(f, "{}", address),
|
|
|
- Token::Number(base, exp) if exp.is_empty() => write!(f, "{}", base),
|
|
|
- Token::Number(base, exp) => write!(f, "{}e{}", base, exp),
|
|
|
- Token::RationalNumber(significand, mantissa, exp) if exp.is_empty() => {
|
|
|
- write!(f, "{}.{}", significand, mantissa)
|
|
|
- }
|
|
|
- Token::RationalNumber(significand, mantissa, exp) => {
|
|
|
- write!(f, "{}.{}e{}", significand, mantissa, exp)
|
|
|
- }
|
|
|
- Token::HexNumber(n) => write!(f, "{}", n),
|
|
|
- Token::Uint(w) => write!(f, "uint{}", w),
|
|
|
- Token::Int(w) => write!(f, "int{}", w),
|
|
|
- Token::Bytes(w) => write!(f, "bytes{}", w),
|
|
|
- Token::DynamicBytes => write!(f, "bytes"),
|
|
|
- Token::Semicolon => write!(f, ";"),
|
|
|
- Token::Comma => write!(f, ","),
|
|
|
- Token::OpenParenthesis => write!(f, "("),
|
|
|
- Token::CloseParenthesis => write!(f, ")"),
|
|
|
- Token::OpenCurlyBrace => write!(f, "{{"),
|
|
|
- Token::CloseCurlyBrace => write!(f, "}}"),
|
|
|
- Token::BitwiseOr => write!(f, "|"),
|
|
|
- Token::BitwiseOrAssign => write!(f, "|="),
|
|
|
- Token::Or => write!(f, "||"),
|
|
|
- Token::BitwiseXor => write!(f, "^"),
|
|
|
- Token::BitwiseXorAssign => write!(f, "^="),
|
|
|
- Token::BitwiseAnd => write!(f, "&"),
|
|
|
- Token::BitwiseAndAssign => write!(f, "&="),
|
|
|
- Token::And => write!(f, "&&"),
|
|
|
- Token::AddAssign => write!(f, "+="),
|
|
|
- Token::Increment => write!(f, "++"),
|
|
|
- Token::Add => write!(f, "+"),
|
|
|
- Token::SubtractAssign => write!(f, "-="),
|
|
|
- Token::Decrement => write!(f, "--"),
|
|
|
- Token::Subtract => write!(f, "-"),
|
|
|
- Token::MulAssign => write!(f, "*="),
|
|
|
- Token::Mul => write!(f, "*"),
|
|
|
- Token::Power => write!(f, "**"),
|
|
|
- Token::Divide => write!(f, "/"),
|
|
|
- Token::DivideAssign => write!(f, "/="),
|
|
|
- Token::ModuloAssign => write!(f, "%="),
|
|
|
- Token::Modulo => write!(f, "%"),
|
|
|
- Token::Equal => write!(f, "=="),
|
|
|
- Token::Assign => write!(f, "="),
|
|
|
- Token::ColonAssign => write!(f, ":="),
|
|
|
- Token::NotEqual => write!(f, "!="),
|
|
|
- Token::Not => write!(f, "!"),
|
|
|
- Token::ShiftLeft => write!(f, "<<"),
|
|
|
- Token::ShiftLeftAssign => write!(f, "<<="),
|
|
|
- Token::More => write!(f, ">"),
|
|
|
- Token::MoreEqual => write!(f, ">="),
|
|
|
- Token::Member => write!(f, "."),
|
|
|
- Token::Colon => write!(f, ":"),
|
|
|
- Token::OpenBracket => write!(f, "["),
|
|
|
- Token::CloseBracket => write!(f, "]"),
|
|
|
- Token::Complement => write!(f, "~"),
|
|
|
- Token::Question => write!(f, "?"),
|
|
|
- Token::ShiftRightAssign => write!(f, "<<="),
|
|
|
- Token::ShiftRight => write!(f, "<<"),
|
|
|
- Token::Less => write!(f, "<"),
|
|
|
- Token::LessEqual => write!(f, "<="),
|
|
|
- Token::Bool => write!(f, "bool"),
|
|
|
- Token::Address => write!(f, "address"),
|
|
|
- Token::String => write!(f, "string"),
|
|
|
- Token::Contract => write!(f, "contract"),
|
|
|
- Token::Library => write!(f, "library"),
|
|
|
- Token::Interface => write!(f, "interface"),
|
|
|
- Token::Function => write!(f, "function"),
|
|
|
- Token::Pragma => write!(f, "pragma"),
|
|
|
- Token::Import => write!(f, "import"),
|
|
|
- Token::Struct => write!(f, "struct"),
|
|
|
- Token::Event => write!(f, "event"),
|
|
|
- Token::Enum => write!(f, "enum"),
|
|
|
- Token::Memory => write!(f, "memory"),
|
|
|
- Token::Storage => write!(f, "storage"),
|
|
|
- Token::Calldata => write!(f, "calldata"),
|
|
|
- Token::Public => write!(f, "public"),
|
|
|
- Token::Private => write!(f, "private"),
|
|
|
- Token::Internal => write!(f, "internal"),
|
|
|
- Token::External => write!(f, "external"),
|
|
|
- Token::Constant => write!(f, "constant"),
|
|
|
- Token::New => write!(f, "new"),
|
|
|
- Token::Delete => write!(f, "delete"),
|
|
|
- Token::Pure => write!(f, "pure"),
|
|
|
- Token::View => write!(f, "view"),
|
|
|
- Token::Payable => write!(f, "payable"),
|
|
|
- Token::Do => write!(f, "do"),
|
|
|
- Token::Continue => write!(f, "continue"),
|
|
|
- Token::Break => write!(f, "break"),
|
|
|
- Token::Throw => write!(f, "throw"),
|
|
|
- Token::Emit => write!(f, "emit"),
|
|
|
- Token::Return => write!(f, "return"),
|
|
|
- Token::Returns => write!(f, "returns"),
|
|
|
- Token::True => write!(f, "true"),
|
|
|
- Token::False => write!(f, "false"),
|
|
|
- Token::Else => write!(f, "else"),
|
|
|
- Token::Anonymous => write!(f, "anonymous"),
|
|
|
- Token::For => write!(f, "for"),
|
|
|
- Token::While => write!(f, "while"),
|
|
|
- Token::If => write!(f, "if"),
|
|
|
- Token::Constructor => write!(f, "constructor"),
|
|
|
- Token::Indexed => write!(f, "indexed"),
|
|
|
- Token::Mapping => write!(f, "mapping"),
|
|
|
- Token::Arrow => write!(f, "=>"),
|
|
|
- Token::Try => write!(f, "try"),
|
|
|
- Token::Catch => write!(f, "catch"),
|
|
|
- Token::Receive => write!(f, "receive"),
|
|
|
- Token::Fallback => write!(f, "fallback"),
|
|
|
- Token::Seconds => write!(f, "seconds"),
|
|
|
- Token::Minutes => write!(f, "minutes"),
|
|
|
- Token::Hours => write!(f, "hours"),
|
|
|
- Token::Days => write!(f, "days"),
|
|
|
- Token::Weeks => write!(f, "weeks"),
|
|
|
- Token::Wei => write!(f, "wei"),
|
|
|
- Token::Szabo => write!(f, "szabo"),
|
|
|
- Token::Finney => write!(f, "finney"),
|
|
|
- Token::Ether => write!(f, "ether"),
|
|
|
- Token::This => write!(f, "this"),
|
|
|
- Token::As => write!(f, "as"),
|
|
|
- Token::Is => write!(f, "is"),
|
|
|
- Token::Abstract => write!(f, "abstract"),
|
|
|
- Token::Virtual => write!(f, "virtual"),
|
|
|
- Token::Override => write!(f, "override"),
|
|
|
- Token::Using => write!(f, "using"),
|
|
|
- Token::Modifier => write!(f, "modifier"),
|
|
|
- Token::Immutable => write!(f, "immutable"),
|
|
|
- Token::Unchecked => write!(f, "unchecked"),
|
|
|
- Token::Assembly => write!(f, "assembly"),
|
|
|
- Token::Let => write!(f, "let"),
|
|
|
- }
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-pub struct Lexer<'input> {
|
|
|
- input: &'input str,
|
|
|
- chars: Peekable<CharIndices<'input>>,
|
|
|
- last_tokens: [Option<Token<'input>>; 2],
|
|
|
-}
|
|
|
-
|
|
|
-#[derive(Debug, PartialEq)]
|
|
|
-pub enum LexicalError {
|
|
|
- EndOfFileInComment(usize, usize),
|
|
|
- EndOfFileInString(usize, usize),
|
|
|
- EndofFileInHex(usize, usize),
|
|
|
- MissingNumber(usize, usize),
|
|
|
- InvalidCharacterInHexLiteral(usize, char),
|
|
|
- UnrecognisedToken(usize, usize, String),
|
|
|
- MissingExponent(usize, usize),
|
|
|
- DoublePoints(usize, usize),
|
|
|
- UnrecognisedDecimal(usize, usize),
|
|
|
- ExpectedFrom(usize, usize, String),
|
|
|
-}
|
|
|
-
|
|
|
-impl fmt::Display for LexicalError {
|
|
|
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
|
- match self {
|
|
|
- LexicalError::EndOfFileInComment(_, _) => write!(f, "end of file found in comment"),
|
|
|
- LexicalError::EndOfFileInString(_, _) => {
|
|
|
- write!(f, "end of file found in string literal")
|
|
|
- }
|
|
|
- LexicalError::EndofFileInHex(_, _) => {
|
|
|
- write!(f, "end of file found in hex literal string")
|
|
|
- }
|
|
|
- LexicalError::MissingNumber(_, _) => write!(f, "missing number"),
|
|
|
- LexicalError::InvalidCharacterInHexLiteral(_, ch) => {
|
|
|
- write!(f, "invalid character ‘{}’ in hex literal string", ch)
|
|
|
- }
|
|
|
- LexicalError::UnrecognisedToken(_, _, t) => write!(f, "unrecognised token ‘{}’", t),
|
|
|
- LexicalError::ExpectedFrom(_, _, t) => write!(f, "‘{}’ found where ‘from’ expected", t),
|
|
|
- LexicalError::MissingExponent(_, _) => write!(f, "missing number"),
|
|
|
- LexicalError::DoublePoints(_, _) => write!(f, "found two dots in number"),
|
|
|
- LexicalError::UnrecognisedDecimal(_, _) => {
|
|
|
- write!(f, "expected number after decimal point")
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-impl LexicalError {
|
|
|
- pub fn loc(&self, file_no: usize) -> Loc {
|
|
|
- match self {
|
|
|
- LexicalError::EndOfFileInComment(start, end) => Loc(file_no, *start, *end),
|
|
|
- LexicalError::EndOfFileInString(start, end) => Loc(file_no, *start, *end),
|
|
|
- LexicalError::EndofFileInHex(start, end) => Loc(file_no, *start, *end),
|
|
|
- LexicalError::MissingNumber(start, end) => Loc(file_no, *start, *end),
|
|
|
- LexicalError::InvalidCharacterInHexLiteral(pos, _) => Loc(file_no, *pos, *pos),
|
|
|
- LexicalError::UnrecognisedToken(start, end, _) => Loc(file_no, *start, *end),
|
|
|
- LexicalError::ExpectedFrom(start, end, _) => Loc(file_no, *start, *end),
|
|
|
- LexicalError::MissingExponent(start, end) => Loc(file_no, *start, *end),
|
|
|
- LexicalError::DoublePoints(start, end) => Loc(file_no, *start, *end),
|
|
|
- LexicalError::UnrecognisedDecimal(start, end) => Loc(file_no, *start, *end),
|
|
|
- }
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-static KEYWORDS: phf::Map<&'static str, Token> = phf_map! {
|
|
|
- "address" => Token::Address,
|
|
|
- "anonymous" => Token::Anonymous,
|
|
|
- "bool" => Token::Bool,
|
|
|
- "break" => Token::Break,
|
|
|
- "bytes1" => Token::Bytes(1),
|
|
|
- "bytes2" => Token::Bytes(2),
|
|
|
- "bytes3" => Token::Bytes(3),
|
|
|
- "bytes4" => Token::Bytes(4),
|
|
|
- "bytes5" => Token::Bytes(5),
|
|
|
- "bytes6" => Token::Bytes(6),
|
|
|
- "bytes7" => Token::Bytes(7),
|
|
|
- "bytes8" => Token::Bytes(8),
|
|
|
- "bytes9" => Token::Bytes(9),
|
|
|
- "bytes10" => Token::Bytes(10),
|
|
|
- "bytes11" => Token::Bytes(11),
|
|
|
- "bytes12" => Token::Bytes(12),
|
|
|
- "bytes13" => Token::Bytes(13),
|
|
|
- "bytes14" => Token::Bytes(14),
|
|
|
- "bytes15" => Token::Bytes(15),
|
|
|
- "bytes16" => Token::Bytes(16),
|
|
|
- "bytes17" => Token::Bytes(17),
|
|
|
- "bytes18" => Token::Bytes(18),
|
|
|
- "bytes19" => Token::Bytes(19),
|
|
|
- "bytes20" => Token::Bytes(20),
|
|
|
- "bytes21" => Token::Bytes(21),
|
|
|
- "bytes22" => Token::Bytes(22),
|
|
|
- "bytes23" => Token::Bytes(23),
|
|
|
- "bytes24" => Token::Bytes(24),
|
|
|
- "bytes25" => Token::Bytes(25),
|
|
|
- "bytes26" => Token::Bytes(26),
|
|
|
- "bytes27" => Token::Bytes(27),
|
|
|
- "bytes28" => Token::Bytes(28),
|
|
|
- "bytes29" => Token::Bytes(29),
|
|
|
- "bytes30" => Token::Bytes(30),
|
|
|
- "bytes31" => Token::Bytes(31),
|
|
|
- "bytes32" => Token::Bytes(32),
|
|
|
- "bytes" => Token::DynamicBytes,
|
|
|
- "byte" => Token::Bytes(1),
|
|
|
- "calldata" => Token::Calldata,
|
|
|
- "constant" => Token::Constant,
|
|
|
- "constructor" => Token::Constructor,
|
|
|
- "continue" => Token::Continue,
|
|
|
- "contract" => Token::Contract,
|
|
|
- "delete" => Token::Delete,
|
|
|
- "do" => Token::Do,
|
|
|
- "else" => Token::Else,
|
|
|
- "emit" => Token::Emit,
|
|
|
- "enum" => Token::Enum,
|
|
|
- "event" => Token::Event,
|
|
|
- "external" => Token::External,
|
|
|
- "false" => Token::False,
|
|
|
- "for" => Token::For,
|
|
|
- "function" => Token::Function,
|
|
|
- "if" => Token::If,
|
|
|
- "import" => Token::Import,
|
|
|
- "indexed" => Token::Indexed,
|
|
|
- "int8" => Token::Int(8),
|
|
|
- "int16" => Token::Int(16),
|
|
|
- "int24" => Token::Int(24),
|
|
|
- "int32" => Token::Int(32),
|
|
|
- "int40" => Token::Int(40),
|
|
|
- "int48" => Token::Int(48),
|
|
|
- "int56" => Token::Int(56),
|
|
|
- "int64" => Token::Int(64),
|
|
|
- "int72" => Token::Int(72),
|
|
|
- "int80" => Token::Int(80),
|
|
|
- "int88" => Token::Int(88),
|
|
|
- "int96" => Token::Int(96),
|
|
|
- "int104" => Token::Int(104),
|
|
|
- "int112" => Token::Int(112),
|
|
|
- "int120" => Token::Int(120),
|
|
|
- "int128" => Token::Int(128),
|
|
|
- "int136" => Token::Int(136),
|
|
|
- "int144" => Token::Int(144),
|
|
|
- "int152" => Token::Int(152),
|
|
|
- "int160" => Token::Int(160),
|
|
|
- "int168" => Token::Int(168),
|
|
|
- "int176" => Token::Int(176),
|
|
|
- "int184" => Token::Int(184),
|
|
|
- "int192" => Token::Int(192),
|
|
|
- "int200" => Token::Int(200),
|
|
|
- "int208" => Token::Int(208),
|
|
|
- "int216" => Token::Int(216),
|
|
|
- "int224" => Token::Int(224),
|
|
|
- "int232" => Token::Int(232),
|
|
|
- "int240" => Token::Int(240),
|
|
|
- "int248" => Token::Int(248),
|
|
|
- "int256" => Token::Int(256),
|
|
|
- "interface" => Token::Interface,
|
|
|
- "internal" => Token::Internal,
|
|
|
- "int" => Token::Int(256),
|
|
|
- "library" => Token::Library,
|
|
|
- "mapping" => Token::Mapping,
|
|
|
- "memory" => Token::Memory,
|
|
|
- "new" => Token::New,
|
|
|
- "payable" => Token::Payable,
|
|
|
- "pragma" => Token::Pragma,
|
|
|
- "private" => Token::Private,
|
|
|
- "public" => Token::Public,
|
|
|
- "pure" => Token::Pure,
|
|
|
- "returns" => Token::Returns,
|
|
|
- "return" => Token::Return,
|
|
|
- "storage" => Token::Storage,
|
|
|
- "string" => Token::String,
|
|
|
- "struct" => Token::Struct,
|
|
|
- "throw" => Token::Throw,
|
|
|
- "true" => Token::True,
|
|
|
- "uint8" => Token::Uint(8),
|
|
|
- "uint16" => Token::Uint(16),
|
|
|
- "uint24" => Token::Uint(24),
|
|
|
- "uint32" => Token::Uint(32),
|
|
|
- "uint40" => Token::Uint(40),
|
|
|
- "uint48" => Token::Uint(48),
|
|
|
- "uint56" => Token::Uint(56),
|
|
|
- "uint64" => Token::Uint(64),
|
|
|
- "uint72" => Token::Uint(72),
|
|
|
- "uint80" => Token::Uint(80),
|
|
|
- "uint88" => Token::Uint(88),
|
|
|
- "uint96" => Token::Uint(96),
|
|
|
- "uint104" => Token::Uint(104),
|
|
|
- "uint112" => Token::Uint(112),
|
|
|
- "uint120" => Token::Uint(120),
|
|
|
- "uint128" => Token::Uint(128),
|
|
|
- "uint136" => Token::Uint(136),
|
|
|
- "uint144" => Token::Uint(144),
|
|
|
- "uint152" => Token::Uint(152),
|
|
|
- "uint160" => Token::Uint(160),
|
|
|
- "uint168" => Token::Uint(168),
|
|
|
- "uint176" => Token::Uint(176),
|
|
|
- "uint184" => Token::Uint(184),
|
|
|
- "uint192" => Token::Uint(192),
|
|
|
- "uint200" => Token::Uint(200),
|
|
|
- "uint208" => Token::Uint(208),
|
|
|
- "uint216" => Token::Uint(216),
|
|
|
- "uint224" => Token::Uint(224),
|
|
|
- "uint232" => Token::Uint(232),
|
|
|
- "uint240" => Token::Uint(240),
|
|
|
- "uint248" => Token::Uint(248),
|
|
|
- "uint256" => Token::Uint(256),
|
|
|
- "uint" => Token::Uint(256),
|
|
|
- "view" => Token::View,
|
|
|
- "while" => Token::While,
|
|
|
- "try" => Token::Try,
|
|
|
- "catch" => Token::Catch,
|
|
|
- "receive" => Token::Receive,
|
|
|
- "fallback" => Token::Fallback,
|
|
|
- "seconds" => Token::Seconds,
|
|
|
- "minutes" => Token::Minutes,
|
|
|
- "hours" => Token::Hours,
|
|
|
- "days" => Token::Days,
|
|
|
- "weeks" => Token::Weeks,
|
|
|
- "wei" => Token::Wei,
|
|
|
- "szabo" => Token::Szabo,
|
|
|
- "finney" => Token::Finney,
|
|
|
- "ether" => Token::Ether,
|
|
|
- "this" => Token::This,
|
|
|
- "as" => Token::As,
|
|
|
- "is" => Token::Is,
|
|
|
- "abstract" => Token::Abstract,
|
|
|
- "virtual" => Token::Virtual,
|
|
|
- "override" => Token::Override,
|
|
|
- "using" => Token::Using,
|
|
|
- "modifier" => Token::Modifier,
|
|
|
- "immutable" => Token::Immutable,
|
|
|
- "unchecked" => Token::Unchecked,
|
|
|
- "assembly" => Token::Assembly,
|
|
|
- "let" => Token::Let,
|
|
|
-};
|
|
|
-
|
|
|
-impl<'input> Lexer<'input> {
|
|
|
- pub fn new(input: &'input str) -> Self {
|
|
|
- Lexer {
|
|
|
- input,
|
|
|
- chars: input.char_indices().peekable(),
|
|
|
- last_tokens: [None, None],
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- fn parse_number(
|
|
|
- &mut self,
|
|
|
- start: usize,
|
|
|
- end: usize,
|
|
|
- ch: char,
|
|
|
- ) -> Result<(usize, Token<'input>, usize), LexicalError> {
|
|
|
- let mut is_rational = false;
|
|
|
- if ch == '0' {
|
|
|
- if let Some((_, 'x')) = self.chars.peek() {
|
|
|
- // hex number
|
|
|
- self.chars.next();
|
|
|
-
|
|
|
- let mut end = match self.chars.next() {
|
|
|
- Some((end, ch)) if ch.is_ascii_hexdigit() => end,
|
|
|
- Some((_, _)) => {
|
|
|
- return Err(LexicalError::MissingNumber(start, start + 1));
|
|
|
- }
|
|
|
- None => {
|
|
|
- return Err(LexicalError::EndofFileInHex(start, self.input.len()));
|
|
|
- }
|
|
|
- };
|
|
|
-
|
|
|
- while let Some((i, ch)) = self.chars.peek() {
|
|
|
- if !ch.is_ascii_hexdigit() && *ch != '_' {
|
|
|
- break;
|
|
|
- }
|
|
|
- end = *i;
|
|
|
- self.chars.next();
|
|
|
- }
|
|
|
-
|
|
|
- return Ok((start, Token::HexNumber(&self.input[start..=end]), end + 1));
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- let mut start = start;
|
|
|
- if ch == '.' {
|
|
|
- is_rational = true;
|
|
|
- start -= 1;
|
|
|
- }
|
|
|
-
|
|
|
- let mut end = end;
|
|
|
- while let Some((i, ch)) = self.chars.peek() {
|
|
|
- if !ch.is_ascii_digit() && *ch != '_' {
|
|
|
- break;
|
|
|
- }
|
|
|
- end = *i;
|
|
|
- self.chars.next();
|
|
|
- }
|
|
|
- let mut rational_end = end;
|
|
|
- let mut end_before_rational = end;
|
|
|
- let mut rational_start = end;
|
|
|
- if is_rational {
|
|
|
- end_before_rational = start - 1;
|
|
|
- rational_start = start + 1;
|
|
|
- }
|
|
|
-
|
|
|
- if let Some((i, '.')) = self.chars.peek() {
|
|
|
- if is_rational {
|
|
|
- return Err(LexicalError::DoublePoints(start, self.input.len()));
|
|
|
- }
|
|
|
- rational_start = *i + 1;
|
|
|
- rational_end = *i + 1;
|
|
|
- let mut has_number = false;
|
|
|
- is_rational = true;
|
|
|
- self.chars.next();
|
|
|
- while let Some((i, ch)) = self.chars.peek() {
|
|
|
- if *ch == '.' {
|
|
|
- return Err(LexicalError::DoublePoints(start, self.input.len()));
|
|
|
- }
|
|
|
- if !ch.is_ascii_digit() {
|
|
|
- break;
|
|
|
- }
|
|
|
- has_number = true;
|
|
|
- rational_end = *i;
|
|
|
- end = *i;
|
|
|
- self.chars.next();
|
|
|
- }
|
|
|
- if !has_number {
|
|
|
- return Err(LexicalError::UnrecognisedDecimal(start, self.input.len()));
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- let old_end = end;
|
|
|
- let mut exp_start = end + 1;
|
|
|
-
|
|
|
- if let Some((i, 'e')) = self.chars.peek() {
|
|
|
- exp_start = *i + 1;
|
|
|
- self.chars.next();
|
|
|
- while let Some((i, ch)) = self.chars.peek() {
|
|
|
- if !ch.is_ascii_digit() && *ch != '_' && *ch != '-' {
|
|
|
- break;
|
|
|
- }
|
|
|
- end = *i;
|
|
|
- self.chars.next();
|
|
|
- }
|
|
|
-
|
|
|
- if exp_start > end {
|
|
|
- return Err(LexicalError::MissingExponent(start, self.input.len()));
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- if is_rational {
|
|
|
- let significand = &self.input[start..=end_before_rational];
|
|
|
- let mantissa = &self.input[rational_start..=rational_end];
|
|
|
-
|
|
|
- if mantissa.is_empty() {
|
|
|
- return Err(LexicalError::UnrecognisedDecimal(start, self.input.len()));
|
|
|
- }
|
|
|
- let exp = &self.input[exp_start..=end];
|
|
|
- return Ok((
|
|
|
- start,
|
|
|
- Token::RationalNumber(significand, mantissa, exp),
|
|
|
- end + 1,
|
|
|
- ));
|
|
|
- }
|
|
|
-
|
|
|
- let base = &self.input[start..=old_end];
|
|
|
- let exp = &self.input[exp_start..=end];
|
|
|
-
|
|
|
- Ok((start, Token::Number(base, exp), end + 1))
|
|
|
- }
|
|
|
-
|
|
|
- fn string(
|
|
|
- &mut self,
|
|
|
- token_start: usize,
|
|
|
- string_start: usize,
|
|
|
- quote_char: char,
|
|
|
- ) -> Result<(usize, Token<'input>, usize), LexicalError> {
|
|
|
- let mut end;
|
|
|
-
|
|
|
- let mut last_was_escape = false;
|
|
|
-
|
|
|
- loop {
|
|
|
- if let Some((i, ch)) = self.chars.next() {
|
|
|
- end = i;
|
|
|
- if !last_was_escape {
|
|
|
- if ch == quote_char {
|
|
|
- break;
|
|
|
- }
|
|
|
- last_was_escape = ch == '\\';
|
|
|
- } else {
|
|
|
- last_was_escape = false;
|
|
|
- }
|
|
|
- } else {
|
|
|
- return Err(LexicalError::EndOfFileInString(
|
|
|
- token_start,
|
|
|
- self.input.len(),
|
|
|
- ));
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- Ok((
|
|
|
- token_start,
|
|
|
- Token::StringLiteral(&self.input[string_start..end]),
|
|
|
- end + 1,
|
|
|
- ))
|
|
|
- }
|
|
|
-
|
|
|
- fn next(&mut self) -> Option<Result<(usize, Token<'input>, usize), LexicalError>> {
|
|
|
- loop {
|
|
|
- match self.chars.next() {
|
|
|
- Some((start, ch)) if ch == '_' || ch == '$' || UnicodeXID::is_xid_start(ch) => {
|
|
|
- let end;
|
|
|
-
|
|
|
- loop {
|
|
|
- if let Some((i, ch)) = self.chars.peek() {
|
|
|
- if !UnicodeXID::is_xid_continue(*ch) && *ch != '$' {
|
|
|
- end = *i;
|
|
|
- break;
|
|
|
- }
|
|
|
- self.chars.next();
|
|
|
- } else {
|
|
|
- end = self.input.len();
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- let id = &self.input[start..end];
|
|
|
-
|
|
|
- if id == "unicode" {
|
|
|
- match self.chars.peek() {
|
|
|
- Some((_, quote_char @ '"')) | Some((_, quote_char @ '\'')) => {
|
|
|
- let quote_char = *quote_char;
|
|
|
-
|
|
|
- self.chars.next();
|
|
|
-
|
|
|
- return Some(self.string(start, start + 8, quote_char));
|
|
|
- }
|
|
|
- _ => (),
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- if id == "hex" {
|
|
|
- match self.chars.peek() {
|
|
|
- Some((_, quote_char @ '"')) | Some((_, quote_char @ '\'')) => {
|
|
|
- let quote_char = *quote_char;
|
|
|
-
|
|
|
- self.chars.next();
|
|
|
-
|
|
|
- for (i, ch) in &mut self.chars {
|
|
|
- if ch == quote_char {
|
|
|
- return Some(Ok((
|
|
|
- start,
|
|
|
- Token::HexLiteral(&self.input[start..=i]),
|
|
|
- i + 1,
|
|
|
- )));
|
|
|
- }
|
|
|
-
|
|
|
- if !ch.is_ascii_hexdigit() && ch != '_' {
|
|
|
- // Eat up the remainer of the string
|
|
|
- for (_, ch) in &mut self.chars {
|
|
|
- if ch == quote_char {
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- return Some(Err(
|
|
|
- LexicalError::InvalidCharacterInHexLiteral(i, ch),
|
|
|
- ));
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- return Some(Err(LexicalError::EndOfFileInString(
|
|
|
- start,
|
|
|
- self.input.len(),
|
|
|
- )));
|
|
|
- }
|
|
|
- _ => (),
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- if id == "address" {
|
|
|
- match self.chars.peek() {
|
|
|
- Some((_, quote_char @ '"')) | Some((_, quote_char @ '\'')) => {
|
|
|
- let quote_char = *quote_char;
|
|
|
-
|
|
|
- self.chars.next();
|
|
|
-
|
|
|
- for (i, ch) in &mut self.chars {
|
|
|
- if ch == quote_char {
|
|
|
- return Some(Ok((
|
|
|
- start,
|
|
|
- Token::AddressLiteral(&self.input[start..=i]),
|
|
|
- i + 1,
|
|
|
- )));
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- return Some(Err(LexicalError::EndOfFileInString(
|
|
|
- start,
|
|
|
- self.input.len(),
|
|
|
- )));
|
|
|
- }
|
|
|
- _ => (),
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- return if let Some(w) = KEYWORDS.get(id) {
|
|
|
- Some(Ok((start, *w, end)))
|
|
|
- } else {
|
|
|
- Some(Ok((start, Token::Identifier(id), end)))
|
|
|
- };
|
|
|
- }
|
|
|
- Some((start, quote_char @ '"')) | Some((start, quote_char @ '\'')) => {
|
|
|
- return Some(self.string(start, start + 1, quote_char));
|
|
|
- }
|
|
|
- Some((start, '/')) => {
|
|
|
- match self.chars.peek() {
|
|
|
- Some((_, '=')) => {
|
|
|
- self.chars.next();
|
|
|
- return Some(Ok((start, Token::DivideAssign, start + 2)));
|
|
|
- }
|
|
|
- Some((_, '/')) => {
|
|
|
- // line comment
|
|
|
- self.chars.next();
|
|
|
-
|
|
|
- let mut newline = false;
|
|
|
-
|
|
|
- let doc_comment_start = match self.chars.next() {
|
|
|
- Some((i, '/')) => match self.chars.peek() {
|
|
|
- // ///(/)+ is still a line comment
|
|
|
- Some((_, '/')) => None,
|
|
|
- _ => Some(i + 1),
|
|
|
- },
|
|
|
- Some((_, ch)) if ch == '\n' || ch == '\r' => {
|
|
|
- newline = true;
|
|
|
- None
|
|
|
- }
|
|
|
- _ => None,
|
|
|
- };
|
|
|
-
|
|
|
- let mut last = start + 3;
|
|
|
-
|
|
|
- if !newline {
|
|
|
- for (i, ch) in &mut self.chars {
|
|
|
- if ch == '\n' || ch == '\r' {
|
|
|
- break;
|
|
|
- }
|
|
|
- last = i;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- if let Some(doc_start) = doc_comment_start {
|
|
|
- if last > doc_start {
|
|
|
- return Some(Ok((
|
|
|
- start + 3,
|
|
|
- Token::DocComment(
|
|
|
- CommentType::Line,
|
|
|
- &self.input[doc_start..=last],
|
|
|
- ),
|
|
|
- last + 1,
|
|
|
- )));
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- Some((_, '*')) => {
|
|
|
- // multiline comment
|
|
|
- self.chars.next();
|
|
|
-
|
|
|
- let doc_comment_start = match self.chars.next() {
|
|
|
- Some((i, '*')) => match self.chars.peek() {
|
|
|
- Some((_, '*')) => None,
|
|
|
- _ => Some(i + 1),
|
|
|
- },
|
|
|
- _ => None,
|
|
|
- };
|
|
|
-
|
|
|
- let mut last = start + 3;
|
|
|
- let mut seen_star = false;
|
|
|
-
|
|
|
- loop {
|
|
|
- if let Some((i, ch)) = self.chars.next() {
|
|
|
- if seen_star && ch == '/' {
|
|
|
- break;
|
|
|
- }
|
|
|
- seen_star = ch == '*';
|
|
|
- last = i;
|
|
|
- } else {
|
|
|
- return Some(Err(LexicalError::EndOfFileInComment(
|
|
|
- start,
|
|
|
- self.input.len(),
|
|
|
- )));
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- if let Some(doc_start) = doc_comment_start {
|
|
|
- if last > doc_start {
|
|
|
- return Some(Ok((
|
|
|
- start + 3,
|
|
|
- Token::DocComment(
|
|
|
- CommentType::Block,
|
|
|
- &self.input[doc_start..last],
|
|
|
- ),
|
|
|
- last,
|
|
|
- )));
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- _ => {
|
|
|
- return Some(Ok((start, Token::Divide, start + 1)));
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- Some((start, ch)) if ch.is_ascii_digit() => {
|
|
|
- return Some(self.parse_number(start, start, ch))
|
|
|
- }
|
|
|
- Some((i, ';')) => return Some(Ok((i, Token::Semicolon, i + 1))),
|
|
|
- Some((i, ',')) => return Some(Ok((i, Token::Comma, i + 1))),
|
|
|
- Some((i, '(')) => return Some(Ok((i, Token::OpenParenthesis, i + 1))),
|
|
|
- Some((i, ')')) => return Some(Ok((i, Token::CloseParenthesis, i + 1))),
|
|
|
- Some((i, '{')) => return Some(Ok((i, Token::OpenCurlyBrace, i + 1))),
|
|
|
- Some((i, '}')) => return Some(Ok((i, Token::CloseCurlyBrace, i + 1))),
|
|
|
- Some((i, '~')) => return Some(Ok((i, Token::Complement, i + 1))),
|
|
|
- Some((i, '=')) => match self.chars.peek() {
|
|
|
- Some((_, '=')) => {
|
|
|
- self.chars.next();
|
|
|
- return Some(Ok((i, Token::Equal, i + 2)));
|
|
|
- }
|
|
|
- Some((_, '>')) => {
|
|
|
- self.chars.next();
|
|
|
- return Some(Ok((i, Token::Arrow, i + 2)));
|
|
|
- }
|
|
|
- _ => {
|
|
|
- return Some(Ok((i, Token::Assign, i + 1)));
|
|
|
- }
|
|
|
- },
|
|
|
- Some((i, '!')) => {
|
|
|
- if let Some((_, '=')) = self.chars.peek() {
|
|
|
- self.chars.next();
|
|
|
- return Some(Ok((i, Token::NotEqual, i + 2)));
|
|
|
- } else {
|
|
|
- return Some(Ok((i, Token::Not, i + 1)));
|
|
|
- }
|
|
|
- }
|
|
|
- Some((i, '|')) => {
|
|
|
- return match self.chars.peek() {
|
|
|
- Some((_, '=')) => {
|
|
|
- self.chars.next();
|
|
|
- Some(Ok((i, Token::BitwiseOrAssign, i + 2)))
|
|
|
- }
|
|
|
- Some((_, '|')) => {
|
|
|
- self.chars.next();
|
|
|
- Some(Ok((i, Token::Or, i + 2)))
|
|
|
- }
|
|
|
- _ => Some(Ok((i, Token::BitwiseOr, i + 1))),
|
|
|
- };
|
|
|
- }
|
|
|
- Some((i, '&')) => {
|
|
|
- return match self.chars.peek() {
|
|
|
- Some((_, '=')) => {
|
|
|
- self.chars.next();
|
|
|
- Some(Ok((i, Token::BitwiseAndAssign, i + 2)))
|
|
|
- }
|
|
|
- Some((_, '&')) => {
|
|
|
- self.chars.next();
|
|
|
- Some(Ok((i, Token::And, i + 2)))
|
|
|
- }
|
|
|
- _ => Some(Ok((i, Token::BitwiseAnd, i + 1))),
|
|
|
- };
|
|
|
- }
|
|
|
- Some((i, '^')) => {
|
|
|
- return match self.chars.peek() {
|
|
|
- Some((_, '=')) => {
|
|
|
- self.chars.next();
|
|
|
- Some(Ok((i, Token::BitwiseXorAssign, i + 2)))
|
|
|
- }
|
|
|
- _ => Some(Ok((i, Token::BitwiseXor, i + 1))),
|
|
|
- };
|
|
|
- }
|
|
|
- Some((i, '+')) => {
|
|
|
- return match self.chars.peek() {
|
|
|
- Some((_, '=')) => {
|
|
|
- self.chars.next();
|
|
|
- Some(Ok((i, Token::AddAssign, i + 2)))
|
|
|
- }
|
|
|
- Some((_, '+')) => {
|
|
|
- self.chars.next();
|
|
|
- Some(Ok((i, Token::Increment, i + 2)))
|
|
|
- }
|
|
|
- _ => Some(Ok((i, Token::Add, i + 1))),
|
|
|
- };
|
|
|
- }
|
|
|
- Some((i, '-')) => {
|
|
|
- return match self.chars.peek() {
|
|
|
- Some((_, '=')) => {
|
|
|
- self.chars.next();
|
|
|
- Some(Ok((i, Token::SubtractAssign, i + 2)))
|
|
|
- }
|
|
|
- Some((_, '-')) => {
|
|
|
- self.chars.next();
|
|
|
- Some(Ok((i, Token::Decrement, i + 2)))
|
|
|
- }
|
|
|
- _ => Some(Ok((i, Token::Subtract, i + 1))),
|
|
|
- };
|
|
|
- }
|
|
|
- Some((i, '*')) => {
|
|
|
- return match self.chars.peek() {
|
|
|
- Some((_, '=')) => {
|
|
|
- self.chars.next();
|
|
|
- Some(Ok((i, Token::MulAssign, i + 2)))
|
|
|
- }
|
|
|
- Some((_, '*')) => {
|
|
|
- self.chars.next();
|
|
|
- Some(Ok((i, Token::Power, i + 2)))
|
|
|
- }
|
|
|
- _ => Some(Ok((i, Token::Mul, i + 1))),
|
|
|
- };
|
|
|
- }
|
|
|
- Some((i, '%')) => {
|
|
|
- return match self.chars.peek() {
|
|
|
- Some((_, '=')) => {
|
|
|
- self.chars.next();
|
|
|
- Some(Ok((i, Token::ModuloAssign, i + 2)))
|
|
|
- }
|
|
|
- _ => Some(Ok((i, Token::Modulo, i + 1))),
|
|
|
- };
|
|
|
- }
|
|
|
- Some((i, '<')) => {
|
|
|
- return match self.chars.peek() {
|
|
|
- Some((_, '<')) => {
|
|
|
- self.chars.next();
|
|
|
- if let Some((_, '=')) = self.chars.peek() {
|
|
|
- self.chars.next();
|
|
|
- Some(Ok((i, Token::ShiftLeftAssign, i + 3)))
|
|
|
- } else {
|
|
|
- Some(Ok((i, Token::ShiftLeft, i + 2)))
|
|
|
- }
|
|
|
- }
|
|
|
- Some((_, '=')) => {
|
|
|
- self.chars.next();
|
|
|
- Some(Ok((i, Token::LessEqual, i + 2)))
|
|
|
- }
|
|
|
- _ => Some(Ok((i, Token::Less, i + 1))),
|
|
|
- };
|
|
|
- }
|
|
|
- Some((i, '>')) => {
|
|
|
- return match self.chars.peek() {
|
|
|
- Some((_, '>')) => {
|
|
|
- self.chars.next();
|
|
|
- if let Some((_, '=')) = self.chars.peek() {
|
|
|
- self.chars.next();
|
|
|
- Some(Ok((i, Token::ShiftRightAssign, i + 3)))
|
|
|
- } else {
|
|
|
- Some(Ok((i, Token::ShiftRight, i + 2)))
|
|
|
- }
|
|
|
- }
|
|
|
- Some((_, '=')) => {
|
|
|
- self.chars.next();
|
|
|
- Some(Ok((i, Token::MoreEqual, i + 2)))
|
|
|
- }
|
|
|
- _ => Some(Ok((i, Token::More, i + 1))),
|
|
|
- };
|
|
|
- }
|
|
|
- Some((i, '.')) => {
|
|
|
- if let Some((_, a)) = self.chars.peek() {
|
|
|
- if a.is_ascii_digit() {
|
|
|
- return Some(self.parse_number(i + 1, i + 1, '.'));
|
|
|
- }
|
|
|
- }
|
|
|
- return Some(Ok((i, Token::Member, i + 1)));
|
|
|
- }
|
|
|
- Some((i, '[')) => return Some(Ok((i, Token::OpenBracket, i + 1))),
|
|
|
- Some((i, ']')) => return Some(Ok((i, Token::CloseBracket, i + 1))),
|
|
|
- Some((i, ':')) => {
|
|
|
- return match self.chars.peek() {
|
|
|
- Some((_, '=')) => {
|
|
|
- self.chars.next();
|
|
|
- Some(Ok((i, Token::ColonAssign, i + 2)))
|
|
|
- }
|
|
|
- _ => Some(Ok((i, Token::Colon, i + 1))),
|
|
|
- };
|
|
|
- }
|
|
|
- Some((i, '?')) => return Some(Ok((i, Token::Question, i + 1))),
|
|
|
- Some((_, ch)) if ch.is_whitespace() => (),
|
|
|
- Some((start, _)) => {
|
|
|
- let mut end;
|
|
|
-
|
|
|
- loop {
|
|
|
- if let Some((i, ch)) = self.chars.next() {
|
|
|
- end = i;
|
|
|
-
|
|
|
- if ch.is_whitespace() {
|
|
|
- break;
|
|
|
- }
|
|
|
- } else {
|
|
|
- end = self.input.len();
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- return Some(Err(LexicalError::UnrecognisedToken(
|
|
|
- start,
|
|
|
- end,
|
|
|
- self.input[start..end].to_owned(),
|
|
|
- )));
|
|
|
- }
|
|
|
- None => return None, // End of file
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- /// Next token is pragma value. Return it
|
|
|
- fn pragma_value(&mut self) -> Option<Result<(usize, Token<'input>, usize), LexicalError>> {
|
|
|
- // special parser for pragma solidity >=0.4.22 <0.7.0;
|
|
|
- let mut start = None;
|
|
|
- let mut end = 0;
|
|
|
-
|
|
|
- // solc will include anything upto the next semicolon, whitespace
|
|
|
- // trimmed on left and right
|
|
|
- loop {
|
|
|
- match self.chars.peek() {
|
|
|
- Some((_, ';')) | None => {
|
|
|
- return if let Some(start) = start {
|
|
|
- Some(Ok((
|
|
|
- start,
|
|
|
- Token::StringLiteral(&self.input[start..end]),
|
|
|
- end,
|
|
|
- )))
|
|
|
- } else {
|
|
|
- self.next()
|
|
|
- };
|
|
|
- }
|
|
|
- Some((_, ch)) if ch.is_whitespace() => {
|
|
|
- self.chars.next();
|
|
|
- }
|
|
|
- Some((i, _)) => {
|
|
|
- if start.is_none() {
|
|
|
- start = Some(*i);
|
|
|
- }
|
|
|
- self.chars.next();
|
|
|
-
|
|
|
- // end should point to the byte _after_ the character
|
|
|
- end = match self.chars.peek() {
|
|
|
- Some((i, _)) => *i,
|
|
|
- None => self.input.len(),
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-impl<'input> Iterator for Lexer<'input> {
|
|
|
- type Item = Spanned<Token<'input>, usize, LexicalError>;
|
|
|
-
|
|
|
- /// Return the next token
|
|
|
- fn next(&mut self) -> Option<Self::Item> {
|
|
|
- // Lexer should be aware of whether the last two tokens were
|
|
|
- // pragma followed by identifier. If this is true, then special parsing should be
|
|
|
- // done for the pragma value
|
|
|
- let token = if let [Some(Token::Pragma), Some(Token::Identifier(_))] = self.last_tokens {
|
|
|
- self.pragma_value()
|
|
|
- } else {
|
|
|
- self.next()
|
|
|
- };
|
|
|
-
|
|
|
- self.last_tokens = [
|
|
|
- self.last_tokens[1],
|
|
|
- match token {
|
|
|
- Some(Ok((_, n, _))) => Some(n),
|
|
|
- _ => None,
|
|
|
- },
|
|
|
- ];
|
|
|
-
|
|
|
- token
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-#[test]
|
|
|
-fn lexertest() {
|
|
|
- let tokens = Lexer::new("bool").collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(tokens, vec!(Ok((0, Token::Bool, 4))));
|
|
|
-
|
|
|
- let tokens = Lexer::new("uint8").collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(tokens, vec!(Ok((0, Token::Uint(8), 5))));
|
|
|
-
|
|
|
- let tokens = Lexer::new("hex").collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(tokens, vec!(Ok((0, Token::Identifier("hex"), 3))));
|
|
|
-
|
|
|
- let tokens = Lexer::new("hex\"cafe_dead\" /* adad*** */")
|
|
|
- .collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- tokens,
|
|
|
- vec!(Ok((0, Token::HexLiteral("hex\"cafe_dead\""), 14)))
|
|
|
- );
|
|
|
-
|
|
|
- let tokens = Lexer::new("// foo bar\n0x00fead0_12 00090 0_0")
|
|
|
- .collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- tokens,
|
|
|
- vec!(
|
|
|
- Ok((11, Token::HexNumber("0x00fead0_12"), 23)),
|
|
|
- Ok((24, Token::Number("00090", ""), 29)),
|
|
|
- Ok((30, Token::Number("0_0", ""), 33))
|
|
|
- )
|
|
|
- );
|
|
|
-
|
|
|
- let tokens = Lexer::new("// foo bar\n0x00fead0_12 9.0008 0_0")
|
|
|
- .collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- tokens,
|
|
|
- vec!(
|
|
|
- Ok((11, Token::HexNumber("0x00fead0_12"), 23)),
|
|
|
- Ok((24, Token::RationalNumber("9", "0008", ""), 30)),
|
|
|
- Ok((31, Token::Number("0_0", ""), 34))
|
|
|
- )
|
|
|
- );
|
|
|
-
|
|
|
- let tokens = Lexer::new("// foo bar\n0x00fead0_12 .0008 0.9e2")
|
|
|
- .collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- tokens,
|
|
|
- vec!(
|
|
|
- Ok((11, Token::HexNumber("0x00fead0_12"), 23)),
|
|
|
- Ok((24, Token::RationalNumber("", "0008", ""), 29)),
|
|
|
- Ok((30, Token::RationalNumber("0", "9", "2"), 35))
|
|
|
- )
|
|
|
- );
|
|
|
-
|
|
|
- let tokens = Lexer::new("// foo bar\n0x00fead0_12 .0008 0.9e-2")
|
|
|
- .collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- tokens,
|
|
|
- vec!(
|
|
|
- Ok((11, Token::HexNumber("0x00fead0_12"), 23)),
|
|
|
- Ok((24, Token::RationalNumber("", "0008", ""), 29)),
|
|
|
- Ok((30, Token::RationalNumber("0", "9", "-2"), 36))
|
|
|
- )
|
|
|
- );
|
|
|
-
|
|
|
- let tokens =
|
|
|
- Lexer::new("\"foo\"").collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(tokens, vec!(Ok((0, Token::StringLiteral("foo"), 5)),));
|
|
|
-
|
|
|
- let tokens = Lexer::new("pragma solidity >=0.5.0 <0.7.0;")
|
|
|
- .collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- tokens,
|
|
|
- vec!(
|
|
|
- Ok((0, Token::Pragma, 6)),
|
|
|
- Ok((7, Token::Identifier("solidity"), 15)),
|
|
|
- Ok((16, Token::StringLiteral(">=0.5.0 <0.7.0"), 30)),
|
|
|
- Ok((30, Token::Semicolon, 31)),
|
|
|
- )
|
|
|
- );
|
|
|
-
|
|
|
- let tokens = Lexer::new("pragma solidity \t>=0.5.0 <0.7.0 \n ;")
|
|
|
- .collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- tokens,
|
|
|
- vec!(
|
|
|
- Ok((0, Token::Pragma, 6)),
|
|
|
- Ok((7, Token::Identifier("solidity"), 15)),
|
|
|
- Ok((17, Token::StringLiteral(">=0.5.0 <0.7.0"), 31)),
|
|
|
- Ok((34, Token::Semicolon, 35)),
|
|
|
- )
|
|
|
- );
|
|
|
-
|
|
|
- let tokens = Lexer::new("pragma solidity 赤;")
|
|
|
- .collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- tokens,
|
|
|
- vec!(
|
|
|
- Ok((0, Token::Pragma, 6)),
|
|
|
- Ok((7, Token::Identifier("solidity"), 15)),
|
|
|
- Ok((16, Token::StringLiteral("赤"), 19)),
|
|
|
- Ok((19, Token::Semicolon, 20))
|
|
|
- )
|
|
|
- );
|
|
|
-
|
|
|
- let tokens =
|
|
|
- Lexer::new(">>= >> >= >").collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- tokens,
|
|
|
- vec!(
|
|
|
- Ok((0, Token::ShiftRightAssign, 3)),
|
|
|
- Ok((4, Token::ShiftRight, 6)),
|
|
|
- Ok((7, Token::MoreEqual, 9)),
|
|
|
- Ok((10, Token::More, 11)),
|
|
|
- )
|
|
|
- );
|
|
|
-
|
|
|
- let tokens =
|
|
|
- Lexer::new("<<= << <= <").collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- tokens,
|
|
|
- vec!(
|
|
|
- Ok((0, Token::ShiftLeftAssign, 3)),
|
|
|
- Ok((4, Token::ShiftLeft, 6)),
|
|
|
- Ok((7, Token::LessEqual, 9)),
|
|
|
- Ok((10, Token::Less, 11)),
|
|
|
- )
|
|
|
- );
|
|
|
-
|
|
|
- let tokens =
|
|
|
- Lexer::new("-16 -- - -=").collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- tokens,
|
|
|
- vec!(
|
|
|
- Ok((0, Token::Subtract, 1)),
|
|
|
- Ok((1, Token::Number("16", ""), 3)),
|
|
|
- Ok((4, Token::Decrement, 6)),
|
|
|
- Ok((7, Token::Subtract, 8)),
|
|
|
- Ok((9, Token::SubtractAssign, 11)),
|
|
|
- )
|
|
|
- );
|
|
|
-
|
|
|
- let tokens = Lexer::new("-4 ").collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- tokens,
|
|
|
- vec!(
|
|
|
- Ok((0, Token::Subtract, 1)),
|
|
|
- Ok((1, Token::Number("4", ""), 2)),
|
|
|
- )
|
|
|
- );
|
|
|
-
|
|
|
- let tokens =
|
|
|
- Lexer::new(r#"hex"abcdefg""#).collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- tokens,
|
|
|
- vec!(Err(LexicalError::InvalidCharacterInHexLiteral(10, 'g')))
|
|
|
- );
|
|
|
-
|
|
|
- let tokens = Lexer::new(r#" € "#).collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- tokens,
|
|
|
- vec!(Err(LexicalError::UnrecognisedToken(1, 4, "€".to_owned())))
|
|
|
- );
|
|
|
-
|
|
|
- let tokens = Lexer::new(r#"€"#).collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- tokens,
|
|
|
- vec!(Err(LexicalError::UnrecognisedToken(0, 3, "€".to_owned())))
|
|
|
- );
|
|
|
-
|
|
|
- let tokens = Lexer::new(r#"pragma foo bar"#)
|
|
|
- .collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- tokens,
|
|
|
- vec!(
|
|
|
- Ok((0, Token::Pragma, 6)),
|
|
|
- Ok((7, Token::Identifier("foo"), 10)),
|
|
|
- Ok((11, Token::StringLiteral("bar"), 14)),
|
|
|
- )
|
|
|
- );
|
|
|
-
|
|
|
- let tokens =
|
|
|
- Lexer::new(r#"/// foo"#).collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- tokens,
|
|
|
- vec!(Ok((3, Token::DocComment(CommentType::Line, " foo"), 7)))
|
|
|
- );
|
|
|
-
|
|
|
- let tokens = Lexer::new("/// jadajadadjada\n// bar")
|
|
|
- .collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- tokens,
|
|
|
- vec!(Ok((
|
|
|
- 3,
|
|
|
- Token::DocComment(CommentType::Line, " jadajadadjada"),
|
|
|
- 17
|
|
|
- )))
|
|
|
- );
|
|
|
-
|
|
|
- let tokens =
|
|
|
- Lexer::new(r#"/** foo */"#).collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- tokens,
|
|
|
- vec!(Ok((3, Token::DocComment(CommentType::Block, " foo "), 8)))
|
|
|
- );
|
|
|
-
|
|
|
- let tokens = Lexer::new("/** jadajadadjada */\n/* bar */")
|
|
|
- .collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- tokens,
|
|
|
- vec!(Ok((
|
|
|
- 3,
|
|
|
- Token::DocComment(CommentType::Block, " jadajadadjada "),
|
|
|
- 18
|
|
|
- )))
|
|
|
- );
|
|
|
-
|
|
|
- let tokens = Lexer::new("/************/").next();
|
|
|
- assert_eq!(tokens, None);
|
|
|
-
|
|
|
- let tokens = Lexer::new("/**").next();
|
|
|
- assert_eq!(tokens, Some(Err(LexicalError::EndOfFileInComment(0, 3))));
|
|
|
-
|
|
|
- let tokens = Lexer::new("//////////////").next();
|
|
|
- assert_eq!(tokens, None);
|
|
|
-
|
|
|
- // some unicode tests
|
|
|
- let tokens = Lexer::new(">=\u{a0} . très\u{2028}αβγδεζηθικλμνξοπρστυφχψω\u{85}カラス")
|
|
|
- .collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- tokens,
|
|
|
- vec!(
|
|
|
- Ok((0, Token::MoreEqual, 2)),
|
|
|
- Ok((5, Token::Member, 6)),
|
|
|
- Ok((7, Token::Identifier("très"), 12)),
|
|
|
- Ok((15, Token::Identifier("αβγδεζηθικλμνξοπρστυφχψω"), 63)),
|
|
|
- Ok((65, Token::Identifier("カラス"), 74))
|
|
|
- )
|
|
|
- );
|
|
|
-
|
|
|
- let tokens =
|
|
|
- Lexer::new(r#"unicode"€""#).collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(tokens, vec!(Ok((0, Token::StringLiteral("€"), 12)),));
|
|
|
-
|
|
|
- let tokens =
|
|
|
- Lexer::new(r#"unicode "€""#).collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- tokens,
|
|
|
- vec!(
|
|
|
- Ok((0, Token::Identifier("unicode"), 7)),
|
|
|
- Ok((8, Token::StringLiteral("€"), 13)),
|
|
|
- )
|
|
|
- );
|
|
|
-
|
|
|
- // scientific notation
|
|
|
- let tokens =
|
|
|
- Lexer::new(r#" 1e0 "#).collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(tokens, vec!(Ok((1, Token::Number("1", "0"), 4)),));
|
|
|
-
|
|
|
- let tokens =
|
|
|
- Lexer::new(r#" -9e0123"#).collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- tokens,
|
|
|
- vec!(
|
|
|
- Ok((1, Token::Subtract, 2)),
|
|
|
- Ok((2, Token::Number("9", "0123"), 8)),
|
|
|
- )
|
|
|
- );
|
|
|
-
|
|
|
- let tokens =
|
|
|
- Lexer::new(r#" -9e"#).collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- tokens,
|
|
|
- vec!(
|
|
|
- Ok((1, Token::Subtract, 2)),
|
|
|
- Err(LexicalError::MissingExponent(2, 4))
|
|
|
- )
|
|
|
- );
|
|
|
-
|
|
|
- let tokens = Lexer::new(r#"9ea"#).collect::<Vec<Result<(usize, Token, usize), LexicalError>>>();
|
|
|
-
|
|
|
- assert_eq!(
|
|
|
- tokens,
|
|
|
- vec!(
|
|
|
- Err(LexicalError::MissingExponent(0, 3)),
|
|
|
- Ok((2, Token::Identifier("a"), 3))
|
|
|
- )
|
|
|
- );
|
|
|
-}
|