فهرست منبع

feat(parser): DocComments grouping & type annotation (#621)

* feat(parser): DocComments grouping & type annotation
* preserve newlines in doc comments
* bump solang-parser version

Signed-off-by: Alexey Shekhirin <a.shekhirin@gmail.com>
Alexey Shekhirin 3 سال پیش
والد
کامیت
2fcd989451
7فایلهای تغییر یافته به همراه181 افزوده شده و 73 حذف شده
  1. 1 1
      Cargo.toml
  2. 2 2
      solang-parser/Cargo.toml
  3. 67 33
      solang-parser/src/doc.rs
  4. 89 32
      solang-parser/src/lib.rs
  5. 20 3
      solang-parser/src/pt.rs
  6. 1 1
      src/sema/dotgraphviz.rs
  7. 1 1
      src/sema/tags.rs

+ 1 - 1
Cargo.toml

@@ -43,7 +43,7 @@ funty = "=1.1.0"
 itertools = "0.10"
 num-rational = "0.4"
 indexmap = "1.7"
-solang-parser = { path = "solang-parser", version = "0.1" }
+solang-parser = { path = "solang-parser", version = "0.1.1" }
 
 [dev-dependencies]
 parity-scale-codec-derive = "2.0.0"

+ 2 - 2
solang-parser/Cargo.toml

@@ -1,6 +1,6 @@
 [package]
 name = "solang-parser"
-version = "0.1.0"
+version = "0.1.1"
 authors = ["Sean Young <sean@mess.org>"]
 homepage = "https://github.com/hyperledger-labs/solang"
 documentation = "https://solang.readthedocs.io/"
@@ -19,4 +19,4 @@ num-bigint = "0.4"
 num-traits = "0.2"
 num-rational = "0.4"
 phf = { version = "0.10", features = ["macros"] }
-unicode-xid = "0.2.0"
+unicode-xid = "0.2.0"

+ 67 - 33
solang-parser/src/doc.rs

@@ -1,14 +1,18 @@
 // Parse the fields f
 use crate::lexer::CommentType;
-use crate::pt::DocComment;
+use crate::pt::{DocComment, SingleDocComment};
 
 /// Convert the comment to lines, stripping
-fn to_lines<'a>(comments: &[(usize, CommentType, &'a str)]) -> Vec<(usize, &'a str)> {
+fn to_lines<'a>(
+    comments: &[(usize, CommentType, &'a str)],
+) -> Vec<(CommentType, Vec<(usize, &'a str)>)> {
     let mut res = Vec::new();
 
     for (start, ty, comment) in comments.iter() {
+        let mut grouped_comments = Vec::new();
+
         match ty {
-            CommentType::Line => res.push((*start, comment.trim())),
+            CommentType::Line => grouped_comments.push((*start, comment.trim())),
             CommentType::Block => {
                 let mut start = *start;
 
@@ -17,13 +21,15 @@ fn to_lines<'a>(comments: &[(usize, CommentType, &'a str)]) -> Vec<(usize, &'a s
                         .char_indices()
                         .find(|(_, ch)| !ch.is_whitespace() && *ch != '*')
                     {
-                        res.push((start + i, s[i..].trim_end()))
+                        grouped_comments.push((start + i, s[i..].trim_end()));
                     }
 
                     start += s.len();
                 }
             }
         }
+
+        res.push((*ty, grouped_comments));
     }
 
     res
@@ -34,42 +40,70 @@ pub fn tags(lines: &[(usize, CommentType, &str)]) -> Vec<DocComment> {
     // first extract the tags
     let mut tags = Vec::new();
 
-    for (start_offset, line) in to_lines(lines).into_iter() {
-        let mut chars = line.char_indices().peekable();
+    let lines = to_lines(lines);
+    for (ty, comment_lines) in lines {
+        let mut single_tags = Vec::new();
+
+        for (start_offset, line) in comment_lines {
+            let mut chars = line.char_indices().peekable();
+
+            if let Some((_, '@')) = chars.peek() {
+                // step over @
+                let (tag_start, _) = chars.next().unwrap();
+                let mut tag_end = tag_start;
+
+                while let Some((offset, c)) = chars.peek() {
+                    if c.is_whitespace() {
+                        break;
+                    }
 
-        if let Some((_, '@')) = chars.peek() {
-            // step over @
-            let (tag_start, _) = chars.next().unwrap();
-            let mut tag_end = tag_start;
+                    tag_end = *offset;
 
-            while let Some((offset, c)) = chars.peek() {
-                if c.is_whitespace() {
-                    break;
+                    chars.next();
                 }
 
-                tag_end = *offset;
+                // tag value
+                single_tags.push(SingleDocComment {
+                    offset: tag_start,
+                    tag: line[tag_start + 1..tag_end + 1].to_owned(),
+                    value: line[tag_end + 1..].trim().to_owned(),
+                });
+            } else if !single_tags.is_empty() || !tags.is_empty() {
+                let line = line.trim();
+                if !line.is_empty() {
+                    let single_doc_comment = if let Some(single_tag) = single_tags.last_mut() {
+                        Some(single_tag)
+                    } else if let Some(tag) = tags.last_mut() {
+                        match tag {
+                            DocComment::Line { comment } => Some(comment),
+                            DocComment::Block { comments } => comments.last_mut(),
+                        }
+                    } else {
+                        None
+                    };
 
-                chars.next();
+                    if let Some(comment) = single_doc_comment {
+                        comment.value.push('\n');
+                        comment.value.push_str(line);
+                    }
+                }
+            } else {
+                single_tags.push(SingleDocComment {
+                    offset: start_offset,
+                    tag: String::from("notice"),
+                    value: line.trim().to_owned(),
+                });
             }
+        }
 
-            // tag value
-            tags.push(DocComment {
-                offset: tag_start,
-                tag: line[tag_start + 1..tag_end + 1].to_owned(),
-                value: line[tag_end + 1..].trim().to_owned(),
-            });
-        } else if let Some(tag) = tags.last_mut() {
-            let line = line.trim();
-            if !line.is_empty() {
-                tag.value.push(' ');
-                tag.value.push_str(line.trim());
-            }
-        } else {
-            tags.push(DocComment {
-                offset: start_offset,
-                tag: String::from("notice"),
-                value: line.trim().to_owned(),
-            });
+        match ty {
+            CommentType::Line if !single_tags.is_empty() => tags.push(DocComment::Line {
+                comment: single_tags[0].to_owned(),
+            }),
+            CommentType::Block => tags.push(DocComment::Block {
+                comments: single_tags,
+            }),
+            _ => {}
         }
     }
 

+ 89 - 32
solang-parser/src/lib.rs

@@ -1,18 +1,19 @@
 //! Solidity file parser
 
+use lalrpop_util::ParseError;
+
+pub use diagnostics::Diagnostic;
+
 pub mod diagnostics;
 mod doc;
 pub mod lexer;
 pub mod pt;
-pub use diagnostics::Diagnostic;
 
 #[allow(clippy::all)]
 pub mod solidity {
     include!(concat!(env!("OUT_DIR"), "/solidity.rs"));
 }
 
-use lalrpop_util::ParseError;
-
 /// Parse soldiity file content
 pub fn parse(src: &str, file_no: usize) -> Result<pt::SourceUnit, Vec<Diagnostic>> {
     // parse phase
@@ -68,7 +69,19 @@ mod test {
 
     #[test]
     fn parse_test() {
-        let src = "contract foo {
+        let src = "/// @title Foo
+                /// @description Foo
+                /// Bar
+                contract foo {
+                    /**
+                    @title Jurisdiction
+                    */
+                    /// @author Anon
+                    /**
+                    @description Data for
+                    jurisdiction
+                    @dev It's a struct
+                    */
                     struct Jurisdiction {
                         bool exists;
                         uint keyIdx;
@@ -81,62 +94,106 @@ mod test {
 
         let lex = lexer::Lexer::new(src);
 
-        let e = solidity::SourceUnitParser::new()
+        let actual_parse_tree = solidity::SourceUnitParser::new()
             .parse(src, 0, lex)
             .unwrap();
 
-        let a = SourceUnit(vec![SourceUnitPart::ContractDefinition(Box::new(
+        let expected_parse_tree = SourceUnit(vec![SourceUnitPart::ContractDefinition(Box::new(
             ContractDefinition {
-                doc: vec![],
-                loc: Loc(0, 0, 13),
-                ty: ContractTy::Contract(Loc(0, 0, 8)),
+                doc: vec![
+                    DocComment::Line {
+                        comment: SingleDocComment {
+                            offset: 0,
+                            tag: "title".to_string(),
+                            value: "Foo".to_string(),
+                        },
+                    },
+                    DocComment::Line {
+                        comment: SingleDocComment {
+                            offset: 0,
+                            tag: "description".to_string(),
+                            value: "Foo\nBar".to_string(),
+                        },
+                    },
+                ],
+                loc: Loc(0, 92, 105),
+                ty: ContractTy::Contract(Loc(0, 92, 100)),
                 name: Identifier {
-                    loc: Loc(0, 9, 12),
+                    loc: Loc(0, 101, 104),
                     name: "foo".to_string(),
                 },
                 base: Vec::new(),
                 parts: vec![
                     ContractPart::StructDefinition(Box::new(StructDefinition {
-                        doc: vec![],
+                        doc: vec![
+                            DocComment::Block {
+                                comments: vec![SingleDocComment {
+                                    offset: 0,
+                                    tag: "title".to_string(),
+                                    value: "Jurisdiction".to_string(),
+                                }],
+                            },
+                            DocComment::Line {
+                                comment: SingleDocComment {
+                                    offset: 0,
+                                    tag: "author".to_string(),
+                                    value: "Anon".to_string(),
+                                },
+                            },
+                            DocComment::Block {
+                                comments: vec![
+                                    SingleDocComment {
+                                        offset: 0,
+                                        tag: "description".to_string(),
+                                        value: "Data for\njurisdiction".to_string(),
+                                    },
+                                    SingleDocComment {
+                                        offset: 0,
+                                        tag: "dev".to_string(),
+                                        value: "It's a struct".to_string(),
+                                    },
+                                ],
+                            },
+                        ],
                         name: Identifier {
-                            loc: Loc(0, 42, 54),
+                            loc: Loc(0, 419, 431),
                             name: "Jurisdiction".to_string(),
                         },
-                        loc: Loc(0, 35, 232),
+                        loc: Loc(0, 412, 609),
                         fields: vec![
                             VariableDeclaration {
-                                loc: Loc(0, 81, 92),
-                                ty: Expression::Type(Loc(0, 81, 85), Type::Bool),
+                                loc: Loc(0, 458, 469),
+                                ty: Expression::Type(Loc(0, 458, 462), Type::Bool),
                                 storage: None,
                                 name: Identifier {
-                                    loc: Loc(0, 86, 92),
+                                    loc: Loc(0, 463, 469),
                                     name: "exists".to_string(),
                                 },
                             },
                             VariableDeclaration {
-                                loc: Loc(0, 118, 129),
-                                ty: Expression::Type(Loc(0, 118, 122), Type::Uint(256)),
+                                loc: Loc(0, 495, 506),
+                                ty: Expression::Type(Loc(0, 495, 499), Type::Uint(256)),
                                 storage: None,
                                 name: Identifier {
-                                    loc: Loc(0, 123, 129),
+                                    loc: Loc(0, 500, 506),
                                     name: "keyIdx".to_string(),
                                 },
                             },
                             VariableDeclaration {
-                                loc: Loc(0, 155, 169),
-                                ty: Expression::Type(Loc(0, 155, 161), Type::Bytes(2)),
+                                loc: Loc(0, 532, 546),
+                                ty: Expression::Type(Loc(0, 532, 538), Type::Bytes(2)),
                                 storage: None,
                                 name: Identifier {
-                                    loc: Loc(0, 162, 169),
+                                    loc: Loc(0, 539, 546),
                                     name: "country".to_string(),
                                 },
                             },
                             VariableDeclaration {
-                                loc: Loc(0, 195, 209),
-                                ty: Expression::Type(Loc(0, 195, 202), Type::Bytes(32)),
+                                loc: Loc(0, 572, 586),
+                                ty: Expression::Type(Loc(0, 572, 579), Type::Bytes(32)),
                                 storage: None,
                                 name: Identifier {
-                                    loc: Loc(0, 203, 209),
+                                    loc: Loc(0, 580, 586),
                                     name: "region".to_string(),
                                 },
                             },
@@ -144,30 +201,30 @@ mod test {
                     })),
                     ContractPart::VariableDefinition(Box::new(VariableDefinition {
                         doc: vec![],
-                        ty: Expression::Type(Loc(0, 253, 259), Type::String),
+                        ty: Expression::Type(Loc(0, 630, 636), Type::String),
                         attrs: vec![],
                         name: Identifier {
-                            loc: Loc(0, 260, 268),
+                            loc: Loc(0, 637, 645),
                             name: "__abba_$".to_string(),
                         },
-                        loc: Loc(0, 253, 268),
+                        loc: Loc(0, 630, 645),
                         initializer: None,
                     })),
                     ContractPart::VariableDefinition(Box::new(VariableDefinition {
                         doc: vec![],
-                        ty: Expression::Type(Loc(0, 290, 295), Type::Int(64)),
+                        ty: Expression::Type(Loc(0, 667, 672), Type::Int(64)),
                         attrs: vec![],
                         name: Identifier {
-                            loc: Loc(0, 296, 306),
+                            loc: Loc(0, 673, 683),
                             name: "$thing_102".to_string(),
                         },
-                        loc: Loc(0, 290, 306),
+                        loc: Loc(0, 667, 683),
                         initializer: None,
                     })),
                 ],
             },
         ))]);
 
-        assert_eq!(e, a);
+        assert_eq!(actual_parse_tree, expected_parse_tree);
     }
 }

+ 20 - 3
solang-parser/src/pt.rs

@@ -1,7 +1,9 @@
-use crate::lexer::CommentType;
+use std::fmt;
+
 use num_bigint::BigInt;
 use num_rational::BigRational;
-use std::fmt;
+
+use crate::lexer::CommentType;
 
 #[derive(Debug, PartialEq, PartialOrd, Ord, Eq, Hash, Clone, Copy)]
 /// file no, start offset, end offset (in bytes)
@@ -24,7 +26,22 @@ pub struct Identifier {
 }
 
 #[derive(Debug, PartialEq, Clone)]
-pub struct DocComment {
+pub enum DocComment {
+    Line { comment: SingleDocComment },
+    Block { comments: Vec<SingleDocComment> },
+}
+
+impl DocComment {
+    pub fn comments(&self) -> Vec<&SingleDocComment> {
+        match self {
+            DocComment::Line { comment } => vec![comment],
+            DocComment::Block { comments } => comments.iter().collect(),
+        }
+    }
+}
+
+#[derive(Debug, PartialEq, Clone)]
+pub struct SingleDocComment {
     pub offset: usize,
     pub tag: String,
     pub value: String,

+ 1 - 1
src/sema/dotgraphviz.rs

@@ -99,7 +99,7 @@ impl Dot {
         if !tags.is_empty() {
             let labels = tags
                 .iter()
-                .map(|tag| format!("{}: {}", tag.tag, tag.value))
+                .map(|tag| format!("{}: {}", tag.tag, tag.value.replace('\n', " ")))
                 .collect();
 
             self.add_node(

+ 1 - 1
src/sema/tags.rs

@@ -13,7 +13,7 @@ pub fn resolve_tags(
 ) -> Vec<Tag> {
     let mut res: Vec<Tag> = Vec::new();
 
-    for c in doc.iter() {
+    for c in doc.iter().flat_map(pt::DocComment::comments) {
         match c.tag.as_str() {
             "notice" | "author" | "title" | "dev" => {
                 // fold fields with the same name