Pārlūkot izejas kodu

feat: error handling enhancement/replace build mode with sbpf-assembler

Claire xyz 3 mēneši atpakaļ
vecāks
revīzija
2a47adbf56

+ 1 - 0
crates/assembler/Cargo.toml

@@ -10,3 +10,4 @@ num-traits = { workspace = true }
 thiserror = { workspace = true }
 anyhow = { workspace = true }
 codespan-reporting = { workspace = true }
+termcolor = "1.4"

+ 50 - 4
crates/assembler/src/errors.rs

@@ -2,6 +2,7 @@ use crate::define_compile_errors;
 use std::ops::Range;
 
 define_compile_errors! {
+    // Lexical errors
     InvalidNumber {
         error = "Invalid number '{number}'",
         label = "Invalid number",
@@ -21,20 +22,65 @@ define_compile_errors! {
         error = "Unterminated string literal",
         label = "Unterminated string literal",
         fields = { span: Range<usize> }
-    }
+    },
+    // Syntactic errors
+    InvalidGlobalDecl {
+        error = "Invalid global declaration",
+        label = "Expected <identifier> for entry label",
+        fields = { span: Range<usize> }
+    },
+    InvalidExternDecl {
+        error = "Invalid extern declaration",
+        label = "Invalid extern declaration",
+        fields = { span: Range<usize> }
+    },
+    InvalidRodataDecl {
+        error = "Invalid rodata declaration",
+        label = "Invalid rodata declaration",
+        fields = { span: Range<usize> }
+    },
+    InvalidEquDecl {
+        error = "Invalid equ declaration",
+        label = "Invalid equ declaration",
+        fields = { span: Range<usize> }
+    },
+    InvalidDirective {
+        error = "Invalid directive '{directive}'",
+        label = "Invalid directive",
+        fields = { directive: String, span: Range<usize> }
+    },
+    InvalidInstruction {
+        error = "Invalid '{instruction}' instruction",
+        label = "Invalid instruction",
+        fields = { instruction: String, span: Range<usize> }
+    },
+    UnexpectedToken {
+        error = "Unexpected token '{token}'",
+        label = "Unexpected token",
+        fields = { token: String, span: Range<usize> }
+    },
+
+    // Semantic errors
+    UndefinedLabel {
+        error = "Undefined label '{label}'",
+        label = "Undefined label",
+        fields = { label: String, span: Range<usize> }
+    },
 }
 
+
 use codespan_reporting::diagnostic::{Diagnostic, Label};
 
 pub trait AsDiagnostic {
-    fn as_diagnostic(&self) -> Diagnostic<usize>;
+    // currently only support single source file reporting
+    fn to_diagnostic(&self) -> Diagnostic<()>;
 }
 
 impl AsDiagnostic for CompileError {
-    fn as_diagnostic(&self) -> Diagnostic<usize> {
+    fn to_diagnostic(&self) -> Diagnostic<()> {
         Diagnostic::error()
             .with_message(self.to_string())
-            .with_labels(vec![Label::primary(0, self.span().start..self.span().end).with_message(self.label())])
+            .with_labels(vec![Label::primary((), self.span().start..self.span().end).with_message(self.label())])
     }
 }
 

+ 31 - 11
crates/assembler/src/lexer.rs

@@ -53,19 +53,20 @@ pub enum Token {
     RightBracket(Range<usize>),
     Comma(Range<usize>),
     Colon(Range<usize>),
+
+    Newline(Range<usize>),
 }
 
-pub fn tokenize(source: &str) -> Result<Vec<Token>, CompileError> {
+pub fn tokenize(source: &str) -> Result<Vec<Token>, Vec<CompileError>> {
     let mut tokens = Vec::new();
+    let mut errors = Vec::new();
     let mut byte_offset = 0;
 
     for line in source.lines() {
-
         if line.is_empty() {
             byte_offset += 1;
             continue;
         }
-
         let mut chars = line.char_indices().peekable();
         while let Some((start_idx, c)) = chars.peek() {
             let token_start = byte_offset + start_idx;
@@ -87,9 +88,17 @@ pub fn tokenize(source: &str) -> Result<Vec<Token>, CompileError> {
                     }
                     let span = token_start..token_start + number.len();
                     if is_addr {
-                        tokens.push(Token::ImmediateValue(ImmediateValue::Addr(i64::from_str_radix(&number, 16).map_err(|_| CompileError::InvalidNumber { number, span: span.clone() })?), span.clone())); 
+                        if let Ok(value) = i64::from_str_radix(&number, 16) {
+                            tokens.push(Token::ImmediateValue(ImmediateValue::Addr(value), span.clone()));
+                        } else {
+                            errors.push(CompileError::InvalidNumber { number, span: span.clone(), custom_label: None });
+                        }
                     } else {
-                        tokens.push(Token::ImmediateValue(ImmediateValue::Int(number.parse::<i64>().map_err(|_| CompileError::InvalidNumber { number, span: span.clone() })?), span.clone()));
+                        if let Ok(value) = number.parse::<i64>() {
+                            tokens.push(Token::ImmediateValue(ImmediateValue::Int(value), span.clone()));
+                        } else {
+                            errors.push(CompileError::InvalidNumber { number, span: span.clone(), custom_label: None });
+                        }
                     }      
                 }
 
@@ -107,7 +116,12 @@ pub fn tokenize(source: &str) -> Result<Vec<Token>, CompileError> {
                         let label_name = identifier.trim_end_matches(':').to_string();
                         tokens.push(Token::Label(label_name, span));
                     } else if identifier.starts_with('r') && identifier[1..].chars().all(|c| c.is_ascii_digit()) {
-                        tokens.push(Token::Register(identifier[1..].parse::<u8>().map_err(|_| CompileError::InvalidRegister { register: identifier, span: span.clone() })?, span.clone()));
+                        // TODO: label name can be "r"
+                        if let Ok(value) = identifier[1..].parse::<u8>() {
+                            tokens.push(Token::Register(value, span.clone()));
+                        } else {
+                            errors.push(CompileError::InvalidRegister { register: identifier, span: span.clone(), custom_label: None });
+                        }
                     } else if let Ok(opcode) = Opcode::from_str(&identifier) {
                         tokens.push(Token::Opcode(opcode, span));
                     } else {
@@ -146,7 +160,7 @@ pub fn tokenize(source: &str) -> Result<Vec<Token>, CompileError> {
                             tokens.push(Token::StringLiteral(string_literal, span));
                             break;
                         } else if *c == '\n' {
-                            return Err(CompileError::UnterminatedStringLiteral { span: token_start..token_start + 1 });
+                            errors.push(CompileError::UnterminatedStringLiteral { span: token_start..token_start + 1, custom_label: None });
                         }
                         string_literal.push(chars.next().unwrap().1);
                     }
@@ -178,16 +192,22 @@ pub fn tokenize(source: &str) -> Result<Vec<Token>, CompileError> {
                         break;
                     } else {
                         let span = token_start..token_start + 1;
-                        return Err(CompileError::UnexpectedCharacter { character: '/', span });
+                        errors.push(CompileError::UnexpectedCharacter { character: '/', span, custom_label: None });
                     }
                 }
                 _ => {
                     let span = token_start..token_start + 1;
-                    return Err(CompileError::UnexpectedCharacter { character: *c, span });
+                    errors.push(CompileError::UnexpectedCharacter { character: *c, span, custom_label: None });
                 }
             }
         }
-        byte_offset += line.len() + 1;
+        byte_offset += line.len();
+        tokens.push(Token::Newline(byte_offset..byte_offset + 1));
+        byte_offset += 1;
+    }
+    if errors.is_empty() {
+        Ok(tokens)
+    } else {
+        Err(errors)
     }
-    Ok(tokens)
 }

+ 25 - 3
crates/assembler/src/lib.rs

@@ -5,6 +5,10 @@ extern crate anyhow;
 use std::path::Path;
 use anyhow::{Error, Result};
 use codespan_reporting::files::SimpleFile;
+use codespan_reporting::term;
+use codespan_reporting::term::{Config};
+use termcolor::{ColorChoice, StandardStream};
+use crate::errors::AsDiagnostic;
 
 // Tokenizer and parser
 pub mod parser;
@@ -14,6 +18,7 @@ pub mod opcode;
 // Error handling and diagnostics
 pub mod macros;
 pub mod errors;
+pub mod messages;
 
 // Intermediate Representation
 pub mod astnode;
@@ -45,15 +50,32 @@ pub fn assemble(src: &str, deploy: &str) -> Result<()> {
     let source_code = std::fs::read_to_string(src)?;
     let file = SimpleFile::new(src.to_string(), source_code.clone());
 
+    // TODO: ideally we should have only collect errors and then print them with parsers
+    // errors all at once
     let tokens = match tokenize(&source_code) {
         Ok(tokens) => tokens,
-        Err(e) => return Err(Error::msg(format!("Tokenizer error: {}", e))),
+        Err(errors) => {
+            for error in errors {
+                let writer = StandardStream::stderr(ColorChoice::Auto);
+                let config = Config::default();
+                let diagnostic = error.to_diagnostic();
+                term::emit(&mut writer.lock(), &config, &file, &diagnostic)?;
+            }
+            return Err(Error::msg("Compilation failed"));
+        }
     };
-
     let mut parser = Parser::new(tokens, &file);
     let parse_result = match parser.parse() {
         Ok(program) => program,
-        Err(e) => return Err(Error::msg(format!("Parser error: {}", e))),
+        Err(errors) => {
+            for error in errors {
+                let writer = StandardStream::stderr(ColorChoice::Auto);
+                let config = Config::default();
+                let diagnostic = error.to_diagnostic();
+                term::emit(&mut writer.lock(), &config, &file, &diagnostic)?;
+            }
+            return Err(Error::msg("Compilation failed"));
+        }
     };
 
     let program = Program::from_parse_result(parse_result);

+ 17 - 3
crates/assembler/src/macros.rs

@@ -13,15 +13,15 @@ macro_rules! define_compile_errors {
         pub enum CompileError {
             $(
                 #[error($error_msg)]
-                $variant { $( $field_name: $field_ty ),* }
+                $variant { $( $field_name: $field_ty ),*, custom_label: Option<String> }
             ),*
         }
 
         impl CompileError {
-            pub fn label(&self) -> &'static str {
+            pub fn label(&self) -> &str {
                 match self {
                     $(
-                        Self::$variant { .. } => $label_msg,
+                        Self::$variant { custom_label, .. } => custom_label.as_deref().unwrap_or($label_msg),
                     )*
                 }
             }
@@ -37,3 +37,17 @@ macro_rules! define_compile_errors {
     };
 }
 
+#[macro_export]
+macro_rules! bug {
+    ($($arg:tt)*) => {{
+        eprintln!(
+            "\n{}\n{}",
+            "Thanks for abusing the compiler <3 you've hunted a bug!",
+            format!("Please file a bug report at: {}", "https://github.com/blueshift-gg/sbpf/issues")
+        );
+
+        panic!("{}", format!("Internal error: {}\n", format!($($arg)*)));
+    }};
+}
+
+

+ 17 - 0
crates/assembler/src/messages.rs

@@ -0,0 +1,17 @@
+
+/* A central place to store all the label messages to avoid duplication
+ * and make it easier to update or translate. We could move error messages here too
+ */
+
+// we can do a more fine-grained error message by pointing out the exact token that caused the error
+// with a special error pattern parser, but for now we just provide expected patterns
+pub const EXPECTS_LABEL_DIR_STR: &str = "expects <label>, <directive>, <string literal>";
+pub const EXPECTS_IDEN: &str = "expects <identifier>";
+pub const EXPECTS_IDEN_COM_IMM: &str = "expects <identifier>, <immediate value>";
+pub const EXPECTS_MORE_OPERAND: &str = "expects more operand";
+pub const EXPECTS_REG_COM_IMM: &str = "expects <register>, <immediate value>";
+pub const EXPECTS_REG_COM_REG: &str = "expects <register>, <register>";
+pub const EXPECTS_REG_COM_IMM_OR_IDEN: &str = "expects <register>, <immediate value>/<identifier>";
+pub const EXPECTS_REG_COM_IMM_COM_IMM_OR_IDEN: &str = "expects <register>, <immediate value>, <immediate value>/<identifier>";
+pub const EXPECTS_REG_COM_LB_REG_BIOP_IMM_RB: &str = "expects <register>, [<register> <binary operator> <immediate value>]";
+pub const EXPECTS_LB_REG_BIOP_IMM_RB_COM_REG: &str = "expects [<register> <binary operator> <immediate value>], <register>";

+ 3 - 0
crates/assembler/src/opcode.rs

@@ -608,4 +608,7 @@ impl Opcode {
             Opcode::Exit => "exit",
         }
     }
+    pub fn to_string(&self) -> String {
+        self.to_str().to_string()
+    }
 }

+ 208 - 85
crates/assembler/src/parser.rs

@@ -4,10 +4,12 @@ use crate::lexer::{Token, ImmediateValue};
 use crate::section::{CodeSection, DataSection};
 use crate::astnode::{ASTNode, Directive, GlobalDecl, EquDecl, ExternDecl, RodataDecl, Label, Instruction, ROData};
 use crate::dynsym::{DynamicSymbolMap, RelDynMap, RelocationType};
-use crate::debuginfo::span_to_line_number;
 use codespan_reporting::files::SimpleFile;
 use num_traits::FromPrimitive;
 use std::collections::HashMap;
+use crate::errors::CompileError;
+use crate::messages::*;
+use crate::bug;
 
 pub struct Parser<> {
     tokens: Vec<Token>,
@@ -38,42 +40,45 @@ pub struct ParseResult {
 
     pub relocation_data: RelDynMap,
 
-    // TODO: this should determine by if there's any dynamic symbol
+    // TODO: this can be removed and dynamic-ness should just be 
+    // determined by if there's any dynamic symbol
     pub prog_is_static: bool,
 }
 
+// for now, we only return one error per parse for simpler error handling
 pub trait Parse {
-    fn parse(tokens: &[Token]) -> Option<(Self, &[Token])>
+    fn parse(tokens: &[Token]) -> Result<(Self, &[Token]), CompileError>
         where Self: Sized;
 }
 
-// can maybe be removed
 pub trait ParseInstruction {
-    fn parse_instruction<'a>(tokens: &'a [Token], const_map: &HashMap<String, ImmediateValue>) -> Option<(Self, &'a [Token])>
+    fn parse_instruction<'a>(tokens: &'a [Token], const_map: &HashMap<String, ImmediateValue>) -> Result<(Self, &'a [Token]), CompileError>
         where Self: Sized;
 }
 
 impl Parse for GlobalDecl {
-    fn parse(tokens: &[Token]) -> Option<(Self, &[Token])> {
+    fn parse(tokens: &[Token]) -> Result<(Self, &[Token]), CompileError> {
+        let Token::Directive(_, span) = &tokens[0] else { bug!("GlobalDecl not a valid directive") };
         if tokens.len() < 2 {
-            return None;
+            return Err(CompileError::InvalidGlobalDecl { span: span.clone(), custom_label: None });
         }
         match &tokens[1] {
-            Token::Identifier(name, span) => Some((
+            Token::Identifier(name, span) => Ok((
                 GlobalDecl {
                     entry_label: name.clone(), 
                     span: span.clone()
                 },
                 &tokens[2..])),
-            _ => None,
+            _ => Err(CompileError::InvalidGlobalDecl { span: span.clone(), custom_label: None }),
         }
     }
 }
 
 impl Parse for EquDecl {
-    fn parse(tokens: &[Token]) -> Option<(Self, &[Token])> {
+    fn parse(tokens: &[Token]) -> Result<(Self, &[Token]), CompileError> {
+        let Token::Directive(_, span) = &tokens[0] else { bug!("EquDecl not a valid directive") };
         if tokens.len() < 3 {
-            return None;
+            return Err(CompileError::InvalidEquDecl { span: span.clone(), custom_label: Some(EXPECTS_MORE_OPERAND.to_string()) });
         }
         match (
             &tokens[1],
@@ -85,25 +90,25 @@ impl Parse for EquDecl {
                 Token::Comma(_),
                 Token::ImmediateValue(_value, _)
             ) => {
-                Some((
+                Ok((
                     EquDecl {
                         name: name.clone(),
-                        // TODO: infer the number type from the value
                         value: tokens[3].clone(),
                         span: span.clone()
                     },
                     &tokens[4..]
                 ))
             }
-            _ => None,
+            _ => Err(CompileError::InvalidEquDecl { span: span.clone(), custom_label: Some(EXPECTS_IDEN_COM_IMM.to_string()) }),
         }
     }
 }
 
 impl Parse for ExternDecl {
-    fn parse(tokens: &[Token]) -> Option<(Self, &[Token])> {
+    fn parse(tokens: &[Token]) -> Result<(Self, &[Token]), CompileError> {
+        let Token::Directive(_, span) = &tokens[0] else { bug!("ExternDecl not a valid directive") };
         if tokens.len() < 2 {
-            return None;
+            return Err(CompileError::InvalidExternDecl { span: span.clone(), custom_label: Some(EXPECTS_MORE_OPERAND.to_string()) });
         }
         let mut args = Vec::new();
         let mut i = 1;
@@ -120,10 +125,9 @@ impl Parse for ExternDecl {
         }
         //
         if args.is_empty() {
-            None
+            Err(CompileError::InvalidExternDecl { span: span.clone(), custom_label: Some(EXPECTS_IDEN.to_string()) })
         } else {
-            let Token::Directive(_, span) = &tokens[0] else { unreachable!() };
-            Some((
+            Ok((
                 ExternDecl { 
                     args, 
                     span: span.clone()
@@ -135,9 +139,10 @@ impl Parse for ExternDecl {
 }
 
 impl Parse for ROData {
-    fn parse(tokens: &[Token]) -> Option<(Self, &[Token])> {
+    fn parse(tokens: &[Token]) -> Result<(Self, &[Token]), CompileError> {
+        let Token::Directive(_, span) = &tokens[0] else { bug!("ROData not a valid directive") };
         if tokens.len() < 3 {
-            return None;
+            return Err(CompileError::InvalidRodataDecl { span: span.clone(), custom_label: Some(EXPECTS_MORE_OPERAND.to_string()) });
         }
 
         let mut args = Vec::new();
@@ -153,7 +158,7 @@ impl Parse for ROData {
             ) => {
                 args.push(tokens[1].clone());
                 args.push(tokens[2].clone());
-                Some((
+                Ok((
                     ROData {
                         name: name.clone(),
                         args,
@@ -162,13 +167,13 @@ impl Parse for ROData {
                     &tokens[3..]
                 ))
             }
-            _ => None,
+            _ => Err(CompileError::InvalidRodataDecl { span: span.clone(), custom_label: Some(EXPECTS_LABEL_DIR_STR.to_string()) }),
         }
     }
 }
 
 impl ParseInstruction for Instruction {
-    fn parse_instruction<'a>(tokens: &'a [Token], const_map: &HashMap<String, ImmediateValue>) -> Option<(Self, &'a [Token])> {
+    fn parse_instruction<'a>(tokens: &'a [Token], const_map: &HashMap<String, ImmediateValue>) -> Result<(Self, &'a [Token]), CompileError> {
         let next_token_num;
         match &tokens[0] {
             Token::Opcode(opcode, span) => {
@@ -177,7 +182,11 @@ impl ParseInstruction for Instruction {
                 match opcode {
                     Opcode::Lddw => {
                         if tokens.len() < 4 {
-                            return None;
+                            return Err(
+                                CompileError::InvalidInstruction {  // 
+                                    instruction: opcode.to_string() //
+                                    , span: span.clone()            //
+                                    , custom_label: Some(EXPECTS_MORE_OPERAND.to_string()) });
                         }
                         let (value, advance_token_num) = inline_and_fold_constant(tokens, const_map, 3);
                         if let Some(value) = value {
@@ -195,7 +204,11 @@ impl ParseInstruction for Instruction {
                                     operands.push(Token::ImmediateValue(value, span.clone()));
                                 }
                                 _ => {
-                                    return None;
+                                    return Err(
+                                        CompileError::InvalidInstruction {  //
+                                            instruction: opcode.to_string() //
+                                            , span: span.clone()            //
+                                            , custom_label: Some(EXPECTS_REG_COM_IMM_OR_IDEN.to_string()) });
                                 }
                             }
                             next_token_num = advance_token_num;
@@ -213,9 +226,12 @@ impl ParseInstruction for Instruction {
                                     operands.push(tokens[1].clone());
                                     operands.push(tokens[3].clone());
                                 }
-                                // external error: invalid syntax with opcode: lddw
                                 _ => {
-                                    return None;
+                                    return Err(
+                                        CompileError::InvalidInstruction {  //
+                                            instruction: opcode.to_string() //
+                                            , span: span.clone()            //
+                                            , custom_label: Some(EXPECTS_REG_COM_IMM_OR_IDEN.to_string()) });
                                 }
                             }
                             next_token_num = 4;
@@ -223,7 +239,11 @@ impl ParseInstruction for Instruction {
                     }
                     Opcode::Ldxw | Opcode::Ldxh | Opcode::Ldxb | Opcode::Ldxdw => {
                         if tokens.len() < 8 {
-                            return None;
+                            return Err(
+                                CompileError::InvalidInstruction {  //
+                                    instruction: opcode.to_string() //
+                                    , span: span.clone()            //
+                                    , custom_label: Some(EXPECTS_MORE_OPERAND.to_string()) });
                         }
                         let (value, advance_token_num) = inline_and_fold_constant(tokens, const_map, 6);
                         if let Some(value) = value {
@@ -250,18 +270,30 @@ impl ParseInstruction for Instruction {
                                     operands.push(Token::ImmediateValue(value, span.clone()));                                    
                                 }
                                 _ => {
-                                    return None;
+                                    return Err(
+                                        CompileError::InvalidInstruction {  //
+                                            instruction: opcode.to_string() //
+                                            , span: span.clone()            //
+                                            , custom_label: Some(EXPECTS_REG_COM_LB_REG_BIOP_IMM_RB.to_string()) });
                                 }
                             }
                             next_token_num = advance_token_num + 1;
                         } else {
-                            return None;
+                            return Err(
+                                CompileError::InvalidInstruction {  //
+                                    instruction: opcode.to_string() //
+                                    , span: span.clone()            //
+                                    , custom_label: Some(EXPECTS_REG_COM_LB_REG_BIOP_IMM_RB.to_string()) });
                         }
                     }
                     Opcode::Stw | Opcode::Sth | Opcode::Stb | Opcode::Stdw
                     | Opcode::Stxb | Opcode::Stxh | Opcode::Stxw | Opcode::Stxdw => {
                         if tokens.len() < 8 {
-                            return None;
+                            return Err(
+                                CompileError::InvalidInstruction {  //
+                                    instruction: opcode.to_string() //
+                                    , span: span.clone()            //
+                                    , custom_label: Some(EXPECTS_MORE_OPERAND.to_string()) });
                         }
                         let (value, advance_token_num) = inline_and_fold_constant(tokens, const_map, 4);
                         if let Some(value) = value {
@@ -288,12 +320,20 @@ impl ParseInstruction for Instruction {
                                     operands.push(tokens[advance_token_num + 2].clone());
                                 }
                                 _ => {
-                                    return None;
+                                    return Err(
+                                        CompileError::InvalidInstruction {  //
+                                            instruction: opcode.to_string() //
+                                            , span: span.clone()            //
+                                            , custom_label: Some(EXPECTS_LB_REG_BIOP_IMM_RB_COM_REG.to_string()) });
                                 }
                             }
                             next_token_num = advance_token_num + 3;
                         } else {
-                            return None;
+                            return Err(
+                                CompileError::InvalidInstruction {  //
+                                    instruction: opcode.to_string() //
+                                    , span: span.clone()            //
+                                    , custom_label: Some(EXPECTS_LB_REG_BIOP_IMM_RB_COM_REG.to_string()) });
                         }
                     }
                     Opcode::Add32 | Opcode::Sub32 | Opcode::Mul32 
@@ -309,7 +349,11 @@ impl ParseInstruction for Instruction {
                     | Opcode::Lmul64 | Opcode::Uhmul64 | Opcode::Udiv64 
                     | Opcode::Urem64 | Opcode::Sdiv64 | Opcode::Srem64 => {
                         if tokens.len() < 4 {
-                            return None;
+                            return Err(
+                                CompileError::InvalidInstruction {  //
+                                    instruction: opcode.to_string() //
+                                    , span: span.clone()            //
+                                    , custom_label: Some(EXPECTS_MORE_OPERAND.to_string()) });
                         }
                         let (value, advance_token_num) = inline_and_fold_constant(tokens, const_map, 3);
                         if let Some(value) = value {
@@ -328,7 +372,11 @@ impl ParseInstruction for Instruction {
                                     operands.push(Token::ImmediateValue(value, span.clone()));
                                 }
                                 _ => {
-                                    return None;
+                                    return Err(
+                                        CompileError::InvalidInstruction {  //
+                                            instruction: opcode.to_string() //
+                                            , span: span.clone()            //
+                                            , custom_label: Some(EXPECTS_REG_COM_IMM.to_string()) });
                                 }
                             } 
                             next_token_num = advance_token_num;
@@ -348,7 +396,11 @@ impl ParseInstruction for Instruction {
                                     operands.push(tokens[3].clone());
                                 }
                                 _ => {
-                                    return None;
+                                    return Err(
+                                        CompileError::InvalidInstruction {  //
+                                            instruction: opcode.to_string() //
+                                            , span: span.clone()            //
+                                            , custom_label: Some(EXPECTS_REG_COM_REG.to_string()) });
                                 }
                             }                           
                             next_token_num = 4;
@@ -359,7 +411,11 @@ impl ParseInstruction for Instruction {
                     | Opcode::Jne | Opcode::Jsgt | Opcode::Jsge
                     | Opcode::Jslt | Opcode::Jsle => {
                         if tokens.len() < 6 {
-                            return None;
+                            return Err(
+                                CompileError::InvalidInstruction {  //
+                                    instruction: opcode.to_string() //
+                                    , span: span.clone()            //
+                                    , custom_label: Some(EXPECTS_MORE_OPERAND.to_string()) });
                         }
                         let (value, advance_token_num) = inline_and_fold_constant(tokens, const_map, 3);
                         if let Some(value) = value {
@@ -383,7 +439,11 @@ impl ParseInstruction for Instruction {
                                     operands.push(tokens[advance_token_num + 1].clone());
                                 }
                                 _ => {
-                                    return None;
+                                    return Err(
+                                        CompileError::InvalidInstruction {  //
+                                            instruction: opcode.to_string() //
+                                            , span: span.clone()            //
+                                            , custom_label: Some(EXPECTS_REG_COM_IMM_COM_IMM_OR_IDEN.to_string()) });
                                 }
                             }
                             next_token_num = advance_token_num + 2;
@@ -402,13 +462,18 @@ impl ParseInstruction for Instruction {
                                     Token::Comma(_),
                                     Token::Identifier(_, _)
                                 ) => {
+                                    // turn "invalid opcode" to a bug
                                     opcode = FromPrimitive::from_u8((opcode as u8) + 2).expect("Invalid opcode conversion"); 
                                     operands.push(tokens[1].clone());
                                     operands.push(tokens[3].clone());
                                     operands.push(tokens[5].clone());
                                 }
                                 _ => {
-                                    return None;
+                                    return Err(
+                                        CompileError::InvalidInstruction {  //
+                                            instruction: opcode.to_string() //
+                                            , span: span.clone()            //
+                                            , custom_label: Some(EXPECTS_REG_COM_IMM_COM_IMM_OR_IDEN.to_string()) });
                                 }
                             }
                             next_token_num = 6;
@@ -416,7 +481,11 @@ impl ParseInstruction for Instruction {
                     }
                     Opcode::Ja => {
                         if tokens.len() < 2 {
-                            return None;
+                            return Err(
+                                CompileError::InvalidInstruction {  //
+                                    instruction: opcode.to_string() //
+                                    , span: span.clone()            //
+                                    , custom_label: Some(EXPECTS_MORE_OPERAND.to_string()) });
                         }
                         let (value, advance_token_num) = inline_and_fold_constant(tokens, const_map, 1);
                         if let Some(value) = value {
@@ -428,7 +497,11 @@ impl ParseInstruction for Instruction {
                                     operands.push(tokens[1].clone());
                                 }
                                 _ => {
-                                    return None;
+                                    return Err(
+                                        CompileError::InvalidInstruction {  //
+                                            instruction: opcode.to_string() //
+                                            , span: span.clone()            //
+                                            , custom_label: Some(EXPECTS_IDEN.to_string()) });
                                 }
                             }
                             next_token_num = 2;
@@ -436,14 +509,22 @@ impl ParseInstruction for Instruction {
                     }
                     Opcode::Call => {
                         if tokens.len() < 2 {
-                            return None;
+                            return Err(
+                                CompileError::InvalidInstruction {  //
+                                    instruction: opcode.to_string() //
+                                    , span: span.clone()            //
+                                    , custom_label: Some(EXPECTS_MORE_OPERAND.to_string()) });
                         }
                         match &tokens[1] {
                             Token::Identifier(_, _) => {
                                 operands.push(tokens[1].clone());
                             }
                             _ => {
-                                return None;
+                                return Err(
+                                    CompileError::InvalidInstruction {  //
+                                        instruction: opcode.to_string() //
+                                        , span: span.clone()            //
+                                        , custom_label: Some(EXPECTS_IDEN.to_string()) });
                             }
                         }
                         next_token_num = 2;
@@ -451,12 +532,11 @@ impl ParseInstruction for Instruction {
                     Opcode::Exit => {
                         next_token_num = 1;
                     }
-                    // internal error: invalid opcode
                     _ => {
-                        return None;
+                        bug!("invalid opcode: {}", opcode.to_str());
                     }
                 }
-                Some((
+                Ok((
                     Instruction {
                         opcode,
                         operands,
@@ -465,7 +545,9 @@ impl ParseInstruction for Instruction {
                     &tokens[next_token_num..]
                 ))
             }
-            _ => None,
+            _ => {
+                bug!("invalid instruction");
+            }
         }
         
     }
@@ -534,32 +616,50 @@ impl Parser {
         }
     }
 
-    pub fn parse(&mut self) -> Result<ParseResult, String> {
+    pub fn parse(&mut self) -> Result<ParseResult, Vec<CompileError>> {
         let mut nodes = Vec::new();
         let mut rodata_nodes = Vec::new();
         let mut rodata_phase = false;
 
+        let mut errors = Vec::new();
+
         let mut tokens = self.tokens.as_slice();
 
+        // TODO: when parse error occurs, we should probably just jump to the next line
+        // if we're able to error out the scenario where users put 2 instructions in the same line
+        // for now we just continue to the next token
+
+        // TODO: it would be nice if we build a token iterator that can 
+        // 1. peek the next multiple tokens (for detecting patterns)
+        // 2. jump to the next line
+        // 3. continue to the next token
         while !tokens.is_empty() {
             match &tokens[0] {
                 Token::Directive(name, span) => {
                     match name.as_str() {
                         "global" | "globl" => {
-                            if let Some((node, rest)) = GlobalDecl::parse(tokens) {
+                            match GlobalDecl::parse(tokens) {
+                                Ok((node, rest)) => {
                                 self.m_entry_label = Some(node.get_entry_label());
                                 nodes.push(ASTNode::GlobalDecl { global_decl: node });
                                 tokens = rest;
-                            } else {
-                                return Err("Invalid global declaration".to_string());
+                                }
+                                Err(e) => {
+                                    errors.push(e);
+                                    tokens = &tokens[1..];
+                                }
                             }
                         }
                         "extern" => {
-                            if let Some((node, rest)) = ExternDecl::parse(tokens) {
+                            match ExternDecl::parse(tokens) {
+                                Ok((node, rest)) => {
                                 nodes.push(ASTNode::ExternDecl { extern_decl: node });
                                 tokens = rest;
-                            } else {
-                                return Err("Invalid extern declaration".to_string());
+                                }
+                                Err(e) => {
+                                    errors.push(e);
+                                    tokens = &tokens[1..];
+                                }
                             }
                         }
                         "rodata" => {
@@ -568,12 +668,16 @@ impl Parser {
                             tokens = &tokens[1..];
                         }
                         "equ" => {
-                            if let Some((node, rest)) = EquDecl::parse(tokens) {
+                            match EquDecl::parse(tokens) {
+                                Ok((node, rest)) => {
                                 self.m_const_map.insert(node.get_name(), node.get_val());
                                 nodes.push(ASTNode::EquDecl { equ_decl: node });
                                 tokens = rest;
-                            } else {
-                                return Err("Invalid equ declaration".to_string());
+                                }
+                                Err(e) => {
+                                    errors.push(e);
+                                    tokens = &tokens[1..];
+                                }
                             }
                         }
                         "section" => {
@@ -581,19 +685,24 @@ impl Parser {
                             tokens = &tokens[1..];
                         }
                         _ => {
-                            return Err(format!("Invalid directive: {}", name));
+                            errors.push(CompileError::InvalidDirective { directive: name.clone(), span: span.clone(), custom_label: None });
+                            tokens = &tokens[1..];
                         }
                     }
                 }
                 Token::Label(name, span) => {
                     if rodata_phase {
-                        if let Some((rodata, rest)) = ROData::parse(tokens) {
+                        match ROData::parse(tokens) {
+                            Ok((rodata, rest)) => {
                             self.m_label_offsets.insert(name.clone(), self.m_accum_offset + self.m_rodata_size);
                             self.m_rodata_size += rodata.get_size();
                             rodata_nodes.push(ASTNode::ROData { rodata, offset: self.m_accum_offset });
                             tokens = rest;
-                        } else {
-                            return Err("Invalid rodata declaration".to_string());
+                            }
+                            Err(e) => {
+                                errors.push(e);
+                                tokens = &tokens[1..];
+                            }
                         }
                     } else {
                         self.m_label_offsets.insert(name.clone(), self.m_accum_offset);
@@ -601,30 +710,38 @@ impl Parser {
                         tokens = &tokens[1..];
                     }
                 }
-                Token::Opcode(_opcode, span) => {
-                    if let Some((inst, rest)) = Instruction::parse_instruction(tokens, &self.m_const_map) {
-                        if inst.needs_relocation() {
-                            self.m_prog_is_static = false;
-                            let (reloc_type, label) = inst.get_relocation_info();
-                            self.m_rel_dyns.add_rel_dyn(self.m_accum_offset, reloc_type, label.clone());
-                            if reloc_type == RelocationType::RSbfSyscall {
-                                self.m_dynamic_symbols.add_call_target(label.clone(), self.m_accum_offset);
+                Token::Opcode(_, _) => {
+                    match Instruction::parse_instruction(tokens, &self.m_const_map) {
+                        Ok((inst, rest)) => {
+                            if inst.needs_relocation() {
+                                self.m_prog_is_static = false;
+                                let (reloc_type, label) = inst.get_relocation_info();
+                                self.m_rel_dyns.add_rel_dyn(self.m_accum_offset, reloc_type, label.clone());
+                                if reloc_type == RelocationType::RSbfSyscall {
+                                    self.m_dynamic_symbols.add_call_target(label.clone(), self.m_accum_offset);
+                                }
                             }
+                            let offset = self.m_accum_offset;
+                            self.m_accum_offset += inst.get_size();
+                            nodes.push(ASTNode::Instruction { instruction: inst, offset });
+                            tokens = rest;
+                        }
+                        Err(e) => {
+                            errors.push(e);
+                            tokens = &tokens[1..];
                         }
-                        let offset = self.m_accum_offset;
-                        self.m_accum_offset += inst.get_size();
-                        nodes.push(ASTNode::Instruction { instruction: inst, offset });
-                        tokens = rest;
-                    } else {
-                        return Err(format!("Invalid instruction at line {}", span_to_line_number(span.clone(), self.m_file.as_ref().unwrap())));
                     }
                 }
                 _ => {
-                    return Err(format!("Unexpected token: {:?}", tokens[0]));
+                    tokens = &tokens[1..];
                 }
             }
         }
 
+        if !errors.is_empty() {
+            return Err(errors);
+        }
+
         // Second pass to resolve labels
         for node in &mut nodes {
             match node {
@@ -656,6 +773,8 @@ impl Parser {
                                 // Replace label with immediate value
                                 let last_idx = operands.len() - 1;
                                 operands[last_idx] = Token::ImmediateValue(ImmediateValue::Addr(abs_offset), span.clone());
+                            }  else {
+                                errors.push(CompileError::UndefinedLabel { label: name.clone(), span: span.clone(), custom_label: None });
                             }
                         }
                     }
@@ -670,13 +789,17 @@ impl Parser {
                 self.m_dynamic_symbols.add_entry_point(entry_label.clone(), *offset);
             }
         }
-        
-        Ok(ParseResult {
-            code_section: CodeSection::new(nodes, self.m_accum_offset, self.m_file.as_ref().unwrap()),
-            data_section: DataSection::new(rodata_nodes, self.m_rodata_size),
-            dynamic_symbols: DynamicSymbolMap::copy(&self.m_dynamic_symbols),
-            relocation_data: RelDynMap::copy(&self.m_rel_dyns),
-            prog_is_static: self.m_prog_is_static,
-        })
+
+        if !errors.is_empty() {
+            return Err(errors);
+        } else {
+            Ok(ParseResult {
+                code_section: CodeSection::new(nodes, self.m_accum_offset, self.m_file.as_ref().unwrap()),
+                data_section: DataSection::new(rodata_nodes, self.m_rodata_size),
+                dynamic_symbols: DynamicSymbolMap::copy(&self.m_dynamic_symbols),
+                relocation_data: RelDynMap::copy(&self.m_rel_dyns),
+                prog_is_static: self.m_prog_is_static,
+            })
+        }
     }
 }

+ 10 - 108
src/commands/build.rs

@@ -1,122 +1,25 @@
-use anyhow::{Error, Result};
-use dirs::home_dir;
+use sbpf_assembler::assemble;
+
 use ed25519_dalek::SigningKey;
 use rand::rngs::OsRng;
 use std::fs;
-use std::fs::create_dir_all;
-use std::io;
+
+use anyhow::Result;
 use std::path::Path;
-use std::process::Command;
 use std::time::Instant;
-
-use crate::commands::common::{SolanaConfig, DEFAULT_LINKER};
+use std::fs::create_dir_all;
 
 pub fn build() -> Result<()> {
-    // Construct the path to the config file
-    let home_dir = home_dir().expect("❌ Could not find $HOME directory");
-    // Solana Config path
-    let config_path = home_dir.join(".config/solana/install/config.yml");
-
-    if !Path::new(&config_path).exists() {
-        return Err(Error::msg("❌ Solana config not found. Please install the Solana CLI:\n\nhttps://docs.anza.xyz/cli/install"));
-    }
-
-    // Read the file contents
-    let config_content = fs::read_to_string(config_path)?;
-
-    // Parse the YAML file
-    let solana_config: SolanaConfig = serde_yaml::from_str(&config_content)?;
-
-    // Solana SDK and toolchain paths
-    let platform_tools = [solana_config.active_release_dir.clone(), "/bin/platform-tools-sdk/sbf/dependencies/platform-tools".to_owned()].concat();
-    let llvm_dir = [platform_tools.clone(), "/llvm".to_owned()].concat();
-    let clang = [llvm_dir.clone(), "/bin/clang".to_owned()].concat();
-    let ld = [llvm_dir.clone(), "/bin/ld.lld".to_owned()].concat();
-
-    // Check for platform tools
-    if !Path::new(&llvm_dir).exists() {
-        return Err(Error::msg(format!("❌ Solana platform-tools not found. To manually install, please download the latest release here: \n\nhttps://github.com/anza-xyz/platform-tools/releases\n\nThen unzip to this directory and try again:\n\n{}", &platform_tools)));
-    }
-
-    // Set src/out directory and compiler flags
+    // Set src/out directory
     let src = "src";
-    let out = ".sbpf";
     let deploy = "deploy";
-    let arch = "-target";
-    let arch_target = "sbf";
 
     // Create necessary directories
-    create_dir_all(out)?;
     create_dir_all(deploy)?;
 
     // Function to compile assembly
-    fn compile_assembly(
-        clang: &str,
-        arch: &str,
-        arch_target: &str,
-        out: &str,
-        src: &str,
-        filename: &str,
-    ) -> Result<()> {
-        let output_file = format!("{}/{}.o", out, filename);
-        let input_file = format!("{}/{}/{}.s", src, filename, filename);
-        let status = Command::new(clang)
-            .args([
-                arch,
-                arch_target,
-                "-c",
-                "-o",
-                &output_file,
-                &input_file,
-            ])
-            .status()?;
-
-        if !status.success() {
-            eprintln!("Failed to compile assembly for {}", filename);
-            return Err(Error::new(io::Error::new(
-                io::ErrorKind::Other,
-                "Compilation failed",
-            )));
-        }
-        Ok(())
-    }
-
-    // Function to build shared object
-    fn build_shared_object(ld: &str, filename: &str) -> Result<()> {
-        let default_linker = ".sbpf/linker.ld".to_string();
-        let output_file = format!("deploy/{}.so", filename);
-        let input_file = format!(".sbpf/{}.o", filename);
-        let mut linker_file = format!("src/{}.ld", filename);
-        // Check if a custom linker file exists
-        if !Path::new(&linker_file).exists() {
-            if !Path::new(&default_linker).exists() {
-                fs::create_dir(".sbpf").unwrap_or(());
-                fs::write(&default_linker, DEFAULT_LINKER)?;
-            }
-            linker_file = default_linker;
-        };
-
-        let status = Command::new(ld)
-            .arg("-shared")
-            .arg("-z")
-            .arg("notext")
-            .arg("--image-base")
-            .arg("0x100000000")
-            .arg("-T")
-            .arg(linker_file)
-            .arg("-o")
-            .arg(&output_file)
-            .arg(&input_file)
-            .status()?;
-
-        if !status.success() {
-            eprintln!("Failed to build shared object for {}", filename);
-            return Err(Error::new(io::Error::new(
-                io::ErrorKind::Other,
-                "Linking failed",
-            )));
-        }
-        Ok(())
+    fn compile_assembly(src: &str, deploy: &str) -> Result<()> {
+        assemble(src, deploy)
     }
 
     // Function to check if keypair file exists.
@@ -162,10 +65,9 @@ pub fn build() -> Result<()> {
             if let Some(subdir) = path.file_name().and_then(|name| name.to_str()) {
                 let asm_file = format!("{}/{}/{}.s", src, subdir, subdir);
                 if Path::new(&asm_file).exists() {
-                    println!("🔄 Building \"{}\"", subdir);
+                    println!("⚡️ Light building \"{}\"", subdir);
                     let start = Instant::now();
-                    compile_assembly(&clang, arch, arch_target, out, src, subdir)?;
-                    build_shared_object(&ld, subdir)?;
+                    compile_assembly(&asm_file, deploy)?;
                     let duration = start.elapsed();
                     println!(
                         "✅ \"{}\" built successfully in {}ms!",

+ 0 - 83
src/commands/light_build.rs

@@ -1,83 +0,0 @@
-use sbpf_assembler::assemble;
-
-use ed25519_dalek::SigningKey;
-use rand::rngs::OsRng;
-use std::fs;
-
-use anyhow::Result;
-use std::path::Path;
-use std::time::Instant;
-use std::fs::create_dir_all;
-
-pub fn light_build() -> Result<()> {
-    // Set src/out directory
-    let src = "src";
-    let deploy = "deploy";
-
-    // Create necessary directories
-    create_dir_all(deploy)?;
-
-    // Function to compile assembly
-    fn compile_assembly(src: &str, deploy: &str) -> Result<()> {
-        assemble(src, deploy)
-    }
-
-    // Function to check if keypair file exists.
-    fn has_keypair_file(dir: &Path) -> bool {
-        if dir.exists() && dir.is_dir() {
-            match fs::read_dir(dir) {
-                Ok(entries) => entries.filter_map(Result::ok).any(|entry| {
-                    entry
-                        .path()
-                        .file_name()
-                        .and_then(|name| name.to_str())
-                        .map(|name| name.ends_with("-keypair.json"))
-                        .unwrap_or(false)
-                }),
-                Err(_) => false,
-            }
-        } else {
-            false
-        }
-    }
-
-    // Check if keypair file exists. If not, create one.
-    let deploy_path = Path::new(deploy);
-    if !has_keypair_file(deploy_path) {
-        let project_path = std::env::current_dir()?;
-        let project_name = project_path
-            .file_name()
-            .and_then(|n| n.to_str())
-            .unwrap_or("program");
-        let mut rng = OsRng;
-        fs::write(
-            deploy_path.join(format!("{}-keypair.json", project_name)),
-            serde_json::json!(SigningKey::generate(&mut rng).to_keypair_bytes()[..]).to_string(),
-        )?;
-    }
-
-    // Processing directories
-    let src_path = Path::new(src);
-    for entry in src_path.read_dir()? {
-        let entry = entry?;
-        let path = entry.path();
-        if path.is_dir() {
-            if let Some(subdir) = path.file_name().and_then(|name| name.to_str()) {
-                let asm_file = format!("{}/{}/{}.s", src, subdir, subdir);
-                if Path::new(&asm_file).exists() {
-                    println!("⚡️ Light building \"{}\"", subdir);
-                    let start = Instant::now();
-                    compile_assembly(&asm_file, deploy)?;
-                    let duration = start.elapsed();
-                    println!(
-                        "✅ \"{}\" built successfully in {}ms!",
-                        subdir,
-                        duration.as_micros() as f64 / 1000.0
-                    );
-                }
-            }
-        }
-    }
-
-    Ok(())
-}

+ 0 - 3
src/commands/mod.rs

@@ -4,9 +4,6 @@ pub use init::*;
 pub mod build;
 pub use build::*;
 
-pub mod light_build;
-pub use light_build::*;
-
 pub mod deploy;
 pub use deploy::*;
 

+ 1 - 4
src/main.rs

@@ -1,7 +1,7 @@
 pub mod commands;
 use anyhow::Error;
 use clap::{Args, Parser, Subcommand};
-use commands::{build, light_build, clean, deploy, init, test};
+use commands::{build, clean, deploy, init, test};
 
 #[derive(Parser)]
 #[command(version, about, long_about = None)]
@@ -17,8 +17,6 @@ enum Commands {
     Init(InitArgs),
     #[command(about = "Compile into a Solana program executable")]
     Build,
-    #[command(about = "Compile without any platform tools")]
-    LightBuild,
     #[command(about = "Build and deploy the program")]
     Deploy(DeployArgs),
     #[command(about = "Test deployed program")]
@@ -52,7 +50,6 @@ fn main() -> Result<(), Error> {
     match &cli.command {
         Commands::Init(args) => init(args.name.clone(), args.ts_tests),
         Commands::Build => build(),
-        Commands::LightBuild => light_build(),
         Commands::Deploy(args) => deploy(args.name.clone(), args.url.clone()),
         Commands::Test => test(),
         // use arg to specify if use light build