Prechádzať zdrojové kódy

add grammar for sbpf

bidhan-a 1 týždeň pred
rodič
commit
b41d928a2a
2 zmenil súbory, kde vykonal 182 pridanie a 0 odobranie
  1. 2 0
      crates/assembler/Cargo.toml
  2. 180 0
      crates/assembler/src/sbpf.pest

+ 2 - 0
crates/assembler/Cargo.toml

@@ -22,6 +22,8 @@ anyhow = { workspace = true }
 sbpf-common = { workspace = true }
 phf = "0.13.1"
 phf_macros = "0.13.1"
+pest = "2.7"
+pest_derive = "2.7"
 
 [target.'cfg(target_arch = "wasm32")'.dependencies]
 wasm-bindgen = { version = "0.2.92", features = ["serde-serialize"] }

+ 180 - 0
crates/assembler/src/sbpf.pest

@@ -0,0 +1,180 @@
+WHITESPACE = _{ " " | "\t" | "\r" }
+COMMENT    = _{ (";" | "#" | "//") ~ (!NEWLINE ~ ANY)* }
+
+// Numbers (decimal and hex)
+hex_number     = @{ "0x" ~ ASCII_HEX_DIGIT+ }
+decimal_number = @{ "-"? ~ ASCII_DIGIT+ }
+number         = @{ hex_number | decimal_number }
+
+// String literals.
+string_content = @{ (!"\"" ~ ANY)* }
+string_literal = ${ "\"" ~ string_content ~ "\"" }
+
+// Registers (r0 to r10)
+register = @{ "r" ~ ("10" | ASCII_DIGIT) }
+
+// identifier is used when defining a name
+identifier = @{
+    !register ~ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")*
+}
+// symbol is used when referencing a previously defined name
+symbol = @{ identifier }
+
+// Numeric labels ( can be referenced with f or b)
+numeric_label     = @{ ASCII_DIGIT+ }
+numeric_label_ref = @{ numeric_label ~ ("f" | "b") }
+
+label = { (identifier | numeric_label) ~ ":" ~ ((directive_inner | instruction) | NEWLINE) }
+
+// Expressions
+bin_op     = { "+" | "-" | "*" | "/" }
+term       = { "(" ~ expression ~ ")" | number | symbol }
+expression = { term ~ (bin_op ~ term)* }
+
+// Operand.
+operand_expr = { symbol ~ "+" ~ number }
+operand      = { operand_expr | number | symbol }
+
+// Memory offset.
+memory_offset = { number | symbol }
+memory_ref    = { "[" ~ register ~ ("+" ~ memory_offset)+ ~ "]" }
+
+// Jump target
+jump_target = { numeric_label_ref | (("+" | "-")? ~ number) | symbol }
+
+// ============
+// DIRECTIVES
+// ============
+
+// Global
+globl_symbol    = { "entrypoint" | "e" }
+directive_globl = { ".globl" ~ globl_symbol }
+
+// External
+directive_extern = { ".extern" ~ symbol+ }
+
+// Constant
+directive_equ = { ".equ" ~ identifier ~ "," ~ expression }
+
+// Sections
+directive_section = {
+    ".text"
+  | ".data"
+  | ".rodata"
+}
+
+// Data directives
+directive_ascii = { ".ascii" ~ string_literal }
+directive_byte  = { ".byte" ~ number ~ ("," ~ number)* }
+directive_word  = { ".word" ~ number ~ ("," ~ number)* }
+directive_long  = { ".long" ~ number ~ ("," ~ number)* }
+directive_quad  = { ".quad" ~ number ~ ("," ~ number)* }
+
+directive_inner = {
+    directive_globl
+  | directive_extern
+  | directive_equ
+  | directive_section
+  | directive_ascii
+  | directive_byte
+  | directive_word
+  | directive_long
+  | directive_quad
+}
+directive       = { directive_inner ~ NEWLINE }
+
+// ================
+// INSTRUCTIONS
+// ================
+
+// Arithmetic Operations (64-bit)
+arith_64_op       = {
+    "add64"
+  | "sub64"
+  | "mul64"
+  | "div64"
+  | "sdiv64"
+  | "mod64"
+  | "smod64"
+  | "neg64"
+  | "or64"
+  | "and64"
+  | "xor64"
+  | "mov64"
+  | "lsh64"
+  | "rsh64"
+  | "arsh64"
+}
+instr_arith64_imm = { arith_64_op ~ register ~ "," ~ operand }
+instr_arith64_reg = { arith_64_op ~ register ~ "," ~ register }
+
+// Arithmetic Operations (32-bit)
+arith_32_op       = {
+    "add32"
+  | "sub32"
+  | "mul32"
+  | "div32"
+  | "sdiv32"
+  | "mod32"
+  | "smod32"
+  | "or32"
+  | "and32"
+  | "xor32"
+  | "mov32"
+  | "lsh32"
+  | "rsh32"
+  | "arsh32"
+}
+instr_arith32_imm = { arith_32_op ~ register ~ "," ~ operand }
+instr_arith32_reg = { arith_32_op ~ register ~ "," ~ register }
+
+// Memory Load Operations
+load_op    = { "ldxb" | "ldxh" | "ldxw" | "ldxdw" }
+instr_load = { load_op ~ register ~ "," ~ memory_ref }
+instr_lddw = { "lddw" ~ register ~ "," ~ operand }
+
+// Memory store Operations
+store_op        = { "stb" | "sth" | "stw" | "stdw" | "stxb" | "stxh" | "stxw" | "stxdw" }
+instr_store_imm = { store_op ~ memory_ref ~ "," ~ operand }
+instr_store_reg = { store_op ~ memory_ref ~ "," ~ register }
+
+// Control Flow Operations (Jumps)
+jump_op           = {
+    "jeq"
+  | "jne"
+  | "jgt"
+  | "jge"
+  | "jlt"
+  | "jle"
+  | "jsgt"
+  | "jsge"
+  | "jslt"
+  | "jsle"
+  | "jset"
+}
+instr_jump_imm    = { jump_op ~ register ~ "," ~ operand ~ "," ~ jump_target }
+instr_jump_reg    = { jump_op ~ register ~ "," ~ register ~ "," ~ jump_target }
+instr_jump_uncond = { "ja" ~ jump_target }
+
+// Byte Swap Operations
+endian_op    = { "be" | "le" }
+instr_endian = { endian_op ~ register ~ "," ~ operand }
+
+// Function Call Operations
+instr_call  = { "call" ~ symbol }
+instr_callx = { "callx" ~ register }
+instr_exit  = { "exit" }
+
+// All instructions
+instruction = {
+    (instr_lddw | instr_callx | instr_call | instr_exit | instr_arith64_imm | instr_arith64_reg | instr_arith32_imm | instr_arith32_reg | instr_load | instr_store_imm | instr_store_reg | instr_jump_imm | instr_jump_reg | instr_jump_uncond | instr_endian) ~ (NEWLINE | &EOI)
+}
+
+statement = {
+    NEWLINE
+  | label
+  | directive
+  | instruction
+}
+
+program = { SOI ~ statement* ~ EOI }