Przeglądaj źródła

feat: light_build mode and assembler

Claire xyz 4 miesięcy temu
rodzic
commit
8ca753aa1d

+ 15 - 0
Cargo.toml

@@ -15,3 +15,18 @@ thiserror = "2.0.12"
 serde = { version = "1.0", features = ["derive"] }
 serde_yaml = "0.9.34-deprecated"
 dirs = "4.0" # For easily getting the home directory
+
+sbpf-assembler = { workspace = true }
+
+[workspace]
+members = ["crates/assembler"]
+
+[workspace.package]
+version = "0.1.1"
+
+[workspace.dependencies]
+ num-derive    = "0.4"
+ num-traits    = "0.2"
+ anyhow = "1.0.86"
+
+ sbpf-assembler = { path = "crates/assembler" }

+ 9 - 0
crates/assembler/Cargo.toml

@@ -0,0 +1,9 @@
+[package]
+name                   = "sbpf-assembler"
+description            = "SBPF Assembler"
+version.workspace      = true
+
+[dependencies]
+num-derive = { workspace = true }
+num-traits = { workspace = true }
+anyhow = { workspace = true }

+ 303 - 0
crates/assembler/src/astnode.rs

@@ -0,0 +1,303 @@
+use crate::opcode::Opcode;
+use crate::lexer::{Token, ImmediateValue};
+use crate::dynsym::RelocationType;
+use crate::debuginfo::{DebugInfo, RegisterHint, RegisterType};
+use std::collections::HashMap;
+
+#[derive(Debug, Clone)]
+pub enum ASTNode {
+    // only present in the AST
+    Directive(Directive),
+    GlobalDecl(GlobalDecl),
+    EquDecl(EquDecl),
+    ExternDecl(ExternDecl),
+    RodataDecl(RodataDecl),
+    Label(Label),
+    // present in the bytecode
+    Instruction {
+        instruction: Instruction,
+        offset: u64,
+    },
+    ROData {
+        rodata: ROData,
+        offset: u64,
+    },
+}
+
+// remove this
+#[derive(Debug, Clone)]
+pub struct Directive {
+    pub name: String,
+    pub args: Vec<Token>,
+    pub line_number: usize,
+}
+
+#[derive(Debug, Clone)]
+pub struct GlobalDecl {
+    pub entry_label: String,
+    pub line_number: usize,
+}
+
+impl GlobalDecl {
+    pub fn get_entry_label(&self) -> String {
+        self.entry_label.clone()
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct EquDecl {
+    pub name: String,
+    pub value: Token,
+    pub line_number: usize,
+}
+
+impl EquDecl {
+    pub fn get_name(&self) -> String {
+        self.name.clone()
+    }
+    pub fn get_val(&self) -> ImmediateValue {
+        match &self.value {
+            Token::ImmediateValue(val, _) => val.clone(),
+            _ => panic!("Invalid Equ declaration"),
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct ExternDecl {
+    pub args: Vec<Token>,
+    pub line_number: usize,
+}
+
+#[derive(Debug, Clone)]
+pub struct RodataDecl {
+    pub line_number: usize,
+}
+
+#[derive(Debug, Clone)]
+pub struct Label {
+    pub name: String,
+    pub line_number: usize,
+}
+
+#[derive(Debug, Clone)]
+pub struct Instruction {
+    pub opcode: Opcode,
+    pub operands: Vec<Token>,
+    pub line_number: usize,
+}
+
+impl Instruction {
+    pub fn get_size(&self) -> u64 {
+        match self.opcode {
+            Opcode::Lddw => 16,
+            _ => 8,
+        }
+    }
+    pub fn needs_relocation(&self) -> bool {
+        match self.opcode {
+            Opcode::Call => true,
+            Opcode::Lddw => {
+                match &self.operands[1] {
+                    Token::Identifier(_, _) => true,
+                    _ => false,
+                }
+            },
+            _ => false,
+        }
+    }
+    pub fn get_relocation_info(&self) -> (RelocationType, String) {
+        match self.opcode {
+            Opcode::Lddw => {
+                match &self.operands[1] {
+                    Token::Identifier(name, _) => (RelocationType::RSbf64Relative, name.clone()),
+                    _ => panic!("Expected label operand"),
+                }
+            },
+            _ => {
+                if let Token::Identifier(name, _) = &self.operands[0] {
+                    (RelocationType::RSbfSyscall, name.clone()) 
+                } else {
+                    panic!("Expected label operand")
+                }
+            },
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct ROData {
+    pub name: String,
+    pub args: Vec<Token>,
+    pub line_number: usize,
+}
+
+impl ROData {
+    pub fn get_size(&self) -> u64 {
+        let mut size = 0;
+        for arg in &self.args {
+            if let Token::StringLiteral(s, _) = arg {
+                size += s.len() as u64;
+            }
+        }
+        size
+    }
+}
+
+impl ASTNode {
+    pub fn bytecode_with_debug_map(&self) -> Option<(Vec<u8>, HashMap<u64, DebugInfo>)> {
+        match self {
+            ASTNode::Instruction { instruction: Instruction { opcode, operands, line_number }, offset } => {
+                let mut bytes = Vec::new();
+                let mut line_map = HashMap::new();
+                let mut debug_map = HashMap::new();
+                // Record the start of this instruction
+                line_map.insert(*offset, *line_number);
+                let mut debug_info = DebugInfo::new(*line_number);
+                bytes.push(opcode.to_bytecode());  // 1 byte opcode
+                
+                if *opcode == Opcode::Call {
+                    // currently hardcoded to call sol_log_
+                    bytes.extend_from_slice(&[0x10, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF]);
+                } else {
+                    match &operands[..] {
+                        [Token::ImmediateValue(imm, _)] => {
+                            // 1 byte of zeros (no register)
+                            bytes.push(0);
+                            
+                            if *opcode == Opcode::Ja {
+                                // 2 bytes immediate value in little-endian for 'ja'
+                                let imm16 = match imm {
+                                    ImmediateValue::Int(val) => *val as i16,
+                                    ImmediateValue::Addr(val) => *val as i16,
+                                };
+                                bytes.extend_from_slice(&imm16.to_le_bytes());
+                            } else {
+                                // 4 bytes immediate value in little-endian
+                                let imm32 = match imm {
+                                    ImmediateValue::Int(val) => *val as i32,
+                                    ImmediateValue::Addr(val) => *val as i32,
+                                };
+                                bytes.extend_from_slice(&imm32.to_le_bytes());
+                            }
+                        },
+
+                        [Token::Register(reg, _), Token::ImmediateValue(imm, _)] => {
+                            // 1 byte register number (strip 'r' prefix)
+                            bytes.push(*reg);
+                            
+                            // 2 bytes of zeros (offset/reserved)
+                            bytes.extend_from_slice(&[0, 0]);
+                            
+                            // 4 bytes immediate value in little-endian
+                            let imm32 = match imm {
+                                ImmediateValue::Int(val) => *val as i32,
+                                ImmediateValue::Addr(val) => {
+                                    debug_info.register_hint = RegisterHint {
+                                        register: *reg as usize,
+                                        register_type: RegisterType::Addr
+                                    };
+                                    *val as i32
+                                }
+                            };
+                            bytes.extend_from_slice(&imm32.to_le_bytes());
+                        },
+
+                        [Token::Register(reg, _), Token::ImmediateValue(imm, _), Token::ImmediateValue(offset, _)] => {
+                            // 1 byte register number (strip 'r' prefix)
+                            bytes.push(*reg);
+                            
+                            // 2 bytes of offset in little-endian
+                            let offset16 = match offset {
+                                ImmediateValue::Int(val) => *val as u16,
+                                ImmediateValue::Addr(val) => *val as u16,
+                            };
+                            bytes.extend_from_slice(&offset16.to_le_bytes());
+                            
+                            // 4 bytes immediate value in little-endianß
+                            let imm32 = match imm {
+                                ImmediateValue::Int(val) => *val as i32,
+                                ImmediateValue::Addr(val) => {
+                                    debug_info.register_hint = RegisterHint {
+                                        register: *reg as usize,
+                                        register_type: RegisterType::Addr
+                                    };
+                                    *val as i32
+                                }
+                            };
+                            bytes.extend_from_slice(&imm32.to_le_bytes());
+                        },                    
+                        
+                        [Token::Register(dst, _), Token::Register(src, _)] => {
+                            // Convert register strings to numbers
+                            let dst_num = dst;
+                            let src_num = src;
+                            
+                            // Combine src and dst into a single byte (src in high nibble, dst in low nibble)
+                            let reg_byte = (src_num << 4) | dst_num;
+                            bytes.push(reg_byte);
+                        },
+                        [Token::Register(dst, _), Token::Register(reg, _), Token::ImmediateValue(offset, _)] => {
+                            // Combine base register and destination register into a single byte
+                            let reg_byte = (reg << 4) | dst;
+                            bytes.push(reg_byte);
+                            
+                            // TODO : should only be an int
+                            // Add the offset as a 16-bit value in little-endian
+                            let offset16 = match offset {
+                                ImmediateValue::Int(val) => *val as u16,
+                                ImmediateValue::Addr(val) => *val as u16,
+                            };
+                            bytes.extend_from_slice(&offset16.to_le_bytes());
+                        },
+                        
+                        _ => {}
+                    }
+                }
+
+                // Add padding to make it 8 or 16 bytes depending on opcode
+                let target_len = if *opcode == Opcode::Lddw { 16 } else { 8 };
+                while bytes.len() < target_len {
+                    bytes.push(0);
+                }
+
+                debug_map.insert(*offset, debug_info);
+                
+                Some((bytes, debug_map))
+            },
+            ASTNode::ROData { rodata: ROData { name: _, args, line_number }, offset } => {
+                let mut bytes = Vec::new();
+                let mut line_map = HashMap::<u64, usize>::new();
+                let mut debug_map = HashMap::<u64, DebugInfo>::new();
+                for arg in args {
+                    if let Token::StringLiteral(s, _) = arg {
+                        // Convert string to bytes and add null terminator
+                        let str_bytes = s.as_bytes().to_vec();
+                        bytes.extend(str_bytes);
+                    }
+                }
+                Some((bytes, debug_map))
+            },
+            _ => None
+        }
+    }
+
+    // Keep the old bytecode method for backward compatibility
+    pub fn bytecode(&self) -> Option<Vec<u8>> {
+        self.bytecode_with_debug_map().map(|(bytes, _)| bytes)
+    }
+}
+
+fn parse_expression(expr: &String) -> Option<(String, i32)> {
+    // Split the expression by '+' and trim whitespace
+    let parts: Vec<&str> = expr.split('+').map(str::trim).collect();
+    
+    if parts.len() == 2 {
+        // Assume the first part is the register and the second part is the offset
+        let base_reg = parts[0].to_string();
+        if let Ok(offset) = parts[1].parse::<i32>() {
+            return Some((base_reg, offset));
+        }
+    }
+    None
+}

+ 45 - 0
crates/assembler/src/debuginfo.rs

@@ -0,0 +1,45 @@
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum RegisterType {
+    Int,
+    Addr,
+    Null,
+}
+
+impl RegisterType {
+    pub fn to_string(&self) -> &'static str {
+        match self {
+            RegisterType::Int => "int",
+            RegisterType::Addr => "addr",
+            RegisterType::Null => "null",
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct RegisterHint {
+    pub register: usize,
+    pub register_type: RegisterType,
+}
+
+impl Default for RegisterHint {
+    fn default() -> Self {
+        Self {
+            register: 0,
+            register_type: RegisterType::Null,
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct DebugInfo {
+    pub line_number: usize,
+    pub register_hint: RegisterHint,
+}
+
+impl DebugInfo {
+    pub fn new(line_number: usize) -> Self {
+        Self { line_number, register_hint: RegisterHint::default() }
+    }
+}
+
+

+ 161 - 0
crates/assembler/src/dynsym.rs

@@ -0,0 +1,161 @@
+use std::collections::HashMap;
+
+#[derive(Debug)]
+pub struct DynamicSymbol {
+    name: u32,      // index into .dynstr section
+    info: u8,       // symbol binding and type
+    other: u8,      // symbol visibility
+    shndx: u16,     // section index
+    value: u64,     // symbol value
+    size: u64,      // symbol size
+}
+
+impl DynamicSymbol {
+    pub fn new(name: u32, info: u8, other: u8, shndx: u16, value: u64, size: u64) -> Self {
+        Self { name, info, other, shndx, value, size }
+    }
+
+    pub fn bytecode(&self) -> Vec<u8> {
+        let mut bytes = Vec::new();
+        bytes.extend(self.name.to_le_bytes());
+        bytes.push(self.info);
+        bytes.push(self.other);
+        bytes.extend(self.shndx.to_le_bytes());
+        bytes.extend(self.value.to_le_bytes());
+        bytes.extend(self.size.to_le_bytes());
+        bytes
+    }
+
+    pub fn get_name(&self) -> u32 {
+        self.name
+    }
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum SymbolKind {
+    EntryPoint,
+    CallTarget,
+}
+
+
+
+#[derive(Debug)]
+pub struct DynamicSymbolMap {
+    symbols: HashMap<String, Vec<(SymbolKind, u64)>>,
+}
+
+impl DynamicSymbolMap {
+    pub fn new() -> Self {
+        Self {
+            symbols: HashMap::new(),
+        }
+    }
+
+    pub fn copy(&self) -> Self {
+        Self {
+            symbols: self.symbols.clone()
+        }
+    }
+
+    pub fn add_symbol(&mut self, name: String, kind: SymbolKind, offset: u64) {
+        self.symbols
+            .entry(name)
+            .or_default()
+            .push((kind, offset));
+    }
+
+    pub fn add_entry_point(&mut self, name: String, offset: u64) {
+        self.add_symbol(name, SymbolKind::EntryPoint, offset);
+    }
+
+    pub fn add_call_target(&mut self, name: String, offset: u64) {
+        self.add_symbol(name, SymbolKind::CallTarget, offset);
+    }
+
+    pub fn get_entry_points(&self) -> Vec<(String, u64)> {
+        self.get_symbols_by_kind(SymbolKind::EntryPoint)
+    }
+
+    pub fn get_call_targets(&self) -> Vec<(String, u64)> {
+        self.get_symbols_by_kind(SymbolKind::CallTarget)
+    }
+
+    fn get_symbols_by_kind(&self, kind: SymbolKind) -> Vec<(String, u64)> {
+        self.symbols.iter()
+            .filter(|(_, symbols)| symbols.iter().any(|(k, _)| *k == kind))
+            .map(|(name, symbols)| (name.clone(), symbols.iter().find(|(k, _)| *k == kind).unwrap().1))
+            .collect()
+    }
+
+    pub fn get_symbol(&self, name: &str) -> Option<&Vec<(SymbolKind, u64)>> {
+        self.symbols.get(name)
+    }
+
+    pub fn get_symbols(&self) -> &HashMap<String, Vec<(SymbolKind, u64)>> {
+        &self.symbols
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq)]
+#[repr(u64)]
+pub enum RelocationType {
+    RSbf64Relative = 0x08,
+    RSbfSyscall = 0x0a,
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub struct RelDyn {
+    offset: u64,
+    rel_type: u64,
+    dynstr_offset: u64,
+}  
+
+impl RelDyn {
+    pub fn new(offset: u64, rel_type: u64, dynstr_offset: u64) -> Self {
+        Self { offset, rel_type, dynstr_offset }
+    }
+
+    pub fn bytecode(&self) -> Vec<u8> {
+        let mut bytes = Vec::new();
+        bytes.extend(self.offset.to_le_bytes());
+
+        if self.rel_type == 0x08 {
+            // 8 bytes rel_type
+            bytes.extend(self.rel_type.to_le_bytes());
+        } else if self.rel_type == 0x0a {
+            // 4 bytes rel_type
+            bytes.extend((self.rel_type as u32).to_le_bytes());
+            // 4 bytes dynstr_offset
+            bytes.extend((self.dynstr_offset as u32).to_le_bytes());
+        }
+
+        bytes
+    }
+}
+
+#[derive(Debug)]
+pub struct RelDynMap {
+    rel_dyns: HashMap<u64, Vec<(RelocationType, String)>>,
+}
+
+impl RelDynMap {
+    pub fn new() -> Self {
+        Self { rel_dyns: HashMap::new() }
+    }
+
+    pub fn add_rel_dyn(&mut self, offset: u64, rel_type: RelocationType, name: String) {
+        self.rel_dyns.entry(offset).or_default().push((rel_type, name));
+    }
+
+    pub fn get_rel_dyns(&self) -> Vec<(u64, RelocationType, String)> {
+        self.rel_dyns.iter()
+            .flat_map(|(offset, rel_types)| {
+                rel_types.iter().map(move |(rel_type, name)| (*offset, *rel_type, name.clone()))
+            })
+            .collect()
+    }
+
+    pub fn copy(&self) -> Self {
+        Self { rel_dyns: self.rel_dyns.clone() }
+    }
+}

+ 210 - 0
crates/assembler/src/header.rs

@@ -0,0 +1,210 @@
+#[derive(Debug)]
+pub struct ElfHeader {
+    pub e_ident: [u8; 16],      // ELF identification bytes = [127, 69, 76, 70, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+    pub e_type: u16,            // Object file type = 3 (ET_DYN)
+    pub e_machine: u16,         // Machine architecture = 247 (BPF)
+    pub e_version: u32,         // Object file version = 1
+    pub e_entry: u64,           // Entry point address
+    pub e_phoff: u64,           // Program header offset
+    pub e_shoff: u64,           // Section header offset
+    pub e_flags: u32,           // Processor-specific flags
+    pub e_ehsize: u16,          // ELF header size = 64
+    pub e_phentsize: u16,       // Size of program header entry = 56
+    pub e_phnum: u16,           // Number of program header entries
+    pub e_shentsize: u16,       // Size of section header entry = 64
+    pub e_shnum: u16,           // Number of section header entries
+    pub e_shstrndx: u16,        // Section name string table index
+}
+
+#[derive(Debug)]
+pub struct ProgramHeader {
+    pub p_type: u32,      // Type of segment
+    pub p_flags: u32,     // Segment attributes
+    pub p_offset: u64,    // Offset in file
+    pub p_vaddr: u64,     // Virtual address in memory
+    pub p_paddr: u64,     // Physical address (reserved)
+    pub p_filesz: u64,    // Size of segment in file
+    pub p_memsz: u64,     // Size of segment in memory
+    pub p_align: u64,     // Alignment of segment
+}
+
+impl ElfHeader {
+    const SOLANA_IDENT: [u8; 16] = [
+        0x7f, 0x45, 0x4c, 0x46,  // EI_MAG0..EI_MAG3 ("\x7FELF")
+        0x02,                     // EI_CLASS (64-bit)
+        0x01,                     // EI_DATA (little endian)
+        0x01,                     // EI_VERSION
+        0x00,                     // EI_OSABI
+        0x00,                     // EI_ABIVERSION
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00  // EI_PAD
+    ];
+    const SOLANA_TYPE: u16 = 3;      // ET_DYN
+    const SOLANA_MACHINE: u16 = 247;  // BPF
+    const SOLANA_VERSION: u32 = 1;    // EV_CURRENT
+    const ELF64_HEADER_SIZE: u16 = 64;
+    const PROGRAM_HEADER_SIZE: u16 = 56;
+    const SECTION_HEADER_SIZE: u16 = 64;
+
+    pub fn new() -> Self {
+        Self {
+            e_ident: Self::SOLANA_IDENT,
+            e_type: Self::SOLANA_TYPE,
+            e_machine: Self::SOLANA_MACHINE,
+            e_version: Self::SOLANA_VERSION,
+            e_entry: 0,
+            e_phoff: Self::ELF64_HEADER_SIZE as u64,
+            e_shoff: 0,
+            e_flags: 0,
+            e_ehsize: Self::ELF64_HEADER_SIZE,
+            e_phentsize: Self::PROGRAM_HEADER_SIZE,
+            e_phnum: 0,
+            e_shentsize: Self::SECTION_HEADER_SIZE,
+            e_shnum: 0,
+            e_shstrndx: 0,
+        }
+    }
+
+    pub fn bytecode(&self) -> Vec<u8> {
+        let mut bytecode = Vec::with_capacity(Self::ELF64_HEADER_SIZE as usize);
+        
+        // e_ident (16 bytes)
+        bytecode.extend_from_slice(&self.e_ident);
+        
+        // Emit remaining fields in little-endian order
+        bytecode.extend_from_slice(&self.e_type.to_le_bytes());
+        bytecode.extend_from_slice(&self.e_machine.to_le_bytes());
+        bytecode.extend_from_slice(&self.e_version.to_le_bytes());
+        bytecode.extend_from_slice(&self.e_entry.to_le_bytes());
+        bytecode.extend_from_slice(&self.e_phoff.to_le_bytes());
+        bytecode.extend_from_slice(&self.e_shoff.to_le_bytes());
+        bytecode.extend_from_slice(&self.e_flags.to_le_bytes());
+        bytecode.extend_from_slice(&self.e_ehsize.to_le_bytes());
+        bytecode.extend_from_slice(&self.e_phentsize.to_le_bytes());
+        bytecode.extend_from_slice(&self.e_phnum.to_le_bytes());
+        bytecode.extend_from_slice(&self.e_shentsize.to_le_bytes());
+        bytecode.extend_from_slice(&self.e_shnum.to_le_bytes());
+        bytecode.extend_from_slice(&self.e_shstrndx.to_le_bytes());
+
+        bytecode
+    }
+}
+
+impl ProgramHeader {
+
+    const PT_LOAD: u32 = 1;      // Loadable segment
+    const PT_DYNAMIC: u32 = 2;   // Dynamic linking information
+    
+    const PF_X: u32 = 1;         // Executable
+    const PF_W: u32 = 2;         // Writable
+    const PF_R: u32 = 4;         // Readable
+    
+    const PAGE_SIZE: u64 = 4096;          // Standard page size
+
+    pub fn new_load(offset: u64, size: u64, executable: bool) -> Self {
+        let flags = if executable {
+            Self::PF_R | Self::PF_X  // Read + Execute
+        } else {
+            Self::PF_R        // Read only
+        };
+
+        ProgramHeader {
+            p_type: Self::PT_LOAD,
+            p_flags: flags,
+            p_offset: offset,
+            p_vaddr: offset,
+            p_paddr: offset,
+            p_filesz: size,
+            p_memsz: size,
+            p_align: Self::PAGE_SIZE
+        }
+    }
+
+    pub fn new_dynamic(offset: u64, size: u64) -> Self {
+        ProgramHeader {
+            p_type: Self::PT_DYNAMIC,
+            p_flags: Self::PF_R | Self::PF_W,
+            p_offset: offset,
+            p_vaddr: offset,
+            p_paddr: offset,
+            p_filesz: size,
+            p_memsz: size,
+            p_align: 8
+        }
+    }
+
+    pub fn bytecode(&self) -> Vec<u8> {
+        let mut bytecode = Vec::with_capacity(56); // Size of program header is 56 bytes
+        
+        bytecode.extend_from_slice(&self.p_type.to_le_bytes());
+        bytecode.extend_from_slice(&self.p_flags.to_le_bytes());
+        bytecode.extend_from_slice(&self.p_offset.to_le_bytes());
+        bytecode.extend_from_slice(&self.p_vaddr.to_le_bytes());
+        bytecode.extend_from_slice(&self.p_paddr.to_le_bytes());
+        bytecode.extend_from_slice(&self.p_filesz.to_le_bytes());
+        bytecode.extend_from_slice(&self.p_memsz.to_le_bytes());
+        bytecode.extend_from_slice(&self.p_align.to_le_bytes());
+
+        bytecode
+    }
+}
+#[derive(Debug)]
+pub struct SectionHeader {
+    sh_name: u32,      // Section name (string table index)
+    sh_type: u32,      // Section type
+    sh_flags: u64,     // Section flags
+    sh_addr: u64,      // Section virtual addr at execution
+    sh_offset: u64,    // Section file offset
+    sh_size: u64,      // Section size in bytes
+    sh_link: u32,      // Link to another section
+    sh_info: u32,      // Additional section info
+    sh_addralign: u64, // Section alignment
+    sh_entsize: u64,   // Entry size if section holds table
+}
+
+impl SectionHeader {
+    // Section types
+    pub const SHT_NULL: u32 = 0;          // Section header table entry unused
+    pub const SHT_PROGBITS: u32 = 1;      // Program data
+    pub const SHT_STRTAB: u32 = 3;        // String table
+    pub const SHT_NOBITS: u32 = 8;        // Program space with no data (bss)
+    pub const SHT_DYNAMIC: u32 = 6;      // Dynamic section
+    pub const SHT_DYNSYM: u32 = 11;      // Dynamic symbol table
+    pub const SHT_REL: u32 = 9;          // Relocation table
+    
+    // Section flags
+    pub const SHF_WRITE: u64 = 0x1;       // Writable
+    pub const SHF_ALLOC: u64 = 0x2;       // Occupies memory during execution
+    pub const SHF_EXECINSTR: u64 = 0x4;   // Executable
+    
+    pub fn new(name_offset: u32, sh_type: u32, flags: u64, addr: u64, offset: u64, size: u64, link: u32, info: u32, addralign: u64, entsize: u64) -> Self {
+        Self {
+            sh_name: name_offset,
+            sh_type,
+            sh_flags: flags,
+            sh_addr: addr,
+            sh_offset: offset,
+            sh_size: size,
+            sh_link: link,
+            sh_info: info,
+            sh_addralign: addralign,
+            sh_entsize: entsize,
+        }
+    }
+
+    pub fn bytecode(&self) -> Vec<u8> {
+        let mut bytecode = Vec::with_capacity(64); // Size of section header is 64 bytes
+        
+        bytecode.extend_from_slice(&self.sh_name.to_le_bytes());
+        bytecode.extend_from_slice(&self.sh_type.to_le_bytes());
+        bytecode.extend_from_slice(&self.sh_flags.to_le_bytes());
+        bytecode.extend_from_slice(&self.sh_addr.to_le_bytes());
+        bytecode.extend_from_slice(&self.sh_offset.to_le_bytes());
+        bytecode.extend_from_slice(&self.sh_size.to_le_bytes());
+        bytecode.extend_from_slice(&self.sh_link.to_le_bytes());
+        bytecode.extend_from_slice(&self.sh_info.to_le_bytes());
+        bytecode.extend_from_slice(&self.sh_addralign.to_le_bytes());
+        bytecode.extend_from_slice(&self.sh_entsize.to_le_bytes());
+
+        bytecode
+    }
+}

+ 90 - 0
crates/assembler/src/instruction_verifier.rs

@@ -0,0 +1,90 @@
+use crate::opcode::Opcode;
+use crate::lexer::Token;
+
+pub fn verify_instruction(opcode: &Opcode, operands: &[Token]) -> Result<(), String> {
+    match opcode {
+        Opcode::Add32 | Opcode::Sub32 | Opcode::Mul32 | Opcode::Div32 | Opcode::Or32 | Opcode::And32 | Opcode::Lsh32 | Opcode::Rsh32 | Opcode::Mod32 | Opcode::Xor32 | Opcode::Mov32 | Opcode::Arsh32 | Opcode::Lmul32 | Opcode::Udiv32 | Opcode::Urem32 | Opcode::Sdiv32 | Opcode::Srem32 | Opcode::Neg32 => {
+            if operands.len() != 2 {
+                return Err(format!("Expected 2 operands for {:?}, got {}", opcode, operands.len()));
+            }
+            match (&operands[0], &operands[1]) {
+                (Token::Register(_, _), Token::Register(_, _)) => Ok(()),
+                (Token::Register(_, _), Token::ImmediateValue(_, _)) => Ok(()),
+                _ => Err(format!("Invalid operands for {:?}", opcode)),
+            }
+        }
+        Opcode::Add64 | Opcode::Sub64 | Opcode::Mul64 | Opcode::Div64 | Opcode::Or64 | Opcode::And64 | Opcode::Lsh64 | Opcode::Rsh64 | Opcode::Mod64 | Opcode::Xor64 | Opcode::Mov64 | Opcode::Arsh64 | Opcode::Lmul64 | Opcode::Uhmul64 | Opcode::Udiv64 | Opcode::Urem64 | Opcode::Sdiv64 | Opcode::Srem64 => {
+            if operands.len() != 2 {
+                return Err(format!("Expected 2 operands for {:?}, got {}", opcode, operands.len()));
+            }
+            match (&operands[0], &operands[1]) {
+                (Token::Register(_, _), Token::Register(_, _)) => Ok(()),
+                (Token::Register(_, _), Token::ImmediateValue(_, _)) => Ok(()),
+                _ => Err(format!("Invalid operands for {:?}", opcode)),
+            }
+        }
+        Opcode::Jeq | Opcode::Jgt | Opcode::Jge | Opcode::Jlt | Opcode::Jle | Opcode::Jset | Opcode::Jne | Opcode::Jsgt | Opcode::Jsge | Opcode::Jslt | Opcode::Jsle => {
+            if operands.len() != 3 {
+                return Err(format!("Expected 3 operands for {:?}, got {}", opcode, operands.len()));
+            }
+            match (&operands[0], &operands[1], &operands[2]) {
+                (Token::Register(_, _), Token::Register(_, _), Token::Label(_, _)) => Ok(()),
+                (Token::Register(_, _), Token::ImmediateValue(_, _), Token::Label(_, _)) => Ok(()),
+                _ => Err(format!("Invalid operands for {:?}", opcode)),
+            }
+        }
+        Opcode::Ja => {
+            if operands.len() != 1 {
+                return Err(format!("Expected 1 operand for {:?}, got {}", opcode, operands.len()));
+            }
+            match &operands[0] {
+                Token::Label(_, _) | Token::ImmediateValue(_, _) => Ok(()),
+                _ => Err(format!("Invalid operand for {:?}", opcode)),
+            }
+        }
+        Opcode::Exit => {
+            if !operands.is_empty() {
+                return Err(format!("Expected no operands for {:?}, got {}", opcode, operands.len()));
+            }
+            Ok(())
+        }
+        Opcode::Call => {
+            if operands.len() != 1 {
+                return Err(format!("Expected 1 operand for {:?}, got {}", opcode, operands.len()));
+            }
+            match &operands[0] {
+                Token::Label(_, _) => Ok(()),
+                _ => Err(format!("Invalid operand for {:?}", opcode)),
+            }
+        }
+        Opcode::Lddw => {
+            if operands.len() != 2 {
+                return Err(format!("Expected 2 operands for {:?}, got {}", opcode, operands.len()));
+            }
+            match (&operands[0], &operands[1]) {
+                (Token::Register(_, _), Token::ImmediateValue(_, _)) => Ok(()),
+                (Token::Register(_, _), Token::Label(_, _)) => Ok(()),
+                _ => Err(format!("Invalid operands for {:?}", opcode)),
+            }
+        }
+        // store operations - deprecated
+        Opcode::Stb | Opcode::Sth | Opcode::Stw | Opcode::Stdw => {
+            Err(format!("{} is deprecated", opcode.to_str()))
+        },
+        // negate operations - takes register
+        Opcode::Neg64 => {
+            if operands.len() != 1 {
+                return Err(format!("{} reg", opcode.to_str()));
+            }
+            match &operands[0] {
+                Token::Register(_reg, _) => Ok(()),
+                _ => Err(format!("{} reg", opcode.to_str())),
+            }
+        },
+        // le be
+        Opcode::Le | Opcode::Be => {
+            return Err(format!("Unsure how to handle {}", opcode.to_str()));
+        },
+        _ => Err(format!("Unsupported opcode: {:?}", opcode)),
+    }
+}

+ 171 - 0
crates/assembler/src/lexer.rs

@@ -0,0 +1,171 @@
+use crate::opcode::Opcode;
+
+#[derive(Debug, Clone)]
+pub enum Op {
+    Add,
+    Sub,
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum ImmediateValue {
+    Int(i64),
+    Addr(i64),
+}
+
+impl std::ops::Add for ImmediateValue {
+    type Output = ImmediateValue;
+    fn add(self, other: Self) -> ImmediateValue {
+        match (self, other) {
+            (ImmediateValue::Int(a), ImmediateValue::Int(b)) => ImmediateValue::Int(a + b),
+            _ => panic!("Invalid addition of ImmediateValue"),
+        }
+    }
+}
+
+impl std::ops::Sub for ImmediateValue {
+    type Output = ImmediateValue;
+    fn sub(self, other: Self) -> ImmediateValue {
+        match (self, other) {
+            (ImmediateValue::Int(a), ImmediateValue::Int(b)) => ImmediateValue::Int(a - b),
+            _ => panic!("Invalid subtraction of ImmediateValue"),
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub enum Token {
+    Directive(String, usize),
+    Label(String, usize),
+    Identifier(String, usize),
+    Opcode(Opcode, usize),
+    Register(u8, usize),
+    ImmediateValue(ImmediateValue, usize),
+    BinaryOp(Op, usize),
+    StringLiteral(String, usize),
+
+    LeftBracket(usize),
+    RightBracket(usize),
+    Comma(usize),
+    Colon(usize),
+}
+
+pub fn tokenize(source: &str) -> Result<Vec<Token>, String> {
+    let mut tokens = Vec::new();
+    let mut line_number = 1;
+
+    for line in source.lines() {
+
+        // Handle comments - skip rest of line
+        let line = if let Some(comment_pos) = line.find("//") {
+            &line[..comment_pos].trim()
+        } else if let Some(comment_pos) = line.find("#") {
+            &line[..comment_pos].trim()
+        } else {
+            line.trim()
+        };
+
+        if line.is_empty() {
+            line_number += 1;
+            continue;
+        }
+
+        let mut chars = line.chars().peekable();
+
+        while let Some(&c) = chars.peek() {
+            match c {
+                c if c.is_digit(10) => {
+                    let mut number = String::new();
+                    let mut isAddr = false;
+                    while let Some(&c) = chars.peek() {
+                        if c.is_digit(10) {
+                            number.push(chars.next().unwrap());
+                        } else if number == "0" && c == 'x' {
+                            chars.next();
+                            isAddr = true; /*  */ number = String::new();
+                        } else if isAddr && (c == 'a' || c == 'b' || c == 'c' || c == 'd' || c == 'e' || c == 'f') {
+                            number.push(chars.next().unwrap());
+                        } else {
+                            break;
+                        }
+                    }
+                    if isAddr {
+                        tokens.push(Token::ImmediateValue(ImmediateValue::Addr(i64::from_str_radix(&number, 16).map_err(|_| "Invalid number")?), line_number)); 
+                    } else {
+                        tokens.push(Token::ImmediateValue(ImmediateValue::Int(number.parse::<i64>().map_err(|_| "Invalid number")?), line_number));
+                    }      
+                }
+
+                // TODO: add address and syscall tokens
+                c if c.is_alphanumeric() || c == '_' => {
+                    let mut identifier = String::new();
+                    while let Some(&c) = chars.peek() {
+                        if c.is_alphanumeric() || c == '_' || c == ':' {
+                            identifier.push(chars.next().unwrap());
+                        } else {
+                            break;
+                        }
+                    }
+                    if identifier.ends_with(':') {
+                        let label_name = identifier.trim_end_matches(':').to_string();
+                        tokens.push(Token::Label(label_name, line_number));
+                    } else if identifier.starts_with('r') && identifier[1..].chars().all(|c| c.is_digit(10)) {
+                        tokens.push(Token::Register(identifier[1..].parse::<u8>().map_err(|_| "Invalid register")?, line_number));
+                    } else if let Ok(opcode) = Opcode::from_str(&identifier) {
+                        tokens.push(Token::Opcode(opcode, line_number));
+                    } else {
+                        tokens.push(Token::Identifier(identifier, line_number));
+                    }
+                }
+                c if c.is_whitespace() => {
+                    chars.next();
+                }
+                '+' => {
+                    chars.next();
+                    tokens.push(Token::BinaryOp(Op::Add, line_number));
+                }
+                '-' => {
+                    chars.next();
+                    tokens.push(Token::BinaryOp(Op::Sub, line_number));
+                }
+                '.' => {
+                    chars.next();
+                    let directive: String = chars.by_ref()
+                        .take_while(|&c| c.is_alphanumeric() || c == '_')
+                        .collect();
+                    tokens.push(Token::Directive(directive, line_number));
+                }
+                '"' => {
+                    chars.next();
+                    let mut string_literal = String::new();
+                    while let Some(&c) = chars.peek() {
+                        if c == '"' {
+                            chars.next();
+                            tokens.push(Token::StringLiteral(string_literal, line_number));
+                            break;
+                        } else if c == '\n' {
+                            return Err(format!("Unterminated string literal on line {}", line_number));
+                        }
+                        string_literal.push(chars.next().unwrap());
+                    }
+                }
+                '[' => {
+                    chars.next();
+                    tokens.push(Token::LeftBracket(line_number));
+                }
+                ']' => {
+                    chars.next();
+                    tokens.push(Token::RightBracket(line_number));
+                }
+                ',' => {
+                    chars.next();
+                    tokens.push(Token::Comma(line_number));
+                }
+                _ => {
+                    return Err(format!("Unexpected character: '{}' on line {}", c, line_number));
+                }
+            }
+        }
+        line_number += 1;
+    }
+    Ok(tokens)
+}

+ 69 - 0
crates/assembler/src/lib.rs

@@ -0,0 +1,69 @@
+extern crate num_traits;
+extern crate num_derive;
+extern crate anyhow;
+
+use std::path::Path;
+use std::fs::File;
+use anyhow::{Error, Result};
+
+// Tokenizer and parser
+pub mod parser;
+pub mod lexer;
+pub mod opcode;
+pub mod instruction_verifier;
+pub mod utils;
+
+// Intermediate Representation
+pub mod astnode;
+pub mod dynsym;
+
+// ELF header, program, section
+pub mod header;
+pub mod program;
+pub mod section;
+
+// Debug info
+pub mod debuginfo;
+
+#[cfg(test)]
+mod tests;
+
+// Type aliases for error handling
+pub type ParserError = String;
+pub type ProgramError = String;
+pub type TokenizerError = String;
+
+pub use self::{
+    parser::Parser,
+    program::Program,
+    lexer::tokenize,
+};
+
+pub fn assemble(src: &str, deploy: &str) -> Result<()> {
+    let source_code = std::fs::read_to_string(src)?;
+    let tokens = match tokenize(&source_code) {
+        Ok(tokens) => tokens,
+        Err(e) => return Err(Error::msg(format!("Tokenizer error: {}", e))),
+    };
+
+    let mut parser = Parser::new(tokens);
+    let parse_result = match parser.parse() {
+        Ok(program) => program,
+        Err(e) => return Err(Error::msg(format!("Parser error: {}", e))),
+    };
+
+    let program = Program::from_parse_result(parse_result);
+
+    let bytecode = program.emit_bytecode();
+
+    let output_path = Path::new(deploy)
+        .join(Path::new(src)
+            .file_name()
+            .unwrap()
+            .to_str()
+            .unwrap()
+            .replace(".s", ".so"));
+
+    std::fs::write(output_path, bytecode)?;
+    Ok(())
+}

+ 611 - 0
crates/assembler/src/opcode.rs

@@ -0,0 +1,611 @@
+use num_derive::FromPrimitive;
+
+#[derive(Debug, Clone, Copy, PartialEq, FromPrimitive)]
+#[repr(u8)]
+pub enum Opcode {
+    Lddw,
+    Ldxb,
+    Ldxh,
+    Ldxw,
+    Ldxdw,
+    Stb,
+    Sth,
+    Stw,
+    Stdw,
+    Stxb,
+    Stxh,
+    Stxw,
+    Stxdw,
+    Add32,
+    Add32Imm,
+    Add32Reg,
+    Sub32,
+    Sub32Imm,
+    Sub32Reg,
+    Mul32,
+    Mul32Imm,
+    Mul32Reg,
+    Div32,
+    Div32Imm,
+    Div32Reg,
+    Or32,
+    Or32Imm,
+    Or32Reg,
+    And32,
+    And32Imm,
+    And32Reg,
+    Lsh32,
+    Lsh32Imm,
+    Lsh32Reg,
+    Rsh32,
+    Rsh32Imm,
+    Rsh32Reg,
+    Mod32,
+    Mod32Imm,
+    Mod32Reg,
+    Xor32,
+    Xor32Imm,
+    Xor32Reg,
+    Mov32,
+    Mov32Imm,
+    Mov32Reg,
+    Arsh32,
+    Arsh32Imm,
+    Arsh32Reg,
+    Lmul32,
+    Lmul32Imm,
+    Lmul32Reg,
+    Udiv32,
+    Udiv32Imm,
+    Udiv32Reg,
+    Urem32,
+    Urem32Imm,
+    Urem32Reg,
+    Sdiv32,
+    Sdiv32Imm,
+    Sdiv32Reg,
+    Srem32,
+    Srem32Imm,
+    Srem32Reg,
+    Le,
+    Be,
+    Add64,
+    Add64Imm,
+    Add64Reg,
+    Sub64,
+    Sub64Imm,
+    Sub64Reg,
+    Mul64,
+    Mul64Imm,
+    Mul64Reg,
+    Div64,
+    Div64Imm,
+    Div64Reg,
+    Or64,
+    Or64Imm,
+    Or64Reg,
+    And64,
+    And64Imm,
+    And64Reg,
+    Lsh64,
+    Lsh64Imm,
+    Lsh64Reg,
+    Rsh64,
+    Rsh64Imm,
+    Rsh64Reg,
+    Mod64,
+    Mod64Imm,
+    Mod64Reg,
+    Xor64,
+    Xor64Imm,
+    Xor64Reg,
+    Mov64,
+    Mov64Imm,
+    Mov64Reg,
+    Arsh64,
+    Arsh64Imm,
+    Arsh64Reg,
+    Hor64Imm,
+    Lmul64,
+    Lmul64Imm,
+    Lmul64Reg,
+    Uhmul64,
+    Uhmul64Imm,
+    Uhmul64Reg,
+    Udiv64,
+    Udiv64Imm,
+    Udiv64Reg,
+    Urem64,
+    Urem64Imm,
+    Urem64Reg,
+    Shmul64,
+    Shmul64Imm,
+    Shmul64Reg,
+    Sdiv64,
+    Sdiv64Imm,
+    Sdiv64Reg,
+    Srem64,
+    Srem64Imm,
+    Srem64Reg,
+    Neg32,
+    Neg64,
+    Ja,
+    Jeq,
+    JeqImm,
+    JeqReg,
+    Jgt,
+    JgtImm,
+    JgtReg,
+    Jge,
+    JgeImm,
+    JgeReg,
+    Jlt,
+    JltImm,
+    JltReg,
+    Jle,
+    JleImm,
+    JleReg,
+    Jset,
+    JsetImm,
+    JsetReg,
+    Jne,
+    JneImm,
+    JneReg,
+    Jsgt,
+    JsgtImm,
+    JsgtReg,
+    Jsge,
+    JsgeImm,
+    JsgeReg,
+    Jslt,
+    JsltImm,
+    JsltReg,
+    Jsle,
+    JsleImm,
+    JsleReg,
+    Call,
+    Callx,
+    Exit,
+}
+
+impl Opcode {
+    pub fn from_str(s: &str) -> Result<Self, &'static str> {
+        match s.to_lowercase().as_str() {
+            "lddw" => Ok(Opcode::Lddw),
+            "ldxb" => Ok(Opcode::Ldxb),
+            "ldxh" => Ok(Opcode::Ldxh),
+            "ldxw" => Ok(Opcode::Ldxw),
+            "ldxdw" => Ok(Opcode::Ldxdw),
+            "stb" => Ok(Opcode::Stb),
+            "sth" => Ok(Opcode::Sth),
+            "stw" => Ok(Opcode::Stw),
+            "stdw" => Ok(Opcode::Stdw),
+            "stxb" => Ok(Opcode::Stxb),
+            "stxh" => Ok(Opcode::Stxh),
+            "stxw" => Ok(Opcode::Stxw),
+            "stxdw" => Ok(Opcode::Stxdw),
+            "add32" => Ok(Opcode::Add32),
+            "sub32" => Ok(Opcode::Sub32),
+            "mul32" => Ok(Opcode::Mul32),
+            "div32" => Ok(Opcode::Div32),
+            "or32" => Ok(Opcode::Or32),
+            "and32" => Ok(Opcode::And32),
+            "lsh32" => Ok(Opcode::Lsh32),
+            "rsh32" => Ok(Opcode::Rsh32),
+            "neg32" => Ok(Opcode::Neg32),
+            "mod32" => Ok(Opcode::Mod32),
+            "xor32" => Ok(Opcode::Xor32),
+            "mov32" => Ok(Opcode::Mov32),
+            "arsh32" => Ok(Opcode::Arsh32),
+            "lmul32" => Ok(Opcode::Lmul32),
+            "udiv32" => Ok(Opcode::Udiv32),
+            "urem32" => Ok(Opcode::Urem32),
+            "sdiv32" => Ok(Opcode::Sdiv32),
+            "srem32" => Ok(Opcode::Srem32),
+            "le" => Ok(Opcode::Le),
+            "be" => Ok(Opcode::Be),
+            "add64" => Ok(Opcode::Add64),
+            "sub64" => Ok(Opcode::Sub64),
+            "mul64" => Ok(Opcode::Mul64),
+            "div64" => Ok(Opcode::Div64),
+            "or64" => Ok(Opcode::Or64),
+            "and64" => Ok(Opcode::And64),
+            "lsh64" => Ok(Opcode::Lsh64),
+            "rsh64" => Ok(Opcode::Rsh64),
+            "neg64" => Ok(Opcode::Neg64),
+            "mod64" => Ok(Opcode::Mod64),
+            "xor64" => Ok(Opcode::Xor64),
+            "mov64" => Ok(Opcode::Mov64),
+            "arsh64" => Ok(Opcode::Arsh64),
+            "hor64" => Ok(Opcode::Hor64Imm),
+            "lmul64" => Ok(Opcode::Lmul64),
+            "uhmul64" => Ok(Opcode::Uhmul64),
+            "udiv64" => Ok(Opcode::Udiv64),
+            "urem64" => Ok(Opcode::Urem64),
+            "shmul64" => Ok(Opcode::Shmul64),
+            "sdiv64" => Ok(Opcode::Sdiv64),
+            "srem64" => Ok(Opcode::Srem64),
+            "ja" => Ok(Opcode::Ja),
+            "jeq" => Ok(Opcode::Jeq),
+            "jgt" => Ok(Opcode::Jgt),
+            "jge" => Ok(Opcode::Jge),
+            "jlt" => Ok(Opcode::Jlt),
+            "jle" => Ok(Opcode::Jle),
+            "jset" => Ok(Opcode::Jset),
+            "jne" => Ok(Opcode::Jne),
+            "jsgt" => Ok(Opcode::Jsgt),
+            "jsge" => Ok(Opcode::Jsge),
+            "jslt" => Ok(Opcode::Jslt),
+            "jsle" => Ok(Opcode::Jsle),
+            "call" => Ok(Opcode::Call),
+            "callx" => Ok(Opcode::Callx),
+            "exit" => Ok(Opcode::Exit),
+            _ => Err("Invalid opcode"),
+        }
+    }
+
+    pub fn from_u8(u: u8) -> Option<Self> {
+        match u {
+            0x18 => Some(Opcode::Lddw),
+            0x71 => Some(Opcode::Ldxb),
+            0x69 => Some(Opcode::Ldxh),
+            0x61 => Some(Opcode::Ldxw),
+            0x79 => Some(Opcode::Ldxdw),
+            0x72 => Some(Opcode::Stb),
+            0x6a => Some(Opcode::Sth),
+            0x62 => Some(Opcode::Stw),
+            0x7a => Some(Opcode::Stdw),
+            0x73 => Some(Opcode::Stxb),
+            0x6b => Some(Opcode::Stxh),
+            0x63 => Some(Opcode::Stxw),
+            0x7b => Some(Opcode::Stxdw),
+            0x04 => Some(Opcode::Add32Imm),
+            0x0c => Some(Opcode::Add32Reg),
+            0x14 => Some(Opcode::Sub32Imm),
+            0x1c => Some(Opcode::Sub32Reg),
+            0x24 => Some(Opcode::Mul32Imm),
+            0x2c => Some(Opcode::Mul32Reg),
+            0x34 => Some(Opcode::Div32Imm),
+            0x3c => Some(Opcode::Div32Reg),
+            0x44 => Some(Opcode::Or32Imm),
+            0x4c => Some(Opcode::Or32Reg),
+            0x54 => Some(Opcode::And32Imm),
+            0x5c => Some(Opcode::And32Reg),
+            0x64 => Some(Opcode::Lsh32Imm),
+            0x6c => Some(Opcode::Lsh32Reg),
+            0x74 => Some(Opcode::Rsh32Imm),
+            0x7c => Some(Opcode::Rsh32Reg),
+            0x94 => Some(Opcode::Mod32Imm),
+            0x9c => Some(Opcode::Mod32Reg),
+            0xa4 => Some(Opcode::Xor32Imm),
+            0xac => Some(Opcode::Xor32Reg),
+            0xb4 => Some(Opcode::Mov32Imm),
+            0xbc => Some(Opcode::Mov32Reg),
+            0xc4 => Some(Opcode::Arsh32Imm),
+            0xcc => Some(Opcode::Arsh32Reg),
+            0x86 => Some(Opcode::Lmul32Imm),
+            0x8e => Some(Opcode::Lmul32Reg),
+            0x46 => Some(Opcode::Udiv32Imm),
+            0x4e => Some(Opcode::Udiv32Reg),
+            0x66 => Some(Opcode::Urem32Imm),
+            0x6e => Some(Opcode::Urem32Reg),
+            0xc6 => Some(Opcode::Sdiv32Imm),
+            0xce => Some(Opcode::Sdiv32Reg),
+            0xe6 => Some(Opcode::Srem32Imm),
+            0xee => Some(Opcode::Srem32Reg),
+            0xd4 => Some(Opcode::Le),
+            0xdc => Some(Opcode::Be),
+            0x07 => Some(Opcode::Add64Imm),
+            0x0f => Some(Opcode::Add64Reg),
+            0x17 => Some(Opcode::Sub64Imm),
+            0x1f => Some(Opcode::Sub64Reg),
+            0x27 => Some(Opcode::Mul64Imm),
+            0x2f => Some(Opcode::Mul64Reg),
+            0x37 => Some(Opcode::Div64Imm),
+            0x3f => Some(Opcode::Div64Reg),
+            0x47 => Some(Opcode::Or64Imm),
+            0x4f => Some(Opcode::Or64Reg),
+            0x57 => Some(Opcode::And64Imm),
+            0x5f => Some(Opcode::And64Reg),
+            0x67 => Some(Opcode::Lsh64Imm),
+            0x6f => Some(Opcode::Lsh64Reg),
+            0x77 => Some(Opcode::Rsh64Imm),
+            0x7f => Some(Opcode::Rsh64Reg),
+            0x97 => Some(Opcode::Mod64Imm),
+            0x9f => Some(Opcode::Mod64Reg),
+            0xa7 => Some(Opcode::Xor64Imm),
+            0xaf => Some(Opcode::Xor64Reg),
+            0xb7 => Some(Opcode::Mov64Imm),
+            0xbf => Some(Opcode::Mov64Reg),
+            0xc7 => Some(Opcode::Arsh64Imm),
+            0xcf => Some(Opcode::Arsh64Reg),
+            0xf7 => Some(Opcode::Hor64Imm),
+            0x96 => Some(Opcode::Lmul64Imm),
+            0x9e => Some(Opcode::Lmul64Reg),
+            0x36 => Some(Opcode::Uhmul64Imm),
+            0x3e => Some(Opcode::Uhmul64Reg),
+            0x56 => Some(Opcode::Udiv64Imm),
+            0x5e => Some(Opcode::Udiv64Reg),
+            0x76 => Some(Opcode::Urem64Imm),
+            0x7e => Some(Opcode::Urem64Reg),
+            0xb6 => Some(Opcode::Shmul64Imm),
+            0xbe => Some(Opcode::Shmul64Reg),
+            0xd6 => Some(Opcode::Sdiv64Imm),
+            0xde => Some(Opcode::Sdiv64Reg),
+            0xf6 => Some(Opcode::Srem64Imm),
+            0xfe => Some(Opcode::Srem64Reg),
+            0x84 => Some(Opcode::Neg32),
+            0x87 => Some(Opcode::Neg64),
+            0x05 => Some(Opcode::Ja),
+            0x15 => Some(Opcode::JeqImm),
+            0x1d => Some(Opcode::JeqReg),
+            0x25 => Some(Opcode::JgtImm),
+            0x2d => Some(Opcode::JgtReg),
+            0x35 => Some(Opcode::JgeImm),
+            0x3d => Some(Opcode::JgeReg),
+            0xa5 => Some(Opcode::JltImm),
+            0xad => Some(Opcode::JltReg),
+            0xb5 => Some(Opcode::JleImm),
+            0xbd => Some(Opcode::JleReg),
+            0x45 => Some(Opcode::JsetImm),
+            0x4d => Some(Opcode::JsetReg),
+            0x55 => Some(Opcode::JneImm),
+            0x5d => Some(Opcode::JneReg),
+            0x65 => Some(Opcode::JsgtImm),
+            0x6d => Some(Opcode::JsgtReg),
+            0x75 => Some(Opcode::JsgeImm),
+            0x7d => Some(Opcode::JsgeReg),
+            0xc5 => Some(Opcode::JsltImm),
+            0xcd => Some(Opcode::JsltReg),
+            0xd5 => Some(Opcode::JsleImm),
+            0xdd => Some(Opcode::JsleReg),
+            0x85 => Some(Opcode::Call),
+            0x8d => Some(Opcode::Callx),
+            0x95 => Some(Opcode::Exit),
+            _ => None,
+        }
+    }
+
+    pub fn to_bytecode(&self) -> u8 {
+        match self {
+            Opcode::Lddw => 0x18,
+            Opcode::Ldxb => 0x71,
+            Opcode::Ldxh => 0x69,
+            Opcode::Ldxw => 0x61,
+            Opcode::Ldxdw => 0x79,
+            Opcode::Stb => 0x72,
+            Opcode::Sth => 0x6a,
+            Opcode::Stw => 0x62,
+            Opcode::Stdw => 0x7a,
+            Opcode::Stxb => 0x73,
+            Opcode::Stxh => 0x6b,
+            Opcode::Stxw => 0x63,
+            Opcode::Stxdw => 0x7b,
+            // Opcode::Add32 => 0x04,
+            Opcode::Add32Imm => 0x04,
+            Opcode::Add32Reg => 0x0c,
+            // Opcode::Sub32 => 0x14,
+            Opcode::Sub32Imm => 0x14,
+            Opcode::Sub32Reg => 0x1c,
+            // Opcode::Mul32 => 0x24,
+            Opcode::Mul32Imm => 0x24,
+            Opcode::Mul32Reg => 0x2c,
+            // Opcode::Div32 => 0x34,
+            Opcode::Div32Imm => 0x34,
+            Opcode::Div32Reg => 0x3c,
+            // Opcode::Or32 => 0x44,
+            Opcode::Or32Imm => 0x44,
+            Opcode::Or32Reg => 0x4c,
+            // Opcode::And32 => 0x54,
+            Opcode::And32Imm => 0x54,
+            Opcode::And32Reg => 0x5c,
+            // Opcode::Lsh32 => 0x64,
+            Opcode::Lsh32Imm => 0x64,
+            Opcode::Lsh32Reg => 0x6c,
+            // Opcode::Rsh32 => 0x74,
+            Opcode::Rsh32Imm => 0x74,
+            Opcode::Rsh32Reg => 0x7c,
+            // Opcode::Mod32 => 0x94,
+            Opcode::Mod32Imm => 0x94,
+            Opcode::Mod32Reg => 0x9c,
+            // Opcode::Xor32 => 0xa4,
+            Opcode::Xor32Imm => 0xa4,
+            Opcode::Xor32Reg => 0xac,
+            // Opcode::Mov32 => 0xb4,
+            Opcode::Mov32Imm => 0xb4,
+            Opcode::Mov32Reg => 0xbc,
+            // Opcode::Arsh32 => 0xc4,
+            Opcode::Arsh32Imm => 0xc4,
+            Opcode::Arsh32Reg => 0xcc,
+            // Opcode::Lmul32 => 0x86,
+            Opcode::Lmul32Imm => 0x86,
+            Opcode::Lmul32Reg => 0x8e,
+            // Opcode::Udiv32 => 0x46,
+            Opcode::Udiv32Imm => 0x46,
+            Opcode::Udiv32Reg => 0x4e,
+            // Opcode::Urem32 => 0x66,
+            Opcode::Urem32Imm => 0x66,
+            Opcode::Urem32Reg => 0x6e,
+            // Opcode::Sdiv32 => 0xc6,
+            Opcode::Sdiv32Imm => 0xc6,
+            Opcode::Sdiv32Reg => 0xce,
+            // Opcode::Srem32 => 0xe6,
+            Opcode::Srem32Imm => 0xe6,
+            Opcode::Srem32Reg => 0xee,
+            Opcode::Le => 0xd4,
+            Opcode::Be => 0xdc,
+            // Opcode::Add64 => 0x07,
+            Opcode::Add64Imm => 0x07,
+            Opcode::Add64Reg => 0x0f,
+            // Opcode::Sub64 => 0x17,
+            Opcode::Sub64Imm => 0x17,
+            Opcode::Sub64Reg => 0x1f,
+            // Opcode::Mul64 => 0x27,
+            Opcode::Mul64Imm => 0x27,
+            Opcode::Mul64Reg => 0x2f,
+            // Opcode::Div64 => 0x37,
+            Opcode::Div64Imm => 0x37,
+            Opcode::Div64Reg => 0x3f,
+            // Opcode::Or64 => 0x47,
+            Opcode::Or64Imm => 0x47,
+            Opcode::Or64Reg => 0x4f,
+            // Opcode::And64 => 0x57,
+            Opcode::And64Imm => 0x57,
+            Opcode::And64Reg => 0x5f,
+            // Opcode::Lsh64 => 0x67,
+            Opcode::Lsh64Imm => 0x67,
+            Opcode::Lsh64Reg => 0x6f,
+            // Opcode::Rsh64 => 0x77,
+            Opcode::Rsh64Imm => 0x77,
+            Opcode::Rsh64Reg => 0x7f,
+            // Opcode::Mod64 => 0x97,
+            Opcode::Mod64Imm => 0x97,
+            Opcode::Mod64Reg => 0x9f,
+            // Opcode::Xor64 => 0xa7,
+            Opcode::Xor64Imm => 0xa7,
+            Opcode::Xor64Reg => 0xaf,
+            // Opcode::Mov64 => 0xb7,
+            Opcode::Mov64Imm => 0xb7,
+            Opcode::Mov64Reg => 0xbf,
+            // Opcode::Arsh64 => 0xc7,
+            Opcode::Arsh64Imm => 0xc7,
+            Opcode::Arsh64Reg => 0xcf,
+            Opcode::Hor64Imm => 0xf7,
+            // Opcode::Lmul64 => 0x87,
+            Opcode::Lmul64Imm => 0x96,
+            Opcode::Lmul64Reg => 0x9e,
+            // Opcode::Uhmul64 => 0x36,
+            Opcode::Uhmul64Imm => 0x36,
+            Opcode::Uhmul64Reg => 0x3e,
+            // Opcode::Udiv64 => 0x56,
+            Opcode::Udiv64Imm => 0x56,
+            Opcode::Udiv64Reg => 0x5e,
+            // Opcode::Urem64 => 0x76,
+            Opcode::Urem64Imm => 0x76,
+            Opcode::Urem64Reg => 0x7e,
+            // Opcode::Shmul64 => 0xb6,
+            Opcode::Shmul64Imm => 0xb6,
+            Opcode::Shmul64Reg => 0xbe,
+            // Opcode::Sdiv64 => 0xd6,
+            Opcode::Sdiv64Imm => 0xd6,
+            Opcode::Sdiv64Reg => 0xde,
+            // Opcode::Srem64 => 0xf6,
+            Opcode::Srem64Imm => 0xf6,
+            Opcode::Srem64Reg => 0xfe,
+            Opcode::Neg32 => 0x84,
+            Opcode::Neg64 => 0x87,
+            Opcode::Ja => 0x05,
+            // Opcode::Jeq => 0x15,
+            Opcode::JeqImm => 0x15,
+            Opcode::JeqReg => 0x1d,
+            // Opcode::Jgt => 0x25,
+            Opcode::JgtImm => 0x25,
+            Opcode::JgtReg => 0x2d,
+            // Opcode::Jge => 0x35,
+            Opcode::JgeImm => 0x35,
+            Opcode::JgeReg => 0x3d,
+            // Opcode::Jlt => 0xa5,
+            Opcode::JltImm => 0xa5,
+            Opcode::JltReg => 0xad,
+            // Opcode::Jle => 0xb5,
+            Opcode::JleImm => 0xb5,
+            Opcode::JleReg => 0xbd,
+            // Opcode::Jset => 0x45,
+            Opcode::JsetImm => 0x45,
+            Opcode::JsetReg => 0x4d,
+            // Opcode::Jne => 0x55,
+            Opcode::JneImm => 0x55,
+            Opcode::JneReg => 0x5d,
+            // Opcode::Jsgt => 0x65,
+            Opcode::JsgtImm => 0x65,
+            Opcode::JsgtReg => 0x6d,
+            // Opcode::Jsge => 0x75,
+            Opcode::JsgeImm => 0x75,
+            Opcode::JsgeReg => 0x7d,
+            // Opcode::Jslt => 0xc5,
+            Opcode::JsltImm => 0xc5,
+            Opcode::JsltReg => 0xcd,
+            // Opcode::Jsle => 0xd5,
+            Opcode::JsleImm => 0xd5,
+            Opcode::JsleReg => 0xdd,
+            Opcode::Call => 0x85,
+            Opcode::Callx => 0x8d,
+            Opcode::Exit => 0x95,
+            
+            _ => 0x00,
+
+        }
+    }
+    
+    pub fn to_str(&self) -> &'static str {
+        match self {
+            Opcode::Lddw => "lddw",
+            Opcode::Ldxb => "ldxb",
+            Opcode::Ldxh => "ldxh",
+            Opcode::Ldxw => "ldxw",
+            Opcode::Ldxdw => "ldxdw",
+            Opcode::Stb => "stb",
+            Opcode::Sth => "sth",
+            Opcode::Stw => "stw",
+            Opcode::Stdw => "stdw",
+            Opcode::Stxb => "stxb",
+            Opcode::Stxh => "stxh",
+            Opcode::Stxw => "stxw",
+            Opcode::Stxdw => "stxdw",
+            Opcode::Add32 | Opcode::Add32Imm | Opcode::Add32Reg => "add32",
+            Opcode::Sub32 | Opcode::Sub32Imm | Opcode::Sub32Reg => "sub32",
+            Opcode::Mul32 | Opcode::Mul32Imm | Opcode::Mul32Reg => "mul32",
+            Opcode::Div32 | Opcode::Div32Imm | Opcode::Div32Reg => "div32",
+            Opcode::Or32 | Opcode::Or32Imm | Opcode::Or32Reg => "or32",
+            Opcode::And32 | Opcode::And32Imm | Opcode::And32Reg => "and32",
+            Opcode::Lsh32 | Opcode::Lsh32Imm | Opcode::Lsh32Reg => "lsh32",
+            Opcode::Rsh32 | Opcode::Rsh32Imm | Opcode::Rsh32Reg => "rsh32",
+            Opcode::Neg32 => "neg32",
+            Opcode::Mod32 | Opcode::Mod32Imm | Opcode::Mod32Reg => "mod32",
+            Opcode::Xor32 | Opcode::Xor32Imm | Opcode::Xor32Reg => "xor32",
+            Opcode::Mov32 | Opcode::Mov32Imm | Opcode::Mov32Reg => "mov32",
+            Opcode::Arsh32 | Opcode::Arsh32Imm | Opcode::Arsh32Reg => "arsh32",
+            Opcode::Lmul32 | Opcode::Lmul32Imm | Opcode::Lmul32Reg => "lmul32",
+            Opcode::Udiv32 | Opcode::Udiv32Imm | Opcode::Udiv32Reg => "udiv32",
+            Opcode::Urem32 | Opcode::Urem32Imm | Opcode::Urem32Reg => "urem32",
+            Opcode::Sdiv32 | Opcode::Sdiv32Imm | Opcode::Sdiv32Reg => "sdiv32",
+            Opcode::Srem32 | Opcode::Srem32Imm | Opcode::Srem32Reg => "srem32",
+            Opcode::Le => "le",
+            Opcode::Be => "be",
+            Opcode::Add64 | Opcode::Add64Imm | Opcode::Add64Reg => "add64",
+            Opcode::Sub64 | Opcode::Sub64Imm | Opcode::Sub64Reg => "sub64",
+            Opcode::Mul64 | Opcode::Mul64Imm | Opcode::Mul64Reg => "mul64",
+            Opcode::Div64 | Opcode::Div64Imm | Opcode::Div64Reg => "div64",
+            Opcode::Or64 | Opcode::Or64Imm | Opcode::Or64Reg => "or64",
+            Opcode::And64 | Opcode::And64Imm | Opcode::And64Reg => "and64",
+            Opcode::Lsh64 | Opcode::Lsh64Imm | Opcode::Lsh64Reg => "lsh64",
+            Opcode::Rsh64 | Opcode::Rsh64Imm | Opcode::Rsh64Reg => "rsh64",
+            Opcode::Neg64 => "neg64",
+            Opcode::Mod64 | Opcode::Mod64Imm | Opcode::Mod64Reg => "mod64",
+            Opcode::Xor64 | Opcode::Xor64Imm | Opcode::Xor64Reg => "xor64",
+            Opcode::Mov64 | Opcode::Mov64Imm | Opcode::Mov64Reg => "mov64",
+            Opcode::Arsh64 | Opcode::Arsh64Imm | Opcode::Arsh64Reg => "arsh64",
+            Opcode::Hor64Imm => "hor64",
+            Opcode::Lmul64 | Opcode::Lmul64Imm | Opcode::Lmul64Reg => "lmul64",
+            Opcode::Uhmul64 | Opcode::Uhmul64Imm | Opcode::Uhmul64Reg => "uhmul64",
+            Opcode::Udiv64 | Opcode::Udiv64Imm | Opcode::Udiv64Reg => "udiv64",
+            Opcode::Urem64 | Opcode::Urem64Imm | Opcode::Urem64Reg => "urem64",
+            Opcode::Shmul64 | Opcode::Shmul64Imm | Opcode::Shmul64Reg => "shmul64",
+            Opcode::Sdiv64 | Opcode::Sdiv64Imm | Opcode::Sdiv64Reg => "sdiv64",
+            Opcode::Srem64 | Opcode::Srem64Imm | Opcode::Srem64Reg => "srem64",
+            Opcode::Ja | Opcode::Jeq | Opcode::JeqImm | Opcode::JeqReg => "jeq",
+            Opcode::Jgt | Opcode::JgtImm | Opcode::JgtReg => "jgt",
+            Opcode::Jge | Opcode::JgeImm | Opcode::JgeReg => "jge",
+            Opcode::Jlt | Opcode::JltImm | Opcode::JltReg => "jlt",
+            Opcode::Jle | Opcode::JleImm | Opcode::JleReg => "jle",
+            Opcode::Jset | Opcode::JsetImm | Opcode::JsetReg => "jset",
+            Opcode::Jne | Opcode::JneImm | Opcode::JneReg => "jne",
+            Opcode::Jsgt | Opcode::JsgtImm | Opcode::JsgtReg => "jsgt",
+            Opcode::Jsge | Opcode::JsgeImm | Opcode::JsgeReg => "jsge",
+            Opcode::Jslt | Opcode::JsltImm | Opcode::JsltReg => "jslt",
+            Opcode::Jsle | Opcode::JsleImm | Opcode::JsleReg => "jsle",
+            Opcode::Call | Opcode::Callx => "call",
+            Opcode::Exit => "exit",
+        }
+    }
+}

+ 643 - 0
crates/assembler/src/parser.rs

@@ -0,0 +1,643 @@
+use crate::lexer::Op;
+use crate::opcode::Opcode;
+use crate::lexer::{Token, ImmediateValue};
+use crate::section::{CodeSection, DataSection};
+use crate::astnode::{ASTNode, Directive, GlobalDecl, EquDecl, ExternDecl, RodataDecl, Label, Instruction, ROData};
+use crate::dynsym::{DynamicSymbolMap, RelDynMap, RelocationType};
+use num_traits::FromPrimitive;
+use std::collections::HashMap;
+
+pub struct Parser {
+    tokens: Vec<Token>,
+    current: usize,
+
+    pub m_prog_is_static: bool,
+    pub m_accum_offset: u64,
+
+    // TODO: consolidate all temporary parsing related informaion
+    m_const_map: HashMap<String, ImmediateValue>,
+    m_label_offsets: HashMap<String, u64>,
+
+    // TODO: consolidate all dynamic symbol information to one big map
+    m_entry_label: Option<String>,
+    m_dynamic_symbols: DynamicSymbolMap,
+    m_rel_dyns: RelDynMap,
+
+    m_rodata_size: u64,
+}
+
+pub struct ParseResult {
+    // TODO: parse result is basically 1. static part 2. dynamic part of the program
+    pub code_section: CodeSection,
+
+    pub data_section: DataSection,
+
+    pub dynamic_symbols: DynamicSymbolMap,
+
+    pub relocation_data: RelDynMap,
+
+    // TODO: this should determine by if there's any dynamic symbol
+    pub prog_is_static: bool,
+}
+
+pub trait Parse {
+    fn parse(tokens: &[Token]) -> Option<(Self, &[Token])>
+        where Self: Sized;
+}
+
+pub trait ParseInstruction {
+    fn parse_instruction<'a>(tokens: &'a [Token], const_map: &HashMap<String, ImmediateValue>) -> Option<(Self, &'a [Token])>
+        where Self: Sized;
+}
+
+impl Parse for GlobalDecl {
+    fn parse(tokens: &[Token]) -> Option<(Self, &[Token])> {
+        if tokens.len() < 2 {
+            return None;
+        }
+        match &tokens[1] {
+            Token::Identifier(name, line_number) => Some((
+                GlobalDecl {
+                    entry_label: name.clone(), 
+                    line_number: *line_number },
+                &tokens[2..])),
+            _ => None,
+        }
+    }
+}
+
+impl Parse for EquDecl {
+    fn parse(tokens: &[Token]) -> Option<(Self, &[Token])> {
+        if tokens.len() < 3 {
+            return None;
+        }
+        match (
+            &tokens[1],
+            &tokens[2],
+            &tokens[3],
+        ) {
+            (
+                Token::Identifier(name, line_number),
+                Token::Comma(_),
+                Token::ImmediateValue(value, num_line_number)
+            ) => {
+                Some((
+                    EquDecl {
+                        name: name.clone(),
+                        // TODO: infer the number type from the value
+                        value: tokens[3].clone(),
+                        line_number: *line_number
+                    },
+                    &tokens[4..]
+                ))
+            }
+            _ => None,
+        }
+    }
+}
+
+impl Parse for ExternDecl {
+    fn parse(tokens: &[Token]) -> Option<(Self, &[Token])> {
+        if tokens.len() < 2 {
+            return None;
+        }
+        let mut args = Vec::new();
+        let mut i = 1;
+        while i < tokens.len() {
+            match &tokens[i] {
+                Token::Identifier(name, line_number) => {
+                    args.push(Token::Identifier(name.clone(), *line_number));
+                    i += 1;
+                }
+                _ => {
+                    break;
+                }
+            }
+        }
+        //
+        if args.is_empty() {
+            None
+        } else {
+            let Token::Directive(_, line_number) = &tokens[0] else { unreachable!() };
+            Some((
+                ExternDecl { 
+                    args, 
+                    line_number: *line_number },
+                &tokens[i..]
+            ))
+        }
+    }
+}
+
+impl Parse for ROData {
+    fn parse(tokens: &[Token]) -> Option<(Self, &[Token])> {
+        if tokens.len() < 3 {
+            return None;
+        }
+
+        let mut args = Vec::new();
+        match (
+            &tokens[0],
+            &tokens[1],
+            &tokens[2],
+        ) {
+            (
+                Token::Label(name, line_number),
+                Token::Directive(_, _),
+                Token::StringLiteral(_, _)
+            ) => {
+                args.push(tokens[1].clone());
+                args.push(tokens[2].clone());
+                Some((
+                    ROData {
+                        name: name.clone(),
+                        args,
+                        line_number: *line_number
+                    },
+                    &tokens[3..]
+                ))
+            }
+            _ => None,
+        }
+    }
+}
+
+impl ParseInstruction for Instruction {
+    fn parse_instruction<'a>(tokens: &'a [Token], const_map: &HashMap<String, ImmediateValue>) -> Option<(Self, &'a [Token])> {
+        let mut next_token_num = 1;
+        match &tokens[0] {
+            Token::Opcode(opcode, line_number) => {
+                let mut opcode = opcode.clone();
+                let mut operands = Vec::new();
+                match opcode {
+                    Opcode::Lddw => {
+                        if tokens.len() < 4 {
+                            return None;
+                        }
+                        let (value, advance_token_num) = inline_and_fold_constant(tokens, const_map, 3);
+                        if let Some(value) = value {
+                            match (
+                                &tokens[1],
+                                &tokens[2],
+                                // Third operand is folded to an immediate value
+                            ) {
+                                (
+                                    Token::Register(_, _),
+                                    Token::Comma(_),
+                                    // Third operand is folded to an immediate value
+                                ) => {
+                                    operands.push(tokens[1].clone());
+                                    operands.push(Token::ImmediateValue(value, 0));
+                                }
+                                _ => {
+                                    return None;
+                                }
+                            }
+                            next_token_num = advance_token_num;
+                        } else {
+                            match (
+                                &tokens[1],
+                                &tokens[2],
+                                &tokens[3],
+                            ) {
+                                (
+                                    Token::Register(_, _),
+                                    Token::Comma(_),
+                                    Token::Identifier(_, _)
+                                ) => {
+                                    operands.push(tokens[1].clone());
+                                    operands.push(tokens[3].clone());
+                                }
+                                // external error: invalid syntax with opcode: lddw
+                                _ => {
+                                    return None;
+                                }
+                            }
+                            next_token_num = 4;
+                        }
+                    }
+                    Opcode::Ldxw | Opcode::Ldxh | Opcode::Ldxb | Opcode::Ldxdw => {
+                        if tokens.len() < 8 {
+                            return None;
+                        }
+                        let (value, advance_token_num) = inline_and_fold_constant(tokens, const_map, 6);
+                        if let Some(value) = value {
+                            match (
+                                &tokens[1],
+                                &tokens[2],
+                                &tokens[3],
+                                &tokens[4],
+                                &tokens[5],
+                                // Sixth operand is folded to an immediate value
+                                &tokens[advance_token_num],
+                            ) {
+                                (
+                                    Token::Register(_, _),
+                                    Token::Comma(_),
+                                    Token::LeftBracket(_),
+                                    Token::Register(_, _),
+                                    Token::BinaryOp(_, _),
+                                    // Sixth operand is folded to an immediate value 
+                                    Token::RightBracket(_)
+                                ) => {
+                                    operands.push(tokens[1].clone());
+                                    operands.push(tokens[4].clone());
+                                    operands.push(Token::ImmediateValue(value, 0));                                    
+                                }
+                                _ => {
+                                    return None;
+                                }
+                            }
+                            next_token_num = advance_token_num + 1;
+                        } else {
+                            return None;
+                        }
+                    }
+                    Opcode::Stw | Opcode::Sth | Opcode::Stb | Opcode::Stdw
+                    | Opcode::Stxb | Opcode::Stxh | Opcode::Stxw | Opcode::Stxdw => {
+                        if tokens.len() < 8 {
+                            return None;
+                        }
+                        match (
+                            &tokens[1],
+                            &tokens[2],
+                            &tokens[3],
+                            &tokens[4],
+                            &tokens[5],
+                            &tokens[6],
+                            &tokens[7],
+                        ) {
+                            (
+                                Token::LeftBracket(_),
+                                Token::Register(_, _),
+                                Token::BinaryOp(_, _),
+                                Token::ImmediateValue(_, _),
+                                Token::RightBracket(_),
+                                Token::Comma(_),
+                                Token::Register(_, _)
+                            ) => {
+                                operands.push(tokens[2].clone());
+                                operands.push(tokens[4].clone());
+                                operands.push(tokens[7].clone());
+                            }
+                            _ => {
+                                return None;
+                            }
+                        }
+                        next_token_num = 8;
+                    }
+                    Opcode::Add32 | Opcode::Sub32 | Opcode::Mul32 
+                    | Opcode::Div32 | Opcode::Or32 | Opcode::And32 
+                    | Opcode::Lsh32 | Opcode::Rsh32 | Opcode::Mod32 
+                    | Opcode::Xor32 | Opcode::Mov32 | Opcode::Arsh32 
+                    | Opcode::Lmul32 | Opcode::Udiv32 | Opcode::Urem32 
+                    | Opcode::Sdiv32 | Opcode::Srem32 | Opcode::Neg32
+                    | Opcode::Add64 | Opcode::Sub64 | Opcode::Mul64 
+                    | Opcode::Div64 | Opcode::Or64 | Opcode::And64 
+                    | Opcode::Lsh64 | Opcode::Rsh64 | Opcode::Mod64 
+                    | Opcode::Xor64 | Opcode::Mov64 | Opcode::Arsh64 
+                    | Opcode::Lmul64 | Opcode::Uhmul64 | Opcode::Udiv64 
+                    | Opcode::Urem64 | Opcode::Sdiv64 | Opcode::Srem64 => {
+                        if tokens.len() < 4 {
+                            return None;
+                        }
+                        let (value, advance_token_num) = inline_and_fold_constant(tokens, const_map, 3);
+                        if let Some(value) = value {
+                            match (
+                                &tokens[1],
+                                &tokens[2],
+                                // Third operand is folded to an immediate value
+                            ) {
+                                (
+                                    Token::Register(_, _),
+                                    Token::Comma(_),
+                                    // Third operand is folded to an immediate value
+                                ) => {
+                                    opcode = FromPrimitive::from_u8((opcode as u8) + 1).expect("Invalid opcode conversion"); 
+                                    operands.push(tokens[1].clone());
+                                    operands.push(Token::ImmediateValue(value, 0));
+                                }
+                                _ => {
+                                    return None;
+                                }
+                            } 
+                            next_token_num = advance_token_num;
+                        } else {
+                            match (
+                                &tokens[1],
+                                &tokens[2],
+                                &tokens[3],
+                            ) {
+                                (
+                                    Token::Register(_, _),
+                                    Token::Comma(_),
+                                    Token::Register(_, _)
+                                ) => {
+                                    opcode = FromPrimitive::from_u8((opcode as u8) + 2).expect("Invalid opcode conversion"); 
+                                    operands.push(tokens[1].clone());
+                                    operands.push(tokens[3].clone());
+                                }
+                                _ => {
+                                    return None;
+                                }
+                            }                           
+                            next_token_num = 4;
+                        }
+                    }
+                    Opcode::Jeq | Opcode::Jgt | Opcode::Jge
+                    | Opcode::Jlt | Opcode::Jle | Opcode::Jset
+                    | Opcode::Jne | Opcode::Jsgt | Opcode::Jsge
+                    | Opcode::Jslt | Opcode::Jsle => {
+                        if tokens.len() < 6 {
+                            return None;
+                        }
+                        match (
+                            &tokens[1],
+                            &tokens[2],
+                            &tokens[3],
+                            &tokens[4],
+                            &tokens[5],
+                        ) {
+                            (
+                                Token::Register(_, _),
+                                Token::Comma(_),
+                                Token::ImmediateValue(_, _),
+                                Token::Comma(_),
+                                Token::Identifier(_, _)
+                            ) => {
+                                opcode = FromPrimitive::from_u8((opcode as u8) + 1).expect("Invalid opcode conversion"); 
+                                operands.push(tokens[1].clone());
+                                operands.push(tokens[3].clone());
+                                operands.push(tokens[5].clone());
+                            }
+                            (
+                                Token::Register(_, _),
+                                Token::Comma(_),
+                                Token::Register(_, _),
+                                Token::Comma(_),
+                                Token::Identifier(_, _)
+                            ) => {
+                                opcode = FromPrimitive::from_u8((opcode as u8) + 2).expect("Invalid opcode conversion"); 
+                                operands.push(tokens[1].clone());
+                                operands.push(tokens[3].clone());
+                                operands.push(tokens[5].clone());
+                            }
+                            _ => {
+                                return None;
+                            }
+                        }
+                        next_token_num = 6;
+                    }
+                    Opcode::Ja => {
+                        if tokens.len() < 2 {
+                            return None;
+                        }
+                        match &tokens[1] {
+                            Token::Identifier(_, _) | Token::ImmediateValue(_, _) => {
+                                operands.push(tokens[1].clone());
+                            }
+                            _ => {
+                                return None;
+                            }
+                        }
+                        next_token_num = 2;
+                    }
+                    Opcode::Call => {
+                        if tokens.len() < 2 {
+                            return None;
+                        }
+                        match &tokens[1] {
+                            Token::Identifier(_, _) => {
+                                operands.push(tokens[1].clone());
+                            }
+                            _ => {
+                                return None;
+                            }
+                        }
+                        next_token_num = 2;
+                    }
+                    Opcode::Exit => {
+                        next_token_num = 1;
+                    }
+                    // internal error: invalid opcode
+                    _ => {
+                        return None;
+                    }
+                }
+                Some((
+                    Instruction {
+                        opcode,
+                        operands,
+                        line_number: *line_number
+                    },
+                    &tokens[next_token_num..]
+                ))
+            }
+            _ => None,
+        }
+        
+    }
+}
+
+fn inline_and_fold_constant_helper(tokens: &[Token]                             //
+                                , const_map: &HashMap<String, ImmediateValue>   //
+                                , value: ImmediateValue                         //
+                                , idx: usize) -> (Option<ImmediateValue>, usize) {
+    if tokens.len() < idx + 1 {
+        return (Some(value), idx + 1);
+    }
+    match (
+        &tokens[idx + 1],
+        &tokens[idx + 2],
+    ) {
+        (
+            Token::BinaryOp(op, _),
+            Token::ImmediateValue(value2, _)
+        ) => {
+            let result = match op {
+                Op::Add => value + value2.clone(),
+                Op::Sub => value - value2.clone(),
+                _ => return (Some(value), idx + 1),
+            };
+            inline_and_fold_constant_helper(tokens, const_map, result, idx + 2)
+        }
+        _ => (Some(value), idx + 1),
+    }
+}
+
+fn inline_and_fold_constant(tokens: &[Token]                            //
+                        , const_map: &HashMap<String, ImmediateValue>   //
+                        , idx: usize) -> (Option<ImmediateValue>, usize) {
+    let value = match &tokens[idx] {
+        Token::ImmediateValue(value, _) => value.clone(),
+        Token::Identifier(name, _) => {
+            if let Some(val) = const_map.get(name) {
+                val.clone()
+            } else {
+                return (None, idx + 1);
+            }
+        },
+        _ => return (None, idx + 1),
+    };
+    inline_and_fold_constant_helper(tokens, const_map, value, idx)
+}
+
+impl Parser {
+    pub fn new(tokens: Vec<Token>) -> Self {
+        Self { tokens, current: 0
+            , m_prog_is_static: true
+            , m_accum_offset: 0
+            , m_entry_label: None
+            , m_const_map: HashMap::new()
+            , m_label_offsets: HashMap::new()
+            , m_rodata_size: 0
+            , m_dynamic_symbols: DynamicSymbolMap::new()
+            , m_rel_dyns: RelDynMap::new()
+        }
+    }
+
+    pub fn parse(&mut self) -> Result<ParseResult, String> {
+        let mut nodes = Vec::new();
+        let mut rodata_nodes = Vec::new();
+        let mut rodata_phase = false;
+
+        let mut tokens = self.tokens.as_slice();
+
+        while !tokens.is_empty() {
+            match &tokens[0] {
+                Token::Directive(name, line_number) => {
+                    match name.as_str() {
+                        "global" | "globl" => {
+                            if let Some((node, rest)) = GlobalDecl::parse(tokens) {
+                                self.m_entry_label = Some(node.get_entry_label());
+                                nodes.push(ASTNode::GlobalDecl(node));
+                                tokens = rest;
+                            } else {
+                                return Err("Invalid global declaration".to_string());
+                            }
+                        }
+                        "extern" => {
+                            if let Some((node, rest)) = ExternDecl::parse(tokens) {
+                                nodes.push(ASTNode::ExternDecl(node));
+                                tokens = rest;
+                            } else {
+                                return Err("Invalid extern declaration".to_string());
+                            }
+                        }
+                        "rodata" => {
+                            nodes.push(ASTNode::RodataDecl(RodataDecl { line_number: *line_number }));
+                            rodata_phase = true;
+                            tokens = &tokens[1..];
+                        }
+                        "equ" => {
+                            if let Some((node, rest)) = EquDecl::parse(tokens) {
+                                self.m_const_map.insert(node.get_name(), node.get_val());
+                                nodes.push(ASTNode::EquDecl(node));
+                                tokens = rest;
+                            } else {
+                                return Err("Invalid equ declaration".to_string());
+                            }
+                        }
+                        "section" => {
+                            nodes.push(ASTNode::Directive(Directive { name: name.clone(), args: Vec::new(), line_number: *line_number }));
+                            tokens = &tokens[1..];
+                        }
+                        _ => {
+                            return Err(format!("Invalid directive: {}", name));
+                        }
+                    }
+                }
+                Token::Label(name, line_number) => {
+                    if rodata_phase {
+                        if let Some((rodata, rest)) = ROData::parse(tokens) {
+                            self.m_rodata_size += rodata.get_size();
+                            rodata_nodes.push(ASTNode::ROData { rodata, offset: self.m_accum_offset });
+                            tokens = rest;
+                        } else {
+                            return Err("Invalid rodata declaration".to_string());
+                        }
+                    } else {
+                        nodes.push(ASTNode::Label(Label { name: name.clone(), line_number: *line_number }));
+                        tokens = &tokens[1..];
+                    }
+                    self.m_label_offsets.insert(name.clone(), self.m_accum_offset);
+                }
+                Token::Opcode(opcode, line_number) => {
+                    if let Some((inst, rest)) = Instruction::parse_instruction(tokens, &self.m_const_map) {
+                        if inst.needs_relocation() {
+                            self.m_prog_is_static = false;
+                            let (reloc_type, label) = inst.get_relocation_info();
+                            self.m_rel_dyns.add_rel_dyn(self.m_accum_offset, reloc_type, label.clone());
+                            if reloc_type == RelocationType::RSbfSyscall {
+                                self.m_dynamic_symbols.add_call_target(label.clone(), self.m_accum_offset);
+                            }
+                        }
+                        let offset = self.m_accum_offset;
+                        self.m_accum_offset += inst.get_size();
+                        nodes.push(ASTNode::Instruction { instruction: inst, offset });
+                        tokens = rest;
+                    } else {
+                        return Err(format!("Invalid instruction at line {}", line_number));
+                    }
+                }
+                _ => {
+                    return Err(format!("Unexpected token: {:?}", tokens[0]));
+                }
+            }
+        }
+
+        // Second pass to resolve labels
+        for node in &mut nodes {
+            match node {
+                ASTNode::Instruction { instruction: Instruction { opcode, operands, line_number }, offset } => {
+                    // For jump instructions, replace label operands with relative offsets
+                    if *opcode == Opcode::Ja || *opcode == Opcode::JeqImm || *opcode == Opcode::JgtImm || *opcode == Opcode::JgeImm 
+                    || *opcode == Opcode::JltImm || *opcode == Opcode::JleImm || *opcode == Opcode::JsetImm || *opcode == Opcode::JneImm     
+                    || *opcode == Opcode::JsgtImm || *opcode == Opcode::JsgeImm || *opcode == Opcode::JsltImm || *opcode == Opcode::JsleImm
+                    || *opcode == Opcode::JeqReg || *opcode == Opcode::JgtReg || *opcode == Opcode::JgeReg || *opcode == Opcode::JltReg 
+                    || *opcode == Opcode::JleReg || *opcode == Opcode::JsetReg || *opcode == Opcode::JneReg || *opcode == Opcode::JsgtReg 
+                    || *opcode == Opcode::JsgeReg || *opcode == Opcode::JsltReg || *opcode == Opcode::JsleReg {
+                        if let Some(Token::Identifier(label, _)) = operands.last() {
+                            let label = label.clone(); // Clone early to avoid borrow conflict
+                            if let Some(target_offset) = self.m_label_offsets.get(&label) {
+                                let rel_offset = (*target_offset as i64 - *offset as i64) / 8 - 1;
+                                // Replace label with immediate value
+                                let last_idx = operands.len() - 1;
+                                operands[last_idx] = Token::ImmediateValue(ImmediateValue::Int(rel_offset), 0);
+                            }
+                        }
+                    }
+                    if *opcode == Opcode::Lddw {
+                        if let Some(Token::Identifier(name, _)) = operands.last() {
+                            let label = name.clone();
+                            if let Some(target_offset) = self.m_label_offsets.get(&label) {
+                                let ph_count = if self.m_prog_is_static { 1 } else { 3 };
+                                let ph_offset = 64 + (ph_count as u64 * 56) as i64;
+                                let abs_offset = *target_offset as i64 + ph_offset;
+                                // Replace label with immediate value
+                                let last_idx = operands.len() - 1;
+                                operands[last_idx] = Token::ImmediateValue(ImmediateValue::Addr(abs_offset), 0);
+                            }
+                        }
+                    }
+                }
+                _ => {}
+            }
+        }
+
+        // Set entry point offset if an entry label was specified
+        if let Some(entry_label) = &self.m_entry_label {
+            if let Some(offset) = self.m_label_offsets.get(entry_label) {
+                self.m_dynamic_symbols.add_entry_point(entry_label.clone(), *offset);
+            }
+        }
+        
+        Ok(ParseResult {
+            code_section: CodeSection::new(nodes, self.m_accum_offset),
+            data_section: DataSection::new(rodata_nodes, self.m_rodata_size),
+            dynamic_symbols: DynamicSymbolMap::copy(&self.m_dynamic_symbols),
+            relocation_data: RelDynMap::copy(&self.m_rel_dyns),
+            prog_is_static: self.m_prog_is_static,
+        })
+    }
+}

+ 262 - 0
crates/assembler/src/program.rs

@@ -0,0 +1,262 @@
+use crate::header::ElfHeader;
+use crate::header::ProgramHeader;
+use crate::section::{Section, NullSection, DynamicSection, ShStrTabSection, SectionType, DynStrSection, DynSymSection, RelDynSection};
+use crate::dynsym::{DynamicSymbol, RelDyn, RelocationType};
+use crate::parser::ParseResult;
+use crate::debuginfo::DebugInfo;
+use std::fs::File;
+use std::io::Write;
+use std::path::Path;
+use std::collections::HashMap;
+#[derive(Debug)]
+pub struct Program {
+    pub is_static: bool,
+    pub elf_header: ElfHeader,
+    pub program_headers: Vec<ProgramHeader>,
+    pub sections: Vec<SectionType>,
+}
+
+impl Program {
+    pub fn from_parse_result(
+        ParseResult {
+            code_section,
+            data_section,
+            dynamic_symbols,
+            relocation_data,
+            prog_is_static: is_static,
+        }: ParseResult,
+    ) -> Self {
+        let mut elf_header = ElfHeader::new();
+       
+        let ph_count = if is_static { 1 } else { 3 };
+        elf_header.e_phnum = ph_count;
+        
+        // Calculate base offset after ELF header and program headers
+        let mut current_offset = 64 + (ph_count as u64 * 56); // 64 bytes ELF header, 56 bytes per program header
+        elf_header.e_entry = current_offset;
+
+        // Create program headers vector starting with the Read+Execute header
+        let mut program_headers = vec![
+            ProgramHeader::new_load(
+                elf_header.e_entry,
+                code_section.size() + data_section.size(),
+                true,   // executable
+            )
+        ];
+
+        // Create a vector of sections
+        let mut sections = Vec::new();
+        sections.push(SectionType::Default(NullSection::new()));
+
+        let mut section_names = Vec::new();
+        
+        // Code section
+        let mut text_section = SectionType::Code(code_section);
+        text_section.set_offset(current_offset);
+        current_offset += text_section.size();
+        section_names.push(text_section.name().to_string());
+        sections.push(text_section);
+
+        // Data section
+        if data_section.size() > 0 {
+            let mut rodata_section = SectionType::Data(data_section);
+            rodata_section.set_offset(current_offset);
+            current_offset += rodata_section.size();
+            section_names.push(rodata_section.name().to_string());
+            sections.push(rodata_section);
+        }
+        
+        let padding = (8 - (current_offset % 8)) % 8;
+        current_offset += padding;
+
+        if !is_static {
+            let mut dynamic_section = SectionType::Dynamic(DynamicSection::new());
+            dynamic_section.set_offset(current_offset);
+            current_offset += dynamic_section.size();
+            section_names.push(dynamic_section.name().to_string());
+
+            let mut symbol_names = Vec::new();
+            let mut dyn_syms = Vec::new();
+            let mut dyn_str_offset = 1;
+            
+            dyn_syms.push(DynamicSymbol::new(0, 0, 0, 0, 0, 0));
+
+            // all symbols handled right now are all global symbols
+            // TODO: handle local symbols
+
+            for (name, _) in dynamic_symbols.get_entry_points() {
+                symbol_names.push(name.clone());
+                dyn_syms.push(DynamicSymbol::new(dyn_str_offset as u32, 0x10, 0, 1, elf_header.e_entry, 0));
+                dyn_str_offset += name.len() + 1;
+            }
+
+            for (name, _) in dynamic_symbols.get_call_targets() {
+                symbol_names.push(name.clone());                 
+                dyn_syms.push(DynamicSymbol::new(dyn_str_offset as u32, 0x10, 0, 0, 0, 0));
+                dyn_str_offset += name.len() + 1;
+            }
+
+            let mut rel_count = 0;
+            let mut rel_dyns = Vec::new();
+            for (offset, rel_type, name) in relocation_data.get_rel_dyns() {
+                if rel_type == RelocationType::RSbfSyscall {
+                    if let Some(index) = symbol_names.iter().position(|n| *n == name) {
+                        rel_dyns.push(RelDyn::new(offset + elf_header.e_entry, rel_type as u64, index as u64 + 1));
+                    } else {
+                        panic!("Symbol {} not found in symbol_names", name);
+                    }
+                } else if rel_type == RelocationType::RSbf64Relative {
+                    rel_count += 1;
+                    rel_dyns.push(RelDyn::new(offset + elf_header.e_entry, rel_type as u64, 0));
+                }
+            }
+
+            let mut dynsym_section = DynSymSection::new(dyn_syms);
+            dynsym_section.set_offset(current_offset);
+            current_offset += dynsym_section.size();
+            section_names.push(dynsym_section.name().to_string());
+
+            let mut dynstr_section = DynStrSection::new(symbol_names);
+            dynstr_section.set_offset(current_offset);
+            current_offset += dynstr_section.size();
+            section_names.push(dynstr_section.name().to_string());
+
+            let mut rel_dyn_section = RelDynSection::new(rel_dyns);
+            rel_dyn_section.set_offset(current_offset);
+            current_offset += rel_dyn_section.size();
+            section_names.push(rel_dyn_section.name().to_string());
+
+            if let SectionType::Dynamic(ref mut dynamic_section) = dynamic_section {
+                dynamic_section.set_rel_offset(rel_dyn_section.offset());
+                dynamic_section.set_rel_size(rel_dyn_section.size());
+                dynamic_section.set_rel_count(rel_count);
+                dynamic_section.set_dynsym_offset(dynsym_section.offset());
+                dynamic_section.set_dynstr_offset(dynstr_section.offset());
+                dynamic_section.set_dynstr_size(dynstr_section.size());
+            }
+
+            let mut shstrtab_section = ShStrTabSection::new(section_names);
+            shstrtab_section.set_offset(current_offset);
+            current_offset += shstrtab_section.size();
+
+            let ro_header = ProgramHeader::new_load(
+                dynsym_section.offset(),
+                dynsym_section.size() + dynstr_section.size() + rel_dyn_section.size(),
+                false
+            );
+
+            let dynamic_header = ProgramHeader::new_dynamic(
+                dynamic_section.offset(),
+                dynamic_section.size(),
+            );
+
+            sections.push(dynamic_section);
+            sections.push(SectionType::DynSym(dynsym_section));
+            sections.push(SectionType::DynStr(dynstr_section));
+            sections.push(SectionType::RelDyn(rel_dyn_section));
+            sections.push(SectionType::ShStrTab(shstrtab_section));
+
+            program_headers.push(ro_header);
+            program_headers.push(dynamic_header);
+        } else {
+            // Create a vector of section names
+            let mut section_names = Vec::new();
+            for section in &sections {
+                section_names.push(section.name().to_string());
+            }
+
+            let mut shstrtab_section = ShStrTabSection::new(section_names);
+            shstrtab_section.set_offset(current_offset);
+            current_offset += shstrtab_section.size();
+            sections.push(SectionType::ShStrTab(shstrtab_section));
+        }
+
+        // Update section header offset in ELF header
+        let padding = (8 - (current_offset % 8)) % 8;
+        elf_header.e_shoff = current_offset + padding;
+        elf_header.e_shnum = sections.len() as u16;
+        elf_header.e_shstrndx = sections.len() as u16 - 1;
+        
+        Self {
+            is_static,
+            elf_header,
+            program_headers,
+            sections,
+        }
+    }
+    
+    pub fn emit_bytecode(&self) -> Vec<u8> {
+        let mut bytes = Vec::new();
+        
+        // Emit ELF Header bytes
+        bytes.extend(self.elf_header.bytecode());
+
+        // Emit program headers
+        for ph in &self.program_headers {
+            bytes.extend(ph.bytecode());
+        }
+
+        // Emit sections
+        for section in &self.sections {
+            bytes.extend(section.bytecode());
+        }
+
+        // Emit section headers
+        for section in &self.sections {
+            bytes.extend(section.section_header_bytecode());
+        }
+
+        bytes
+    }
+
+    pub fn has_rodata(&self) -> bool {
+        self.sections.iter().any(|s| s.name() == ".rodata")
+    }
+
+    pub fn parse_rodata(&self) -> Vec<(String, usize, String)> {
+        let rodata = self.sections.iter().find(|s| s.name() == ".rodata").unwrap();
+        if let SectionType::Data(data_section) = rodata {
+            data_section.rodata()
+        } else {
+            panic!("ROData section not found");
+        }
+    }
+
+    pub fn get_line_map(&self) -> HashMap<u64, usize> {
+        let code = self.sections.iter().find(|s| s.name() == ".text").unwrap();
+        if let SectionType::Code(code_section) = code {
+            code_section.get_line_map().clone()
+        } else {
+            panic!("Code section not found");
+        }
+    }
+
+    pub fn get_debug_map(&self) -> HashMap<u64, DebugInfo> {
+        let code = self.sections.iter().find(|s| s.name() == ".text").unwrap();
+        if let SectionType::Code(code_section) = code {
+            code_section.get_debug_map().clone()
+        } else {
+            panic!("Code section not found");
+        }
+    }
+    
+    pub fn save_to_file(&self, input_path: &str) -> std::io::Result<()> {
+        // Get the file stem (name without extension) from input path
+        let path = Path::new(input_path);
+        let file_stem = path.file_stem()
+            .and_then(|s| s.to_str())
+            .unwrap_or("output");
+        
+        // Create the output file name with .so extension
+        let output_path = format!("{}.so", file_stem);
+        
+        // Get the bytecode
+        let bytes = self.emit_bytecode();
+        
+        // Write bytes to file
+        let mut file = File::create(output_path)?;
+        file.write_all(&bytes)?;
+        
+        Ok(())
+    }
+}

+ 784 - 0
crates/assembler/src/section.rs

@@ -0,0 +1,784 @@
+use crate::astnode::ASTNode;
+use crate::header::SectionHeader;
+use crate::dynsym::DynamicSymbol;
+use crate::dynsym::RelDyn;
+use crate::lexer::Token;
+use crate::debuginfo::DebugInfo;
+use std::collections::HashMap;
+use crate::astnode::ROData;
+
+// Base Section trait
+pub trait Section {
+    fn name(&self) -> &str {
+        ".unknown"  // Default section name
+    }
+    
+    fn bytecode(&self) -> Vec<u8> {
+        Vec::new()  // Default empty bytecode
+    }
+
+    fn size(&self) -> u64 {
+        self.bytecode().len() as u64
+    }
+}
+
+// Code Section implementation
+#[derive(Debug)]
+pub struct CodeSection {
+    name: String,
+    nodes: Vec<ASTNode>,
+    size: u64,
+    offset: u64,
+    line_map: HashMap<u64, usize>,
+    debug_map: HashMap<u64, DebugInfo>,
+}
+
+impl CodeSection {
+    pub fn new(nodes: Vec<ASTNode>, size: u64) -> Self {
+        let mut line_map = HashMap::new();
+        let mut debug_map = HashMap::new();
+        for node in &nodes {
+            if let Some((_, node_debug_map)) = node.bytecode_with_debug_map() {
+                debug_map.extend(node_debug_map);
+            }
+        }
+        Self {
+            name: String::from(".text"),
+            nodes,
+            size,
+            offset: 0,
+            line_map,
+            debug_map,
+        }
+    }
+
+    pub fn get_line_number(&self, offset: u64) -> Option<usize> {
+        self.debug_map.get(&offset).map(|debug_info| debug_info.line_number)
+    }
+
+    pub fn get_nodes(&self) -> &Vec<ASTNode> {
+        &self.nodes
+    }
+
+    pub fn get_size(&self) -> u64 {
+        self.size
+    }
+
+    pub fn get_line_map(&self) -> &HashMap<u64, usize> {
+        &self.line_map
+    }
+
+    pub fn get_debug_map(&self) -> &HashMap<u64, DebugInfo> {
+        &self.debug_map
+    }
+
+    pub fn set_offset(&mut self, offset: u64) {
+        self.offset = offset;
+    }
+
+    pub fn offset(&self) -> u64 {
+        self.offset
+    }
+
+    pub fn section_header_bytecode(&self) -> Vec<u8> {
+        let flags = SectionHeader::SHF_ALLOC | SectionHeader::SHF_EXECINSTR;
+        SectionHeader::new(
+            1,
+            SectionHeader::SHT_PROGBITS,
+            flags,
+            self.offset,
+            self.offset,
+            self.size,
+            0,
+            0,
+            4,
+            0
+        ).bytecode()
+    }
+}
+
+impl Section for CodeSection {
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    fn bytecode(&self) -> Vec<u8> {
+        let mut bytecode = Vec::new();
+        for node in &self.nodes {
+            if let Some(node_bytes) = node.bytecode() {
+                bytecode.extend(node_bytes);
+            }
+        }
+        bytecode
+    }
+
+    fn size(&self) -> u64 {
+        self.size
+    }
+}
+
+// Data Section implementation
+#[derive(Debug)]
+pub struct DataSection {
+    name: String,
+    nodes: Vec<ASTNode>,
+    size: u64,
+    offset: u64,
+    // line_map: HashMap<u64, usize>,
+    // debug_map: HashMap<usize, DebugInfo>,
+}
+
+impl DataSection {
+    pub fn new(nodes: Vec<ASTNode>, size: u64) -> Self {
+        // let mut line_map = HashMap::new();
+        // let mut current_offset = 0;
+        // for node in &nodes {
+        //     if let Some((bytes, node_line_map)) = node.bytecode_with_line_map() {
+        //         // Update offsets in the line map to be relative to the start of the data section
+        //         for (offset, line) in node_line_map {
+        //             line_map.insert(current_offset + offset, line);
+        //         }
+        //         current_offset += bytes.len() as u64;
+        //     }
+        // }
+        Self {
+            name: String::from(".rodata"),
+            nodes,
+            size,
+            offset: 0,
+            // line_map,
+            // debug_map,
+        }
+    }
+
+    // pub fn get_line_number(&self, offset: u64) -> Option<usize> {
+    //     self.line_map.get(&offset).copied()
+    // }
+
+    pub fn get_nodes(&self) -> &Vec<ASTNode> {
+        &self.nodes
+    }
+
+    pub fn get_size(&self) -> u64 {
+        self.size
+    }
+
+    // pub fn get_line_map(&self) -> &HashMap<u64, usize> {
+    //     &self.line_map
+    // }
+
+    pub fn set_offset(&mut self, offset: u64) {
+        self.offset = offset;
+    }
+
+    pub fn offset(&self) -> u64 {
+        self.offset
+    }
+
+    pub fn rodata(&self) -> Vec<(String, usize, String)> {
+        let mut ro_data_labels = Vec::new();
+        for node in &self.nodes {    
+            if let ASTNode::ROData { rodata: ROData { name, args, line_number }, offset } = node {
+                if let Some(Token::StringLiteral(str_literal, _)) = args.get(1) {
+                    ro_data_labels.push((name.clone(), offset.clone() as usize, str_literal.clone()));
+                }
+            }
+        }
+        ro_data_labels
+    }
+
+    pub fn section_header_bytecode(&self) -> Vec<u8> {
+        let flags = SectionHeader::SHF_ALLOC;  // Read-only data
+        SectionHeader::new(
+            7,
+            SectionHeader::SHT_PROGBITS,
+            flags,
+            self.offset,
+            self.offset,
+            self.size,
+            0,
+            0,
+            1,
+            0
+        ).bytecode()
+    }
+}
+
+impl Section for DataSection {
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    fn size(&self) -> u64 {
+        self.size
+    }
+
+    fn bytecode(&self) -> Vec<u8> {
+        let mut bytecode = Vec::new();
+        for node in &self.nodes {
+            if let Some(node_bytes) = node.bytecode() {
+                bytecode.extend(node_bytes);
+            }
+        }
+        // Add padding to make size multiple of 8
+        while bytecode.len() % 8 != 0 {
+            bytecode.push(0);
+        }
+
+        bytecode
+    }
+}
+
+#[derive(Debug)]
+pub struct NullSection {
+    name: String,
+    offset: u64,
+}
+
+impl NullSection {
+    pub fn new() -> Self {
+        Self {
+            name: String::from(""),
+            offset: 0,
+        }
+    }
+
+    pub fn section_header_bytecode(&self) -> Vec<u8> {
+        SectionHeader::new(
+            0,
+            SectionHeader::SHT_NULL,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0,
+            0
+        ).bytecode()
+    }
+
+    pub fn offset(&self) -> u64 {
+        self.offset
+    }
+}
+
+impl Section for NullSection {
+    // We can use all default implementations from the Section trait
+}
+
+#[derive(Debug)]
+pub struct ShStrTabSection {
+    name: String,
+    section_names: Vec<String>,
+    offset: u64,
+}
+
+impl ShStrTabSection {
+    pub fn new(section_names: Vec<String>) -> Self {
+        Self {
+            name: String::from(".shstrtab"),
+            section_names: {
+                let mut names = section_names;
+                names.push(".shstrtab".to_string());
+                names
+            },
+            offset: 0,
+        }
+    }
+
+    pub fn set_offset(&mut self, offset: u64) {
+        self.offset = offset;
+    }
+
+    pub fn section_header_bytecode(&self) -> Vec<u8> {
+        SectionHeader::new(
+            49,
+            SectionHeader::SHT_STRTAB,
+            0,
+            0,
+            self.offset,
+            self.size(),
+            0,
+            0,
+            1,
+            0
+        ).bytecode()
+    }
+
+    pub fn offset(&self) -> u64 {
+        self.offset
+    }
+}
+
+impl Section for ShStrTabSection {
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    fn bytecode(&self) -> Vec<u8> {
+        let mut bytes = Vec::new();
+        // First byte is null
+        bytes.push(0);
+        
+        // Add each non-empty section name with null terminator
+        for name in &self.section_names {
+            if !name.is_empty() {
+                bytes.extend(name.as_bytes());
+                bytes.push(0); // null terminator
+            }
+        }
+
+        // Add padding to make size multiple of 8
+        while bytes.len() % 8 != 0 {
+            bytes.push(0);
+        }
+        
+        bytes
+    }
+    
+    fn size(&self) -> u64 {
+        // Calculate section header offset
+        let mut section_name_size = 0;
+        
+        for name in &self.section_names {
+            if !name.is_empty() {
+                section_name_size += 1 + name.len();
+            }
+        }
+
+        section_name_size += 1; // null section
+        
+        section_name_size as u64  // Return the calculated size
+    }
+}
+
+#[derive(Debug)]
+pub struct DynamicSection {
+    name: String,
+    offset: u64,
+    rel_offset: u64,
+    rel_size: u64,
+    rel_count: u64,
+    dynsym_offset: u64,
+    dynstr_offset: u64,
+    dynstr_size: u64,
+}
+
+impl DynamicSection {
+    pub fn new() -> Self {
+        Self {
+            name: String::from(".dynamic"),
+            offset: 0,
+            rel_offset: 0,
+            rel_size: 0,
+            rel_count: 0,
+            dynsym_offset: 0,
+            dynstr_offset: 0,
+            dynstr_size: 0,
+        }
+    }
+
+    pub fn set_offset(&mut self, offset: u64) {
+        self.offset = offset;
+    }
+
+    pub fn set_rel_offset(&mut self, offset: u64) {
+        self.rel_offset = offset;
+    }
+
+    pub fn set_rel_size(&mut self, size: u64) {
+        self.rel_size = size;
+    }
+
+    pub fn set_rel_count(&mut self, count: u64) {
+        self.rel_count = count;
+    }
+
+    pub fn set_dynsym_offset(&mut self, offset: u64) {
+        self.dynsym_offset = offset;
+    }
+
+    pub fn set_dynstr_offset(&mut self, offset: u64) {
+        self.dynstr_offset = offset;
+    }
+
+    pub fn set_dynstr_size(&mut self, size: u64) {
+        self.dynstr_size = size;
+    }
+
+    pub fn section_header_bytecode(&self) -> Vec<u8> {
+        SectionHeader::new(
+            15,
+            SectionHeader::SHT_DYNAMIC,
+            SectionHeader::SHF_ALLOC | SectionHeader::SHF_WRITE,
+            self.offset,
+            self.offset,
+            self.size(),
+            5,
+            0,
+            8,
+            16
+        ).bytecode()
+    }
+
+    pub fn offset(&self) -> u64 {
+        self.offset
+    }
+}
+
+impl Section for DynamicSection {
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    fn bytecode(&self) -> Vec<u8> {
+        let mut bytes = Vec::new();
+        
+        // DT_FLAGS (DF_TEXTREL)
+        bytes.extend_from_slice(&0x1e_u64.to_le_bytes());
+        bytes.extend_from_slice(&0x04_u64.to_le_bytes());
+        
+        // DT_REL
+        bytes.extend_from_slice(&0x11_u64.to_le_bytes());
+        bytes.extend_from_slice(&self.rel_offset.to_le_bytes());
+        
+        // DT_RELSZ
+        bytes.extend_from_slice(&0x12_u64.to_le_bytes());
+        bytes.extend_from_slice(&self.rel_size.to_le_bytes());
+        
+        // DT_RELENT
+        bytes.extend_from_slice(&0x13_u64.to_le_bytes());
+        bytes.extend_from_slice(&0x10_u64.to_le_bytes());  // Constant: 16 bytes per entry
+        
+        // DT_RELCOUNT: number of relative relocation entries
+        bytes.extend_from_slice(&0x6fffff_fa_u64.to_le_bytes());
+        bytes.extend_from_slice(&self.rel_count.to_le_bytes());
+        
+        // DT_SYMTAB
+        bytes.extend_from_slice(&0x06_u64.to_le_bytes());
+        bytes.extend_from_slice(&self.dynsym_offset.to_le_bytes());
+        
+        // DT_SYMENT
+        bytes.extend_from_slice(&0x0b_u64.to_le_bytes());
+        bytes.extend_from_slice(&0x18_u64.to_le_bytes());  // Constant: 24 bytes per symbol
+        
+        // DT_STRTAB
+        bytes.extend_from_slice(&0x05_u64.to_le_bytes());
+        bytes.extend_from_slice(&self.dynstr_offset.to_le_bytes());
+        
+        // DT_STRSZ
+        bytes.extend_from_slice(&0x0a_u64.to_le_bytes());
+        bytes.extend_from_slice(&self.dynstr_size.to_le_bytes());
+        
+        // DT_TEXTREL
+        bytes.extend_from_slice(&0x16_u64.to_le_bytes());
+        bytes.extend_from_slice(&0x00_u64.to_le_bytes());
+        
+        // DT_NULL
+        bytes.extend_from_slice(&0x00_u64.to_le_bytes());
+        bytes.extend_from_slice(&0x00_u64.to_le_bytes());
+        
+        bytes
+    }
+
+    fn size(&self) -> u64 {
+        // 11 * 16
+        11 << 4
+    }
+}
+
+#[derive(Debug)]
+pub struct DynStrSection {
+    name: String,
+    symbol_names: Vec<String>,
+    offset: u64,
+}
+
+impl DynStrSection {
+    pub fn new(symbol_names: Vec<String>) -> Self {
+        Self {
+            name: String::from(".dynstr"),
+            symbol_names,
+            offset: 0,
+        }
+    }
+
+    pub fn set_offset(&mut self, offset: u64) {
+        self.offset = offset;
+    }
+
+    pub fn section_header_bytecode(&self) -> Vec<u8> {
+        SectionHeader::new(
+            32,  // Section index
+            SectionHeader::SHT_STRTAB,
+            SectionHeader::SHF_ALLOC,  // Allocatable section
+            self.offset,
+            self.offset,
+            self.size(),
+            0,
+            0,
+            1,
+            0
+        ).bytecode()
+    }
+
+    pub fn offset(&self) -> u64 {
+        self.offset
+    }
+}
+
+impl Section for DynStrSection {
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    fn bytecode(&self) -> Vec<u8> {
+        let mut bytes = Vec::new();
+        // First byte is null
+        bytes.push(0);
+        
+        // Add each symbol name with null terminator
+        for name in &self.symbol_names {
+            bytes.extend(name.as_bytes());
+            bytes.push(0); // null terminator
+        }
+        // add padding to make size multiple of 8
+        while bytes.len() % 8 != 0 {
+            bytes.push(0);
+        }
+        bytes
+    }
+    
+    fn size(&self) -> u64 {
+        // Calculate total size: initial null byte + sum of (name lengths + null terminators)
+        let mut size = 1 + self.symbol_names.iter()
+            .map(|name| name.len() + 1)
+            .sum::<usize>();
+        // add padding to make size multiple of 8
+        while size % 8 != 0 {
+            size += 1;
+        }
+        size as u64
+    }
+}
+
+#[derive(Debug)]
+pub struct DynSymSection {
+    name: String,
+    offset: u64,
+    symbols: Vec<DynamicSymbol>,
+}
+
+impl DynSymSection {
+    pub fn new(symbols: Vec<DynamicSymbol>) -> Self {
+        Self {
+            name: String::from(".dynsym"),
+            offset: 0,
+            symbols,
+        }
+    }
+
+    pub fn set_offset(&mut self, offset: u64) {
+        self.offset = offset;
+    }
+
+    pub fn section_header_bytecode(&self) -> Vec<u8> {
+        let flags = SectionHeader::SHF_ALLOC;
+        SectionHeader::new(
+            24,
+            SectionHeader::SHT_DYNSYM,
+            flags,
+            self.offset,
+            self.offset,
+            self.size(),
+            5,
+            1,
+            8,
+            24
+        ).bytecode()
+    }
+
+    pub fn offset(&self) -> u64 {
+        self.offset
+    }
+}
+
+impl Section for DynSymSection {
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    fn size(&self) -> u64 {
+        // Each symbol entry is 24 bytes
+        (self.symbols.len() as u64) * 24
+    }
+
+    fn bytecode(&self) -> Vec<u8> {
+        let mut bytes = Vec::new();
+        for symbol in &self.symbols {
+            bytes.extend(symbol.bytecode());
+        }
+        bytes
+    }
+    
+}   
+
+#[derive(Debug)]
+pub struct RelDynSection {
+    name: String,
+    offset: u64,
+    entries: Vec<RelDyn>,
+}
+
+impl RelDynSection {
+    pub fn new(entries: Vec<RelDyn>) -> Self {
+        Self {
+            name: String::from(".rel.dyn"),
+            offset: 0,
+            entries,
+        }
+    }
+
+    pub fn set_offset(&mut self, offset: u64) {
+        self.offset = offset;
+    }
+
+    pub fn size(&self) -> u64 {
+        (self.entries.len() * 16) as u64 // Each RelDyn entry is 16 bytes
+    }
+
+    pub fn section_header_bytecode(&self) -> Vec<u8> {
+        let flags = SectionHeader::SHF_ALLOC;
+        SectionHeader::new(
+            40,
+            SectionHeader::SHT_REL,
+            flags,
+            self.offset,
+            self.offset,
+            self.size(),
+            4,
+            0,
+            8,
+            16
+        ).bytecode()
+    }
+
+    pub fn offset(&self) -> u64 {
+        self.offset
+    }
+}
+
+impl Section for RelDynSection {
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    fn size(&self) -> u64 {
+        self.size()
+    }
+
+    fn bytecode(&self) -> Vec<u8> {
+        let mut bytes = Vec::new();
+        for entry in &self.entries {
+            bytes.extend(entry.bytecode());
+        }
+        bytes
+    }
+
+
+}
+
+#[derive(Debug)]
+pub enum SectionType {
+    Code(CodeSection),
+    Data(DataSection),
+    ShStrTab(ShStrTabSection),
+    Dynamic(DynamicSection),
+    DynStr(DynStrSection),
+    DynSym(DynSymSection),
+    Default(NullSection),
+    RelDyn(RelDynSection),
+}
+
+impl SectionType {
+    pub fn name(&self) -> &str {
+        match self {
+            SectionType::Code(cs) => &cs.name,
+            SectionType::Data(ds) => &ds.name,
+            SectionType::ShStrTab(ss) => &ss.name,
+            SectionType::Dynamic(ds) => &ds.name,
+            SectionType::DynStr(ds) => &ds.name,
+            SectionType::DynSym(ds) => &ds.name,
+            SectionType::Default(ds) => &ds.name,
+            SectionType::RelDyn(ds) => &ds.name,
+        }
+    }
+
+    pub fn bytecode(&self) -> Vec<u8> {
+        match self {
+            SectionType::Code(cs) => cs.bytecode(),
+            SectionType::Data(ds) => ds.bytecode(),
+            SectionType::ShStrTab(ss) => ss.bytecode(),
+            SectionType::Dynamic(ds) => ds.bytecode(),
+            SectionType::DynStr(ds) => ds.bytecode(),
+            SectionType::DynSym(ds) => ds.bytecode(),
+            SectionType::Default(ds) => ds.bytecode(),
+            SectionType::RelDyn(ds) => ds.bytecode(),
+        }
+    }
+
+    pub fn size(&self) -> u64 {
+        match self {
+            SectionType::Code(cs) => cs.size(),
+            SectionType::Data(ds) => ds.size(),
+            SectionType::ShStrTab(ss) => ss.size(),
+            SectionType::Dynamic(ds) => ds.size(),
+            SectionType::DynStr(ds) => ds.size(),
+            SectionType::DynSym(ds) => ds.size(),
+            SectionType::Default(ds) => ds.size(),
+            SectionType::RelDyn(ds) => ds.size(),
+        }
+    }
+
+    pub fn section_header_bytecode(&self) -> Vec<u8> {
+        match self {
+            SectionType::Code(cs) => cs.section_header_bytecode(),
+            SectionType::Data(ds) => ds.section_header_bytecode(),
+            SectionType::ShStrTab(ss) => ss.section_header_bytecode(),
+            SectionType::Dynamic(ds) => ds.section_header_bytecode(),
+            SectionType::DynStr(ds) => ds.section_header_bytecode(),
+            SectionType::DynSym(ds) => ds.section_header_bytecode(),
+            SectionType::Default(ds) => ds.section_header_bytecode(),
+            SectionType::RelDyn(ds) => ds.section_header_bytecode(),
+        }
+    }
+
+    pub fn set_offset(&mut self, offset: u64) {
+        match self {
+            SectionType::Code(cs) => cs.set_offset(offset),
+            SectionType::Data(ds) => ds.set_offset(offset),
+            SectionType::ShStrTab(ss) => ss.set_offset(offset),
+            SectionType::Dynamic(ds) => ds.set_offset(offset),
+            SectionType::DynStr(ds) => ds.set_offset(offset),
+            SectionType::DynSym(ds) => ds.set_offset(offset),
+            SectionType::RelDyn(ds) => ds.set_offset(offset),
+            SectionType::Default(_) => (), // NullSection doesn't need offset
+        }
+    }
+
+    pub fn offset(&self) -> u64 {
+        match self {
+            SectionType::Code(cs) => cs.offset(),
+            SectionType::Data(ds) => ds.offset(),
+            SectionType::ShStrTab(ss) => ss.offset(),
+            SectionType::Dynamic(ds) => ds.offset(),
+            SectionType::DynStr(ds) => ds.offset(),
+            SectionType::DynSym(ds) => ds.offset(),
+            SectionType::Default(ns) => ns.offset(),
+            SectionType::RelDyn(rs) => rs.offset(),
+        }
+    }
+}
+
+

+ 165 - 0
crates/assembler/src/tokenizer.rs

@@ -0,0 +1,165 @@
+use crate::opcode::Opcode;
+use crate::utils::evaluate_constant_expression;
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum ImmediateValue {
+    Int(i64),
+    Addr(i64),
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum Token {
+    Directive(String, usize),
+    Global(usize),
+    Extern(usize),
+    Equ(usize),
+    Rodata(usize),
+    Label(String, usize),
+    Opcode(Opcode, usize),
+    Register(String, usize),
+    ImmediateValue(ImmediateValue, usize),
+    StringLiteral(String, usize),
+    Expression(String, usize),
+    Comma(usize),
+
+    // for refactoring
+    Identifier(String, usize),
+    Number(i64, usize),
+    Colon(usize),
+    LeftBracket(usize),
+    RightBracket(usize),
+    BinaryOp(String, usize),
+}
+
+pub fn tokenize(source: &str) -> Result<Vec<Token>, String> {
+    let mut tokens = Vec::new();
+    let mut line_number = 1;
+
+    for line in source.lines() {
+        let line = line.trim();
+
+        if line.is_empty() {
+            line_number += 1;
+            continue;
+        }
+
+        // Handle comments - skip rest of line
+        let line = if let Some(comment_pos) = line.find("//") {
+            &line[..comment_pos].trim()
+        } else {
+            line
+        };
+
+        if line.is_empty() {
+            line_number += 1;
+            continue;
+        }
+    
+        let mut chars = line.chars().peekable();
+    
+        // iterate over chars
+        while let Some(&c) = chars.peek() {
+            match c {
+                // handle directives
+                '.' => {
+                    chars.next();
+                    let directive: String = chars.by_ref()
+                        .take_while(|&c| c.is_alphanumeric() || c == '_')
+                        .collect();
+                    // TODO: text section doesn't aslways have a global directive
+                    if directive == "global" || directive == "globl" {
+                        tokens.push(Token::Global(line_number));
+                    } else if directive == "extern" {
+                        tokens.push(Token::Extern(line_number));
+                    } else if directive == "rodata" {
+                        tokens.push(Token::Rodata(line_number));
+                    } else if directive == "equ"{
+                        tokens.push(Token::Equ(line_number));
+                    } else{
+                        tokens.push(Token::Directive(directive, line_number));
+                    }
+                }
+                // handle string literals
+                '"' => {
+                    chars.next(); // consume opening quote
+                    let mut string_literal = String::new();
+                    while let Some(&c) = chars.peek() {
+                        if c == '"' {
+                            chars.next(); // consume the closing quote
+                            tokens.push(Token::StringLiteral(string_literal, line_number));
+                            break;
+                        } else if c == '\n' {
+                            return Err(format!("Unterminated string literal on line {}", line_number));
+                        }
+                        string_literal.push(chars.next().unwrap());
+                    }
+                }
+                // handle []
+                '[' => {
+                    chars.next();
+                    let mut string_literal = String::new();
+                    while let Some(&c) = chars.peek() {
+                        if c == ']' {
+                            chars.next(); // consume the closing quote
+                            string_literal = evaluate_constant_expression(&string_literal)?;
+                            tokens.push(Token::Expression(string_literal, line_number));
+                            break;
+                        } else if c == '\n' {
+                            return Err(format!("Unterminated square bracket on line {}", line_number));
+                        }
+                        string_literal.push(chars.next().unwrap());
+                    }
+                }
+                // handle comma
+                ',' => {
+                    chars.next();
+                    tokens.push(Token::Comma(line_number));
+                }
+                // handle comments
+                '/' if chars.clone().nth(1) == Some('/') => {
+                    chars.by_ref().take_while(|&c| c != '\n').for_each(drop);
+                }
+                '#' => {
+                    chars.by_ref().take_while(|&c| c != '\n').for_each(drop);
+                }
+                c if c.is_digit(10) => {
+                    let number: String = chars.by_ref()
+                        .take_while(|&c| c.is_digit(10)).collect();
+                    tokens.push(
+                        Token::ImmediateValue(
+                            ImmediateValue::Int(number.parse::<i64>().map_err(|_| "Invalid number")?),
+                            line_number
+                        )
+                    );
+                }
+
+                c if c.is_alphanumeric() || c == '_' => {
+                    let identifier: String = chars.by_ref()
+                        .take_while(|&c| c.is_alphanumeric() || c == '_' || c == ':')
+                        .collect();
+                    // Check if the next character is ':' for labels
+                    if identifier.ends_with(':') {
+                        let label_name = identifier.trim_end_matches(':').to_string(); 
+                        tokens.push(Token::Label(label_name, line_number));
+                    } else if let Some(Token::Directive(_, _)) = tokens.last() {
+                        tokens.push(Token::Label(identifier, line_number));
+                    } else if identifier.starts_with('r') //
+                            && identifier[1..].chars().all(|c| c.is_digit(10)){
+                        tokens.push(Token::Register(identifier, line_number));
+                    } else if let Ok(opcode) = Opcode::from_str(&identifier) {
+                        tokens.push(Token::Opcode(opcode, line_number));
+                    } else {
+                        tokens.push(Token::Label(identifier, line_number));
+                    }
+                }
+                c if c.is_whitespace() => {
+                    chars.next();
+                }
+                _ => return Err(format!("Unexpected charcter: '{}' on line {}", c, line_number)),
+            }
+        }
+        line_number += 1;
+    }
+    
+    Ok(tokens)
+}

+ 66 - 0
crates/assembler/src/utils.rs

@@ -0,0 +1,66 @@
+use std::str::FromStr;
+
+pub fn evaluate_constant_expression(expr: &str) -> Result<String, String> {
+
+    let mut tokens = Vec::new(); // let mut tokens = vec![];
+
+    let mut cur_token = String::new();
+    for c in expr.chars() {
+        if c.is_alphanumeric() || c == '_' {
+            cur_token.push(c);
+        } else if c == '+' || c == '-' {
+            if !cur_token.is_empty() {
+                tokens.push(cur_token.clone());
+                cur_token.clear();
+            }
+            tokens.push(c.to_string());
+        } else if c == ' ' {
+            continue;
+        } else {
+            return Err(format!("Invalid character in expression: {}", c));
+        }
+    }
+
+    if !cur_token.is_empty() {
+        tokens.push(cur_token.clone());
+    }
+
+    let mut result_tokens = Vec::<String>::new();
+    let mut i = 0;
+    let mut constant = 0;
+
+    while i < tokens.len() {
+        match tokens[i].as_str() {
+            "+" | "-" => {
+                if i + 1 < tokens.len() {
+                    if let Ok(num) = i32::from_str(&tokens[i + 1]) {
+                        if tokens[i] == "+" {
+                            constant += num;
+                        } else {
+                            constant -= num;
+                        }
+                        i += 2;
+                    } else {
+                        return Err(format!("Invalid token after {}: {}", tokens[i], tokens[i + 1]));
+                    }
+                } else {
+                    return Err(format!("Operator {} has no operand", tokens[i]));
+                }
+            }
+            token => {
+                if let Ok(num) = i32::from_str(token) {
+                    constant += num;
+                } else {
+                    result_tokens.push(token.to_string());
+                }
+            }
+        }
+        i += 1;
+    }
+
+    if constant != 0 {
+        result_tokens.push(constant.to_string());
+    }
+
+    Ok(result_tokens.join(if constant > 0 { "+" } else { "-" }))
+}

+ 1 - 36
src/commands/build.rs

@@ -1,7 +1,6 @@
 use anyhow::{Error, Result};
 use dirs::home_dir;
-use ed25519_dalek::SigningKey;
-use rand::rngs::OsRng;
+
 use std::fs;
 use std::fs::create_dir_all;
 use std::io;
@@ -119,40 +118,6 @@ pub fn build() -> Result<()> {
         Ok(())
     }
 
-    // Function to check if keypair file exists.
-    fn has_keypair_file(dir: &Path) -> bool {
-        if dir.exists() && dir.is_dir() {
-            match fs::read_dir(dir) {
-                Ok(entries) => entries.filter_map(Result::ok).any(|entry| {
-                    entry
-                        .path()
-                        .file_name()
-                        .and_then(|name| name.to_str())
-                        .map(|name| name.ends_with("-keypair.json"))
-                        .unwrap_or(false)
-                }),
-                Err(_) => false,
-            }
-        } else {
-            false
-        }
-    }
-
-    // Check if keypair file exists. If not, create one.
-    let deploy_path = Path::new(deploy);
-    if !has_keypair_file(deploy_path) {
-        let project_path = std::env::current_dir()?;
-        let project_name = project_path
-            .file_name()
-            .and_then(|n| n.to_str())
-            .unwrap_or("program");
-        let mut rng = OsRng;
-        fs::write(
-            deploy_path.join(format!("{}-keypair.json", project_name)),
-            serde_json::json!(SigningKey::generate(&mut rng).to_keypair_bytes()[..]).to_string(),
-        )?;
-    }
-
     // Processing directories
     let src_path = Path::new(src);
     for entry in src_path.read_dir()? {

+ 4 - 2
src/commands/common.rs

@@ -146,6 +146,8 @@ pub const TSCONFIG: &str = r#"
 }
 "#;
 
+// mollusk-svm 0.1.5
+// solana-sdk 2.2.1
 pub const CARGO_TOML: &str = r#"[package]
 name = "default_project_name"
 version = "0.1.0"
@@ -154,8 +156,8 @@ edition = "2021"
 [dependencies]
 
 [dev-dependencies]
-mollusk-svm = "0.1.5"
-solana-sdk = "2.2.1"
+mollusk-svm = "0.0.11"
+solana-sdk = "2.1.0"
 
 [features]
 test-sbf = []"#;

+ 39 - 0
src/commands/deploy.rs

@@ -1,10 +1,49 @@
 use std::io;
 use std::path::Path;
 use std::process::Command;
+use ed25519_dalek::SigningKey;
+use rand::rngs::OsRng;
 
+use std::fs;
 use anyhow::{Error, Result};
 
 fn deploy_program(program_name: &str, url: &str) -> Result<(), Error> {
+    let deploy = "deploy";
+
+    // Function to check if keypair file exists.
+    fn has_keypair_file(dir: &Path) -> bool {
+        if dir.exists() && dir.is_dir() {
+            match fs::read_dir(dir) {
+                Ok(entries) => entries.filter_map(Result::ok).any(|entry| {
+                    entry
+                        .path()
+                        .file_name()
+                        .and_then(|name| name.to_str())
+                        .map(|name| name.ends_with("-keypair.json"))
+                        .unwrap_or(false)
+                }),
+                Err(_) => false,
+            }
+        } else {
+            false
+        }
+    }
+
+    // Check if keypair file exists. If not, create one.
+    let deploy_path = Path::new(deploy);
+    if !has_keypair_file(deploy_path) {
+        let project_path = std::env::current_dir()?;
+        let project_name = project_path
+            .file_name()
+            .and_then(|n| n.to_str())
+            .unwrap_or("program");
+        let mut rng = OsRng;
+        fs::write(
+            deploy_path.join(format!("{}-keypair.json", project_name)),
+            serde_json::json!(SigningKey::generate(&mut rng).to_keypair_bytes()[..]).to_string(),
+        )?;
+    }
+
     let program_id_file = format!("./deploy/{}-keypair.json", program_name);
     let program_file = format!("./deploy/{}.so", program_name);
 

+ 45 - 0
src/commands/light_build.rs

@@ -0,0 +1,45 @@
+use sbpf_assembler::assemble;
+
+use anyhow::{Error, Result};
+use std::path::Path;
+use std::time::Instant;
+use std::fs::create_dir_all;
+
+pub fn light_build() -> Result<()> {
+    // Set src/out directory
+    let src = "src";
+    let deploy = "deploy";
+
+    // Create necessary directories
+    create_dir_all(deploy)?;
+
+    // Function to compile assembly
+    fn compile_assembly(src: &str, deploy: &str) -> Result<()> {
+        assemble(src, deploy)
+    }
+
+    // Processing directories
+    let src_path = Path::new(src);
+    for entry in src_path.read_dir()? {
+        let entry = entry?;
+        let path = entry.path();
+        if path.is_dir() {
+            if let Some(subdir) = path.file_name().and_then(|name| name.to_str()) {
+                let asm_file = format!("{}/{}/{}.s", src, subdir, subdir);
+                if Path::new(&asm_file).exists() {
+                    println!("⚡️ Light building \"{}\"", subdir);
+                    let start = Instant::now();
+                    compile_assembly(&asm_file, deploy)?;
+                    let duration = start.elapsed();
+                    println!(
+                        "✅ \"{}\" built successfully in {}ms!",
+                        subdir,
+                        duration.as_micros() as f64 / 1000.0
+                    );
+                }
+            }
+        }
+    }
+
+    Ok(())
+}

+ 3 - 0
src/commands/mod.rs

@@ -4,6 +4,9 @@ pub use init::*;
 pub mod build;
 pub use build::*;
 
+pub mod light_build;
+pub use light_build::*;
+
 pub mod deploy;
 pub use deploy::*;
 

+ 5 - 1
src/main.rs

@@ -1,7 +1,7 @@
 pub mod commands;
 use anyhow::Error;
 use clap::{Args, Parser, Subcommand};
-use commands::{build, clean, deploy, init, test};
+use commands::{build, light_build, clean, deploy, init, test};
 
 #[derive(Parser)]
 #[command(version, about, long_about = None)]
@@ -17,6 +17,8 @@ enum Commands {
     Init(InitArgs),
     #[command(about = "Compile into a Solana program executable")]
     Build,
+    #[command(about = "Compile without any platform tools")]
+    LightBuild,
     #[command(about = "Build and deploy the program")]
     Deploy(DeployArgs),
     #[command(about = "Test deployed program")]
@@ -50,8 +52,10 @@ fn main() -> Result<(), Error> {
     match &cli.command {
         Commands::Init(args) => init(args.name.clone(), args.ts_tests),
         Commands::Build => build(),
+        Commands::LightBuild => light_build(),
         Commands::Deploy(args) => deploy(args.name.clone(), args.url.clone()),
         Commands::Test => test(),
+        // use arg to specify if use light build
         Commands::E2E(args) => {
             build()?;
             deploy(args.name.clone(), args.url.clone())?;