Переглянути джерело

Calculate whether string or bytes types are read-only

If a string or bytes type is readonly, it can be reduced to a slice. A
slice is a readonly pointer to the data and the length, so there is no
need for a vector. This may also avoid a memcpy.

The idea is to know if the data refered to by the reference type has been
modified since the variable was first defined.

	bytes x = hex"010203";

	x[1] = 1;

	// now x is not readonly; the data we are referring to has changed.

This needs an analysis pass since the variable x can be assigned to another
variable, then the original vector can be modifed through that:

	bytes x = "abcd";

	bytes y = x;

	y[0] = 65;

In this case x must be a vector since it is modified through y.

Signed-off-by: Sean Young <sean@mess.org>
Sean Young 4 роки тому
батько
коміт
dddf1fb80d

+ 5 - 3
src/codegen/cfg.rs

@@ -8,6 +8,7 @@ use super::constant_folding;
 use super::expression::expression;
 use super::reaching_definitions;
 use super::statements::{statement, LoopScopes};
+use super::vector_to_slice;
 use crate::parser::pt;
 use crate::sema::ast::{
     CallTy, Contract, Expression, Function, Namespace, Parameter, StringLocation, Type,
@@ -57,8 +58,8 @@ pub enum Instr {
     /// In storage slot, set the value at the offset
     SetStorageBytes {
         value: Expression,
-        storage: Box<Expression>,
-        offset: Box<Expression>,
+        storage: Expression,
+        offset: Expression,
     },
     /// Push element on memory array
     PushMemory {
@@ -921,7 +922,7 @@ impl ControlFlowGraph {
                     .map(|(var_no, defs)| format!(
                         " {}:[{}]",
                         &self.vars[var_no].id.name,
-                        defs.iter()
+                        defs.keys()
                             .map(|d| format!("{}:{}", d.block_no, d.instr_no))
                             .collect::<Vec<String>>()
                             .join(", ")
@@ -1019,6 +1020,7 @@ pub fn generate_cfg(
 
     reaching_definitions::find(&mut cfg);
     constant_folding::constant_folding(&mut cfg, ns);
+    vector_to_slice::vector_to_slice(&mut cfg, ns);
 
     all_cfgs[cfg_no] = cfg;
 }

+ 6 - 3
src/codegen/constant_folding.rs

@@ -117,9 +117,9 @@ pub fn constant_folding(cfg: &mut ControlFlowGraph, ns: &mut Namespace) {
                     let (offset, _) = expression(offset, Some(&vars), &cur, cfg, ns);
 
                     cfg.blocks[block_no].instr[instr_no] = Instr::SetStorageBytes {
-                        storage: Box::new(storage),
+                        storage,
                         value,
-                        offset: Box::new(offset),
+                        offset,
                     };
                 }
                 Instr::PushMemory {
@@ -558,7 +558,7 @@ fn expression(
                         // There must be at least one definition, and all should evaluate to the same value
                         let mut v = None;
 
-                        for def in defs {
+                        for def in defs.keys() {
                             if let Some(expr) = get_definition(def, cfg) {
                                 let expr = expression(expr, None, pos, cfg, ns);
 
@@ -575,6 +575,9 @@ fn expression(
                                     v = None;
                                     break;
                                 }
+                            } else {
+                                v = None;
+                                break;
                             }
                         }
 

+ 2 - 2
src/codegen/expression.rs

@@ -986,8 +986,8 @@ pub fn assign_single(
                             vartab,
                             Instr::SetStorageBytes {
                                 value: Expression::Variable(left.loc(), ty.clone(), pos),
-                                storage: array,
-                                offset: index,
+                                storage: *array,
+                                offset: *index,
                             },
                         );
                     } else {

+ 2 - 0
src/codegen/mod.rs

@@ -5,6 +5,7 @@ mod external_functions;
 mod reaching_definitions;
 mod statements;
 mod storage;
+mod vector_to_slice;
 
 use self::cfg::{ControlFlowGraph, Instr, Vartable};
 use self::expression::expression;
@@ -90,6 +91,7 @@ fn storage_initializer(contract_no: usize, ns: &mut Namespace) -> ControlFlowGra
 
     reaching_definitions::find(&mut cfg);
     constant_folding::constant_folding(&mut cfg, ns);
+    vector_to_slice::vector_to_slice(&mut cfg, ns);
 
     cfg
 }

+ 80 - 8
src/codegen/reaching_definitions.rs

@@ -1,4 +1,5 @@
 use super::cfg::{BasicBlock, ControlFlowGraph, Instr};
+use crate::sema::ast::Expression;
 use std::collections::{HashMap, HashSet};
 use std::fmt;
 
@@ -11,6 +12,8 @@ pub struct Def {
 #[derive(Clone, Copy, PartialEq)]
 pub enum Transfer {
     Gen { def: Def, var_no: usize },
+    Mod { var_no: usize },
+    Copy { var_no: usize, src: usize },
     Kill { var_no: usize },
 }
 
@@ -20,6 +23,12 @@ impl fmt::Display for Transfer {
             Transfer::Gen { def, var_no } => {
                 write!(f, "Gen %{} = ({}, {})", var_no, def.block_no, def.instr_no)
             }
+            Transfer::Mod { var_no } => {
+                write!(f, "Mod %{}", var_no)
+            }
+            Transfer::Copy { var_no, src } => {
+                write!(f, "Copy %{} from %{}", var_no, src)
+            }
             Transfer::Kill { var_no } => {
                 write!(f, "Kill %{}", var_no)
             }
@@ -27,7 +36,7 @@ impl fmt::Display for Transfer {
     }
 }
 
-pub type VarDefs = HashMap<usize, HashSet<Def>>;
+pub type VarDefs = HashMap<usize, HashMap<Def, bool>>;
 
 /// Calculate all the reaching definitions for the contract. This is a flow
 /// analysis which is used for further optimizations
@@ -56,7 +65,13 @@ pub fn find(cfg: &mut ControlFlowGraph) {
                 // merge incoming set
                 for (var_no, defs) in &vars {
                     if let Some(entry) = cfg.blocks[edge].defs.get_mut(var_no) {
-                        entry.extend(defs);
+                        for (incoming_def, incoming_modified) in defs {
+                            if let Some(e) = entry.get_mut(incoming_def) {
+                                *e |= *incoming_modified;
+                            } else {
+                                entry.insert(*incoming_def, *incoming_modified);
+                            }
+                        }
                     } else {
                         cfg.blocks[edge].defs.insert(*var_no, defs.clone());
                     }
@@ -89,17 +104,50 @@ fn instr_transfers(block_no: usize, block: &BasicBlock) -> Vec<Vec<Transfer>> {
         };
 
         transfers.push(match instr {
+            Instr::Set {
+                res,
+                expr: Expression::Variable(_, _, src),
+                ..
+            } => {
+                vec![
+                    Transfer::Kill { var_no: *res },
+                    Transfer::Copy {
+                        var_no: *res,
+                        src: *src,
+                    },
+                ]
+            }
             Instr::Set { res, .. } => set_var(&[*res]),
             Instr::Call { res, .. } => set_var(res),
             Instr::AbiDecode { res, .. } => set_var(res),
-            Instr::PushMemory { res, .. }
-            | Instr::AbiEncodeVector { res, .. }
+            Instr::PushMemory { array, res, .. } => {
+                let mut v = set_var(&[*res]);
+                v.push(Transfer::Mod { var_no: *array });
+
+                v
+            }
+            Instr::PopMemory { array, .. } => {
+                vec![Transfer::Mod { var_no: *array }]
+            }
+            Instr::AbiEncodeVector { res, .. }
             | Instr::ExternalCall {
                 success: Some(res), ..
             }
             | Instr::Constructor {
                 success: None, res, ..
             } => set_var(&[*res]),
+            Instr::ClearStorage { storage: dest, .. }
+            | Instr::SetStorageBytes { storage: dest, .. }
+            | Instr::SetStorage { storage: dest, .. }
+            | Instr::Store { dest, .. } => {
+                let mut v = Vec::new();
+
+                if let Some(var_no) = array_var(dest) {
+                    v.push(Transfer::Mod { var_no });
+                }
+
+                v
+            }
             Instr::Constructor {
                 success: Some(success),
                 res,
@@ -112,18 +160,42 @@ fn instr_transfers(block_no: usize, block: &BasicBlock) -> Vec<Vec<Transfer>> {
     transfers
 }
 
-pub fn apply_transfers(transfers: &[Transfer], vars: &mut HashMap<usize, HashSet<Def>>) {
+fn array_var(expr: &Expression) -> Option<usize> {
+    match expr {
+        Expression::Variable(_, _, var_no) => Some(*var_no),
+        Expression::DynamicArraySubscript(_, _, expr, _)
+        | Expression::ArraySubscript(_, _, expr, _)
+        | Expression::StructMember(_, _, expr, _) => array_var(expr),
+        _ => None,
+    }
+}
+
+pub fn apply_transfers(transfers: &[Transfer], vars: &mut HashMap<usize, HashMap<Def, bool>>) {
     for transfer in transfers {
         match transfer {
             Transfer::Kill { var_no } => {
                 vars.remove(var_no);
             }
+            Transfer::Mod { var_no } => {
+                if let Some(entry) = vars.get_mut(var_no) {
+                    for e in entry.values_mut() {
+                        *e = true;
+                    }
+                }
+            }
+            Transfer::Copy { var_no, src } => {
+                if let Some(defs) = vars.get(src) {
+                    let defs = defs.clone();
+
+                    vars.insert(*var_no, defs);
+                }
+            }
             Transfer::Gen { var_no, def } => {
                 if let Some(entry) = vars.get_mut(var_no) {
-                    entry.insert(*def);
+                    entry.insert(*def, false);
                 } else {
-                    let mut v = HashSet::new();
-                    v.insert(*def);
+                    let mut v = HashMap::new();
+                    v.insert(*def, false);
                     vars.insert(*var_no, v);
                 }
             }

+ 168 - 0
src/codegen/vector_to_slice.rs

@@ -0,0 +1,168 @@
+use super::cfg::{BasicBlock, ControlFlowGraph, Instr};
+use super::reaching_definitions::{Def, Transfer};
+use crate::sema::ast::{Expression, Namespace, Type};
+use std::collections::{HashMap, HashSet};
+
+/// A vector is a modifiable struct with a length, size and data. A slice is a readonly
+/// pointer to some data, plus the length. By using a slice, often a memcpy can be avoided.
+///
+/// Codegen generates vectors. Here we walk the cfg to find all vectors which can be converted
+/// to slices. In addition, we add some notes to the namespace so the language server can display
+/// some information when hovering over a variable.
+pub fn vector_to_slice(cfg: &mut ControlFlowGraph, _ns: &mut Namespace) {
+    // first, we need to find all the defs which have modified their referent
+    // note that variables can aliases
+    let mut writable = HashSet::new();
+
+    for block_no in 0..cfg.blocks.len() {
+        let mut vars = cfg.blocks[block_no].defs.clone();
+
+        find_writable_vectors(&cfg.blocks[block_no], &mut vars, &mut writable);
+    }
+
+    // Now we have a list of all vectors defs that get written two (via variables)
+
+    // walk the cfg and expressions and update the type of vectors
+    for block_no in 0..cfg.blocks.len() {
+        update_vectors_to_slice(block_no, &writable, cfg);
+    }
+}
+
+fn find_writable_vectors(
+    block: &BasicBlock,
+    vars: &mut HashMap<usize, HashMap<Def, bool>>,
+    writable: &mut HashSet<Def>,
+) {
+    for instr_no in 0..block.instr.len() {
+        match &block.instr[instr_no] {
+            Instr::Set {
+                res,
+                expr: Expression::Variable(_, _, var_no),
+                ..
+            } => {
+                // is this aliasing a vector var
+                if let Some(defs) = vars.get(var_no) {
+                    let defs = defs.clone();
+
+                    apply_transfers(&block.transfers[instr_no], vars, writable);
+
+                    vars.insert(*res, defs);
+                } else {
+                    apply_transfers(&block.transfers[instr_no], vars, writable);
+                }
+            }
+            // Call and return do not take slices
+            Instr::Return { value: args } | Instr::Call { args, .. } => {
+                for arg in args {
+                    if let Expression::Variable(_, _, var_no) = arg {
+                        if let Some(entry) = vars.get_mut(var_no) {
+                            writable.extend(entry.keys());
+                        }
+                    }
+                }
+
+                apply_transfers(&block.transfers[instr_no], vars, writable);
+            }
+            Instr::PushMemory { value, .. } => {
+                if let Expression::Variable(_, _, var_no) = value.as_ref() {
+                    if let Some(entry) = vars.get_mut(var_no) {
+                        writable.extend(entry.keys());
+                    }
+                }
+
+                apply_transfers(&block.transfers[instr_no], vars, writable);
+            }
+            Instr::Store { pos, .. } => {
+                if let Some(entry) = vars.get_mut(pos) {
+                    writable.extend(entry.keys());
+                }
+
+                apply_transfers(&block.transfers[instr_no], vars, writable);
+            }
+            // These instructions are fine with vectors
+            Instr::Set { .. }
+            | Instr::Branch { .. }
+            | Instr::BranchCond { .. }
+            | Instr::PopMemory { .. }
+            | Instr::SetStorage { .. }
+            | Instr::ClearStorage { .. }
+            | Instr::SetStorageBytes { .. }
+            | Instr::SelfDestruct { .. }
+            | Instr::EmitEvent { .. }
+            | Instr::AbiDecode { .. }
+            | Instr::ExternalCall { .. }
+            | Instr::Constructor { .. }
+            | Instr::Unreachable
+            | Instr::Print { .. }
+            | Instr::AbiEncodeVector { .. }
+            | Instr::AssertFailure { .. } => {
+                apply_transfers(&block.transfers[instr_no], vars, writable);
+            }
+        }
+    }
+}
+
+fn apply_transfers(
+    transfers: &[Transfer],
+    vars: &mut HashMap<usize, HashMap<Def, bool>>,
+    writable: &mut HashSet<Def>,
+) {
+    for transfer in transfers {
+        match transfer {
+            Transfer::Kill { var_no } => {
+                vars.remove(var_no);
+            }
+            Transfer::Mod { var_no } => {
+                if let Some(entry) = vars.get_mut(var_no) {
+                    for e in entry.values_mut() {
+                        *e = true;
+                    }
+
+                    writable.extend(entry.keys());
+                }
+            }
+            Transfer::Copy { var_no, src } => {
+                if let Some(defs) = vars.get(src) {
+                    let defs = defs.clone();
+
+                    vars.insert(*var_no, defs);
+                }
+            }
+            Transfer::Gen { var_no, def } => {
+                if let Some(entry) = vars.get_mut(var_no) {
+                    entry.insert(*def, false);
+                } else {
+                    let mut v = HashMap::new();
+                    v.insert(*def, false);
+                    vars.insert(*var_no, v);
+                }
+            }
+        }
+    }
+}
+
+fn update_vectors_to_slice(block_no: usize, writable: &HashSet<Def>, cfg: &mut ControlFlowGraph) {
+    for instr_no in 0..cfg.blocks[block_no].instr.len() {
+        let cur = Def { block_no, instr_no };
+
+        if let Instr::Set {
+            loc,
+            res,
+            expr: Expression::AllocDynamicArray(_, _, len, Some(bs)),
+        } = &cfg.blocks[block_no].instr[instr_no]
+        {
+            if !writable.contains(&cur) {
+                cfg.blocks[block_no].instr[instr_no] = Instr::Set {
+                    loc: *loc,
+                    res: *res,
+                    expr: Expression::AllocDynamicArray(
+                        *loc,
+                        Type::Slice,
+                        len.clone(),
+                        Some(bs.clone()),
+                    ),
+                };
+            }
+        }
+    }
+}

+ 4 - 4
src/emit/ewasm.rs

@@ -844,10 +844,10 @@ impl<'a> TargetRuntime<'a> for EwasmTarget {
 
     fn set_storage_string(
         &self,
-        _contract: &Contract,
-        _function: FunctionValue,
-        _slot: PointerValue,
-        _dest: PointerValue,
+        _contract: &Contract<'a>,
+        _function: FunctionValue<'a>,
+        _slot: PointerValue<'a>,
+        _dest: BasicValueEnum<'a>,
     ) {
         unimplemented!();
     }

+ 6 - 37
src/emit/generic.rs

@@ -243,35 +243,11 @@ impl<'a> TargetRuntime<'a> for GenericTarget {
 
     fn set_storage_string(
         &self,
-        contract: &Contract,
-        _function: FunctionValue,
-        slot: PointerValue,
-        dest: PointerValue,
+        contract: &Contract<'a>,
+        _function: FunctionValue<'a>,
+        slot: PointerValue<'a>,
+        dest: BasicValueEnum<'a>,
     ) {
-        let len = unsafe {
-            contract.builder.build_gep(
-                dest,
-                &[
-                    contract.context.i32_type().const_zero(),
-                    contract.context.i32_type().const_zero(),
-                ],
-                "ptr.string.len",
-            )
-        };
-
-        let len = contract.builder.build_load(len, "string.len");
-
-        let data = unsafe {
-            contract.builder.build_gep(
-                dest,
-                &[
-                    contract.context.i32_type().const_zero(),
-                    contract.context.i32_type().const_int(2, false),
-                ],
-                "ptr.string.data",
-            )
-        };
-
         // TODO: check for non-zero
         contract.builder.build_call(
             contract.module.get_function("solang_storage_set").unwrap(),
@@ -284,15 +260,8 @@ impl<'a> TargetRuntime<'a> for GenericTarget {
                         "",
                     )
                     .into(),
-                contract
-                    .builder
-                    .build_pointer_cast(
-                        data,
-                        contract.context.i8_type().ptr_type(AddressSpace::Generic),
-                        "",
-                    )
-                    .into(),
-                len,
+                contract.vector_bytes(dest).into(),
+                contract.vector_len(dest).into(),
             ],
             "",
         );

+ 122 - 202
src/emit/mod.rs

@@ -128,10 +128,10 @@ pub trait TargetRuntime<'a> {
     // Bytes and string have special storage layout
     fn set_storage_string(
         &self,
-        contract: &Contract,
-        function: FunctionValue,
-        slot: PointerValue,
-        dest: PointerValue,
+        contract: &Contract<'a>,
+        function: FunctionValue<'a>,
+        slot: PointerValue<'a>,
+        dest: BasicValueEnum<'a>,
     );
     fn get_storage_string(
         &self,
@@ -859,7 +859,7 @@ pub trait TargetRuntime<'a> {
             ast::Type::String | ast::Type::DynamicBytes => {
                 contract.builder.build_store(slot_ptr, *slot);
 
-                self.set_storage_string(contract, function, slot_ptr, dest.into_pointer_value());
+                self.set_storage_string(contract, function, slot_ptr, dest);
             }
             ast::Type::ExternalFunction { .. } => {
                 contract.builder.build_store(slot_ptr, *slot);
@@ -1898,23 +1898,9 @@ pub trait TargetRuntime<'a> {
                     .into()
             }
             Expression::BytesCast(_, ast::Type::DynamicBytes, ast::Type::Bytes(n), e) => {
-                let array = self
-                    .expression(contract, e, vartab, function)
-                    .into_pointer_value();
-                let len_ptr = unsafe {
-                    contract.builder.build_gep(
-                        array,
-                        &[
-                            contract.context.i32_type().const_zero(),
-                            contract.context.i32_type().const_zero(),
-                        ],
-                        "array_len",
-                    )
-                };
-                let len = contract
-                    .builder
-                    .build_load(len_ptr, "array_len")
-                    .into_int_value();
+                let array = self.expression(contract, e, vartab, function);
+
+                let len = contract.vector_len(array);
 
                 // Check if equal to n
                 let is_equal_to_n = contract.builder.build_int_compare(
@@ -1941,24 +1927,9 @@ pub trait TargetRuntime<'a> {
                 );
 
                 contract.builder.position_at_end(cast);
-                let bytes_ptr = unsafe {
-                    contract.builder.build_gep(
-                        array,
-                        &[
-                            contract.context.i32_type().const_zero(),
-                            contract.context.i32_type().const_int(2, false),
-                        ],
-                        "data",
-                    )
-                };
+                let bytes_ptr = contract.vector_bytes(array);
 
                 // Switch byte order
-                let bytes_ptr = contract.builder.build_pointer_cast(
-                    bytes_ptr,
-                    contract.context.i8_type().ptr_type(AddressSpace::Generic),
-                    "bytes_ptr",
-                );
-
                 let ty = contract.context.custom_width_int_type(*n as u32 * 8);
                 let le_bytes_ptr = contract.builder.build_alloca(ty, "le_bytes");
 
@@ -2132,9 +2103,7 @@ pub trait TargetRuntime<'a> {
                     .into()
             }
             Expression::DynamicArraySubscript(_, elem_ty, a, i) => {
-                let array = self
-                    .expression(contract, a, vartab, function)
-                    .into_pointer_value();
+                let array = self.expression(contract, a, vartab, function);
 
                 let ty = contract.llvm_var(elem_ty);
 
@@ -2163,12 +2132,8 @@ pub trait TargetRuntime<'a> {
 
                 let elem = unsafe {
                     contract.builder.build_gep(
-                        array,
-                        &[
-                            contract.context.i32_type().const_zero(),
-                            contract.context.i32_type().const_int(2, false),
-                            index,
-                        ],
+                        contract.vector_bytes(array),
+                        &[index],
                         "index_access",
                     )
                 };
@@ -2314,27 +2279,46 @@ pub trait TargetRuntime<'a> {
                 array.into()
             }
             Expression::AllocDynamicArray(_, ty, size, init) => {
-                let elem = match ty {
-                    ast::Type::String | ast::Type::DynamicBytes => ast::Type::Bytes(1),
-                    _ => ty.array_elem(),
-                };
+                if *ty == ast::Type::Slice {
+                    let init = init.as_ref().unwrap();
 
-                let size = self
-                    .expression(contract, size, vartab, function)
-                    .into_int_value();
+                    let data = contract.emit_global_string("const_string", init, true);
 
-                let elem_size = contract
-                    .llvm_type(&elem)
-                    .size_of()
-                    .unwrap()
-                    .const_cast(contract.context.i32_type(), false);
+                    contract
+                        .llvm_type(ty)
+                        .into_struct_type()
+                        .const_named_struct(&[
+                            data.into(),
+                            contract
+                                .context
+                                .i32_type()
+                                .const_int(init.len() as u64, false)
+                                .into(),
+                        ])
+                        .into()
+                } else {
+                    let elem = match ty {
+                        ast::Type::Slice | ast::Type::String | ast::Type::DynamicBytes => {
+                            ast::Type::Bytes(1)
+                        }
+                        _ => ty.array_elem(),
+                    };
+
+                    let size = self
+                        .expression(contract, size, vartab, function)
+                        .into_int_value();
 
-                contract.vector_new(size, elem_size, init.as_ref()).into()
+                    let elem_size = contract
+                        .llvm_type(&elem)
+                        .size_of()
+                        .unwrap()
+                        .const_cast(contract.context.i32_type(), false);
+
+                    contract.vector_new(size, elem_size, init.as_ref()).into()
+                }
             }
             Expression::DynamicArrayLength(_, a) => {
-                let array = self
-                    .expression(contract, a, vartab, function)
-                    .into_pointer_value();
+                let array = self.expression(contract, a, vartab, function);
 
                 contract.vector_len(array).into()
             }
@@ -2849,31 +2833,7 @@ pub trait TargetRuntime<'a> {
             | Expression::Builtin(_, _, hash @ Builtin::Blake2_128, args)
             | Expression::Builtin(_, _, hash @ Builtin::Blake2_256, args)
             | Expression::Builtin(_, _, hash @ Builtin::Sha256, args) => {
-                let v = self
-                    .expression(contract, &args[0], vartab, function)
-                    .into_pointer_value();
-
-                let data = unsafe {
-                    contract.builder.build_gep(
-                        v,
-                        &[
-                            contract.context.i32_type().const_zero(),
-                            contract.context.i32_type().const_int(2, false),
-                        ],
-                        "data",
-                    )
-                };
-
-                let data_len = unsafe {
-                    contract.builder.build_gep(
-                        v,
-                        &[
-                            contract.context.i32_type().const_zero(),
-                            contract.context.i32_type().const_zero(),
-                        ],
-                        "data_len",
-                    )
-                };
+                let v = self.expression(contract, &args[0], vartab, function);
 
                 let hash = match hash {
                     Builtin::Ripemd160 => HashTy::Ripemd160,
@@ -2887,15 +2847,8 @@ pub trait TargetRuntime<'a> {
                 self.hash(
                     &contract,
                     hash,
-                    contract.builder.build_pointer_cast(
-                        data,
-                        contract.context.i8_type().ptr_type(AddressSpace::Generic),
-                        "data",
-                    ),
-                    contract
-                        .builder
-                        .build_load(data_len, "data_len")
-                        .into_int_value(),
+                    contract.vector_bytes(v),
+                    contract.vector_len(v),
                 )
                 .into()
             }
@@ -2928,43 +2881,9 @@ pub trait TargetRuntime<'a> {
                     .const_int(literal.len() as u64, false),
             ),
             StringLocation::RunTime(e) => {
-                let v = self
-                    .expression(contract, e, vartab, function)
-                    .into_pointer_value();
+                let v = self.expression(contract, e, vartab, function);
 
-                let data = unsafe {
-                    contract.builder.build_gep(
-                        v,
-                        &[
-                            contract.context.i32_type().const_zero(),
-                            contract.context.i32_type().const_int(2, false),
-                        ],
-                        "data",
-                    )
-                };
-
-                let data_len = unsafe {
-                    contract.builder.build_gep(
-                        v,
-                        &[
-                            contract.context.i32_type().const_zero(),
-                            contract.context.i32_type().const_zero(),
-                        ],
-                        "data_len",
-                    )
-                };
-
-                (
-                    contract.builder.build_pointer_cast(
-                        data,
-                        contract.context.i8_type().ptr_type(AddressSpace::Generic),
-                        "data",
-                    ),
-                    contract
-                        .builder
-                        .build_load(data_len, "data_len")
-                        .into_int_value(),
-                )
+                (contract.vector_bytes(v), contract.vector_len(v))
             }
         }
     }
@@ -3579,43 +3498,12 @@ pub trait TargetRuntime<'a> {
                         self.assert_failure(contract, data, len);
                     }
                     Instr::Print { expr } => {
-                        let v = self
-                            .expression(contract, expr, &w.vars, function)
-                            .into_pointer_value();
-
-                        let data = unsafe {
-                            contract.builder.build_gep(
-                                v,
-                                &[
-                                    contract.context.i32_type().const_zero(),
-                                    contract.context.i32_type().const_int(2, false),
-                                ],
-                                "data",
-                            )
-                        };
-
-                        let data_len = unsafe {
-                            contract.builder.build_gep(
-                                v,
-                                &[
-                                    contract.context.i32_type().const_zero(),
-                                    contract.context.i32_type().const_zero(),
-                                ],
-                                "data_len",
-                            )
-                        };
+                        let expr = self.expression(contract, expr, &w.vars, function);
 
                         self.print(
                             &contract,
-                            contract.builder.build_pointer_cast(
-                                data,
-                                contract.context.i8_type().ptr_type(AddressSpace::Generic),
-                                "data",
-                            ),
-                            contract
-                                .builder
-                                .build_load(data_len, "data_len")
-                                .into_int_value(),
+                            contract.vector_bytes(expr),
+                            contract.vector_len(expr),
                         );
                     }
                     Instr::Call {
@@ -4659,7 +4547,7 @@ pub trait TargetRuntime<'a> {
 
                         evaluated_arg[i] = Some(val);
 
-                        contract.vector_len(val.into_pointer_value())
+                        contract.vector_len(val)
                     }
                     ast::Type::DynamicBytes => {
                         let val = self.expression(contract, arg, vartab, function);
@@ -4667,7 +4555,7 @@ pub trait TargetRuntime<'a> {
                         evaluated_arg[i] = Some(val);
 
                         // will be hex encoded, so double
-                        let len = contract.vector_len(val.into_pointer_value());
+                        let len = contract.vector_len(val);
 
                         contract.builder.build_int_add(len, len, "hex_len")
                     }
@@ -4714,7 +4602,7 @@ pub trait TargetRuntime<'a> {
             None,
         );
 
-        let output_start = contract.vector_bytes(vector);
+        let output_start = contract.vector_bytes(vector.into());
 
         // now encode each of the arguments
         let mut output = output_start;
@@ -4770,8 +4658,8 @@ pub trait TargetRuntime<'a> {
                         output = unsafe { contract.builder.build_gep(output, &[len], "") };
                     }
                     ast::Type::String => {
-                        let s = contract.vector_bytes(val.into_pointer_value());
-                        let len = contract.vector_len(val.into_pointer_value());
+                        let s = contract.vector_bytes(val);
+                        let len = contract.vector_len(val);
 
                         contract.builder.build_call(
                             contract.module.get_function("__memcpy").unwrap(),
@@ -4782,8 +4670,8 @@ pub trait TargetRuntime<'a> {
                         output = unsafe { contract.builder.build_gep(output, &[len], "") };
                     }
                     ast::Type::DynamicBytes => {
-                        let s = contract.vector_bytes(val.into_pointer_value());
-                        let len = contract.vector_len(val.into_pointer_value());
+                        let s = contract.vector_bytes(val);
+                        let len = contract.vector_len(val);
 
                         contract.builder.build_call(
                             contract.module.get_function("hex_encode").unwrap(),
@@ -6137,6 +6025,18 @@ impl<'a> Contract<'a> {
                         .ptr_type(AddressSpace::Generic),
                 )
             }
+            ast::Type::Slice => BasicTypeEnum::StructType(
+                self.context.struct_type(
+                    &[
+                        self.context
+                            .i8_type()
+                            .ptr_type(AddressSpace::Generic)
+                            .into(),
+                        self.context.i32_type().into(),
+                    ],
+                    false,
+                ),
+            ),
             _ => unreachable!(),
         }
     }
@@ -6205,40 +6105,60 @@ impl<'a> Contract<'a> {
     }
 
     /// Number of element in a vector
-    fn vector_len(&self, vector: PointerValue<'a>) -> IntValue<'a> {
-        // field 0 is the length
-        let len = unsafe {
-            self.builder.build_gep(
-                vector,
-                &[
-                    self.context.i32_type().const_zero(),
-                    self.context.i32_type().const_zero(),
-                ],
-                "vector_len",
-            )
-        };
+    fn vector_len(&self, vector: BasicValueEnum<'a>) -> IntValue<'a> {
+        if vector.is_struct_value() {
+            // slice
+            let slice = vector.into_struct_value();
+
+            self.builder
+                .build_extract_value(slice, 1, "slice_len")
+                .unwrap()
+                .into_int_value()
+        } else {
+            // field 0 is the length
+            let len = unsafe {
+                self.builder.build_gep(
+                    vector.into_pointer_value(),
+                    &[
+                        self.context.i32_type().const_zero(),
+                        self.context.i32_type().const_zero(),
+                    ],
+                    "vector_len",
+                )
+            };
 
-        self.builder.build_load(len, "vector_len").into_int_value()
+            self.builder.build_load(len, "vector_len").into_int_value()
+        }
     }
 
     /// Return the pointer to the actual bytes in the vector
-    fn vector_bytes(&self, vector: PointerValue<'a>) -> PointerValue<'a> {
-        let data = unsafe {
-            self.builder.build_gep(
-                vector,
-                &[
-                    self.context.i32_type().const_zero(),
-                    self.context.i32_type().const_int(2, false),
-                ],
+    fn vector_bytes(&self, vector: BasicValueEnum<'a>) -> PointerValue<'a> {
+        if vector.is_struct_value() {
+            // slice
+            let slice = vector.into_struct_value();
+
+            self.builder
+                .build_extract_value(slice, 0, "slice_data")
+                .unwrap()
+                .into_pointer_value()
+        } else {
+            let data = unsafe {
+                self.builder.build_gep(
+                    vector.into_pointer_value(),
+                    &[
+                        self.context.i32_type().const_zero(),
+                        self.context.i32_type().const_int(2, false),
+                    ],
+                    "data",
+                )
+            };
+
+            self.builder.build_pointer_cast(
+                data,
+                self.context.i8_type().ptr_type(AddressSpace::Generic),
                 "data",
             )
-        };
-
-        self.builder.build_pointer_cast(
-            data,
-            self.context.i8_type().ptr_type(AddressSpace::Generic),
-            "data",
-        )
+        }
     }
 }
 

+ 4 - 4
src/emit/sabre.rs

@@ -379,10 +379,10 @@ impl<'a> TargetRuntime<'a> for SabreTarget {
 
     fn set_storage_string(
         &self,
-        _contract: &Contract,
-        _function: FunctionValue,
-        _slot: PointerValue,
-        _dest: PointerValue,
+        _contract: &Contract<'a>,
+        _function: FunctionValue<'a>,
+        _slot: PointerValue<'a>,
+        _dest: BasicValueEnum<'a>,
     ) {
         unimplemented!();
     }

+ 4 - 4
src/emit/solana.rs

@@ -296,10 +296,10 @@ impl<'a> TargetRuntime<'a> for SolanaTarget {
 
     fn set_storage_string(
         &self,
-        _contract: &Contract,
-        _function: FunctionValue,
-        _slot: PointerValue,
-        _dest: PointerValue,
+        _contract: &Contract<'a>,
+        _function: FunctionValue<'a>,
+        _slot: PointerValue<'a>,
+        _dest: BasicValueEnum<'a>,
     ) {
         unimplemented!();
     }

+ 16 - 86
src/emit/substrate.rs

@@ -1289,6 +1289,10 @@ impl SubstrateTarget {
                     arg
                 };
 
+                let string_len = contract.vector_len(arg);
+
+                let string_data = contract.vector_bytes(arg);
+
                 if !packed {
                     let function = contract.module.get_function("scale_encode_string").unwrap();
 
@@ -1296,19 +1300,7 @@ impl SubstrateTarget {
                         .builder
                         .build_call(
                             function,
-                            &[
-                                (*data).into(),
-                                // when we call LinkModules2() some types like vector get renamed to vector.1
-                                contract
-                                    .builder
-                                    .build_pointer_cast(
-                                        arg.into_pointer_value(),
-                                        function.get_type().get_param_types()[1]
-                                            .into_pointer_type(),
-                                        "vector",
-                                    )
-                                    .into(),
-                            ],
+                            &[(*data).into(), string_data.into(), string_len.into()],
                             "",
                         )
                         .try_as_basic_value()
@@ -1316,33 +1308,6 @@ impl SubstrateTarget {
                         .unwrap()
                         .into_pointer_value();
                 } else {
-                    let len = unsafe {
-                        contract.builder.build_gep(
-                            arg.into_pointer_value(),
-                            &[
-                                contract.context.i32_type().const_zero(),
-                                contract.context.i32_type().const_zero(),
-                            ],
-                            "string.len",
-                        )
-                    };
-
-                    let p = unsafe {
-                        contract.builder.build_gep(
-                            arg.into_pointer_value(),
-                            &[
-                                contract.context.i32_type().const_zero(),
-                                contract.context.i32_type().const_int(2, false),
-                            ],
-                            "string.data",
-                        )
-                    };
-
-                    let len = contract
-                        .builder
-                        .build_load(len, "array.len")
-                        .into_int_value();
-
                     contract.builder.build_call(
                         contract.module.get_function("__memcpy").unwrap(),
                         &[
@@ -1350,17 +1315,17 @@ impl SubstrateTarget {
                             contract
                                 .builder
                                 .build_pointer_cast(
-                                    p,
+                                    string_data,
                                     contract.context.i8_type().ptr_type(AddressSpace::Generic),
                                     "",
                                 )
                                 .into(),
-                            len.into(),
+                            string_len.into(),
                         ],
                         "",
                     );
 
-                    *data = unsafe { contract.builder.build_gep(*data, &[len], "") };
+                    *data = unsafe { contract.builder.build_gep(*data, &[string_len], "") };
                 }
             }
             ast::Type::ExternalFunction { .. } => {
@@ -1653,21 +1618,7 @@ impl SubstrateTarget {
                 // A string or bytes type has to be encoded by: one compact integer for
                 // the length, followed by the bytes themselves. Here we assume that the
                 // length requires 5 bytes.
-                let len = unsafe {
-                    contract.builder.build_gep(
-                        arg.into_pointer_value(),
-                        &[
-                            contract.context.i32_type().const_zero(),
-                            contract.context.i32_type().const_zero(),
-                        ],
-                        "string.len",
-                    )
-                };
-
-                let len = contract
-                    .builder
-                    .build_load(len, "string.len")
-                    .into_int_value();
+                let len = contract.vector_len(arg);
 
                 if packed {
                     len
@@ -1870,34 +1821,13 @@ impl<'a> TargetRuntime<'a> for SubstrateTarget {
 
     fn set_storage_string(
         &self,
-        contract: &Contract,
-        _function: FunctionValue,
-        slot: PointerValue,
-        dest: PointerValue,
+        contract: &Contract<'a>,
+        _function: FunctionValue<'a>,
+        slot: PointerValue<'a>,
+        dest: BasicValueEnum<'a>,
     ) {
-        let len = unsafe {
-            contract.builder.build_gep(
-                dest,
-                &[
-                    contract.context.i32_type().const_zero(),
-                    contract.context.i32_type().const_zero(),
-                ],
-                "ptr.string.len",
-            )
-        };
-
-        let len = contract.builder.build_load(len, "string.len");
-
-        let data = unsafe {
-            contract.builder.build_gep(
-                dest,
-                &[
-                    contract.context.i32_type().const_zero(),
-                    contract.context.i32_type().const_int(2, false),
-                ],
-                "ptr.string.data",
-            )
-        };
+        let len = contract.vector_len(dest);
+        let data = contract.vector_bytes(dest);
 
         // TODO: check for non-zero
         contract.builder.build_call(
@@ -1919,7 +1849,7 @@ impl<'a> TargetRuntime<'a> for SubstrateTarget {
                         "",
                     )
                     .into(),
-                len,
+                len.into(),
             ],
             "",
         );

+ 2 - 0
src/sema/ast.rs

@@ -38,6 +38,8 @@ pub enum Type {
     Value,
     Void,
     Unreachable,
+    /// DynamicBytes and String are lowered to a vector.
+    Slice,
 }
 
 #[derive(PartialEq, Clone, Debug)]

+ 17 - 0
src/sema/contracts.rs

@@ -59,6 +59,23 @@ impl ast::Contract {
                     cfg.ty, cfg.name, cfg.public, cfg.nonpayable
                 );
 
+                out += &format!(
+                    "# params: {}\n",
+                    cfg.params
+                        .iter()
+                        .map(|p| p.ty.to_string(ns))
+                        .collect::<Vec<String>>()
+                        .join(",")
+                );
+                out += &format!(
+                    "# returns: {}\n",
+                    cfg.returns
+                        .iter()
+                        .map(|p| p.ty.to_string(ns))
+                        .collect::<Vec<String>>()
+                        .join(",")
+                );
+
                 out += &cfg.to_string(self, ns);
             }
         }

+ 1 - 0
src/sema/types.rs

@@ -682,6 +682,7 @@ impl Type {
             Type::StorageRef(ty) => format!("{} storage", ty.to_string(ns)),
             Type::Void => "void".to_owned(),
             Type::Unreachable => "unreachable".to_owned(),
+            Type::Slice => "slice".to_owned(),
         }
     }
 

+ 1 - 3
stdlib/substrate.c

@@ -55,11 +55,9 @@ uint8_t *compact_decode_u32(uint8_t *dest, uint32_t *val)
     return dest;
 }
 
-uint8_t *scale_encode_string(uint8_t *dest, struct vector *s)
+uint8_t *scale_encode_string(uint8_t *dest, uint8_t *data, uint32_t len)
 {
-    uint32_t len = s->len;
     uint8_t *data_dst = compact_encode_u32(dest, len);
-    uint8_t *data = s->data;
 
     while (len--)
     {

BIN
stdlib/wasm/substrate.bc


+ 34 - 8
tests/codegen.rs

@@ -16,6 +16,12 @@ fn testcases() {
     }
 }
 
+#[derive(Debug)]
+enum Test {
+    Check(String),
+    Rewind,
+}
+
 fn testcase(path: PathBuf) {
     // find the args to run.
     println!("testcase: {}", path.display());
@@ -31,7 +37,10 @@ fn testcase(path: PathBuf) {
 
             command_line = Some(String::from(args));
         } else if let Some(check) = line.strip_prefix("// CHECK:") {
-            checks.push(check.trim().to_string());
+            checks.push(Test::Check(check.trim().to_string()));
+        } else if let Some(check) = line.strip_prefix("// BEGIN-CHECK:") {
+            checks.push(Test::Rewind);
+            checks.push(Test::Check(check.trim().to_string()));
         }
     }
 
@@ -48,16 +57,33 @@ fn testcase(path: PathBuf) {
 
     let stdout = String::from_utf8_lossy(&output.stdout);
 
-    let mut check_done = 0;
+    let mut current_check = 0;
+    let mut current_line = 0;
+    let lines: Vec<&str> = stdout.split('\n').collect();
+
+    while current_line < lines.len() {
+        let line = lines[current_line];
 
-    for line in stdout.split('\n') {
-        // have we done all checks
-        if check_done < checks.len() && line.find(&checks[check_done]).is_some() {
-            check_done += 1;
+        match checks.get(current_check) {
+            Some(Test::Check(needle)) => {
+                if line.find(needle).is_some() {
+                    current_check += 1;
+                }
+            }
+            Some(Test::Rewind) => {
+                current_line = 0;
+                current_check += 1;
+                continue;
+            }
+            None => (),
         }
+
+        current_line += 1;
     }
 
-    if check_done < checks.len() {
-        panic!("NOT FOUND CHECK: {}", checks[check_done]);
+    if current_check < checks.len() {
+        println!("OUTPUT: \n===8<===8<===\n{}===8<===8<===\n", stdout);
+
+        panic!("NOT FOUND CHECK: {:?}", checks[current_check]);
     }
 }

+ 77 - 0
tests/codegen_testcases/slice1.sol

@@ -0,0 +1,77 @@
+// RUN: --emit cfg
+contract c {
+// BEGIN-CHECK: c::test1
+	function test1() public pure {
+		bytes x = "foo1";
+		// x is not being used, so it can be a slice
+// CHECK: alloc slice uint32 4 "foo1"
+	}
+
+// BEGIN-CHECK: c::test2
+	function test2() public pure {
+		bytes x = "foo2";
+
+		x[1] = 0;
+		// x is being modified, so it must be a vector
+// CHECK: alloc bytes uint32 4 "foo2"
+	}
+
+	function foo(bytes x) pure internal {
+
+	}
+
+// BEGIN-CHECK: c::test3
+	function test3() public pure {
+		bytes x = "foo3";
+
+		foo(x);
+		// no slices for function arguments yet, so it must be a vector
+// CHECK: alloc bytes uint32 4 "foo3"
+	}
+
+
+// BEGIN-CHECK: c::test4
+	function test4() public pure {
+		string x = "foo4";
+
+		// a bunch of stuff that does not need a vector
+		if (x == "bar") {
+			bool y = true;
+		}
+
+		string y = x + "if";
+
+		print(x);
+// CHECK: alloc slice uint32 4 "foo4"
+	}
+
+// BEGIN-CHECK: c::test5
+	function test5() public pure {
+		bytes x = "foo5";
+
+		x.push(0);
+		// push modifies vectotr
+// CHECK: alloc bytes uint32 4 "foo5"
+	}
+
+// BEGIN-CHECK: c::test6
+	function test6() public pure {
+		bytes x = "foo6";
+
+		x.pop();
+		// pop modifies vectotr
+// CHECK: alloc bytes uint32 4 "foo6"
+	}
+
+
+// BEGIN-CHECK: c::test7
+	function test7() public pure {
+		bytes x = "foo7";
+
+		bytes y = x;
+		y[1] = 0;
+
+		// x modified via y
+// CHECK: alloc bytes uint32 4 "foo7"
+	}
+}