scale_encoding.rs 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693
  1. // SPDX-License-Identifier: Apache-2.0
  2. use crate::codegen::cfg::{ControlFlowGraph, Instr};
  3. use crate::codegen::encoding::AbiEncoding;
  4. use crate::codegen::vartable::Vartable;
  5. use crate::codegen::{Builtin, Expression};
  6. use crate::sema::ast::StructType;
  7. use crate::sema::ast::{Namespace, Type, Type::Uint};
  8. use parity_scale_codec::Encode;
  9. use primitive_types::U256;
  10. use solang_parser::pt::Loc::Codegen;
  11. use std::collections::HashMap;
  12. use super::buffer_validator::BufferValidator;
  13. pub(super) struct ScaleEncoding {
  14. storage_cache: HashMap<usize, Expression>,
  15. packed_encoder: bool,
  16. }
  17. impl ScaleEncoding {
  18. pub fn new(packed: bool) -> Self {
  19. Self {
  20. storage_cache: HashMap::new(),
  21. packed_encoder: packed,
  22. }
  23. }
  24. }
  25. /// Decoding the compact integer at current `offset` inside `buffer`.
  26. /// Returns the variable number of the decoded integer (32bit) and the width in bytes of the encoded version.
  27. /// More information can found in the /// [SCALE documentation](https://docs.substrate.io/reference/scale-codec/).
  28. fn decode_compact(
  29. buffer: &Expression,
  30. offset: &Expression,
  31. vartab: &mut Vartable,
  32. cfg: &mut ControlFlowGraph,
  33. ) -> (usize, Expression) {
  34. let decoded_var = vartab.temp_anonymous(&Uint(32));
  35. let size_width_var = vartab.temp_anonymous(&Uint(32));
  36. vartab.new_dirty_tracker();
  37. let read_byte = Expression::Builtin {
  38. loc: Codegen,
  39. tys: vec![Uint(8)],
  40. kind: Builtin::ReadFromBuffer,
  41. args: vec![buffer.clone(), offset.clone()],
  42. };
  43. cfg.add(
  44. vartab,
  45. Instr::Set {
  46. loc: Codegen,
  47. res: size_width_var,
  48. expr: Expression::ZeroExt {
  49. loc: Codegen,
  50. ty: Uint(32),
  51. expr: read_byte.into(),
  52. },
  53. },
  54. );
  55. let size_width = Expression::Variable {
  56. loc: Codegen,
  57. ty: Uint(32),
  58. var_no: size_width_var,
  59. };
  60. let two = Expression::NumberLiteral {
  61. loc: Codegen,
  62. ty: Uint(32),
  63. value: 2.into(),
  64. };
  65. let three = Expression::NumberLiteral {
  66. loc: Codegen,
  67. ty: Uint(32),
  68. value: 3.into(),
  69. };
  70. let cond = Expression::BitwiseAnd {
  71. loc: Codegen,
  72. ty: Uint(32),
  73. left: size_width.clone().into(),
  74. right: three.into(),
  75. };
  76. let cases = &[
  77. (
  78. Expression::NumberLiteral {
  79. loc: Codegen,
  80. ty: Uint(32),
  81. value: 0.into(),
  82. },
  83. cfg.new_basic_block("case_0".into()),
  84. ),
  85. (
  86. Expression::NumberLiteral {
  87. loc: Codegen,
  88. ty: Uint(32),
  89. value: 1.into(),
  90. },
  91. cfg.new_basic_block("case_1".into()),
  92. ),
  93. (
  94. Expression::NumberLiteral {
  95. loc: Codegen,
  96. ty: Uint(32),
  97. value: 2.into(),
  98. },
  99. cfg.new_basic_block("case_2".into()),
  100. ),
  101. ];
  102. let default = cfg.new_basic_block("case_default".into());
  103. cfg.add(
  104. vartab,
  105. Instr::Switch {
  106. cond,
  107. cases: cases.to_vec(),
  108. default,
  109. },
  110. );
  111. let done = cfg.new_basic_block("done".into());
  112. // We will land in the default block for sizes of 2**30 (1GB) or larger.
  113. // Such big sizes are invalid for smart contracts and should never occur anyways.
  114. cfg.set_basic_block(default);
  115. cfg.add(vartab, Instr::AssertFailure { encoded_args: None });
  116. cfg.set_basic_block(cases[0].1);
  117. let expr = Expression::ShiftRight {
  118. loc: Codegen,
  119. ty: Uint(32),
  120. left: size_width.clone().into(),
  121. right: two.clone().into(),
  122. signed: false,
  123. };
  124. cfg.add(
  125. vartab,
  126. Instr::Set {
  127. loc: Codegen,
  128. res: decoded_var,
  129. expr,
  130. },
  131. );
  132. cfg.add(
  133. vartab,
  134. Instr::Set {
  135. loc: Codegen,
  136. res: size_width_var,
  137. expr: Expression::NumberLiteral {
  138. loc: Codegen,
  139. ty: Uint(32),
  140. value: 1.into(),
  141. },
  142. },
  143. );
  144. cfg.add(vartab, Instr::Branch { block: done });
  145. cfg.set_basic_block(cases[1].1);
  146. let read_byte = Expression::Builtin {
  147. loc: Codegen,
  148. tys: vec![Uint(16)],
  149. kind: Builtin::ReadFromBuffer,
  150. args: vec![buffer.clone(), offset.clone()],
  151. };
  152. let expr = Expression::ShiftRight {
  153. loc: Codegen,
  154. ty: Uint(32),
  155. left: Expression::ZeroExt {
  156. loc: Codegen,
  157. ty: Uint(32),
  158. expr: read_byte.into(),
  159. }
  160. .into(),
  161. right: two.clone().into(),
  162. signed: false,
  163. };
  164. cfg.add(
  165. vartab,
  166. Instr::Set {
  167. loc: Codegen,
  168. res: decoded_var,
  169. expr,
  170. },
  171. );
  172. cfg.add(
  173. vartab,
  174. Instr::Set {
  175. loc: Codegen,
  176. res: size_width_var,
  177. expr: two.clone(),
  178. },
  179. );
  180. cfg.add(vartab, Instr::Branch { block: done });
  181. cfg.set_basic_block(cases[2].1);
  182. let read_byte = Expression::Builtin {
  183. loc: Codegen,
  184. tys: vec![Uint(32)],
  185. kind: Builtin::ReadFromBuffer,
  186. args: vec![buffer.clone(), offset.clone()],
  187. };
  188. let expr = Expression::ShiftRight {
  189. loc: Codegen,
  190. ty: Uint(32),
  191. left: read_byte.into(),
  192. right: two.into(),
  193. signed: false,
  194. };
  195. cfg.add(
  196. vartab,
  197. Instr::Set {
  198. loc: Codegen,
  199. res: decoded_var,
  200. expr,
  201. },
  202. );
  203. cfg.add(
  204. vartab,
  205. Instr::Set {
  206. loc: Codegen,
  207. res: size_width_var,
  208. expr: Expression::NumberLiteral {
  209. loc: Codegen,
  210. ty: Uint(32),
  211. value: 4.into(),
  212. },
  213. },
  214. );
  215. cfg.add(vartab, Instr::Branch { block: done });
  216. vartab.set_dirty(decoded_var);
  217. vartab.set_dirty(size_width_var);
  218. cfg.set_basic_block(done);
  219. cfg.set_phis(done, vartab.pop_dirty_tracker());
  220. (decoded_var, size_width)
  221. }
  222. /// Encode `expr` into `buffer` as a compact integer. More information can found in the
  223. /// [SCALE documentation](https://docs.substrate.io/reference/scale-codec/).
  224. fn encode_compact(
  225. expr: &Expression,
  226. buffer: Option<&Expression>,
  227. offset: Option<&Expression>,
  228. vartab: &mut Vartable,
  229. cfg: &mut ControlFlowGraph,
  230. ) -> Expression {
  231. let small = cfg.new_basic_block("small".into());
  232. let medium = cfg.new_basic_block("medium".into());
  233. let medium_or_big = cfg.new_basic_block("medium_or_big".into());
  234. let big = cfg.new_basic_block("big".into());
  235. let done = cfg.new_basic_block("done".into());
  236. let fail = cfg.new_basic_block("fail".into());
  237. let prepare = cfg.new_basic_block("prepare".into());
  238. let cmp_val = Expression::NumberLiteral {
  239. loc: Codegen,
  240. ty: Uint(32),
  241. value: (0x40000000 - 1).into(),
  242. };
  243. let compare = Expression::More {
  244. loc: Codegen,
  245. signed: false,
  246. left: expr.clone().into(),
  247. right: cmp_val.into(),
  248. };
  249. cfg.add(
  250. vartab,
  251. Instr::BranchCond {
  252. cond: compare,
  253. true_block: fail,
  254. false_block: prepare,
  255. },
  256. );
  257. cfg.set_basic_block(fail);
  258. cfg.add(vartab, Instr::AssertFailure { encoded_args: None });
  259. cfg.set_basic_block(prepare);
  260. let cmp_val = Expression::NumberLiteral {
  261. loc: Codegen,
  262. ty: Uint(32),
  263. value: (0x40 - 1).into(),
  264. };
  265. let compare = Expression::More {
  266. loc: Codegen,
  267. signed: false,
  268. left: expr.clone().into(),
  269. right: cmp_val.into(),
  270. };
  271. cfg.add(
  272. vartab,
  273. Instr::BranchCond {
  274. cond: compare,
  275. true_block: medium_or_big,
  276. false_block: small,
  277. },
  278. );
  279. cfg.set_basic_block(medium_or_big);
  280. let cmp_val = Expression::NumberLiteral {
  281. loc: Codegen,
  282. ty: Uint(32),
  283. value: (0x4000 - 1).into(),
  284. };
  285. let compare = Expression::More {
  286. loc: Codegen,
  287. signed: false,
  288. left: expr.clone().into(),
  289. right: cmp_val.into(),
  290. };
  291. cfg.add(
  292. vartab,
  293. Instr::BranchCond {
  294. cond: compare,
  295. true_block: big,
  296. false_block: medium,
  297. },
  298. );
  299. let size_variable = vartab.temp_anonymous(&Uint(32));
  300. vartab.new_dirty_tracker();
  301. let four = Expression::NumberLiteral {
  302. loc: Codegen,
  303. ty: Uint(32),
  304. value: 4.into(),
  305. }
  306. .into();
  307. let mul = Expression::Multiply {
  308. loc: Codegen,
  309. ty: Uint(32),
  310. overflowing: false,
  311. left: expr.clone().into(),
  312. right: four,
  313. };
  314. cfg.set_basic_block(small);
  315. if let (Some(buffer), Some(offset)) = (buffer, offset) {
  316. cfg.add(
  317. vartab,
  318. Instr::WriteBuffer {
  319. buf: buffer.clone(),
  320. offset: offset.clone(),
  321. value: Expression::Cast {
  322. loc: Codegen,
  323. ty: Uint(8),
  324. expr: mul.clone().into(),
  325. },
  326. },
  327. );
  328. }
  329. let one = Expression::NumberLiteral {
  330. loc: Codegen,
  331. ty: Uint(32),
  332. value: 1.into(),
  333. };
  334. cfg.add(
  335. vartab,
  336. Instr::Set {
  337. loc: Codegen,
  338. res: size_variable,
  339. expr: one.clone(),
  340. },
  341. );
  342. cfg.add(vartab, Instr::Branch { block: done });
  343. cfg.set_basic_block(medium);
  344. if let (Some(buffer), Some(offset)) = (buffer, offset) {
  345. let mul = Expression::BitwiseOr {
  346. loc: Codegen,
  347. ty: Uint(32),
  348. left: mul.clone().into(),
  349. right: one.into(),
  350. };
  351. cfg.add(
  352. vartab,
  353. Instr::WriteBuffer {
  354. buf: buffer.clone(),
  355. offset: offset.clone(),
  356. value: Expression::Cast {
  357. loc: Codegen,
  358. ty: Uint(16),
  359. expr: mul.into(),
  360. },
  361. },
  362. );
  363. }
  364. let two = Expression::NumberLiteral {
  365. loc: Codegen,
  366. ty: Uint(32),
  367. value: 2.into(),
  368. };
  369. cfg.add(
  370. vartab,
  371. Instr::Set {
  372. loc: Codegen,
  373. res: size_variable,
  374. expr: two.clone(),
  375. },
  376. );
  377. cfg.add(vartab, Instr::Branch { block: done });
  378. cfg.set_basic_block(big);
  379. if let (Some(buffer), Some(offset)) = (buffer, offset) {
  380. cfg.add(
  381. vartab,
  382. Instr::WriteBuffer {
  383. buf: buffer.clone(),
  384. offset: offset.clone(),
  385. value: Expression::BitwiseOr {
  386. loc: Codegen,
  387. ty: Uint(32),
  388. left: mul.into(),
  389. right: two.into(),
  390. },
  391. },
  392. );
  393. }
  394. cfg.add(
  395. vartab,
  396. Instr::Set {
  397. loc: Codegen,
  398. res: size_variable,
  399. expr: Expression::NumberLiteral {
  400. loc: Codegen,
  401. ty: Uint(32),
  402. value: 4.into(),
  403. },
  404. },
  405. );
  406. cfg.add(vartab, Instr::Branch { block: done });
  407. cfg.set_basic_block(done);
  408. cfg.set_phis(done, vartab.pop_dirty_tracker());
  409. Expression::Variable {
  410. loc: Codegen,
  411. ty: Uint(32),
  412. var_no: size_variable,
  413. }
  414. }
  415. impl AbiEncoding for ScaleEncoding {
  416. fn size_width(
  417. &self,
  418. size: &Expression,
  419. vartab: &mut Vartable,
  420. cfg: &mut ControlFlowGraph,
  421. ) -> Expression {
  422. // FIXME:
  423. // It should be possible to optimize this to estimate always 4 bytes.
  424. // `codegen::abi_encode()` also returns the actual encoded size,
  425. // so slightly overestimating it shouldn't matter.
  426. // However, the actual length of the encoded data produced by `codegen::abi_encode()`
  427. // is ignored in some places, wich results in buggy contracts if we have not an exact estimate.
  428. // Once this is fixed (the encoded size return by `codegen::abi_encode()` must never be ignored),
  429. // this can just be always 4 bytes .
  430. encode_compact(size, None, None, vartab, cfg)
  431. }
  432. fn encode_external_function(
  433. &mut self,
  434. expr: &Expression,
  435. buffer: &Expression,
  436. offset: &Expression,
  437. ns: &Namespace,
  438. vartab: &mut Vartable,
  439. cfg: &mut ControlFlowGraph,
  440. ) -> Expression {
  441. let addr_len = ns.address_length.into();
  442. let address = expr.external_function_address();
  443. let size = self.encode_directly(&address, buffer, offset, vartab, cfg, addr_len);
  444. let offset = Expression::Add {
  445. loc: Codegen,
  446. ty: Uint(32),
  447. overflowing: false,
  448. left: offset.clone().into(),
  449. right: size.into(),
  450. };
  451. let selector = expr.external_function_selector();
  452. self.encode_directly(&selector, buffer, &offset, vartab, cfg, 4.into());
  453. Expression::NumberLiteral {
  454. loc: Codegen,
  455. ty: Uint(32),
  456. value: (ns.address_length + 4).into(),
  457. }
  458. }
  459. fn encode_size(
  460. &mut self,
  461. expr: &Expression,
  462. buffer: &Expression,
  463. offset: &Expression,
  464. _ns: &Namespace,
  465. vartab: &mut Vartable,
  466. cfg: &mut ControlFlowGraph,
  467. ) -> Expression {
  468. encode_compact(expr, Some(buffer), Some(offset), vartab, cfg)
  469. }
  470. fn decode_external_function(
  471. &self,
  472. buffer: &Expression,
  473. offset: &Expression,
  474. ty: &Type,
  475. validator: &mut BufferValidator,
  476. ns: &Namespace,
  477. vartab: &mut Vartable,
  478. cfg: &mut ControlFlowGraph,
  479. ) -> (Expression, Expression) {
  480. let size = Expression::NumberLiteral {
  481. loc: Codegen,
  482. ty: Uint(32),
  483. value: (ns.address_length + 4).into(),
  484. };
  485. validator.validate_offset_plus_size(offset, &size, ns, vartab, cfg);
  486. let address = Expression::Builtin {
  487. loc: Codegen,
  488. tys: vec![Type::Address(false)],
  489. kind: Builtin::ReadFromBuffer,
  490. args: vec![buffer.clone(), offset.clone()],
  491. };
  492. let new_offset = offset.clone().add_u32(Expression::NumberLiteral {
  493. loc: Codegen,
  494. ty: Uint(32),
  495. value: ns.address_length.into(),
  496. });
  497. let selector = Expression::Builtin {
  498. loc: Codegen,
  499. tys: vec![Type::FunctionSelector],
  500. kind: Builtin::ReadFromBuffer,
  501. args: vec![buffer.clone(), new_offset],
  502. };
  503. let ext_func = Expression::StructLiteral {
  504. loc: Codegen,
  505. ty: Type::Struct(StructType::ExternalFunction),
  506. values: vec![selector, address],
  507. };
  508. (
  509. Expression::Cast {
  510. loc: Codegen,
  511. ty: ty.clone(),
  512. expr: ext_func.into(),
  513. },
  514. size,
  515. )
  516. }
  517. fn retrieve_array_length(
  518. &self,
  519. buffer: &Expression,
  520. offset: &Expression,
  521. vartab: &mut Vartable,
  522. cfg: &mut ControlFlowGraph,
  523. ) -> (usize, Expression) {
  524. decode_compact(buffer, offset, vartab, cfg)
  525. }
  526. fn storage_cache_insert(&mut self, arg_no: usize, expr: Expression) {
  527. self.storage_cache.insert(arg_no, expr);
  528. }
  529. fn storage_cache_remove(&mut self, arg_no: usize) -> Option<Expression> {
  530. self.storage_cache.remove(&arg_no)
  531. }
  532. fn calculate_string_size(
  533. &self,
  534. expr: &Expression,
  535. vartab: &mut Vartable,
  536. cfg: &mut ControlFlowGraph,
  537. ) -> Expression {
  538. // When encoding a variable length array, the total size is "compact encoded array length + N elements"
  539. let length = Expression::Builtin {
  540. loc: Codegen,
  541. tys: vec![Uint(32)],
  542. kind: Builtin::ArrayLength,
  543. args: vec![expr.clone()],
  544. };
  545. if self.is_packed() {
  546. length
  547. } else {
  548. encode_compact(&length, None, None, vartab, cfg).add_u32(length)
  549. }
  550. }
  551. fn is_packed(&self) -> bool {
  552. self.packed_encoder
  553. }
  554. /// TODO: This is used and tested for error data (Error and Panic) only.
  555. fn const_encode(&self, args: &[Expression]) -> Option<Vec<u8>> {
  556. let mut result = vec![];
  557. for arg in args {
  558. match arg {
  559. Expression::AllocDynamicBytes {
  560. initializer: Some(data),
  561. ty: Type::String | Type::DynamicBytes,
  562. ..
  563. } => result.extend_from_slice(&data.encode()),
  564. Expression::AllocDynamicBytes {
  565. initializer: Some(data),
  566. ty: Type::Slice(inner),
  567. ..
  568. } if matches!(**inner, Type::Bytes(1)) => result.extend_from_slice(data),
  569. Expression::NumberLiteral {
  570. ty: Type::Bytes(4),
  571. value,
  572. ..
  573. } => {
  574. let bytes = value.to_bytes_be().1;
  575. if bytes.len() < 4 {
  576. let mut buf = Vec::new();
  577. buf.resize(4 - bytes.len(), 0);
  578. result.extend_from_slice(&buf);
  579. }
  580. result.extend_from_slice(&bytes[..]);
  581. }
  582. Expression::NumberLiteral {
  583. ty: Type::Uint(256),
  584. value,
  585. ..
  586. } => {
  587. let bytes = value.to_bytes_be().1;
  588. result.extend_from_slice(&U256::from_big_endian(&bytes).encode()[..]);
  589. }
  590. _ => return None,
  591. }
  592. }
  593. result.into()
  594. }
  595. }
  596. #[cfg(test)]
  597. mod tests {
  598. use num_bigint::{BigInt, Sign};
  599. use parity_scale_codec::Encode;
  600. use primitive_types::U256;
  601. use crate::{
  602. codegen::{
  603. encoding::{scale_encoding::ScaleEncoding, AbiEncoding},
  604. Expression,
  605. },
  606. sema::ast::Type,
  607. };
  608. #[test]
  609. fn const_encode_dynamic_bytes() {
  610. let data = vec![0x41, 0x41];
  611. let encoder = ScaleEncoding::new(false);
  612. let expr = Expression::AllocDynamicBytes {
  613. loc: Default::default(),
  614. ty: Type::DynamicBytes,
  615. size: Expression::Poison.into(),
  616. initializer: data.clone().into(),
  617. };
  618. let encoded = encoder.const_encode(&[expr]).unwrap();
  619. assert_eq!(encoded, data.encode());
  620. }
  621. #[test]
  622. fn const_encode_uint() {
  623. let encoder = ScaleEncoding::new(false);
  624. for value in [U256::MAX, U256::zero(), U256::one()] {
  625. let mut bytes = [0u8; 32].to_vec();
  626. value.to_big_endian(&mut bytes);
  627. let data = BigInt::from_bytes_be(Sign::Plus, &bytes);
  628. let expr = Expression::NumberLiteral {
  629. loc: Default::default(),
  630. ty: Type::Uint(256),
  631. value: data,
  632. };
  633. let encoded = encoder.const_encode(&[expr]).unwrap();
  634. assert_eq!(encoded, value.encode());
  635. }
  636. }
  637. #[test]
  638. fn const_encode_bytes4() {
  639. let encoder = ScaleEncoding::new(false);
  640. for value in [
  641. [0x00, 0x00, 0xff, 0xff],
  642. [0x00, 0xff, 0xff, 0x00],
  643. [0xff, 0xff, 0x00, 0x00],
  644. [0xff, 0xff, 0xff, 0xff],
  645. [0x00, 0x00, 0x00, 0x00],
  646. [0xde, 0xad, 0xbe, 0xef],
  647. [0x01, 0x00, 0x00, 0x00],
  648. [0x00, 0x00, 0x00, 0x01],
  649. ] {
  650. let expr = Expression::NumberLiteral {
  651. ty: Type::Bytes(4),
  652. value: BigInt::from_bytes_be(Sign::Plus, &value),
  653. loc: Default::default(),
  654. };
  655. assert_eq!(&encoder.const_encode(&[expr]).unwrap(), &value.encode());
  656. }
  657. }
  658. }