buffer.rs 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460
  1. // SPDX-License-Identifier: Apache-2.0
  2. //! Format buffer.
  3. use crate::{
  4. comments::{CommentState, CommentStringExt},
  5. string::{QuoteState, QuotedStringExt},
  6. };
  7. use std::fmt::Write;
  8. /// An indent group. The group may optionally skip the first line
  9. #[derive(Clone, Debug, Default)]
  10. struct IndentGroup {
  11. skip_line: bool,
  12. }
  13. #[derive(Clone, Copy, Debug)]
  14. enum WriteState {
  15. LineStart(CommentState),
  16. WriteTokens(CommentState),
  17. WriteString(char),
  18. }
  19. impl WriteState {
  20. fn comment_state(&self) -> CommentState {
  21. match self {
  22. Self::LineStart(state) => *state,
  23. Self::WriteTokens(state) => *state,
  24. Self::WriteString(_) => CommentState::None,
  25. }
  26. }
  27. }
  28. impl Default for WriteState {
  29. fn default() -> Self {
  30. Self::LineStart(CommentState::default())
  31. }
  32. }
  33. /// A wrapper around a `std::fmt::Write` interface. The wrapper keeps track of indentation as well
  34. /// as information about the last `write_str` command if available. The formatter may also be
  35. /// restricted to a single line, in which case it will throw an error on a newline
  36. #[derive(Clone, Debug)]
  37. pub struct FormatBuffer<W> {
  38. pub w: W,
  39. indents: Vec<IndentGroup>,
  40. base_indent_len: usize,
  41. tab_width: usize,
  42. last_char: Option<char>,
  43. current_line_len: usize,
  44. restrict_to_single_line: bool,
  45. state: WriteState,
  46. }
  47. impl<W> FormatBuffer<W> {
  48. pub fn new(w: W, tab_width: usize) -> Self {
  49. Self {
  50. w,
  51. tab_width,
  52. base_indent_len: 0,
  53. indents: vec![],
  54. current_line_len: 0,
  55. last_char: None,
  56. restrict_to_single_line: false,
  57. state: WriteState::default(),
  58. }
  59. }
  60. /// Create a new temporary buffer based on an existing buffer which retains information about
  61. /// the buffer state, but has a blank String as its underlying `Write` interface
  62. pub fn create_temp_buf(&self) -> FormatBuffer<String> {
  63. let mut new = FormatBuffer::new(String::new(), self.tab_width);
  64. new.base_indent_len = self.total_indent_len();
  65. new.current_line_len = self.current_line_len();
  66. new.last_char = self.last_char;
  67. new.restrict_to_single_line = self.restrict_to_single_line;
  68. new.state = match self.state {
  69. WriteState::WriteTokens(state) | WriteState::LineStart(state) => {
  70. WriteState::LineStart(state)
  71. }
  72. WriteState::WriteString(ch) => WriteState::WriteString(ch),
  73. };
  74. new
  75. }
  76. /// Restrict the buffer to a single line
  77. pub fn restrict_to_single_line(&mut self, restricted: bool) {
  78. self.restrict_to_single_line = restricted;
  79. }
  80. /// Indent the buffer by delta
  81. pub fn indent(&mut self, delta: usize) {
  82. self.indents
  83. .extend(std::iter::repeat_n(IndentGroup::default(), delta));
  84. }
  85. /// Dedent the buffer by `delta` levels. Panics if `delta` is too large.
  86. pub fn dedent(&mut self, delta: usize) {
  87. if delta > self.indents.len() {
  88. panic!(
  89. "Cannot dedent by {} levels: only {} levels present",
  90. delta,
  91. self.indents.len()
  92. );
  93. }
  94. self.indents.truncate(self.indents.len() - delta);
  95. }
  96. /// Get the current level of the indent. This is multiplied by the tab width to get the
  97. /// resulting indent
  98. fn level(&self) -> usize {
  99. self.indents.iter().filter(|i| !i.skip_line).count()
  100. }
  101. /// Check if the last indent group is being skipped
  102. pub fn last_indent_group_skipped(&self) -> bool {
  103. self.indents.last().map(|i| i.skip_line).unwrap_or(false)
  104. }
  105. /// Set whether the last indent group should be skipped
  106. pub fn set_last_indent_group_skipped(&mut self, skip_line: bool) {
  107. if let Some(i) = self.indents.last_mut() {
  108. i.skip_line = skip_line
  109. }
  110. }
  111. /// Get the current indent size (level * tab_width)
  112. pub fn current_indent_len(&self) -> usize {
  113. self.level() * self.tab_width
  114. }
  115. /// Get the total indent size
  116. pub fn total_indent_len(&self) -> usize {
  117. self.current_indent_len() + self.base_indent_len
  118. }
  119. /// Get the current written position (this does not include the indent size)
  120. pub fn current_line_len(&self) -> usize {
  121. self.current_line_len
  122. }
  123. /// Check if the buffer is at the beginning of a new line
  124. pub fn is_beginning_of_line(&self) -> bool {
  125. matches!(self.state, WriteState::LineStart(_))
  126. }
  127. /// Start a new indent group (skips first indent)
  128. pub fn start_group(&mut self) {
  129. self.indents.push(IndentGroup { skip_line: true });
  130. }
  131. /// End the last indent group
  132. pub fn end_group(&mut self) {
  133. self.indents.pop();
  134. }
  135. /// Get the last char written to the buffer
  136. pub fn last_char(&self) -> Option<char> {
  137. self.last_char
  138. }
  139. /// When writing a newline apply state changes
  140. fn handle_newline(&mut self, mut comment_state: CommentState) {
  141. if comment_state == CommentState::Line {
  142. comment_state = CommentState::None;
  143. }
  144. self.current_line_len = 0;
  145. self.set_last_indent_group_skipped(false);
  146. self.last_char = Some('\n');
  147. self.state = WriteState::LineStart(comment_state);
  148. }
  149. }
  150. impl<W: Write> FormatBuffer<W> {
  151. /// Write a raw string to the buffer. This will ignore indents and remove the indents of the
  152. /// written string to match the current base indent of this buffer if it is a temp buffer
  153. pub fn write_raw(&mut self, s: impl AsRef<str>) -> std::fmt::Result {
  154. let mut lines = s.as_ref().lines().peekable();
  155. let mut comment_state = self.state.comment_state();
  156. while let Some(line) = lines.next() {
  157. // remove the whitespace that covered by the base indent length (this is normally the
  158. // case with temporary buffers as this will be readded by the underlying IndentWriter
  159. // later on
  160. let (new_comment_state, line_start) = line
  161. .comment_state_char_indices()
  162. .with_state(comment_state)
  163. .take(self.base_indent_len)
  164. .take_while(|(_, _, ch)| ch.is_whitespace())
  165. .last()
  166. .map(|(state, idx, _)| (state, idx + 1))
  167. .unwrap_or((comment_state, 0));
  168. comment_state = new_comment_state;
  169. let trimmed_line = &line[line_start..];
  170. if !trimmed_line.is_empty() {
  171. self.w.write_str(trimmed_line)?;
  172. self.current_line_len += trimmed_line.len();
  173. self.last_char = trimmed_line.chars().next_back();
  174. self.state = WriteState::WriteTokens(comment_state);
  175. }
  176. if lines.peek().is_some() || s.as_ref().ends_with('\n') {
  177. if self.restrict_to_single_line {
  178. return Err(std::fmt::Error);
  179. }
  180. self.w.write_char('\n')?;
  181. self.handle_newline(comment_state);
  182. }
  183. }
  184. Ok(())
  185. }
  186. }
  187. impl<W: Write> Write for FormatBuffer<W> {
  188. fn write_str(&mut self, mut s: &str) -> std::fmt::Result {
  189. if s.is_empty() {
  190. return Ok(());
  191. }
  192. let mut indent = " ".repeat(self.current_indent_len());
  193. loop {
  194. match self.state {
  195. WriteState::LineStart(mut comment_state) => {
  196. match s.find(|b| b != '\n') {
  197. // No non-empty lines in input, write the entire string (only newlines)
  198. None => {
  199. if !s.is_empty() {
  200. self.w.write_str(s)?;
  201. self.handle_newline(comment_state);
  202. }
  203. break;
  204. }
  205. // We can see the next non-empty line. Write up to the
  206. // beginning of that line, then insert an indent, then
  207. // continue.
  208. Some(len) => {
  209. let (head, tail) = s.split_at(len);
  210. self.w.write_str(head)?;
  211. self.w.write_str(&indent)?;
  212. self.current_line_len = 0;
  213. self.last_char = Some(' ');
  214. // a newline has been inserted
  215. if len > 0 {
  216. if self.last_indent_group_skipped() {
  217. indent = " ".repeat(self.current_indent_len() + self.tab_width);
  218. self.set_last_indent_group_skipped(false);
  219. }
  220. if comment_state == CommentState::Line {
  221. comment_state = CommentState::None;
  222. }
  223. }
  224. s = tail;
  225. self.state = WriteState::WriteTokens(comment_state);
  226. }
  227. }
  228. }
  229. WriteState::WriteTokens(comment_state) => {
  230. if s.is_empty() {
  231. break;
  232. }
  233. // find the next newline or non-comment string separator (e.g. ' or ")
  234. let mut len = 0;
  235. let mut new_state = WriteState::WriteTokens(comment_state);
  236. for (state, idx, ch) in s.comment_state_char_indices().with_state(comment_state)
  237. {
  238. len = idx;
  239. if ch == '\n' {
  240. if self.restrict_to_single_line {
  241. return Err(std::fmt::Error);
  242. }
  243. new_state = WriteState::LineStart(state);
  244. break;
  245. } else if state == CommentState::None && (ch == '\'' || ch == '"') {
  246. new_state = WriteState::WriteString(ch);
  247. break;
  248. } else {
  249. new_state = WriteState::WriteTokens(state);
  250. }
  251. }
  252. if matches!(new_state, WriteState::WriteTokens(_)) {
  253. // No newlines or strings found, write the entire string
  254. self.w.write_str(s)?;
  255. self.current_line_len += s.len();
  256. self.last_char = s.chars().next_back();
  257. self.state = new_state;
  258. break;
  259. } else {
  260. // A newline or string has been found. Write up to that character and
  261. // continue on the tail
  262. let (head, tail) = s.split_at(len + 1);
  263. self.w.write_str(head)?;
  264. s = tail;
  265. match new_state {
  266. WriteState::LineStart(comment_state) => {
  267. self.handle_newline(comment_state)
  268. }
  269. new_state => {
  270. self.current_line_len += head.len();
  271. self.last_char = head.chars().next_back();
  272. self.state = new_state;
  273. }
  274. }
  275. }
  276. }
  277. WriteState::WriteString(quote) => {
  278. match s
  279. .quoted_ranges()
  280. .with_state(QuoteState::String(quote))
  281. .next()
  282. {
  283. // No end found, write the rest of the string
  284. None => {
  285. self.w.write_str(s)?;
  286. self.current_line_len += s.len();
  287. self.last_char = s.chars().next_back();
  288. break;
  289. }
  290. // String end found, write the string and continue to add tokens after
  291. Some((_, _, len)) => {
  292. let (head, tail) = s.split_at(len + 1);
  293. self.w.write_str(head)?;
  294. if let Some((_, last)) = head.rsplit_once('\n') {
  295. self.set_last_indent_group_skipped(false);
  296. self.current_line_len = last.len();
  297. } else {
  298. self.current_line_len += head.len();
  299. }
  300. self.last_char = Some(quote);
  301. s = tail;
  302. self.state = WriteState::WriteTokens(CommentState::None);
  303. }
  304. }
  305. }
  306. }
  307. }
  308. Ok(())
  309. }
  310. }
  311. #[cfg(test)]
  312. mod tests {
  313. use super::*;
  314. use std::panic::{catch_unwind, AssertUnwindSafe};
  315. const TAB_WIDTH: usize = 4;
  316. #[test]
  317. fn test_buffer_indents() {
  318. let delta = 1;
  319. let mut buf = FormatBuffer::new(String::new(), TAB_WIDTH);
  320. assert_eq!(buf.indents.len(), 0);
  321. assert_eq!(buf.level(), 0);
  322. assert_eq!(buf.current_indent_len(), 0);
  323. buf.indent(delta);
  324. assert_eq!(buf.indents.len(), delta);
  325. assert_eq!(buf.level(), delta);
  326. assert_eq!(buf.current_indent_len(), delta * TAB_WIDTH);
  327. buf.indent(delta);
  328. buf.set_last_indent_group_skipped(true);
  329. assert!(buf.last_indent_group_skipped());
  330. assert_eq!(buf.indents.len(), delta * 2);
  331. assert_eq!(buf.level(), delta);
  332. assert_eq!(buf.current_indent_len(), delta * TAB_WIDTH);
  333. buf.dedent(delta);
  334. buf.dedent(delta);
  335. assert_eq!(buf.indents.len(), 0);
  336. assert_eq!(buf.level(), 0);
  337. assert_eq!(buf.current_indent_len(), 0);
  338. // should panic on extra dedent
  339. let res = catch_unwind(AssertUnwindSafe(|| {
  340. let mut buf = buf.clone();
  341. buf.dedent(delta);
  342. }));
  343. assert!(
  344. res.is_err(),
  345. "Expected panic on extra dedent, but did not get one"
  346. );
  347. }
  348. #[test]
  349. fn test_identical_temp_buf() -> std::fmt::Result {
  350. let content = "test string";
  351. let multiline_content = "test\nmultiline\nmultiple";
  352. let mut buf = FormatBuffer::new(String::new(), TAB_WIDTH);
  353. // create identical temp buf
  354. let mut temp = buf.create_temp_buf();
  355. writeln!(buf, "{content}")?;
  356. writeln!(temp, "{content}")?;
  357. assert_eq!(buf.w, format!("{content}\n"));
  358. assert_eq!(temp.w, buf.w);
  359. assert_eq!(temp.current_line_len, buf.current_line_len);
  360. assert_eq!(temp.base_indent_len, buf.total_indent_len());
  361. let delta = 1;
  362. buf.indent(delta);
  363. let mut temp_indented = buf.create_temp_buf();
  364. assert!(temp_indented.w.is_empty());
  365. assert_eq!(temp_indented.base_indent_len, buf.total_indent_len());
  366. assert_eq!(temp_indented.level() + delta, buf.level());
  367. let indent = " ".repeat(delta * TAB_WIDTH);
  368. let mut original_buf = buf.clone();
  369. write!(buf, "{multiline_content}")?;
  370. let expected_content = format!(
  371. "{}\n{}{}",
  372. content,
  373. indent,
  374. multiline_content
  375. .lines()
  376. .collect::<Vec<_>>()
  377. .join(&format!("\n{indent}"))
  378. );
  379. assert_eq!(buf.w, expected_content);
  380. write!(temp_indented, "{multiline_content}")?;
  381. // write temp buf to original and assert the result
  382. write!(original_buf, "{}", temp_indented.w)?;
  383. assert_eq!(buf.w, original_buf.w);
  384. Ok(())
  385. }
  386. #[test]
  387. fn test_preserves_original_content_with_default_settings() -> std::fmt::Result {
  388. let contents = [
  389. "simple line",
  390. r"
  391. some
  392. multiline
  393. content",
  394. "// comment",
  395. "/* comment */",
  396. r"mutliline
  397. content
  398. // comment1
  399. with comments
  400. /* comment2 */ ",
  401. ];
  402. for content in contents.iter() {
  403. let mut buf = FormatBuffer::new(String::new(), TAB_WIDTH);
  404. write!(buf, "{content}")?;
  405. assert_eq!(&buf.w, content);
  406. }
  407. Ok(())
  408. }
  409. }