From c3215249c2fa7e556ce8110d109a02ea2bacb0c3 Mon Sep 17 00:00:00 2001 From: Ryan Jones-Ward Date: Tue, 9 Jul 2024 15:09:23 +0100 Subject: [PATCH] Initial work on the data section parsing. --- Cargo.toml | 2 + redox-core/Cargo.toml | 2 + redox-core/src/parsing/asm_parser.rs | 80 +++++++++++++++++++++++++++- redox-terminal/src/main.rs | 10 +++- 4 files changed, 92 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f91862e..69da7d2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,8 @@ rand_xoshiro = "0.6.0" strum = "0.26.3" strum_macros = "0.26.4" thiserror = "1.0.61" +unescape = "0.1.0" +unicode-segmentation = "1.11.0" winres = "0.1.12" [profile.dev] diff --git a/redox-core/Cargo.toml b/redox-core/Cargo.toml index 8713b67..d0b0207 100644 --- a/redox-core/Cargo.toml +++ b/redox-core/Cargo.toml @@ -23,6 +23,8 @@ rand.workspace = true rand_xoshiro.workspace = true strum.workspace = true strum_macros.workspace = true +unescape.workspace = true +unicode-segmentation.workspace = true thiserror.workspace = true [dev-dependencies] diff --git a/redox-core/src/parsing/asm_parser.rs b/redox-core/src/parsing/asm_parser.rs index 78d89a6..643302c 100644 --- a/redox-core/src/parsing/asm_parser.rs +++ b/redox-core/src/parsing/asm_parser.rs @@ -157,7 +157,7 @@ impl<'a> AsmParser<'a> { FileSection::Text => { instructions.push(self.parse_code_line(line)); } - FileSection::Data => todo!(), + FileSection::Data => self.parse_data_line(line), FileSection::ReadOnlyData => todo!(), } } @@ -369,6 +369,68 @@ impl<'a> AsmParser<'a> { AsmParser::try_build_instruction(final_option.opcode, &arguments, label) } + /// Parse a data line of an assembly file. + /// + /// # Arguments + /// + /// * `line` - A code line to be parsed. + fn parse_data_line(&mut self, line: &str) { + use unicode_segmentation::UnicodeSegmentation; + + let graphemes: Vec<&str> = line.graphemes(true).collect(); + + let mut in_quoted_string = false; + + let mut escaped_args = vec![]; + let mut buffer = String::new(); + for (i, g) in graphemes.iter().enumerate() { + let is_escaped = i > 0 && graphemes[i - 1] == "\\"; + + if !is_escaped && (*g == "\"" || *g == "'") { + // We have reached the end of a quoted segment. + if in_quoted_string { + AsmParser::push_buffer_if_not_empty(&mut buffer, &mut escaped_args); + } + + in_quoted_string = !in_quoted_string; + continue; + } + + if !in_quoted_string && (*g == " " || *g == "\t" || *g == ",") { + // We have reached the end of a segment. + if !buffer.is_empty() { + AsmParser::push_buffer_if_not_empty(&mut buffer, &mut escaped_args); + } + + continue; + } + + buffer.push_str(g); + } + + assert!( + !in_quoted_string, + "invalid syntax - closing quotation mark not present for line: {line}." + ); + + // Push the final argument to the list. + AsmParser::push_buffer_if_not_empty(&mut buffer, &mut escaped_args); + + assert!( + escaped_args.len() >= 3, + "invalid syntax - insufficient arguments for line: {line}." + ); + + // Now we want to ensure we correctly handle any escape sequences. + let mut arguments = vec![]; + for argument in escaped_args { + let un = unescape::unescape(&argument).unwrap_or(argument); + arguments.push(un); + } + + println!("{arguments:?}"); + } + /// Parse a section line of an ASM file. /// /// # Arguments @@ -389,6 +451,22 @@ impl<'a> AsmParser<'a> { } } + /// Push a string buffer onto an string argument vector. + /// + /// # Arguments + /// + /// * `arg` - The argument string. + /// * `vec` - The vector that the argument should be pushed too, if it isn't empty. + #[inline] + fn push_buffer_if_not_empty(arg: &mut String, vec: &mut Vec) { + if arg.is_empty() { + return; + } + + vec.push(arg.clone()); + arg.clear(); + } + /// Try to build an [`Instruction`] from an [`OpCode`] and a set of arguments. /// /// # Arguments diff --git a/redox-terminal/src/main.rs b/redox-terminal/src/main.rs index 4dcb8dc..9ac0edd 100644 --- a/redox-terminal/src/main.rs +++ b/redox-terminal/src/main.rs @@ -66,7 +66,15 @@ fn main() { panic!("currently unsupported"); } - let code = "section .text\r\npush 0\r\ncall :LABEL_1\r\nhlt\r\n:LABEL_1\r\nmov 0xdeadbeef, EAX\r\niret"; + let code = "section .data + banana db \"apples\" + section .text + push 0 + call :LABEL_1 + hlt + :LABEL_1 + mov 0xdeadbeef, EAX + iret"; let mut compiler = Compiler::new(); let data = compiler.compile_assembly(code, true);