From d73ea90543858e91aab18670e5596687ac253755 Mon Sep 17 00:00:00 2001 From: Mark Tyrkba Date: Sat, 22 Feb 2025 11:33:18 +0300 Subject: [PATCH] Stop doing redundant allocations in arena in `cr.rs`. Introduce new `util` module --- src/cr.rs | 15 ++++----------- src/graph.rs | 5 ++--- src/main.rs | 11 +++++------ src/parser.rs | 35 +++-------------------------------- src/template.rs | 13 ------------- src/util.rs | 37 +++++++++++++++++++++++++++++++++++++ 6 files changed, 51 insertions(+), 65 deletions(-) create mode 100644 src/util.rs diff --git a/src/cr.rs b/src/cr.rs index f532634..d98847c 100644 --- a/src/cr.rs +++ b/src/cr.rs @@ -17,7 +17,6 @@ use std::hash::{Hash, Hasher}; use std::path::{Path, PathBuf}; use std::sync::atomic::{Ordering, AtomicBool, AtomicUsize}; -use bumpalo::Bump; use dashmap::DashMap; #[derive(PartialEq)] @@ -26,8 +25,6 @@ enum ExecutorFlow { Ok, Stop } #[cfg_attr(feature = "dbg", derive(Debug))] pub struct CommandRunner<'a> { - arena: &'a Bump, - context: &'a Compiled<'a>, clean: Command<'a>, @@ -94,8 +91,7 @@ impl<'a> CommandRunner<'a> { fn new( clean: Command<'a>, - arena: &'a Bump, - context: &'a Compiled, + context: &'a Compiled<'a>, graph: Graph<'a>, transitive_deps: Graph<'a>, flags: Flags, @@ -113,7 +109,6 @@ impl<'a> CommandRunner<'a> { Self { clean, graph, - arena, poller, context, db_read, @@ -167,7 +162,6 @@ impl<'a> CommandRunner<'a> { pub fn run( clean: Command<'a>, - arena: &'a Bump, context: &'a Compiled, graph: Graph<'a>, transitive_deps: Graph<'a>, @@ -177,7 +171,6 @@ impl<'a> CommandRunner<'a> { ) -> Db<'a> { let mut cr = Self::new( clean, - arena, context, graph, transitive_deps, @@ -202,7 +195,7 @@ impl<'a> CommandRunner<'a> { } #[inline] - fn execute_command(&self, command: &Command<'a>, target: &str) -> io::Result::<()> { + fn execute_command(&self, command: &Command, target: &str) -> io::Result::<()> { if self.flags.print_commands() { let output = command.to_string(&self.flags); println!("{output}"); @@ -339,8 +332,8 @@ impl<'a> CommandRunner<'a> { }); let target = job.target; - let command = self.arena.alloc_str(&command); - let description = description.as_ref().map(|d| self.arena.alloc_str(d) as &_); + let command = command.as_ref(); + let description = description.as_deref(); let command = Command {target, command, description}; _ = self.execute_command(&command, job.target).map(|_| self.executed_jobs += 1); diff --git a/src/graph.rs b/src/graph.rs index 6157bce..a955e76 100644 --- a/src/graph.rs +++ b/src/graph.rs @@ -1,9 +1,9 @@ +use crate::util; use crate::parser::comp::Phony; use crate::dbg_unwrap::DbgUnwrap; use crate::parser::{Compiled, DefaultJob}; use crate::types::{StrHashMap, StrHashSet, StrIndexSet}; -use std::fs; use std::sync::Arc; use std::collections::VecDeque; @@ -49,8 +49,7 @@ pub fn build_dependency_graph<'a>( // check if depfile exists, if it does => read it, extend our deps with the ones that are listed in the .d file if let Some(job) = parsed.jobs.get(node) { if let Some(depfile_path) = job.depfile() { - if let Ok(depfile) = fs::read_to_string(depfile_path) { - let depfile = arena.alloc_str(&depfile); + if let Ok(depfile) = util::read_file_into_arena_str(arena, depfile_path) { let colon_idx = depfile.find(':').unwrap_dbg(); let depfile_deps = depfile[colon_idx + 1..] .split_ascii_whitespace() diff --git a/src/main.rs b/src/main.rs index db410b4..707a29e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,7 @@ mod loc; mod ux; mod cr; mod db; +mod util; mod poll; mod flags; mod types; @@ -73,12 +74,11 @@ fn main() -> ExitCode { let content_ = unsafe { std::str::from_utf8_unchecked(&mmap[..]) }; let mut content = String::with_capacity(content_.len()); let escaped_indexes = Parser::preprocess_content(content_, &mut content); - let arena = Bump::with_capacity(1024 * 1024); + + let arena_cap = content.len().max(1024 * 1024); + + let arena = Bump::with_capacity(arena_cap); let context = Parser::parse(&arena, &content, &escaped_indexes); - // let reserve = context.guess_preallocation(); - // #[cfg(feature = "dbg")] { - // println!("guessed size: {reserve}") - // } let context = context.compile(&arena); @@ -149,7 +149,6 @@ fn main() -> ExitCode { _ = CommandRunner::run( clean, - &arena, &context, graph, transitive_deps, diff --git a/src/parser.rs b/src/parser.rs index 23760a9..867961f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,3 +1,4 @@ +use crate::util; use crate::db::Db; use crate::loc::Loc; use crate::flags::Flags; @@ -10,7 +11,6 @@ use crate::types::{StrHashMap, StrHashSet}; use crate::consts::{CLEAN_TARGET, PHONY_TARGETS, BUILD_DIR_VARIABLE}; use std::mem; -use std::io::Read; use std::sync::Arc; use std::borrow::Cow; use std::fs::{self, File}; @@ -250,24 +250,6 @@ impl<'a> Parsed<'a> { self.rules.get_mut(name).unwrap_dbg() } - #[inline] - pub fn guess_preallocation(&self) -> usize { - self.jobs.values().map(|j| { - j.target_template.guess_compiled_size() + match &j.phony { - prep::Phony::Phony { .. } => 0, - prep::Phony::NotPhony { inputs_templates, deps_templates, .. } => { - inputs_templates.iter() - .chain(deps_templates.iter()) - .map(|t| t.guess_compiled_size()) - .sum::() - } - } - }).sum::() + self.rules.values().map(|r| { - r.command.guess_compiled_size() + - r.description.as_ref().map_or(0, |d| d.guess_compiled_size()) - }).sum::() - } - #[cfg_attr(feature = "dbg", tramer("nanos"))] pub fn compile(self, arena: &'a Bump) -> Compiled<'a> { let Parsed { defs, jobs, rules, phonys, default_target } = self; @@ -762,7 +744,8 @@ impl<'a> Parser<'a> { match first_token { SUBRUSH => { let path = second_token; - let mut file = match File::open(path) { + + let content = match util::read_file_into_arena_str(self.arena, path) { Ok(ok) => ok, Err(e) => report_panic!{ Loc(self.cursor), @@ -770,18 +753,6 @@ impl<'a> Parser<'a> { } }; - let file_size = file.metadata().unwrap().len() as usize; - let content = self.arena.alloc_slice_fill_default(file_size); - - if let Err(e) = file.read_exact(content) { - report_panic!{ - Loc(self.cursor), - "could not read: {path}: {e}" - } - } - - let content = unsafe { std::str::from_utf8_unchecked(&*content) }; - // TODO: preprocess allocated string in-place, without additional allocation let buf = self.arena.alloc_slice_fill_default(content.len()); diff --git a/src/template.rs b/src/template.rs index 23de9e4..20a9933 100644 --- a/src/template.rs +++ b/src/template.rs @@ -22,7 +22,6 @@ pub struct Template<'a> { } impl Template<'_> { - const PLACEHOLDER_MULTIPLIER: usize = 7; const AVERAGE_COMPILED_CHUNK_SIZE: usize = 24; const CONSTANT_PLACEHOLDERS: &'static [&'static str] = &["in", "out"]; @@ -116,18 +115,6 @@ impl Template<'_> { Template { loc, in_used, statics_len, chunks } } - #[inline] - pub fn guess_compiled_size(&self) -> usize { - self.chunks.iter().map(|c| { - match c { - TemplateChunk::Static(s) => s.len() + 1, - TemplateChunk::JoinedStatic(s) => s.len(), - TemplateChunk::Placeholder(p) => Self::PLACEHOLDER_MULTIPLIER * p.len() + 1, - TemplateChunk::JoinedPlaceholder(p) => Self::PLACEHOLDER_MULTIPLIER * p.len() - } - }).sum::() + self.chunks.len() * 8 - } - #[inline] pub fn check(&self, shadows: &Shadows, defs: &Defs) -> Result::<(), String> { for placeholder in self.chunks.iter().filter_map(|c| { diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..d544a8c --- /dev/null +++ b/src/util.rs @@ -0,0 +1,37 @@ +use std::fs::File; +use std::path::Path; +use std::io::{self, Read, Error, ErrorKind}; + +use bumpalo::Bump; + +#[inline] +#[cfg_attr(feature = "dbg", track_caller)] +pub fn read_file_into_arena<'bump, P>(arena: &'bump Bump, path: P) -> io::Result::<&'bump mut [u8]> +where + P: AsRef:: +{ + let mut file = File::open(path)?; + + let file_size = file.metadata().unwrap().len() as usize; + let content = arena.alloc_slice_fill_default(file_size); + + file.read_exact(content).map(|_| content) +} + +#[inline] +#[cfg_attr(feature = "dbg", track_caller)] +pub fn read_file_into_arena_str<'bump, P>(arena: &'bump Bump, path: P) -> io::Result::<&'bump str> +where + P: AsRef:: +{ + match read_file_into_arena(arena, path) { + Ok(bytes) => { + std::str::from_utf8(&*bytes).map_err(|e| { + let kind = ErrorKind::InvalidData; + let err = Error::new(kind, e); + err + }) + } + Err(e) => Err(e) + } +}