Skip to content

Commit

Permalink
feat(runtime): set exact file size limit
Browse files Browse the repository at this point in the history
  • Loading branch information
loichyan committed Jul 12, 2024
1 parent db421eb commit b0a6e53
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 77 deletions.
56 changes: 7 additions & 49 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ license = "MIT OR Apache-2.0"
edition = "2021"

[dependencies]
bytesize = "1.3"
clap = { version = ">=4.0, <4.5", features = ["derive"] }
codespan-reporting = "0.11.1"
content_inspector = "0.2.4"
Expand All @@ -21,7 +22,6 @@ regex = "1.10"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
shadow-rs = { version = "0.29.0", default-features = false }
sysinfo = { version = "0.30.13", default-features = false }
thisctx = "0.4.0"
thiserror = "1.0"
tracing = "0.1.40"
Expand Down
15 changes: 9 additions & 6 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use std::path::PathBuf;
use std::str::FromStr;
use std::{fmt, fs, io};

use bytesize::ByteSize;
use clap::{Parser, Subcommand, ValueEnum};
use shadow_rs::formatcp;
use thisctx::IntoError;
Expand All @@ -17,6 +18,8 @@ const V_PATH: &str = "PATH";
const V_SOURCE: &str = "SOURCE";
const V_SUBSTITUTION: &str = "SUBSTITUTION";
const V_FORMAT: &str = "FORMAT";
const V_SIZE: &str = "SIZE";
const DEFAULT_SIZE: &str = "16MB";
const INDEX_REV: &str = include_str!("index-rev");
const CLAP_LONG_VERSION: &str = formatcp!("{}\ncheat-sheet: {}", shadow::PKG_VERSION, INDEX_REV);

Expand Down Expand Up @@ -96,9 +99,9 @@ pub enum Command {
/// Do not skip binary files.
#[arg(long)]
include_binary: bool,
/// Do not skip large files.
#[arg(long)]
include_large: bool,
/// Set the file size limit (0 to disable it).
#[arg(long, value_name= V_SIZE, default_value = DEFAULT_SIZE)]
size_limit: ByteSize,
/// Path(s) of files to check.
#[arg(value_name = V_PATH)]
source: Vec<IoPath>,
Expand All @@ -120,9 +123,9 @@ pub enum Command {
/// Do not skip binary files.
#[arg(long)]
include_binary: bool,
/// Do not skip large files.
#[arg(long)]
include_large: bool,
/// Set the file size limit (0 to disable it).
#[arg(long, value_name= V_SIZE, default_value = DEFAULT_SIZE)]
size_limit: ByteSize,
/// Path tuple(s) of files to read from and write to.
///
/// Each tuple is an input path followed by an optional output path,
Expand Down
8 changes: 4 additions & 4 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,14 @@ fn main_impl() -> error::Result<()> {
source,
recursive,
include_binary,
include_large,
size_limit,
} => {
let rt = rt.build();
let mut context = CheckerContext {
format,
writer: Box::new(std::io::stdout()),
include_binary,
include_large,
size_limit: size_limit.as_u64(),
..Default::default()
};
for source in walk(source.into_iter().map(|p| Source(p, None)), recursive) {
Expand All @@ -134,7 +134,7 @@ fn main_impl() -> error::Result<()> {
select_first,
recursive,
include_binary,
include_large,
size_limit,
source,
} => {
if yes {
Expand All @@ -145,7 +145,7 @@ fn main_impl() -> error::Result<()> {
write,
select_first,
include_binary,
include_large,
size_limit: size_limit.as_u64(),
..Default::default()
};
let mut buffer = String::new();
Expand Down
20 changes: 3 additions & 17 deletions src/runtime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ const PAD_LEN: usize = 2;
const WARP: f32 = 3.0;
const THRESHOLD: f32 = 0.7;
const MAX_CHOICES: usize = 4;
const MAX_MEM: u64 = 1024 * 1024 * 1024 * 8;

pub type NGram = noodler::NGram<(String, usize)>;

Expand All @@ -34,7 +33,6 @@ pub struct Runtime {
corpus: OnceCell<Rc<NGram>>,
exact_sub: HashMap<String, String>,
prefix_sub: Vec<Substitution>,
file_size_limit: OnceCell<u64>,
}

#[derive(Default)]
Expand Down Expand Up @@ -114,18 +112,6 @@ impl Runtime {
Ok(())
}

pub fn file_size_limit(&self) -> u64 {
*self.file_size_limit.get_or_init(|| {
let mut sys = sysinfo::System::new();
sys.refresh_memory();
let mut max_mem = sys.total_memory();
if let Some(limit) = sys.cgroup_limits() {
max_mem = limit.total_memory;
}
std::cmp::min(max_mem * 3 / 4, MAX_MEM)
})
}

pub fn check(
&self,
context: &mut CheckerContext,
Expand All @@ -134,7 +120,7 @@ impl Runtime {
) -> error::Result<bool> {
info!("Check input file from '{}'", input);

if !context.include_large && input.file_size()?.unwrap_or(0) >= self.file_size_limit() {
if context.size_limit != 0 && input.file_size()?.unwrap_or(0) >= context.size_limit {
warn!("Skip large file '{}'", input);
return Ok(false);
}
Expand Down Expand Up @@ -389,7 +375,7 @@ pub struct CheckerContext {
pub write: bool,
pub select_first: bool,
pub include_binary: bool,
pub include_large: bool,
pub size_limit: u64,
}

impl Default for CheckerContext {
Expand All @@ -402,7 +388,7 @@ impl Default for CheckerContext {
write: false,
select_first: false,
include_binary: false,
include_large: false,
size_limit: 0,
}
}
}
Expand Down

0 comments on commit b0a6e53

Please sign in to comment.