Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

--filter argument for split #1681

Merged
merged 37 commits into from
Jan 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
bcbef3d
first `--filter` implementation
FelipeLema Jan 5, 2021
400450d
don't borrow stdin from child process
FelipeLema Jan 5, 2021
3403c42
add documentation for "line size"
FelipeLema Jan 5, 2021
7fc2e93
add unit test
FelipeLema Jan 5, 2021
1656c3e
improve docs in tests internal utilities
FelipeLema Jan 6, 2021
00f51b2
fix unit test
FelipeLema Jan 6, 2021
8d2c602
borrow, don't move (it's not necessary)
FelipeLema Jan 6, 2021
7dee374
improve doc / format code
FelipeLema Jan 6, 2021
931959b
correct assertion
FelipeLema Jan 6, 2021
62e29cf
Update src/uu/split/src/split.rs
FelipeLema Jan 6, 2021
f0a41ea
remove mid-dev markers, comment on code branches
FelipeLema Jan 7, 2021
fbbfee7
remove redundancy / simplify fetching CLI parameter
FelipeLema Jan 7, 2021
ee09691
Merge branch 'split-filter' of github.com:FelipeLema/coreutils into s…
FelipeLema Jan 7, 2021
48ee1a1
proper shell command for windows
FelipeLema Jan 7, 2021
3701f28
flush() before closing shell process' stdin
FelipeLema Jan 7, 2021
b900093
re-write "with-env-var" as RAII
FelipeLema Jan 7, 2021
9dcdbaa
test handling of previously set env and $FILE
FelipeLema Jan 7, 2021
0b21b52
correctly forward shell command failure
FelipeLema Jan 14, 2021
a0934e0
add / correct documentation
FelipeLema Jan 14, 2021
a5c850a
add windows test
FelipeLema Jan 14, 2021
501d621
correct windows arguments to CMD
FelipeLema Jan 15, 2021
9967684
add some more documentation
FelipeLema Jan 15, 2021
66e1d69
try an alternative to "cat for windows"
FelipeLema Jan 15, 2021
1e550b7
re-organize unix-vs-win code
FelipeLema Jan 15, 2021
ad8ecdf
quoting $FILE in windows, just in case
FelipeLema Jan 15, 2021
70914b2
leaving `--filter` as non-windows-only feature
FelipeLema Jan 15, 2021
a1842f1
windows: fix compilation errors & warnings
FelipeLema Jan 16, 2021
85045c7
Make explicit "windows-only feature" in --help
FelipeLema Jan 16, 2021
3bc94c0
fulfill requests from github review
FelipeLema Jan 16, 2021
3bf4e8f
Merge branch 'split-filter' of github.com:FelipeLema/coreutils into s…
FelipeLema Jan 16, 2021
0716b97
suggestion from github review
FelipeLema Jan 16, 2021
6f18db1
don't assert, that's already done in `fails()`
FelipeLema Jan 16, 2021
22268cb
revert Cargo.lock to master
FelipeLema Jan 16, 2021
056e182
remove windows warning (suggested from github)
FelipeLema Jan 18, 2021
9c85575
migrate platform-dependent code to separate modules
FelipeLema Jan 18, 2021
ecdf756
Merge branch 'split-filter' of github.com:FelipeLema/coreutils into s…
FelipeLema Jan 18, 2021
a839836
Explain unix-only filter tests
FelipeLema Jan 18, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/uu/split/src/platform/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#[cfg(unix)]
pub use self::unix::instantiate_current_writer;

#[cfg(windows)]
pub use self::windows::instantiate_current_writer;

#[cfg(unix)]
mod unix;

#[cfg(windows)]
mod windows;
124 changes: 124 additions & 0 deletions src/uu/split/src/platform/unix.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
use std::env;
use std::io::Write;
use std::io::{BufWriter, Result};
use std::process::{Child, Command, Stdio};
/// A writer that writes to a shell_process' stdin
///
/// We use a shell process (not directy calling a sub-process) so we can forward the name of the
/// corresponding output file (xaa, xab, xac… ). This is the way it was implemented in GNU split.
struct FilterWriter {
/// Running shell process
shell_process: Child,
}

impl Write for FilterWriter {
fn write(&mut self, buf: &[u8]) -> Result<usize> {
self.shell_process
.stdin
.as_mut()
.expect("failed to get shell stdin")
.write(buf)
}
fn flush(&mut self) -> Result<()> {
self.shell_process
.stdin
.as_mut()
.expect("failed to get shell stdin")
.flush()
}
}

/// Have an environment variable set at a value during this lifetime
struct WithEnvVarSet {
/// Env var key
_previous_var_key: String,
/// Previous value set to this key
_previous_var_value: std::result::Result<String, env::VarError>,
}
impl WithEnvVarSet {
/// Save previous value assigned to key, set key=value
fn new(key: &str, value: &str) -> WithEnvVarSet {
let previous_env_value = env::var(key);
env::set_var(key, value);
WithEnvVarSet {
_previous_var_key: String::from(key),
_previous_var_value: previous_env_value,
}
}
}

impl Drop for WithEnvVarSet {
/// Restore previous value now that this is being dropped by context
fn drop(&mut self) {
if let Ok(ref prev_value) = self._previous_var_value {
env::set_var(&self._previous_var_key, &prev_value);
} else {
env::remove_var(&self._previous_var_key)
}
}
}
impl FilterWriter {
/// Create a new filter running a command with $FILE pointing at the output name
///
/// #Arguments
///
/// * `command` - The shell command to execute
/// * `filepath` - Path of the output file (forwarded to command as $FILE)
fn new(command: &str, filepath: &str) -> FilterWriter {
// set $FILE, save previous value (if there was one)
let _with_env_var_set = WithEnvVarSet::new("FILE", &filepath);

let shell_process = Command::new(env::var("SHELL").unwrap_or("/bin/sh".to_owned()))
.arg("-c")
.arg(command)
.stdin(Stdio::piped())
.spawn()
.expect("Couldn't spawn filter command");

FilterWriter {
shell_process: shell_process,
}
}
}

impl Drop for FilterWriter {
/// flush stdin, close it and wait on `shell_process` before dropping self
fn drop(&mut self) {
{
// close stdin by dropping it
let _stdin = self.shell_process.stdin.as_mut();
}
let exit_status = self
.shell_process
.wait()
.expect("Couldn't wait for child process");
if let Some(return_code) = exit_status.code() {
if return_code != 0 {
crash!(1, "Shell process returned {}", return_code);
}
} else {
crash!(1, "Shell process terminated by signal")
}
}
}

/// Instantiate either a file writer or a "write to shell process's stdin" writer
pub fn instantiate_current_writer(
filter: &Option<String>,
filename: &str,
) -> BufWriter<Box<dyn Write>> {
match filter {
None => BufWriter::new(Box::new(
// write to the next file
std::fs::OpenOptions::new()
.write(true)
.create(true)
.open(std::path::Path::new(&filename))
.unwrap(),
) as Box<dyn Write>),
Some(ref filter_command) => BufWriter::new(Box::new(
// spawn a shell command and write to it
FilterWriter::new(&filter_command, &filename),
) as Box<dyn Write>),
}
}
19 changes: 19 additions & 0 deletions src/uu/split/src/platform/windows.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
use std::io::BufWriter;
use std::io::Write;
/// Get a file writer
///
/// Unlike the unix version of this function, this _always_ returns
/// a file writer
pub fn instantiate_current_writer(
_filter: &Option<String>,
filename: &str,
) -> BufWriter<Box<dyn Write>> {
BufWriter::new(Box::new(
// write to the next file
std::fs::OpenOptions::new()
.write(true)
.create(true)
.open(std::path::Path::new(&filename))
.unwrap(),
) as Box<dyn Write>)
}
35 changes: 23 additions & 12 deletions src/uu/split/src/split.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@
#[macro_use]
extern crate uucore;

mod platform;

use std::char;
use std::fs::{File, OpenOptions};
use std::env;
use std::fs::File;
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
use std::path::Path;

Expand Down Expand Up @@ -47,6 +50,12 @@ pub fn uumain(args: impl uucore::Args) -> i32 {
"additional suffix to append to output file names",
"SUFFIX",
);
opts.optopt(
FelipeLema marked this conversation as resolved.
Show resolved Hide resolved
"",
"filter",
"write to shell COMMAND file name is $FILE (Currently not implemented for Windows)",
"COMMAND",
);
opts.optopt("l", "lines", "put NUMBER lines per output file", "NUMBER");
opts.optflag(
"",
Expand Down Expand Up @@ -92,6 +101,7 @@ size is 1000, and default PREFIX is 'x'. With no INPUT, or when INPUT is
suffix_length: 0,
additional_suffix: "".to_owned(),
input: "".to_owned(),
filter: None,
strategy: "".to_owned(),
strategy_param: "".to_owned(),
verbose: false,
Expand Down Expand Up @@ -138,6 +148,14 @@ size is 1000, and default PREFIX is 'x'. With no INPUT, or when INPUT is
settings.input = input;
settings.prefix = prefix;

settings.filter = matches.opt_str("filter");

if settings.filter.is_some() && cfg!(windows) {
// see https://github.com/rust-lang/rust/issues/29494
show_error!("--filter is currently not supported in this platform");
exit!(-1);
}

split(&settings)
}

Expand All @@ -147,6 +165,8 @@ struct Settings {
suffix_length: usize,
additional_suffix: String,
input: String,
/// When supplied, a shell command to output to instead of xaa, xab …
filter: Option<String>,
strategy: String,
strategy_param: String,
verbose: bool,
Expand Down Expand Up @@ -323,7 +343,6 @@ fn split(settings: &Settings) -> i32 {
_ => {}
}
}

if control.request_new_file {
let mut filename = settings.prefix.clone();
filename.push_str(
Expand All @@ -336,17 +355,9 @@ fn split(settings: &Settings) -> i32 {
);
filename.push_str(settings.additional_suffix.as_ref());

if fileno != 0 {
crash_if_err!(1, writer.flush());
}
crash_if_err!(1, writer.flush());
fileno += 1;
writer = BufWriter::new(Box::new(
OpenOptions::new()
.write(true)
.create(true)
.open(Path::new(&filename))
.unwrap(),
) as Box<dyn Write>);
writer = platform::instantiate_current_writer(&settings.filter, filename.as_str());
control.request_new_file = false;
if settings.verbose {
println!("creating file '{}'", filename);
Expand Down
74 changes: 72 additions & 2 deletions tests/by-util/test_split.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ extern crate regex;
use self::rand::{thread_rng, Rng};
use self::regex::Regex;
use crate::common::util::*;
#[cfg(not(windows))]
use std::env;
FelipeLema marked this conversation as resolved.
Show resolved Hide resolved
FelipeLema marked this conversation as resolved.
Show resolved Hide resolved
use std::fs::{read_dir, File};
use std::io::Write;
use std::path::Path;
Expand Down Expand Up @@ -32,6 +34,7 @@ impl Glob {
self.collect().len()
}

/// Get all files in `self.directory` that match `self.regex`
fn collect(&self) -> Vec<String> {
read_dir(Path::new(&self.directory.subdir))
.unwrap()
Expand All @@ -49,6 +52,7 @@ impl Glob {
.collect()
}

/// Accumulate bytes of all files in `self.collect()`
fn collate(&self) -> Vec<u8> {
let mut files = self.collect();
files.sort();
Expand All @@ -60,11 +64,16 @@ impl Glob {
}
}

/// File handle that user can add random bytes (line-formatted or not) to
struct RandomFile {
inner: File,
}

impl RandomFile {
/// Size of each line that's being generated
const LINESIZE: usize = 32;

/// `create()` file handle located at `at` / `name`
fn new(at: &AtPath, name: &str) -> RandomFile {
RandomFile {
inner: File::create(&at.plus(name)).unwrap(),
Expand All @@ -81,11 +90,11 @@ impl RandomFile {
let _ = write!(self.inner, "{}", random_chars(n));
}

/// Add n lines each of size `RandomFile::LINESIZE`
fn add_lines(&mut self, lines: usize) {
let line_size: usize = 32;
let mut n = lines;
while n > 0 {
let _ = writeln!(self.inner, "{}", random_chars(line_size));
let _ = writeln!(self.inner, "{}", random_chars(RandomFile::LINESIZE));
n -= 1;
}
}
Expand Down Expand Up @@ -156,3 +165,64 @@ fn test_split_additional_suffix() {
assert_eq!(glob.count(), 2);
assert_eq!(glob.collate(), at.read(name).into_bytes());
}

// note: the test_filter* tests below are unix-only
// windows support has been waived for now because of the difficulty of getting
// the `cmd` call right
// see https://github.com/rust-lang/rust/issues/29494

#[test]
#[cfg(unix)]
FelipeLema marked this conversation as resolved.
Show resolved Hide resolved
fn test_filter() {
// like `test_split_default()` but run a command before writing
let (at, mut ucmd) = at_and_ucmd!();
let name = "filtered";
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
let n_lines = 3;
RandomFile::new(&at, name).add_lines(n_lines);

// change all characters to 'i'
ucmd.args(&["--filter=sed s/./i/g > $FILE", name])
.succeeds();
// assert all characters are 'i' / no character is not 'i'
// (assert that command succeded)
assert!(
glob.collate().iter().find(|&&c| {
// is not i
c != ('i' as u8)
// is not newline
&& c != ('\n' as u8)
}) == None
);
}

#[test]
#[cfg(unix)]
fn test_filter_with_env_var_set() {
// This test will ensure that if $FILE env var was set before running --filter, it'll stay that
// way
// implemented like `test_split_default()` but run a command before writing
let (at, mut ucmd) = at_and_ucmd!();
let name = "filtered";
let glob = Glob::new(&at, ".", r"x[[:alpha:]][[:alpha:]]$");
let n_lines = 3;
RandomFile::new(&at, name).add_lines(n_lines);

let env_var_value = "somevalue";
env::set_var("FILE", &env_var_value);
ucmd.args(&[format!("--filter={}", "cat > $FILE").as_str(), name])
.succeeds();
assert_eq!(glob.collate(), at.read(name).into_bytes());
assert!(env::var("FILE").unwrap_or("var was unset".to_owned()) == env_var_value);
}

#[test]
#[cfg(unix)]
fn test_filter_command_fails() {
let (at, mut ucmd) = at_and_ucmd!();
let name = "filter-will-fail";
RandomFile::new(&at, name).add_lines(4);

ucmd.args(&["--filter=/a/path/that/totally/does/not/exist", name])
.fails();
}
4 changes: 4 additions & 0 deletions tests/common/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,12 @@ pub fn repeat_str(s: &str, n: u32) -> String {
pub struct CmdResult {
//tmpd is used for convenience functions for asserts against fixtures
tmpd: Option<Rc<TempDir>>,
/// zero-exit from running the Command?
FelipeLema marked this conversation as resolved.
Show resolved Hide resolved
/// see [`success`]
pub success: bool,
/// captured utf-8 standard output after running the Command
pub stdout: String,
/// captured utf-8 standard error after running the Command
pub stderr: String,
}

Expand Down