Skip to content

Commit

Permalink
Weird thiserror error in debug builds
Browse files Browse the repository at this point in the history
  • Loading branch information
Scripter17 committed Jan 2, 2024
1 parent 86d73cb commit f4b3988
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 82 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
/target
/301-cache.txt
/konsole*
/benchmarking/stdin
/benchmarking/output-*
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ wasm-bindgen = "0.2.88"
reqwest = { version = "0.11.22", features = ["blocking"], optional = true}
const-str = { version = "0.5.6", optional = true }
atty = { version = "0.2.14", optional = true }
thiserror = "1.0.50"
thiserror = "1.0.52"
serde-wasm-bindgen = "0.6.3"
regex = { version = "1.10.2", optional = true }
glob = { version = "0.3.1", optional = true }
Expand Down
16 changes: 16 additions & 0 deletions benchmarking/benchmark.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/usr/bin/bash

rm output*

URLS=("https://x.com?a=2" "https://example.com?fb_action_ids&mc_eid&ml_subscriber_hash&oft_ck&s_cid&unicorn_click_id")

cargo build -r
for url in "${URLS[@]}"; do
echo $url > stdin
for num in $(seq 3); do
out="output-$(echo $url | rg / -r=-)-$num"
hyperfine -n "$url - $(cat stdin | wc -l)" -w 5 --input ./stdin "../target/release/url-cleaner" --export-json "$out"

printf "$(cat stdin)\n%0.s" {1..100} > stdin
done
done
4 changes: 0 additions & 4 deletions default-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,5 @@
{
"condition": {"UnqualifiedAnyTld": "amazon"},
"mapper": {"RemoveSomeQueryParams": ["ref"]}
},
{
"condition": "Always",
"mapper": "Expand301"
}
]
128 changes: 72 additions & 56 deletions src/glue/command/enabled.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use thiserror::Error;
use std::borrow::Cow;
use std::collections::HashMap;
use std::env;
use std::convert::Infallible;

use serde::{
Serialize, Deserialize,
Expand All @@ -18,16 +19,78 @@ use serde::{
/// The enabled form of the wrapper around [`Command`].
/// Only the necessary methods are exposed for the sake of simplicity.
/// Note that if the `command` feature is disabled, this struct is empty and all provided functions will always panic.
#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Serialize)]
pub struct CommandWrapper {
/// The command being wrapped around.
#[serde(flatten, serialize_with = "serialize_command", deserialize_with = "deserialize_command")]
#[serde(flatten, serialize_with = "serialize_command")]
pub inner: Command,
/// The rule for how the command's output is handled and returned in [`CommandWrapper::get_url`].
#[serde(default)]
pub output_handler: OutputHandler
}

#[derive(Debug, Serialize, Deserialize)]
struct CommandParts {
program: String,
#[serde(default)]
args: Vec<String>,
#[serde(default)]
current_dir: Option<PathBuf>,
#[serde(default)]
envs: HashMap<String, String>,
#[serde(default)]
output_handler: OutputHandler
}

impl FromStr for CommandParts {
type Err = Infallible;

fn from_str(x: &str) -> Result<Self, Self::Err> {
Ok(Self {
program: x.to_string(),
args: Vec::new(),
current_dir: None,
envs: HashMap::new(),
output_handler: OutputHandler::default()
})
}
}

impl<'de> Deserialize<'de> for CommandWrapper {
/// TODO: Deserializing from a list.
fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
let parts: CommandParts = crate::glue::string_or_struct(deserializer)?;
Ok(Self {
output_handler: parts.output_handler.clone(),
inner: parts.into()
})
}
}

impl From<CommandParts> for Command {
fn from(parts: CommandParts) -> Self {
let mut ret=Command::new(parts.program);
ret.args(parts.args);
if let Some(dir) = parts.current_dir {
ret.current_dir(dir);
}
ret.envs(parts.envs);
ret
}
}

fn serialize_command<S: Serializer>(command: &Command, serializer: S) -> Result<S::Ok, S::Error> {
let mut state = serializer.serialize_struct("Comamnd", 3)?;
state.serialize_field("program", command.get_program().to_str().ok_or_else(|| S::Error::custom("The command's program name/path is not UTF-8"))?)?;
state.serialize_field("args", &command.get_args().map(|x| x.to_str()).collect::<Option<Vec<_>>>().ok_or_else(|| S::Error::custom("One of the command's arguments isn't UTF-8"))?)?;
state.serialize_field("envs", &command.get_envs().filter_map(
|(k, v)| match (k.to_str(), v.map(|x| x.to_str())) {
(Some(k), Some(Some(v))) => Some((k, v)),
_ => None
}
).collect::<HashMap<_, _>>())?;
state.end()
}

/// The enabled form of `OutputHandler`.
/// Note that if the `command` feature is disabled, this enum is empty and all provided functions will always panic.
/// Before a [`CommandWrapper`] returns its output, it passes it through this to allow for piping and control flow.
Expand Down Expand Up @@ -102,48 +165,14 @@ impl OutputHandler {
}
}

#[derive(Debug, Serialize, Deserialize)]
struct CommandParts {
program: String,
#[serde(default)]
args: Vec<String>,
#[serde(default)]
current_dir: Option<PathBuf>,
#[serde(default)]
envs: HashMap<String, String>
}

fn deserialize_command<'de, D: Deserializer<'de>>(deserializer: D) -> Result<Command, D::Error> {
let command_parts: CommandParts = Deserialize::deserialize(deserializer)?;
let mut ret=Command::new(command_parts.program);
ret.args(command_parts.args);
if let Some(dir) = command_parts.current_dir {
ret.current_dir(dir);
}
ret.envs(command_parts.envs);
Ok(ret)
}

fn serialize_command<S: Serializer>(command: &Command, serializer: S) -> Result<S::Ok, S::Error> {
let mut state = serializer.serialize_struct("Comamnd", 3)?;
state.serialize_field("program", command.get_program().to_str().ok_or_else(|| S::Error::custom("The command's program name/path is not UTF-8"))?)?;
state.serialize_field("args", &command.get_args().map(|x| x.to_str()).collect::<Option<Vec<_>>>().ok_or_else(|| S::Error::custom("One of the command's arguments isn't UTF-8"))?)?;
state.serialize_field("envs", &command.get_envs().filter_map(|(k, v)| {
match (k.to_str(), v.map(|x| x.to_str())) {
(Some(k), Some(Some(v))) => Some((k, v)),
_ => None
}
}).collect::<HashMap<_, _>>())?;
state.end()
}

impl CommandWrapper {
/// Checks if the command's [`std::process::Command::program`] exists. Checks the system's PATH.
/// Uses [this StackOverflow post](https://stackoverflow.com/a/37499032/10720231) to check the PATH.
pub fn exists(&self) -> bool {
PathBuf::from(self.inner.get_program()).exists() || find_it(self.inner.get_program()).is_some()
}

/// Runs the command and gets the exit code. Returns [`Err(CommandError::SignalTermination)`] if the command returns no exit code.
pub fn exit_code(&self, url: &Url) -> Result<i32, CommandError> {
self.clone().apply_url(url).inner.status()?.code().ok_or(CommandError::SignalTermination)
Expand All @@ -159,7 +188,8 @@ impl CommandWrapper {
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()?;
let child_stdin=child.stdin.as_mut().expect("The STDIN inserted a few lines ago to have worked");
#[allow(clippy::unwrap_used)]
let child_stdin=child.stdin.as_mut().unwrap(); // This never panics. If it ever does, so will I.
child_stdin.write_all(stdin)?;
self.output_handler.handle(url, child.wait_with_output()?)
},
Expand All @@ -181,12 +211,7 @@ impl CommandWrapper {
if let Some(dir) = self.inner.get_current_dir() {
ret.current_dir(dir);
}
ret.envs(self.inner.get_envs().filter_map(|(k, v)| {
match (k, v) {
(k, Some(v)) => Some((k.to_owned(), v.to_owned())),
_ => None
}
}));
ret.envs(self.inner.get_envs().filter_map(|(k, v)| v.map(|v| (k, v))));
Self {inner: ret, output_handler: self.output_handler.clone()}
}
}
Expand All @@ -198,12 +223,7 @@ impl Clone for CommandWrapper {
if let Some(dir) = self.inner.get_current_dir() {
ret.current_dir(dir);
}
ret.envs(self.inner.get_envs().filter_map(|(k, v)|
match (k, v) {
(k, Some(v)) => Some((k, v)),
_ => None
}
));
ret.envs(self.inner.get_envs().filter_map(|(k, v)| v.map(|v| (k, v))));
Self {inner: ret, output_handler: self.output_handler.clone()}
}
}
Expand All @@ -220,11 +240,7 @@ fn find_it<P: AsRef<Path>>(exe_name: P) -> Option<PathBuf> {
env::var_os("PATH").and_then(|paths| {
env::split_paths(&paths).filter_map(|dir| {
let full_path = dir.join(&exe_name);
if full_path.is_file() {
Some(full_path)
} else {
None
}
full_path.is_file().then_some(full_path)
}).next()
})
}
Expand Down
13 changes: 4 additions & 9 deletions src/glue/regex/enabled.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use std::convert::Infallible;

use serde::{
Serialize,
ser::Serializer,
{de::Error as _, Deserialize, Deserializer}
};
use regex::{Regex, RegexBuilder, Replacer, Error as RegexError};
Expand All @@ -13,12 +12,14 @@ use regex::{Regex, RegexBuilder, Replacer, Error as RegexError};
/// Note that if the `regex` feature is disabled, this struct is empty and all provided functions will always panic.
/// Because converting a [`Regex`] to [`RegexParts`] is way more complicated than it should be, various [`From`]/[`Into`] and [`TryFrom`]/[`TryInto`] conversions are defined instead of making the filds public.
/// Only the necessary methods are exposed for the sake of simplicity.
#[derive(Clone, Debug)]
#[derive(Clone, Debug, Serialize)]
#[serde(into = "RegexParts")]
pub struct RegexWrapper {
/// The compiled [`regex::Regex`].
inner: Regex,
/// The [`RegexParts`] used to construct the above [`regex::Regex`].
/// Exists here primarily for the sake of [`Clone`].
/// Let's see YOU implement clone for [`regex::Regex`]. It's a mess.
parts: RegexParts
}

Expand Down Expand Up @@ -152,7 +153,7 @@ impl TryFrom<RegexParts> for RegexWrapper {
impl From<RegexWrapper> for Regex {fn from(value: RegexWrapper) -> Self {value.inner}}
impl From<RegexWrapper> for RegexParts {fn from(value: RegexWrapper) -> Self {value.parts}}

impl<'de> Deserialize<'de> for RegexWrapper {
impl<'de> Deserialize<'de> for RegexWrapper {
fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
let parts: RegexParts = crate::glue::string_or_struct(deserializer)?;
Ok(RegexWrapper {
Expand All @@ -162,12 +163,6 @@ impl<'de> Deserialize<'de> for RegexWrapper {
}
}

impl Serialize for RegexWrapper {
fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
self.parts.serialize(serializer)
}
}

impl RegexWrapper {
/// Wrapper for `regex::Regex::is_match`.
pub fn is_match(&self, str: &str) -> bool {
Expand Down
16 changes: 7 additions & 9 deletions src/rules/conditions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,8 @@ impl Condition {
match url.domain() {
Some(url_domain) => match crate::suffix::get_tlds()?.suffix(url_domain.as_bytes()) {
Some(suffix) => {
url_domain.as_bytes().split(|x| *x==b'.').collect::<Vec<_>>()
.ends_with(&name.as_bytes().split(|x| *x==b'.').chain(suffix.as_bytes().split(|x| *x==b'.')).collect::<Vec<_>>())
// https://github.com/rust-lang/libs-team/issues/212
url_domain.as_bytes().strip_suffix(suffix.as_bytes()).is_some_and(|x| x.strip_suffix(b".").is_some_and(|y| y.ends_with(name.as_bytes()) && y.get(y.len()-name.bytes().len()-1).map_or(true, |x| *x==b'.')))
},
None => false
},
Expand All @@ -225,9 +225,7 @@ impl Condition {
match url.domain() {
Some(url_domain) => match crate::suffix::get_tlds()?.suffix(url_domain.as_bytes()) {
Some(suffix) => {
url_domain.as_bytes().split(|x| *x==b'.').eq(
name.as_bytes().split(|x| *x==b'.').chain(suffix.as_bytes().split(|x| *x==b'.'))
)
url_domain.as_bytes().strip_suffix(suffix.as_bytes()).is_some_and(|x| x.strip_suffix(b".")==Some(name.as_bytes()))
},
None => false
},
Expand All @@ -238,7 +236,7 @@ impl Condition {
Self::QueryHasParam(name) => url.query_pairs().any(|(ref name2, _)| name2==name),
Self::QueryParamValueIs{name, value} => url.query_pairs().any(|(ref name2, ref value2)| name2==name && value2==value),
Self::UrlPartIs{part_name, none_to_empty_string, value} => value==part_name.get_from(url)
.ok_or(ConditionError::UrlPartNotFound).or_else(|_| if *none_to_empty_string {Ok(Cow::Owned("".to_string()))} else {Err(ConditionError::UrlPartNotFound)})?.as_ref(),
.ok_or(ConditionError::UrlPartNotFound).or_else(|_| if *none_to_empty_string {Ok(Cow::Borrowed(""))} else {Err(ConditionError::UrlPartNotFound)})?.as_ref(),

// Disablable conditions

Expand All @@ -248,7 +246,7 @@ impl Condition {
DomainConditionRule::Always => true,
DomainConditionRule::Never => false,
// Somewhatly annoyingly `DomainConditionRule::Url(Url) | DomainConditionRule::UseUrlBeingCloned` doesn't desugar to this.
// I get it's a nich and weird case, but in this one specific instance it'd be nice.
// I get it's a niche and weird case, but in this one specific instance it'd be nice.
DomainConditionRule::Url(url) => {
if let Some(host)=url.host_str() {
(yes_domains.iter().any(|domain| domain==host) || yes_domains_regexes.iter().any(|regex| regex.is_match(host))) &&
Expand All @@ -274,7 +272,7 @@ impl Condition {
#[cfg(feature = "regex")] Self::QueryParamValueMatchesRegex{name, regex} => url.query_pairs().any(|(ref name2, ref value2)| name2==name && regex.is_match(value2)),
#[cfg(feature = "regex")] Self::PathMatchesRegex(regex) => regex.is_match(url.path()),
#[cfg(feature = "regex")] Self::UrlPartMatchesRegex {part_name, none_to_empty_string, regex} => regex.is_match(part_name.get_from(url)
.ok_or(ConditionError::UrlPartNotFound).or_else(|_| if *none_to_empty_string {Ok(Cow::Owned("".to_string()))} else {Err(ConditionError::UrlPartNotFound)})?.as_ref()),
.ok_or(ConditionError::UrlPartNotFound).or_else(|_| if *none_to_empty_string {Ok(Cow::Borrowed(""))} else {Err(ConditionError::UrlPartNotFound)})?.as_ref()),

#[cfg(not(feature = "regex"))] Self::QueryParamValueMatchesRegex{..} => Err(ConditionError::ConditionDisabled)?,
#[cfg(not(feature = "regex"))] Self::PathMatchesRegex(..) => Err(ConditionError::ConditionDisabled)?,
Expand All @@ -283,7 +281,7 @@ impl Condition {
#[cfg(feature = "glob")] Self::QueryParamValueMatchesGlob {name, glob} => url.query_pairs().any(|(ref name2, ref value2)| name2==name && glob.matches(value2)),
#[cfg(feature = "glob")] Self::PathMatchesGlob (glob) => glob .matches(url.path()),
#[cfg(feature = "glob")] Self::UrlPartMatchesGlob {part_name, none_to_empty_string, glob} => glob.matches(part_name.get_from(url)
.ok_or(ConditionError::UrlPartNotFound).or_else(|_| if *none_to_empty_string {Ok(Cow::Owned("".to_string()))} else {Err(ConditionError::UrlPartNotFound)})?.as_ref()),
.ok_or(ConditionError::UrlPartNotFound).or_else(|_| if *none_to_empty_string {Ok(Cow::Borrowed(""))} else {Err(ConditionError::UrlPartNotFound)})?.as_ref()),

#[cfg(not(feature = "glob"))] Self::QueryParamValueMatchesGlob{..} => Err(ConditionError::ConditionDisabled)?,
#[cfg(not(feature = "glob"))] Self::PathMatchesGlob(..) => Err(ConditionError::ConditionDisabled)?,
Expand Down
6 changes: 3 additions & 3 deletions src/rules/mappers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ impl Mapper {
#[cfg(feature = "http")]
Self::Expand301 => {
#[cfg(feature = "cache-redirects")]
if let Ok(lines) = read_lines("redirect-cache.txt") {
if let Ok(lines) = read_lines("301-cache.txt") {
for line in lines.map_while(Result::ok) {
if let Some((short, long)) = line.split_once('\t') {
if url.as_str()==short {
Expand All @@ -238,8 +238,8 @@ impl Mapper {
// enum Warning<T, W, E> {Ok(T), Warning(T, W), Error(E)} is obvious.
// But I'd want to bubble up a warning then return the Ok value with it.
#[cfg(feature = "cache-redirects")]
if let Ok(mut x) = OpenOptions::new().create(true).append(true).open("redirect-cache.txt") {
let _=x.write(format!("{}\t{}", url.as_str(), new_url.as_str()).as_bytes());
if let Ok(mut x) = OpenOptions::new().create(true).append(true).open("301-cache.txt") {
let _=x.write(format!("\n{}\t{}", url.as_str(), new_url.as_str()).as_bytes());
}
}
#[cfg(target_family = "wasm")]
Expand Down

0 comments on commit f4b3988

Please sign in to comment.