Skip to content

Commit c622438

Browse files
authored
add ether units parsing to lenient uint tokenizer (#255)
Allowing `LenientTokenizer` to parse `uint256` with units: `ether|gwei|nano|nanoether|wei` eg: [Foundry](https://github.com/gakonst/foundry) uses the `LenientTokenizer` to parse function arguments given by the user through its CLI. This would help out when inserting large values. I don't know if inserting units into this library is desirable... The reason I'm proposing the change here is for cases such as arrays : `[1ether, 1gwei]`, which would be quite troublesome to parse in higher level libraries.
1 parent 5781964 commit c622438

File tree

2 files changed

+183
-1
lines changed

2 files changed

+183
-1
lines changed

Diff for: ethabi/Cargo.toml

+4
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ sha3 = { version = "0.9", default-features = false }
2020
ethereum-types = { version = "0.12.0", default-features = false }
2121
thiserror = { version = "1", optional = true }
2222
uint = { version = "0.9.0", optional = true }
23+
regex = { version = "1.5.4", optional = true }
24+
once_cell = { version = "1.9.0", optional = true }
2325

2426
[dev-dependencies]
2527
hex-literal = "0.3"
@@ -46,6 +48,8 @@ full-serde = [
4648
"serde_json",
4749
"uint",
4850
"ethereum-types/serialize",
51+
"regex",
52+
"once_cell"
4953
]
5054

5155
rlp = [

Diff for: ethabi/src/token/lenient.rs

+179-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ use crate::{
1313
};
1414
use std::borrow::Cow;
1515

16+
use once_cell::sync::Lazy;
17+
static RE: Lazy<regex::Regex> =
18+
Lazy::new(|| regex::Regex::new(r"^([0-9]+)(\.[0-9]+)?\s*(ether|gwei|nanoether|nano|wei)$").expect("invalid regex"));
19+
1620
/// Tries to parse string as a token. Does not require string to clearly represent the value.
1721
pub struct LenientTokenizer;
1822

@@ -43,7 +47,49 @@ impl Tokenizer for LenientTokenizer {
4347
return result;
4448
}
4549

46-
let uint = Uint::from_dec_str(value)?;
50+
// Tries to parse it as is first. If it fails, tries to check for
51+
// expectable units with the following format: 'Number[Spaces]Unit'.
52+
// If regex fails, then the original FromDecStrErr should take priority
53+
let uint = match Uint::from_dec_str(value) {
54+
Ok(_uint) => _uint,
55+
Err(dec_error) => {
56+
let original_dec_error = dec_error.to_string();
57+
58+
match RE.captures(value) {
59+
Some(captures) => {
60+
let integer = captures.get(1).expect("capture group does not exist").as_str();
61+
let fract = captures.get(2).map(|c| c.as_str().trim_start_matches('.')).unwrap_or_else(|| "");
62+
let units = captures.get(3).expect("capture group does not exist").as_str();
63+
64+
let units = Uint::from(match units.to_lowercase().as_str() {
65+
"ether" => 18,
66+
"gwei" | "nano" | "nanoether" => 9,
67+
"wei" => 0,
68+
_ => return Err(dec_error.into()),
69+
});
70+
71+
let integer = Uint::from_dec_str(integer)?.checked_mul(Uint::from(10u32).pow(units));
72+
73+
if fract.is_empty() {
74+
integer.ok_or(dec_error)?
75+
} else {
76+
// makes sure we don't go beyond 18 decimals
77+
let fract_pow = units.checked_sub(Uint::from(fract.len())).ok_or(dec_error)?;
78+
79+
let fract = Uint::from_dec_str(fract)?
80+
.checked_mul(Uint::from(10u32).pow(fract_pow))
81+
.ok_or_else(|| Error::Other(Cow::Owned(original_dec_error.clone())))?;
82+
83+
integer
84+
.and_then(|integer| integer.checked_add(fract))
85+
.ok_or(Error::Other(Cow::Owned(original_dec_error)))?
86+
}
87+
}
88+
None => return Err(dec_error.into()),
89+
}
90+
}
91+
};
92+
4793
Ok(uint.into())
4894
}
4995

@@ -74,3 +120,135 @@ impl Tokenizer for LenientTokenizer {
74120
Ok(int.into())
75121
}
76122
}
123+
124+
#[cfg(test)]
125+
mod tests {
126+
use ethereum_types::FromDecStrErr;
127+
128+
use crate::{
129+
errors::Error,
130+
token::{LenientTokenizer, Token, Tokenizer},
131+
ParamType, Uint,
132+
};
133+
134+
#[test]
135+
fn tokenize_uint() {
136+
assert_eq!(
137+
LenientTokenizer::tokenize(
138+
&ParamType::Uint(256),
139+
"1111111111111111111111111111111111111111111111111111111111111111"
140+
)
141+
.unwrap(),
142+
Token::Uint([0x11u8; 32].into())
143+
);
144+
}
145+
146+
#[test]
147+
fn tokenize_uint_wei() {
148+
assert_eq!(LenientTokenizer::tokenize(&ParamType::Uint(256), "1wei").unwrap(), Token::Uint(Uint::from(1)));
149+
150+
assert_eq!(LenientTokenizer::tokenize(&ParamType::Uint(256), "1 wei").unwrap(), Token::Uint(Uint::from(1)));
151+
}
152+
153+
#[test]
154+
fn tokenize_uint_gwei() {
155+
assert_eq!(
156+
LenientTokenizer::tokenize(&ParamType::Uint(256), "1nano").unwrap(),
157+
Token::Uint(Uint::from_dec_str("1000000000").unwrap())
158+
);
159+
160+
assert_eq!(
161+
LenientTokenizer::tokenize(&ParamType::Uint(256), "1nanoether").unwrap(),
162+
Token::Uint(Uint::from_dec_str("1000000000").unwrap())
163+
);
164+
165+
assert_eq!(
166+
LenientTokenizer::tokenize(&ParamType::Uint(256), "1gwei").unwrap(),
167+
Token::Uint(Uint::from_dec_str("1000000000").unwrap())
168+
);
169+
170+
assert_eq!(
171+
LenientTokenizer::tokenize(&ParamType::Uint(256), "0.1 gwei").unwrap(),
172+
Token::Uint(Uint::from_dec_str("100000000").unwrap())
173+
);
174+
}
175+
176+
#[test]
177+
fn tokenize_uint_ether() {
178+
assert_eq!(
179+
LenientTokenizer::tokenize(&ParamType::Uint(256), "10000000000ether").unwrap(),
180+
Token::Uint(Uint::from_dec_str("10000000000000000000000000000").unwrap())
181+
);
182+
183+
assert_eq!(
184+
LenientTokenizer::tokenize(&ParamType::Uint(256), "1ether").unwrap(),
185+
Token::Uint(Uint::from_dec_str("1000000000000000000").unwrap())
186+
);
187+
188+
assert_eq!(
189+
LenientTokenizer::tokenize(&ParamType::Uint(256), "0.01 ether").unwrap(),
190+
Token::Uint(Uint::from_dec_str("10000000000000000").unwrap())
191+
);
192+
193+
assert_eq!(
194+
LenientTokenizer::tokenize(&ParamType::Uint(256), "0.000000000000000001ether").unwrap(),
195+
Token::Uint(Uint::from_dec_str("1").unwrap())
196+
);
197+
198+
assert_eq!(
199+
LenientTokenizer::tokenize(&ParamType::Uint(256), "0.000000000000000001ether").unwrap(),
200+
LenientTokenizer::tokenize(&ParamType::Uint(256), "1wei").unwrap(),
201+
);
202+
}
203+
204+
#[test]
205+
fn tokenize_uint_array_ether() {
206+
assert_eq!(
207+
LenientTokenizer::tokenize(&ParamType::Array(Box::new(ParamType::Uint(256))), "[1ether,0.1 ether]")
208+
.unwrap(),
209+
Token::Array(vec![
210+
Token::Uint(Uint::from_dec_str("1000000000000000000").unwrap()),
211+
Token::Uint(Uint::from_dec_str("100000000000000000").unwrap())
212+
])
213+
);
214+
}
215+
216+
#[test]
217+
fn tokenize_uint_invalid_units() {
218+
let _error = Error::from(FromDecStrErr::InvalidCharacter);
219+
220+
assert!(matches!(LenientTokenizer::tokenize(&ParamType::Uint(256), "0.1 wei"), Err(_error)));
221+
222+
// 0.1 wei
223+
assert!(matches!(LenientTokenizer::tokenize(&ParamType::Uint(256), "0.0000000000000000001ether"), Err(_error)));
224+
225+
// 1 ether + 0.1 wei
226+
assert!(matches!(LenientTokenizer::tokenize(&ParamType::Uint(256), "1.0000000000000000001ether"), Err(_error)));
227+
228+
// 1_000_000_000 ether + 0.1 wei
229+
assert!(matches!(
230+
LenientTokenizer::tokenize(&ParamType::Uint(256), "1000000000.0000000000000000001ether"),
231+
Err(_error)
232+
));
233+
234+
assert!(matches!(LenientTokenizer::tokenize(&ParamType::Uint(256), "0..1 gwei"), Err(_error)));
235+
236+
assert!(matches!(LenientTokenizer::tokenize(&ParamType::Uint(256), "..1 gwei"), Err(_error)));
237+
238+
assert!(matches!(LenientTokenizer::tokenize(&ParamType::Uint(256), "1. gwei"), Err(_error)));
239+
240+
assert!(matches!(LenientTokenizer::tokenize(&ParamType::Uint(256), ".1 gwei"), Err(_error)));
241+
242+
assert!(matches!(LenientTokenizer::tokenize(&ParamType::Uint(256), "2.1.1 gwei"), Err(_error)));
243+
244+
assert!(matches!(LenientTokenizer::tokenize(&ParamType::Uint(256), ".1.1 gwei"), Err(_error)));
245+
246+
assert!(matches!(LenientTokenizer::tokenize(&ParamType::Uint(256), "1abc"), Err(_error)));
247+
248+
assert!(matches!(LenientTokenizer::tokenize(&ParamType::Uint(256), "1 gwei "), Err(_error)));
249+
250+
assert!(matches!(LenientTokenizer::tokenize(&ParamType::Uint(256), "g 1 gwei"), Err(_error)));
251+
252+
assert!(matches!(LenientTokenizer::tokenize(&ParamType::Uint(256), "1gwei 1 gwei"), Err(_error)));
253+
}
254+
}

0 commit comments

Comments
 (0)