diff --git a/.github/codecov.yml b/.github/codecov.yml index 4513d92b96..28faeef110 100644 --- a/.github/codecov.yml +++ b/.github/codecov.yml @@ -9,4 +9,3 @@ ignore: - vm/src/types/errors - hint_accountant - vm/src/hint_processor/cairo-1-hint-processor # TODO: Remove this line - - ./deps diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 943713cbd8..213ace61af 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -14,10 +14,6 @@ jobs: uses: actions/checkout@v2 - name: Install stable toolchain uses: dtolnay/rust-toolchain@1.69.0 - - name: Publish crate cairo-take_until_unbalanced - env: - CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} - run: cargo publish --token ${CRATES_TOKEN} --all-features --manifest-path ./deps/parse-hyperlinks/Cargo.toml - name: Publish crate cairo-felt env: CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} diff --git a/Cargo.lock b/Cargo.lock index ebd5a1dd7c..06d004a299 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -700,14 +700,6 @@ dependencies = [ "time", ] -[[package]] -name = "cairo-take_until_unbalanced" -version = "0.32.0" -dependencies = [ - "nom", - "wasm-bindgen-test", -] - [[package]] name = "cairo-vm" version = "0.8.0" @@ -721,7 +713,6 @@ dependencies = [ "cairo-felt 0.8.0", "cairo-lang-casm", "cairo-lang-starknet", - "cairo-take_until_unbalanced", "criterion", "generic-array", "hashbrown 0.13.2", diff --git a/Cargo.toml b/Cargo.toml index 2d25da5dc5..7e2695fefd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,6 @@ members = [ "felt", "vm", "hint_accountant", - "./deps/parse-hyperlinks", ] exclude = ["ensure-no_std"] @@ -47,14 +46,6 @@ anyhow = { version = "1.0.69", default-features = false } thiserror = { version = "1.0.32", default-features = false } thiserror-no-std = { version = "2.0.2", default-features = false } -# This crate has only one function `take_until_unbalanced` that is -# very useful for our parsing purposes: -# https://stackoverflow.com/questions/70630556/parse-allowing-nested-parentheses-in-nom -# There is a proposal for extending nom::delimited to use this function: -# https://github.com/Geal/nom/issues/1253 -parse-hyperlinks = { package = "cairo-take_until_unbalanced", path = "./deps/parse-hyperlinks", version = "0.32.0", default-features = false, features = [ - "alloc", -] } felt = { package = "cairo-felt", path = "./felt", version = "0.8.0", default-features = false, features = [ "alloc", ] } diff --git a/RELEASE.md b/RELEASE.md index 20708d1ab2..65552ae72a 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -17,8 +17,6 @@ - `cairo-vm-cli/Cargo.toml`: update the version string and also the `cairo-vm` dependency version to match the above. - `felt/Cargo.toml`: update the version string. - - `deps/parse-hyperlinks/Cargo.toml`: this vendored dependency needs its - version bumped, but does not need to match the other crate versions. - [Here](https://github.com/lambdaclass/cairo-rs/pull/1257/files) is an example pull request with these changes. - [ ] Run `cargo update` and `git add Cargo.lock` @@ -36,7 +34,6 @@ versions. - [cairo-vm](https://crates.io/crates/cairo-vm) - [cairo-felt](https://crates.io/crates/cairo-felt) - - [cairo-take-until-unbalanced](https://crates.io/crates/cairo-take_until_unbalanced) - [ ] Create a release in Github. - Select the recently created tag. - Set the title to the version string. diff --git a/deps/parse-hyperlinks/Cargo.lock b/deps/parse-hyperlinks/Cargo.lock deleted file mode 100644 index 12d2b52d4b..0000000000 --- a/deps/parse-hyperlinks/Cargo.lock +++ /dev/null @@ -1,32 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "memchr" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" - -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - -[[package]] -name = "nom" -version = "7.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36" -dependencies = [ - "memchr", - "minimal-lexical", -] - -[[package]] -name = "parse-hyperlinks" -version = "0.23.4" -dependencies = [ - "nom", -] diff --git a/deps/parse-hyperlinks/Cargo.toml b/deps/parse-hyperlinks/Cargo.toml deleted file mode 100644 index 3f5fdc05c6..0000000000 --- a/deps/parse-hyperlinks/Cargo.toml +++ /dev/null @@ -1,26 +0,0 @@ -[package] -name = "cairo-take_until_unbalanced" -version = "0.32.0" -authors = [ - "Jens Getreu ", - "LambdaClass ", -] -edition = "2021" -keywords = ["parser"] -license = "MIT/Apache-2.0" -readme = "README.md" -repository = "https://gitlab.com/lambdaclass/cairo-rs" -homepage = "https://gitlab.com/lambdaclass/cairo-rs" -description = "A Nom parser library for contents between unbalanced tags." -categories = ["parser-implementations"] - -[dependencies] -nom = { version = "7.1.1", default-features = false } - -[features] -default = ["std"] -std = ["nom/std"] -alloc = ["nom/alloc"] - -[target.'cfg(target_arch = "wasm32")'.dev-dependencies] -wasm-bindgen-test = "0.3.34" diff --git a/deps/parse-hyperlinks/LICENSE b/deps/parse-hyperlinks/LICENSE deleted file mode 120000 index 30cff7403d..0000000000 --- a/deps/parse-hyperlinks/LICENSE +++ /dev/null @@ -1 +0,0 @@ -../../LICENSE \ No newline at end of file diff --git a/deps/parse-hyperlinks/README.md b/deps/parse-hyperlinks/README.md deleted file mode 100644 index 2f8231dbaa..0000000000 --- a/deps/parse-hyperlinks/README.md +++ /dev/null @@ -1,27 +0,0 @@ -# Parse hyperlinks - -[Parse-hyperlinks](https://crates.io/crates/parse-hyperlinks), -a parser library written with [Nom](https://crates.io/crates/nom) to -recognize hyperlinks and link reference definitions in Markdown, -reStructuredText, Asciidoc and HTML formatted text input. - -[![Cargo](https://img.shields.io/crates/v/parse-hyperlinks.svg)]( -https://crates.io/crates/parse-hyperlinks) -[![Documentation](https://docs.rs/parse-hyperlinks/badge.svg)]( -https://docs.rs/parse-hyperlinks) -[![License](https://img.shields.io/badge/license-MIT%2FApache--2.0-blue.svg)]( -https://gitlab.com/getreu/parse-hyperlinks) - -The library implements the -[CommonMark Specification 0.30](https://spec.commonmark.org/0.30/), -[reStructuredText Markup Specification](https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html) -(revision 8571, date 2020-10-28), the specifications in -[Asciidoctor User Manual, chapter 26](https://asciidoctor.org/docs/user-manual/#url) (date 2020-12-03) -and [HTML 5.2: section 4.5](https://www.w3.org/TR/html52/textlevel-semantics.html#the-a-element). - -To illustrate the usage and the -[API of the library](https://docs.rs/parse-hyperlinks/0.19.6/parse_hyperlinks/index.html), -[Parse-hyperlinks](https://crates.io/crates/parse-hyperlinks) comes with a -simple command line application: -[Atext2html](https://crates.io/crates/atext2html) - diff --git a/deps/parse-hyperlinks/src/lib.rs b/deps/parse-hyperlinks/src/lib.rs deleted file mode 100644 index 5bbbc7dd10..0000000000 --- a/deps/parse-hyperlinks/src/lib.rs +++ /dev/null @@ -1,173 +0,0 @@ -#![allow(dead_code)] -#![cfg_attr(not(feature = "std"), no_std)] - -use nom::error::Error; -use nom::error::ErrorKind; -use nom::error::ParseError; -use nom::Err; -use nom::IResult; - -/// A parser similar to `nom::bytes::complete::take_until()`, except that this -/// one does not stop at balanced opening and closing tags. It is designed to -/// work inside the `nom::sequence::delimited()` parser. -/// -/// # Basic usage -/// ``` -/// use nom::bytes::complete::tag; -/// use nom::sequence::delimited; -/// use cairo_take_until_unbalanced::take_until_unbalanced; -/// -/// let mut parser = delimited(tag("<"), take_until_unbalanced('<', '>'), tag(">")); -/// assert_eq!(parser("<inside>abc"), Ok(("abc", "inside"))); -/// ``` -/// It skips nested brackets until it finds an extra unbalanced closing bracket. Escaped brackets -/// like `\<` and `\>` are not considered as brackets and are not counted. This function is -/// very similar to `nom::bytes::complete::take_until(">")`, except it also takes nested brackets. -/// NOTE: trimmed down from https://docs.rs/parse-hyperlinks to fix a pending out-of-bounds access. -pub fn take_until_unbalanced( - opening_bracket: char, - closing_bracket: char, -) -> impl Fn(&str) -> IResult<&str, &str> { - move |i: &str| { - let mut index = 0; - let mut bracket_counter = 0; - while let Some(n) = &i - .get(index..) - .ok_or_else(|| Err::Error(Error::from_error_kind(i, ErrorKind::TakeUntil)))? - .find(&[opening_bracket, closing_bracket, '\\'][..]) - { - index += n; - let mut it = i - .get(index..) - .ok_or_else(|| Err::Error(Error::from_error_kind(i, ErrorKind::TakeUntil)))? - .chars(); - match it.next().unwrap_or_default() { - c if c == '\\' => { - // Skip the escape char `\`. - index += '\\'.len_utf8(); - // Skip also the following char. - let c = it.next().unwrap_or_default(); - index += c.len_utf8(); - } - c if c == opening_bracket => { - bracket_counter += 1; - index += opening_bracket.len_utf8(); - } - c if c == closing_bracket => { - // Closing bracket. - bracket_counter -= 1; - index += closing_bracket.len_utf8(); - } - // Can not happen. - _ => unreachable!(), - }; - // We found the unmatched closing bracket. - if bracket_counter == -1 { - // We do not consume it. - index -= closing_bracket.len_utf8(); - let remaining = i - .get(index..) - .ok_or_else(|| Err::Error(Error::from_error_kind(i, ErrorKind::TakeUntil)))?; - let matching = i - .get(0..index) - .ok_or_else(|| Err::Error(Error::from_error_kind(i, ErrorKind::TakeUntil)))?; - return Ok((remaining, matching)); - }; - } - - if bracket_counter == 0 { - Ok(("", i)) - } else { - Err(Err::Error(Error::from_error_kind(i, ErrorKind::TakeUntil))) - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use nom::error::ErrorKind; - - #[cfg(target_arch = "wasm32")] - use wasm_bindgen_test::*; - - #[test] - #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] - fn test_take_until_unmatched() { - assert_eq!(take_until_unbalanced('(', ')')("abc"), Ok(("", "abc"))); - assert_eq!( - take_until_unbalanced('(', ')')("url)abc"), - Ok((")abc", "url")) - ); - assert_eq!( - take_until_unbalanced('(', ')')("u()rl)abc"), - Ok((")abc", "u()rl")) - ); - assert_eq!( - take_until_unbalanced('(', ')')("u(())rl)abc"), - Ok((")abc", "u(())rl")) - ); - assert_eq!( - take_until_unbalanced('(', ')')("u(())r()l)abc"), - Ok((")abc", "u(())r()l")) - ); - assert_eq!( - take_until_unbalanced('(', ')')("u(())r()labc"), - Ok(("", "u(())r()labc")) - ); - assert_eq!( - take_until_unbalanced('(', ')')(r#"u\((\))r()labc"#), - Ok(("", r#"u\((\))r()labc"#)) - ); - assert_eq!( - take_until_unbalanced('(', ')')("u(())r(labc"), - Err(nom::Err::Error(nom::error::Error::new( - "u(())r(labc", - ErrorKind::TakeUntil - ))) - ); - assert_eq!( - take_until_unbalanced('€', 'ü')("€uü€€üürlüabc"), - Ok(("üabc", "€uü€€üürl")) - ); - assert_eq!( - take_until_unbalanced('(', ')')("u(())r()labc\\"), - Err(nom::Err::Error(nom::error::Error::new( - "u(())r()labc\\", - ErrorKind::TakeUntil - ))) - ); - assert_eq!( - take_until_unbalanced('(', ')')("u\\rl)abc"), - Ok((")abc", "u\\rl")) - ); - assert_eq!( - take_until_unbalanced('(', ')')("u\\\\rl)abc"), - Ok((")abc", "u\\\\rl")) - ); - // 'µ' used to check for escaped multi-byte character - assert_eq!( - take_until_unbalanced('(', ')')("u\\µrl)"), - Ok((")", "u\\µrl")) - ); - assert_eq!( - take_until_unbalanced('(', ')')("u\\µ)rl"), - Ok((")rl", "u\\µ")) - ); - assert_eq!( - take_until_unbalanced('(', ')')("urlabc\\"), - Err(nom::Err::Error(nom::error::Error::new( - "urlabc\\", - ErrorKind::TakeUntil - ))) - ); - assert_eq!(take_until_unbalanced('(', ')')("abc"), Ok(("", "abc"))); - assert_eq!( - take_until_unbalanced('(', ')')("(abc"), - Err(nom::Err::Error(nom::error::Error::new( - "(abc", - ErrorKind::TakeUntil - ))) - ); - } -} diff --git a/vm/Cargo.toml b/vm/Cargo.toml index a7670d2ff1..0f48ddb1ac 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -16,7 +16,6 @@ std = [ "bincode/std", "anyhow/std", "starknet-crypto/std", - "parse-hyperlinks/std", "felt/std", "dep:num-prime", ] @@ -59,13 +58,6 @@ thiserror-no-std = { workspace = true } # only for std num-prime = { version = "0.4.3", features = ["big-int"], optional = true } - -# This crate has only one function `take_until_unbalanced` that is -# very useful for our parsing purposes: -# https://stackoverflow.com/questions/70630556/parse-allowing-nested-parentheses-in-nom -# There is a proposal for extending nom::delimited to use this function: -# https://github.com/Geal/nom/issues/1253 -parse-hyperlinks = { workspace = true } felt = { workspace = true } bitvec = { workspace = true } diff --git a/vm/src/serde/deserialize_utils.rs b/vm/src/serde/deserialize_utils.rs index 5c30fcb31b..a11eb2f8de 100644 --- a/vm/src/serde/deserialize_utils.rs +++ b/vm/src/serde/deserialize_utils.rs @@ -13,12 +13,11 @@ use nom::{ }, character::complete::digit1, combinator::{map_res, opt, value}, - error::{ErrorKind, ParseError}, + error::{Error, ErrorKind, ParseError}, sequence::{delimited, tuple}, Err, IResult, }; use num_integer::Integer; -use parse_hyperlinks::take_until_unbalanced; #[derive(Debug, PartialEq, Eq)] pub enum ReferenceParseError { @@ -212,6 +211,83 @@ pub fn parse_value(input: &str) -> IResult<&str, ValueAddress> { Ok((rem_input, value_address)) } +/// A parser similar to `nom::bytes::complete::take_until()`, except that this +/// one does not stop at balanced opening and closing tags. It is designed to +/// work inside the `nom::sequence::delimited()` parser. +/// +/// # Basic usage +/// ``` +/// use nom::bytes::complete::tag; +/// use nom::sequence::delimited; +/// use cairo_take_until_unbalanced::take_until_unbalanced; +/// +/// let mut parser = delimited(tag("<"), take_until_unbalanced('<', '>'), tag(">")); +/// assert_eq!(parser("<inside>abc"), Ok(("abc", "inside"))); +/// ``` +/// It skips nested brackets until it finds an extra unbalanced closing bracket. Escaped brackets +/// like `\<` and `\>` are not considered as brackets and are not counted. This function is +/// very similar to `nom::bytes::complete::take_until(">")`, except it also takes nested brackets. +/// NOTE: trimmed down from https://docs.rs/parse-hyperlinks to fix bugs. The project itself seems +/// abandonned. +pub fn take_until_unbalanced( + opening_bracket: char, + closing_bracket: char, +) -> impl Fn(&str) -> IResult<&str, &str> { + move |i: &str| { + let mut index = 0; + let mut bracket_counter = 0; + while let Some(n) = &i + .get(index..) + .ok_or_else(|| Err::Error(Error::from_error_kind(i, ErrorKind::TakeUntil)))? + .find(&[opening_bracket, closing_bracket, '\\'][..]) + { + index += n; + let mut it = i + .get(index..) + .ok_or_else(|| Err::Error(Error::from_error_kind(i, ErrorKind::TakeUntil)))? + .chars(); + match it.next().unwrap_or_default() { + c if c == '\\' => { + // Skip the escape char `\`. + index += '\\'.len_utf8(); + // Skip also the following char. + let c = it.next().unwrap_or_default(); + index += c.len_utf8(); + } + c if c == opening_bracket => { + bracket_counter += 1; + index += opening_bracket.len_utf8(); + } + c if c == closing_bracket => { + // Closing bracket. + bracket_counter -= 1; + index += closing_bracket.len_utf8(); + } + // Can not happen. + _ => unreachable!(), + }; + // We found the unmatched closing bracket. + if bracket_counter == -1 { + // We do not consume it. + index -= closing_bracket.len_utf8(); + let remaining = i + .get(index..) + .ok_or_else(|| Err::Error(Error::from_error_kind(i, ErrorKind::TakeUntil)))?; + let matching = i + .get(0..index) + .ok_or_else(|| Err::Error(Error::from_error_kind(i, ErrorKind::TakeUntil)))?; + return Ok((remaining, matching)); + }; + } + + if bracket_counter == 0 { + Ok(("", i)) + } else { + Err(Err::Error(Error::from_error_kind(i, ErrorKind::TakeUntil))) + } + } +} + #[cfg(test)] mod tests { use super::*; @@ -591,4 +667,84 @@ mod tests { )) ); } + + #[test] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test_take_until_unmatched() { + assert_eq!(take_until_unbalanced('(', ')')("abc"), Ok(("", "abc"))); + assert_eq!( + take_until_unbalanced('(', ')')("url)abc"), + Ok((")abc", "url")) + ); + assert_eq!( + take_until_unbalanced('(', ')')("u()rl)abc"), + Ok((")abc", "u()rl")) + ); + assert_eq!( + take_until_unbalanced('(', ')')("u(())rl)abc"), + Ok((")abc", "u(())rl")) + ); + assert_eq!( + take_until_unbalanced('(', ')')("u(())r()l)abc"), + Ok((")abc", "u(())r()l")) + ); + assert_eq!( + take_until_unbalanced('(', ')')("u(())r()labc"), + Ok(("", "u(())r()labc")) + ); + assert_eq!( + take_until_unbalanced('(', ')')(r#"u\((\))r()labc"#), + Ok(("", r#"u\((\))r()labc"#)) + ); + assert_eq!( + take_until_unbalanced('(', ')')("u(())r(labc"), + Err(nom::Err::Error(nom::error::Error::new( + "u(())r(labc", + ErrorKind::TakeUntil + ))) + ); + assert_eq!( + take_until_unbalanced('€', 'ü')("€uü€€üürlüabc"), + Ok(("üabc", "€uü€€üürl")) + ); + assert_eq!( + take_until_unbalanced('(', ')')("u(())r()labc\\"), + Err(nom::Err::Error(nom::error::Error::new( + "u(())r()labc\\", + ErrorKind::TakeUntil + ))) + ); + assert_eq!( + take_until_unbalanced('(', ')')("u\\rl)abc"), + Ok((")abc", "u\\rl")) + ); + assert_eq!( + take_until_unbalanced('(', ')')("u\\\\rl)abc"), + Ok((")abc", "u\\\\rl")) + ); + // 'µ' used to check for escaped multi-byte character + assert_eq!( + take_until_unbalanced('(', ')')("u\\µrl)"), + Ok((")", "u\\µrl")) + ); + assert_eq!( + take_until_unbalanced('(', ')')("u\\µ)rl"), + Ok((")rl", "u\\µ")) + ); + assert_eq!( + take_until_unbalanced('(', ')')("urlabc\\"), + Err(nom::Err::Error(nom::error::Error::new( + "urlabc\\", + ErrorKind::TakeUntil + ))) + ); + assert_eq!(take_until_unbalanced('(', ')')("abc"), Ok(("", "abc"))); + assert_eq!( + take_until_unbalanced('(', ')')("(abc"), + Err(nom::Err::Error(nom::error::Error::new( + "(abc", + ErrorKind::TakeUntil + ))) + ); + } }