From 79cc55b9d76aa4094baf10282379165e1b00cd41 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Mon, 27 Apr 2020 15:30:24 -0400 Subject: [PATCH 1/5] [WIP] add rcc backend for expression evaluation - Rewrite parse_macro using rcc - Pass in macro definitions to rcc - Remove cexpr - Remove null-terminator from rcc tokens --- Cargo.lock | 124 ++++++++++++++++++++++++++++++++++++++-------- Cargo.toml | 6 ++- src/clang.rs | 56 +++++++++++---------- src/ir/context.rs | 18 +++---- src/ir/var.rs | 124 ++++++++++++++++++++++------------------------ 5 files changed, 205 insertions(+), 123 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b4a5544398..e7219d03a2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -25,12 +25,16 @@ dependencies = [ "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "autocfg" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "bindgen" version = "0.55.1" dependencies = [ "bitflags 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "cexpr 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", "clang-sys 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -42,6 +46,7 @@ dependencies = [ "peeking_take_while 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", "proc-macro2 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "rcc 0.9.0 (git+https://github.com/jyn514/rcc/?branch=bindgen)", "regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "rustc-hash 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "shlex 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -58,14 +63,6 @@ name = "byteorder" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "cexpr" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "nom 5.1.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "cfg-if" version = "0.1.10" @@ -95,6 +92,31 @@ dependencies = [ "vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "codespan" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "codespan-reporting 0.9.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "codespan-reporting" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "termcolor 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-width 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "counter" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-traits 0.2.12 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "diff" version = "0.1.11" @@ -117,6 +139,11 @@ name = "glob" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "hexponent" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "humantime" version = "1.3.0" @@ -125,6 +152,11 @@ dependencies = [ "quick-error 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "lasso" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "lazy_static" version = "1.4.0" @@ -162,12 +194,11 @@ version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] -name = "nom" -version = "5.1.1" +name = "num-traits" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "version_check 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)", + "autocfg 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -196,6 +227,20 @@ dependencies = [ "proc-macro2 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "rcc" +version = "0.9.0" +source = "git+https://github.com/jyn514/rcc/?branch=bindgen#907b52d6b49446c07f4586c9f3fdd9f40323c293" +dependencies = [ + "codespan 0.9.5 (registry+https://github.com/rust-lang/crates.io-index)", + "counter 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", + "hexponent 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "lasso 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "target-lexicon 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", + "thiserror 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "regex" version = "1.3.1" @@ -230,6 +275,21 @@ name = "strsim" version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "syn" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "target-lexicon" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "termcolor" version = "1.0.5" @@ -246,6 +306,24 @@ dependencies = [ "unicode-width 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "thiserror" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "thiserror-impl 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.11 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "thread_local" version = "0.3.6" @@ -269,11 +347,6 @@ name = "vec_map" version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "version_check" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "which" version = "3.0.0" @@ -322,39 +395,48 @@ dependencies = [ "checksum aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)" = "58fb5e95d83b38284460a5fda7d6470aa0b8844d283a0b614b8535e880800d2d" "checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" "checksum atty 0.2.13 (registry+https://github.com/rust-lang/crates.io-index)" = "1803c647a3ec87095e7ae7acfca019e98de5ec9a7d01343f611cf3152ed71a90" +"checksum autocfg 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" "checksum bitflags 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8a606a02debe2813760609f57a64a2ffd27d9fdf5b2f133eaca0b248dd92cdd2" "checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" -"checksum cexpr 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f4aedb84272dbe89af497cf81375129abda4fc0a9e7c5d317498c15cc30c0d27" "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" "checksum clang-sys 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9da1484c6a890e374ca5086062d4847e0a2c1e5eba9afa5d48c09e8eb39b2519" "checksum clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9" +"checksum codespan 0.9.5 (registry+https://github.com/rust-lang/crates.io-index)" = "8ebaf6bb6a863ad6aa3a18729e9710c53d75df03306714d9cc1f7357a00cd789" +"checksum codespan-reporting 0.9.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6e0762455306b1ed42bc651ef6a2197aabda5e1d4a43c34d5eab5c1a3634e81d" +"checksum counter 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "d84b66ce02c964fa8047286289b36797ce48a52a44034e013ce3e5219b6cb360" "checksum diff 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "3c2b69f912779fbb121ceb775d74d51e915af17aaebc38d28a592843a2dd0a3a" "checksum env_logger 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "39ecdb7dd54465526f0a56d666e3b2dd5f3a218665a030b6e4ad9e70fa95d8fa" "checksum glob 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" +"checksum hexponent 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "07482eb12527cb107ba354de9717dbce361ce82ec553fe9e70a98db29fd00d3d" "checksum humantime 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f" +"checksum lasso 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "19486800b9a2b9ca2d5ce6ad9a221f5b3788f1bebcfb94942df6342d6632cb44" "checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" "checksum lazycell 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b294d6fa9ee409a054354afc4352b0b9ef7ca222c69b8812cbea9e7d2bf3783f" "checksum libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)" = "d515b1f41455adea1313a4a2ac8a8a477634fbae63cc6100e3aebb207ce61558" "checksum libloading 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "2cadb8e769f070c45df05c78c7520eb4cd17061d4ab262e43cfc68b4d00ac71c" "checksum log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)" = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" "checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e" -"checksum nom 5.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0b471253da97532da4b61552249c521e01e736071f71c1a4f7ebbfbf0a06aad6" +"checksum num-traits 0.2.12 (registry+https://github.com/rust-lang/crates.io-index)" = "ac267bcc07f48ee5f8935ab0d24f316fb722d7a1292e2913f0cc196b29ffd611" "checksum peeking_take_while 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" "checksum proc-macro2 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "afdc77cc74ec70ed262262942ebb7dac3d479e9e5cfa2da1841c0806f6cdabcc" "checksum quick-error 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9274b940887ce9addde99c4eee6b5c44cc494b182b97e73dc8ffdcb3397fd3f0" "checksum quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe" +"checksum rcc 0.9.0 (git+https://github.com/jyn514/rcc/?branch=bindgen)" = "" "checksum regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dc220bd33bdce8f093101afe22a037b8eb0e5af33592e6a9caafff0d4cb81cbd" "checksum regex-syntax 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)" = "11a7e20d1cce64ef2fed88b66d347f88bd9babb82845b2b858f3edbf59a4f716" "checksum rustc-hash 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7540fc8b0c49f096ee9c961cda096467dce8084bec6bdca2fc83895fd9b28cb8" "checksum shlex 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2" "checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" +"checksum syn 1.0.11 (registry+https://github.com/rust-lang/crates.io-index)" = "dff0acdb207ae2fe6d5976617f887eb1e35a2ba52c13c7234c790960cdad9238" +"checksum target-lexicon 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ab0e7238dcc7b40a7be719a25365910f6807bd864f4cce6b2e6b873658e2b19d" "checksum termcolor 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "96d6098003bde162e4277c70665bd87c326f5a0c3f3fbfb285787fa482d54e6e" "checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +"checksum thiserror 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)" = "7dfdd070ccd8ccb78f4ad66bf1982dc37f620ef696c6b5028fe2ed83dd3d0d08" +"checksum thiserror-impl 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)" = "bd80fc12f73063ac132ac92aceea36734f04a1d93c1240c6944e23a3b8841793" "checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" "checksum unicode-width 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7007dbd421b92cc6e28410fe7362e2e0a2503394908f417b68ec8d1c364c4e20" "checksum unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" "checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" -"checksum version_check 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)" = "078775d0255232fb988e6fccf26ddc9d1ac274299aaedcedce21c6f72cc533ce" "checksum which 3.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "240a31163872f7e8e49f35b42b58485e35355b07eb009d9f3686733541339a69" "checksum winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" diff --git a/Cargo.toml b/Cargo.toml index 7ab43a21dc..a61036852b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,7 +46,6 @@ shlex = "0.1" [dependencies] bitflags = "1.0.3" -cexpr = "0.4" cfg-if = "0.1.0" # This kinda sucks: https://github.com/rust-lang/cargo/issues/1982 clap = { version = "2", optional = true } @@ -71,6 +70,11 @@ version = "0.7" optional = true version = "0.4" +[dependencies.rcc] +git = "https://github.com/jyn514/rcc/" +branch = "bindgen" +default-features = false + [features] default = ["logging", "clap", "runtime", "which-rustfmt"] logging = ["env_logger", "log"] diff --git a/src/clang.rs b/src/clang.rs index 488660c434..071cdc93e5 100644 --- a/src/clang.rs +++ b/src/clang.rs @@ -4,9 +4,7 @@ #![allow(non_upper_case_globals, dead_code)] use crate::ir::context::BindgenContext; -use cexpr; use clang_sys::*; -use regex; use std::ffi::{CStr, CString}; use std::fmt; use std::hash::Hash; @@ -696,11 +694,11 @@ impl Cursor { RawTokens::new(self) } - /// Gets the tokens that correspond to that cursor as `cexpr` tokens. - pub fn cexpr_tokens(self) -> Vec { + /// Gets the tokens that correspond to that cursor as `rcc` tokens. + pub fn rcc_tokens(self) -> Vec> { self.tokens() .iter() - .filter_map(|token| token.as_cexpr_token()) + .filter_map(|token| token.as_rcc_token()) .collect() } @@ -795,28 +793,34 @@ impl ClangToken { c_str.to_bytes() } - /// Converts a ClangToken to a `cexpr` token if possible. - pub fn as_cexpr_token(&self) -> Option { - use cexpr::token; + /// Converts a ClangToken to an `rcc` token if possible. + pub fn as_rcc_token(&self) -> Option> { + use rcc::{Files, Lexer, Literal, Token}; - let kind = match self.kind { - CXToken_Punctuation => token::Kind::Punctuation, - CXToken_Literal => token::Kind::Literal, - CXToken_Identifier => token::Kind::Identifier, - CXToken_Keyword => token::Kind::Keyword, - // NB: cexpr is not too happy about comments inside - // expressions, so we strip them down here. + match self.kind { + // `rcc` does not have a comment token CXToken_Comment => return None, + CXToken_Punctuation | + CXToken_Literal | + CXToken_Identifier | + CXToken_Keyword => { + let spelling = std::str::from_utf8(self.spelling()).expect("invalid utf8 in token"); + let mut files = Files::new(); + let id = files.add("", "".into()); + let mut lexer = Lexer::new(id, spelling, false); + let mut token = lexer.next().unwrap().expect("rcc failed to parse clang token"); + // rcc generates null-terminated string immediately, + // but bindgen only adds the null-terminator during codegen. + if let Token::Literal(Literal::Str(ref mut string)) = &mut token.data { + assert_eq!(string.pop(), Some(b'\0')); + } + Some(token) + } _ => { warn!("Found unexpected token kind: {:?}", self); - return None; + None } - }; - - Some(token::Token { - kind, - raw: self.spelling().to_vec().into_boxed_slice(), - }) + } } } @@ -836,11 +840,11 @@ impl<'a> Iterator for ClangTokenIterator<'a> { type Item = ClangToken; fn next(&mut self) -> Option { - let raw = self.raw.next()?; + let raw = *self.raw.next()?; unsafe { - let kind = clang_getTokenKind(*raw); - let spelling = clang_getTokenSpelling(self.tu, *raw); - let extent = clang_getTokenExtent(self.tu, *raw); + let kind = clang_getTokenKind(raw); + let spelling = clang_getTokenSpelling(self.tu, raw); + let extent = clang_getTokenExtent(self.tu, raw); Some(ClangToken { kind, extent, diff --git a/src/ir/context.rs b/src/ir/context.rs index bb0c3f9dc9..0d4b4b9d98 100644 --- a/src/ir/context.rs +++ b/src/ir/context.rs @@ -24,9 +24,9 @@ use crate::clang::{self, Cursor}; use crate::parse::ClangItemParser; use crate::BindgenOptions; use crate::{Entry, HashMap, HashSet}; -use cexpr; use clang_sys; use proc_macro2::{Ident, Span}; +use rcc::InternedStr; use std::borrow::Cow; use std::cell::Cell; use std::collections::HashMap as StdHashMap; @@ -351,8 +351,8 @@ pub struct BindgenContext { /// hard errors while parsing duplicated macros, as well to allow macro /// expression parsing. /// - /// This needs to be an std::HashMap because the cexpr API requires it. - parsed_macros: StdHashMap, cexpr::expr::EvalResult>, + /// This needs to be an std::HashMap because the rcc API requires it. + parsed_macros: StdHashMap, /// The active replacements collected from replaces="xxx" annotations. replacements: HashMap, ItemId>, @@ -1986,14 +1986,14 @@ If you encounter an error missing from this list, please file an issue or a PR!" } /// Have we parsed the macro named `macro_name` already? - pub fn parsed_macro(&self, macro_name: &[u8]) -> bool { - self.parsed_macros.contains_key(macro_name) + pub fn parsed_macro(&self, macro_name: InternedStr) -> bool { + self.parsed_macros.contains_key(¯o_name) } /// Get the currently parsed macros. pub fn parsed_macros( &self, - ) -> &StdHashMap, cexpr::expr::EvalResult> { + ) -> &StdHashMap { debug_assert!(!self.in_codegen_phase()); &self.parsed_macros } @@ -2001,10 +2001,10 @@ If you encounter an error missing from this list, please file an issue or a PR!" /// Mark the macro named `macro_name` as parsed. pub fn note_parsed_macro( &mut self, - id: Vec, - value: cexpr::expr::EvalResult, + id: InternedStr, + value: rcc::Literal, ) { - self.parsed_macros.insert(id, value); + self.parsed_macros.insert(id, rcc::Definition::Object(vec![rcc::Token::Literal(value)])); } /// Are we in the codegen phase? diff --git a/src/ir/var.rs b/src/ir/var.rs index c6f121d74e..3a420bc8e1 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -13,9 +13,9 @@ use crate::clang::ClangToken; use crate::parse::{ ClangItemParser, ClangSubItemParser, ParseError, ParseResult, }; -use cexpr; use std::io; -use std::num::Wrapping; +use std::collections::HashMap; +use rcc::{InternedStr, Literal}; /// The type for a constant variable. #[derive(Debug)] @@ -188,8 +188,6 @@ impl ClangSubItemParser for Var { cursor: clang::Cursor, ctx: &mut BindgenContext, ) -> Result, ParseError> { - use cexpr::expr::EvalResult; - use cexpr::literal::CChar; use clang_sys::*; match cursor.kind() { CXCursor_MacroDefinition => { @@ -215,70 +213,55 @@ impl ClangSubItemParser for Var { assert!(!id.is_empty(), "Empty macro name?"); - let previously_defined = ctx.parsed_macro(&id); + let previously_defined = ctx.parsed_macro(id); // NB: It's important to "note" the macro even if the result is - // not an integer, otherwise we might loose other kind of + // not an integer, otherwise we might lose other kind of // derived macros. - ctx.note_parsed_macro(id.clone(), value.clone()); + ctx.note_parsed_macro(id, value.clone()); if previously_defined { - let name = String::from_utf8(id).unwrap(); - warn!("Duplicated macro definition: {}", name); + warn!("Duplicated macro definition: {}", id); return Err(ParseError::Continue); } - // NOTE: Unwrapping, here and above, is safe, because the - // identifier of a token comes straight from clang, and we - // enforce utf8 there, so we should have already panicked at - // this point. - let name = String::from_utf8(id).unwrap(); + let parse_int = |value| { + let kind = ctx + .parse_callbacks() + .and_then(|c| c.int_macro(rcc::get_str!(id), value)) + .unwrap_or_else(|| { + default_macro_constant_type(&ctx, value) + }); + + (TypeKind::Int(kind), VarType::Int(value)) + }; + let (type_kind, val) = match value { - EvalResult::Invalid => return Err(ParseError::Continue), - EvalResult::Float(f) => { + Literal::Float(f) => { (TypeKind::Float(FloatKind::Double), VarType::Float(f)) } - EvalResult::Char(c) => { - let c = match c { - CChar::Char(c) => { - assert_eq!(c.len_utf8(), 1); - c as u8 - } - CChar::Raw(c) => { - assert!(c <= ::std::u8::MAX as u64); - c as u8 - } - }; - + Literal::Char(c) => { (TypeKind::Int(IntKind::U8), VarType::Char(c)) } - EvalResult::Str(val) => { + Literal::Str(val) => { let char_ty = Item::builtin_type( TypeKind::Int(IntKind::U8), true, ctx, ); if let Some(callbacks) = ctx.parse_callbacks() { - callbacks.str_macro(&name, &val); + callbacks.str_macro(rcc::get_str!(id), &val); } (TypeKind::Pointer(char_ty), VarType::String(val)) } - EvalResult::Int(Wrapping(value)) => { - let kind = ctx - .parse_callbacks() - .and_then(|c| c.int_macro(&name, value)) - .unwrap_or_else(|| { - default_macro_constant_type(&ctx, value) - }); - - (TypeKind::Int(kind), VarType::Int(value)) - } + Literal::Int(i) => parse_int(i), + Literal::UnsignedInt(u) => parse_int(u as i64), }; let ty = Item::builtin_type(type_kind, true, ctx); Ok(ParseResult::New( - Var::new(name, None, ty, Some(val), true), + Var::new(id.resolve_and_clone(), None, ty, Some(val), true), Some(cursor), )) } @@ -370,25 +353,28 @@ impl ClangSubItemParser for Var { } } -/// Try and parse a macro using all the macros parsed until now. +/// Try and parse an object macro using all the macros parsed until now. +/// +/// The cursor includes the `id` token but not the `#define`. fn parse_macro( ctx: &BindgenContext, tokens: &[ClangToken], -) -> Option<(Vec, cexpr::expr::EvalResult)> { - use cexpr::expr; - - let mut cexpr_tokens: Vec<_> = tokens - .iter() - .filter_map(ClangToken::as_cexpr_token) - .collect(); +) -> Option<(InternedStr, Literal)> { + use rcc::Token; - let parser = expr::IdentifierParser::new(ctx.parsed_macros()); + let mut rcc_tokens = tokens.iter().filter_map(ClangToken::as_rcc_token); + let ident_str = match rcc_tokens.next()?.data { + Token::Id(id) => id, + _ => return None, + }; + if ident_str.is_empty() || tokens.len() < 2 { + return None; + } + let parsed_macros = ctx.parsed_macros(); - match parser.macro_definition(&cexpr_tokens) { - Ok((_, (id, val))) => { - return Some((id.into(), val)); - } - _ => {} + // TODO: remove this clone (will need changes in rcc) + if let Some(literal) = rcc_expr(rcc_tokens.clone(), &parsed_macros) { + return Some((ident_str, literal)); } // Try without the last token, to workaround a libclang bug in versions @@ -397,23 +383,29 @@ fn parse_macro( // See: // https://bugs.llvm.org//show_bug.cgi?id=9069 // https://reviews.llvm.org/D26446 - cexpr_tokens.pop()?; - - match parser.macro_definition(&cexpr_tokens) { - Ok((_, (id, val))) => Some((id.into(), val)), - _ => None, + let tokens = tokens[1..tokens.len() - 1].iter().filter_map(ClangToken::as_rcc_token); + if let Some(literal) = rcc_expr(tokens, &parsed_macros) { + Some((ident_str, literal)) + } else { + None } } -fn parse_int_literal_tokens(cursor: &clang::Cursor) -> Option { - use cexpr::expr; - use cexpr::expr::EvalResult; +fn rcc_expr(rcc_tokens: impl Iterator>, definitions: &HashMap) -> Option { + use rcc::PreProcessor; + + let mut rcc_tokens = rcc_tokens.peekable(); + let location = rcc_tokens.peek()?.location; + PreProcessor::cpp_expr(definitions, rcc_tokens, location).ok()?.const_fold().ok()?.into_literal().ok() +} - let cexpr_tokens = cursor.cexpr_tokens(); +fn parse_int_literal_tokens(cursor: &clang::Cursor) -> Option { + let rcc_tokens = cursor.rcc_tokens().into_iter(); // TODO(emilio): We can try to parse other kinds of literals. - match expr::expr(&cexpr_tokens) { - Ok((_, EvalResult::Int(Wrapping(val)))) => Some(val), + match rcc_expr(rcc_tokens, &HashMap::new()) { + Some(Literal::Int(i)) => Some(i), + Some(Literal::UnsignedInt(u)) => Some(u as i64), _ => None, } } From 2140881c62396e0b847bd714a4751c9b4287399b Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Wed, 27 May 2020 22:30:28 -0400 Subject: [PATCH 2/5] Update tests to match rcc output Just a few more harmless constants --- .../expectations/tests/issue-1676-macro-namespace-prefix.rs | 2 ++ tests/expectations/tests/layout_array.rs | 1 + tests/expectations/tests/layout_array_too_long.rs | 1 + tests/expectations/tests/layout_large_align_field.rs | 1 + tests/expectations/tests/layout_mbuf.rs | 2 ++ tests/expectations/tests/layout_mbuf_1_0.rs | 2 ++ tests/expectations/tests/namespace.rs | 5 +++-- 7 files changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/expectations/tests/issue-1676-macro-namespace-prefix.rs b/tests/expectations/tests/issue-1676-macro-namespace-prefix.rs index 131dbdf39f..b498f3f71c 100644 --- a/tests/expectations/tests/issue-1676-macro-namespace-prefix.rs +++ b/tests/expectations/tests/issue-1676-macro-namespace-prefix.rs @@ -4,3 +4,5 @@ non_camel_case_types, non_upper_case_globals )] + +pub const nssv_inline_ns: u32 = 0; diff --git a/tests/expectations/tests/layout_array.rs b/tests/expectations/tests/layout_array.rs index 3ca20b01d5..3e7e83cf59 100644 --- a/tests/expectations/tests/layout_array.rs +++ b/tests/expectations/tests/layout_array.rs @@ -6,6 +6,7 @@ )] pub const RTE_CACHE_LINE_SIZE: u32 = 64; +pub const __rte_cache_aligned: u32 = 0; pub const RTE_MEMPOOL_OPS_NAMESIZE: u32 = 32; pub const RTE_MEMPOOL_MAX_OPS_IDX: u32 = 16; pub const RTE_HEAP_NUM_FREELISTS: u32 = 13; diff --git a/tests/expectations/tests/layout_array_too_long.rs b/tests/expectations/tests/layout_array_too_long.rs index dbfd80d959..b331431112 100644 --- a/tests/expectations/tests/layout_array_too_long.rs +++ b/tests/expectations/tests/layout_array_too_long.rs @@ -6,6 +6,7 @@ )] pub const RTE_CACHE_LINE_SIZE: u32 = 64; +pub const __rte_cache_aligned: u32 = 0; pub const RTE_LIBRTE_IP_FRAG_MAX_FRAG: u32 = 4; pub const IP_LAST_FRAG_IDX: _bindgen_ty_1 = _bindgen_ty_1::IP_LAST_FRAG_IDX; pub const IP_FIRST_FRAG_IDX: _bindgen_ty_1 = _bindgen_ty_1::IP_FIRST_FRAG_IDX; diff --git a/tests/expectations/tests/layout_large_align_field.rs b/tests/expectations/tests/layout_large_align_field.rs index cbfb286c2b..0ec3c8ed0c 100644 --- a/tests/expectations/tests/layout_large_align_field.rs +++ b/tests/expectations/tests/layout_large_align_field.rs @@ -36,6 +36,7 @@ impl ::std::fmt::Debug for __IncompleteArrayField { } } pub const RTE_CACHE_LINE_SIZE: u32 = 64; +pub const __rte_cache_aligned: u32 = 0; pub const RTE_LIBRTE_IP_FRAG_MAX_FRAG: u32 = 4; pub const IP_LAST_FRAG_IDX: _bindgen_ty_1 = _bindgen_ty_1::IP_LAST_FRAG_IDX; pub const IP_FIRST_FRAG_IDX: _bindgen_ty_1 = _bindgen_ty_1::IP_FIRST_FRAG_IDX; diff --git a/tests/expectations/tests/layout_mbuf.rs b/tests/expectations/tests/layout_mbuf.rs index 2048f33544..73f4817ecc 100644 --- a/tests/expectations/tests/layout_mbuf.rs +++ b/tests/expectations/tests/layout_mbuf.rs @@ -94,6 +94,8 @@ where } pub const RTE_CACHE_LINE_MIN_SIZE: u32 = 64; pub const RTE_CACHE_LINE_SIZE: u32 = 64; +pub const __rte_cache_aligned: u32 = 0; +pub const __rte_cache_min_aligned: u32 = 0; pub type phys_addr_t = u64; pub type MARKER = [*mut ::std::os::raw::c_void; 0usize]; pub type MARKER8 = [u8; 0usize]; diff --git a/tests/expectations/tests/layout_mbuf_1_0.rs b/tests/expectations/tests/layout_mbuf_1_0.rs index 1475cc2141..9c93cdbd79 100644 --- a/tests/expectations/tests/layout_mbuf_1_0.rs +++ b/tests/expectations/tests/layout_mbuf_1_0.rs @@ -137,6 +137,8 @@ impl ::std::cmp::PartialEq for __BindgenUnionField { impl ::std::cmp::Eq for __BindgenUnionField {} pub const RTE_CACHE_LINE_MIN_SIZE: u32 = 64; pub const RTE_CACHE_LINE_SIZE: u32 = 64; +pub const __rte_cache_aligned: u32 = 0; +pub const __rte_cache_min_aligned: u32 = 0; pub type phys_addr_t = u64; pub type MARKER = [*mut ::std::os::raw::c_void; 0usize]; pub type MARKER8 = [u8; 0usize]; diff --git a/tests/expectations/tests/namespace.rs b/tests/expectations/tests/namespace.rs index d5d10e2945..e602971800 100644 --- a/tests/expectations/tests/namespace.rs +++ b/tests/expectations/tests/namespace.rs @@ -9,6 +9,7 @@ pub mod root { #[allow(unused_imports)] use self::super::root; + pub const NAMESPACE: u32 = 0; extern "C" { #[link_name = "\u{1}_Z9top_levelv"] pub fn top_level(); @@ -22,7 +23,7 @@ pub mod root { pub fn in_whatever(); } } - pub mod _bindgen_mod_id_17 { + pub mod _bindgen_mod_id_18 { #[allow(unused_imports)] use self::super::super::root; #[repr(C)] @@ -57,7 +58,7 @@ pub mod root { #[repr(C)] #[derive(Debug)] pub struct C { - pub _base: root::_bindgen_mod_id_17::A, + pub _base: root::_bindgen_mod_id_18::A, pub m_c: T, pub m_c_ptr: *mut T, pub m_c_arr: [T; 10usize], From 820ee95f91449679f5b27cabbf43d8a7418514cf Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sun, 21 Jun 2020 17:21:35 -0400 Subject: [PATCH 3/5] Use published version of rcc; rcc -> saltwater --- Cargo.lock | 32 ++++++++++++++-------------- Cargo.toml | 7 ++---- src/clang.rs | 34 +++++++++++++++++------------ src/ir/context.rs | 17 +++++++++------ src/ir/var.rs | 54 ++++++++++++++++++++++++++++------------------- 5 files changed, 81 insertions(+), 63 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e7219d03a2..04ce1a55e7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -46,9 +46,9 @@ dependencies = [ "peeking_take_while 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", "proc-macro2 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "rcc 0.9.0 (git+https://github.com/jyn514/rcc/?branch=bindgen)", "regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "rustc-hash 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "saltwater 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", "shlex 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "which 3.0.0 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -227,20 +227,6 @@ dependencies = [ "proc-macro2 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "rcc" -version = "0.9.0" -source = "git+https://github.com/jyn514/rcc/?branch=bindgen#907b52d6b49446c07f4586c9f3fdd9f40323c293" -dependencies = [ - "codespan 0.9.5 (registry+https://github.com/rust-lang/crates.io-index)", - "counter 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", - "hexponent 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", - "lasso 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "target-lexicon 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", - "thiserror 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "regex" version = "1.3.1" @@ -265,6 +251,20 @@ dependencies = [ "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "saltwater" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "codespan 0.9.5 (registry+https://github.com/rust-lang/crates.io-index)", + "counter 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", + "hexponent 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "lasso 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "target-lexicon 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", + "thiserror 1.0.20 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "shlex" version = "0.1.1" @@ -421,10 +421,10 @@ dependencies = [ "checksum proc-macro2 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "afdc77cc74ec70ed262262942ebb7dac3d479e9e5cfa2da1841c0806f6cdabcc" "checksum quick-error 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9274b940887ce9addde99c4eee6b5c44cc494b182b97e73dc8ffdcb3397fd3f0" "checksum quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe" -"checksum rcc 0.9.0 (git+https://github.com/jyn514/rcc/?branch=bindgen)" = "" "checksum regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dc220bd33bdce8f093101afe22a037b8eb0e5af33592e6a9caafff0d4cb81cbd" "checksum regex-syntax 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)" = "11a7e20d1cce64ef2fed88b66d347f88bd9babb82845b2b858f3edbf59a4f716" "checksum rustc-hash 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7540fc8b0c49f096ee9c961cda096467dce8084bec6bdca2fc83895fd9b28cb8" +"checksum saltwater 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4529ce7b69f31e0683a6f5982b98789fa25a87f0bc67c1db0eaa966fe366a605" "checksum shlex 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2" "checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" "checksum syn 1.0.11 (registry+https://github.com/rust-lang/crates.io-index)" = "dff0acdb207ae2fe6d5976617f887eb1e35a2ba52c13c7234c790960cdad9238" diff --git a/Cargo.toml b/Cargo.toml index a61036852b..87019f67d6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,6 +58,8 @@ regex = { version = "1.0", default-features = false , features = [ "std", "unico which = { version = "3.0", optional = true, default-features = false } shlex = "0.1" rustc-hash = "1.0.1" +saltwater = { version = "0.10", default-features = false } + # New validation in 0.3.6 breaks bindgen-integration: # https://github.com/alexcrichton/proc-macro2/commit/489c642. proc-macro2 = { version = "1", default-features = false } @@ -70,11 +72,6 @@ version = "0.7" optional = true version = "0.4" -[dependencies.rcc] -git = "https://github.com/jyn514/rcc/" -branch = "bindgen" -default-features = false - [features] default = ["logging", "clap", "runtime", "which-rustfmt"] logging = ["env_logger", "log"] diff --git a/src/clang.rs b/src/clang.rs index 071cdc93e5..810e141f9d 100644 --- a/src/clang.rs +++ b/src/clang.rs @@ -694,11 +694,11 @@ impl Cursor { RawTokens::new(self) } - /// Gets the tokens that correspond to that cursor as `rcc` tokens. - pub fn rcc_tokens(self) -> Vec> { + /// Gets the tokens that correspond to that cursor as `saltwater` tokens. + pub fn swcc_tokens(self) -> Vec> { self.tokens() .iter() - .filter_map(|token| token.as_rcc_token()) + .filter_map(|token| token.as_swcc_token()) .collect() } @@ -793,25 +793,31 @@ impl ClangToken { c_str.to_bytes() } - /// Converts a ClangToken to an `rcc` token if possible. - pub fn as_rcc_token(&self) -> Option> { - use rcc::{Files, Lexer, Literal, Token}; + /// Converts a ClangToken to an `saltwater` token if possible. + pub fn as_swcc_token( + &self, + ) -> Option> { + use saltwater::{Files, Lexer, Literal, Token}; match self.kind { - // `rcc` does not have a comment token + // `saltwater` does not have a comment token CXToken_Comment => return None, - CXToken_Punctuation | - CXToken_Literal | - CXToken_Identifier | + CXToken_Punctuation | CXToken_Literal | CXToken_Identifier | CXToken_Keyword => { - let spelling = std::str::from_utf8(self.spelling()).expect("invalid utf8 in token"); + let spelling = std::str::from_utf8(self.spelling()) + .expect("invalid utf8 in token"); let mut files = Files::new(); let id = files.add("", "".into()); let mut lexer = Lexer::new(id, spelling, false); - let mut token = lexer.next().unwrap().expect("rcc failed to parse clang token"); - // rcc generates null-terminated string immediately, + let mut token = lexer + .next() + .unwrap() + .expect("saltwater failed to parse clang token"); + // saltwater generates null-terminated string immediately, // but bindgen only adds the null-terminator during codegen. - if let Token::Literal(Literal::Str(ref mut string)) = &mut token.data { + if let Token::Literal(Literal::Str(ref mut string)) = + &mut token.data + { assert_eq!(string.pop(), Some(b'\0')); } Some(token) diff --git a/src/ir/context.rs b/src/ir/context.rs index 0d4b4b9d98..0cd85dc133 100644 --- a/src/ir/context.rs +++ b/src/ir/context.rs @@ -26,7 +26,7 @@ use crate::BindgenOptions; use crate::{Entry, HashMap, HashSet}; use clang_sys; use proc_macro2::{Ident, Span}; -use rcc::InternedStr; +use saltwater::InternedStr; use std::borrow::Cow; use std::cell::Cell; use std::collections::HashMap as StdHashMap; @@ -351,8 +351,8 @@ pub struct BindgenContext { /// hard errors while parsing duplicated macros, as well to allow macro /// expression parsing. /// - /// This needs to be an std::HashMap because the rcc API requires it. - parsed_macros: StdHashMap, + /// This needs to be an std::HashMap because the saltwater API requires it. + parsed_macros: StdHashMap, /// The active replacements collected from replaces="xxx" annotations. replacements: HashMap, ItemId>, @@ -1993,7 +1993,7 @@ If you encounter an error missing from this list, please file an issue or a PR!" /// Get the currently parsed macros. pub fn parsed_macros( &self, - ) -> &StdHashMap { + ) -> &StdHashMap { debug_assert!(!self.in_codegen_phase()); &self.parsed_macros } @@ -2002,9 +2002,14 @@ If you encounter an error missing from this list, please file an issue or a PR!" pub fn note_parsed_macro( &mut self, id: InternedStr, - value: rcc::Literal, + value: saltwater::Literal, ) { - self.parsed_macros.insert(id, rcc::Definition::Object(vec![rcc::Token::Literal(value)])); + self.parsed_macros.insert( + id, + saltwater::Definition::Object(vec![saltwater::Token::Literal( + value, + )]), + ); } /// Are we in the codegen phase? diff --git a/src/ir/var.rs b/src/ir/var.rs index 3a420bc8e1..1dc5660583 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -13,9 +13,9 @@ use crate::clang::ClangToken; use crate::parse::{ ClangItemParser, ClangSubItemParser, ParseError, ParseResult, }; -use std::io; +use saltwater::{InternedStr, Literal}; use std::collections::HashMap; -use rcc::{InternedStr, Literal}; +use std::io; /// The type for a constant variable. #[derive(Debug)] @@ -228,10 +228,10 @@ impl ClangSubItemParser for Var { let parse_int = |value| { let kind = ctx .parse_callbacks() - .and_then(|c| c.int_macro(rcc::get_str!(id), value)) - .unwrap_or_else(|| { - default_macro_constant_type(&ctx, value) - }); + .and_then(|c| { + c.int_macro(saltwater::get_str!(id), value) + }) + .unwrap_or_else(|| default_macro_constant_type(&ctx, value)); (TypeKind::Int(kind), VarType::Int(value)) }; @@ -250,7 +250,7 @@ impl ClangSubItemParser for Var { ctx, ); if let Some(callbacks) = ctx.parse_callbacks() { - callbacks.str_macro(rcc::get_str!(id), &val); + callbacks.str_macro(saltwater::get_str!(id), &val); } (TypeKind::Pointer(char_ty), VarType::String(val)) } @@ -360,10 +360,10 @@ fn parse_macro( ctx: &BindgenContext, tokens: &[ClangToken], ) -> Option<(InternedStr, Literal)> { - use rcc::Token; + use saltwater::Token; - let mut rcc_tokens = tokens.iter().filter_map(ClangToken::as_rcc_token); - let ident_str = match rcc_tokens.next()?.data { + let mut swcc_tokens = tokens.iter().filter_map(ClangToken::as_swcc_token); + let ident_str = match swcc_tokens.next()?.data { Token::Id(id) => id, _ => return None, }; @@ -372,8 +372,8 @@ fn parse_macro( } let parsed_macros = ctx.parsed_macros(); - // TODO: remove this clone (will need changes in rcc) - if let Some(literal) = rcc_expr(rcc_tokens.clone(), &parsed_macros) { + // TODO: remove this clone (will need changes in saltwater) + if let Some(literal) = swcc_expr(swcc_tokens.clone(), &parsed_macros) { return Some((ident_str, literal)); } @@ -383,27 +383,37 @@ fn parse_macro( // See: // https://bugs.llvm.org//show_bug.cgi?id=9069 // https://reviews.llvm.org/D26446 - let tokens = tokens[1..tokens.len() - 1].iter().filter_map(ClangToken::as_rcc_token); - if let Some(literal) = rcc_expr(tokens, &parsed_macros) { + let tokens = tokens[1..tokens.len() - 1] + .iter() + .filter_map(ClangToken::as_swcc_token); + if let Some(literal) = swcc_expr(tokens, &parsed_macros) { Some((ident_str, literal)) } else { None } } -fn rcc_expr(rcc_tokens: impl Iterator>, definitions: &HashMap) -> Option { - use rcc::PreProcessor; - - let mut rcc_tokens = rcc_tokens.peekable(); - let location = rcc_tokens.peek()?.location; - PreProcessor::cpp_expr(definitions, rcc_tokens, location).ok()?.const_fold().ok()?.into_literal().ok() +fn swcc_expr( + swcc_tokens: impl Iterator>, + definitions: &HashMap, +) -> Option { + use saltwater::PreProcessor; + + let mut swcc_tokens = swcc_tokens.peekable(); + let location = swcc_tokens.peek()?.location; + PreProcessor::cpp_expr(definitions, swcc_tokens, location) + .ok()? + .const_fold() + .ok()? + .into_literal() + .ok() } fn parse_int_literal_tokens(cursor: &clang::Cursor) -> Option { - let rcc_tokens = cursor.rcc_tokens().into_iter(); + let swcc_tokens = cursor.swcc_tokens().into_iter(); // TODO(emilio): We can try to parse other kinds of literals. - match rcc_expr(rcc_tokens, &HashMap::new()) { + match swcc_expr(swcc_tokens, &HashMap::new()) { Some(Literal::Int(i)) => Some(i), Some(Literal::UnsignedInt(u)) => Some(u as i64), _ => None, From 4a7a6dc772fad200ad5145359e22a256efd89419 Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sun, 21 Jun 2020 20:14:44 -0400 Subject: [PATCH 4/5] Retry parsing with unsigned integers if signed parsing fails - Don't try to add 'u' if it's already there --- src/clang.rs | 45 +++++++++++++++---- .../expectations/tests/jsval_layout_opaque.rs | 12 +++++ .../tests/jsval_layout_opaque_1_0.rs | 12 +++++ 3 files changed, 61 insertions(+), 8 deletions(-) diff --git a/src/clang.rs b/src/clang.rs index 810e141f9d..835fb8b801 100644 --- a/src/clang.rs +++ b/src/clang.rs @@ -797,7 +797,9 @@ impl ClangToken { pub fn as_swcc_token( &self, ) -> Option> { - use saltwater::{Files, Lexer, Literal, Token}; + use saltwater::{ + error::LexError, Files, Lexer, Literal, Locatable, Token, + }; match self.kind { // `saltwater` does not have a comment token @@ -806,13 +808,40 @@ impl ClangToken { CXToken_Keyword => { let spelling = std::str::from_utf8(self.spelling()) .expect("invalid utf8 in token"); - let mut files = Files::new(); - let id = files.add("", "".into()); - let mut lexer = Lexer::new(id, spelling, false); - let mut token = lexer - .next() - .unwrap() - .expect("saltwater failed to parse clang token"); + let parse = |spelling| { + let mut files = Files::new(); + let id = files.add("", "".into()); + let mut lexer = Lexer::new(id, spelling, false); + lexer.next().unwrap() + }; + let failed_parse = |err: Locatable<_>| { + panic!( + "saltwater failed to parse clang token '{}': {}", + spelling, err.data + ); + }; + let mut token = match parse(spelling) { + Ok(token) => token, + Err(Locatable { + data: + LexError::IntegerOverflow { + is_signed: Some(true), + }, + .. + }) => { + warn!("integer does not fit into `long long`, trying again with `unsigned long long`"); + // saltwater ignores trailing `LL`, but requires any `u` suffix to come before `LL` + let mut spelling = String::from( + spelling + .trim_end_matches('l') + .trim_end_matches('L'), + ); + spelling.push('u'); + parse(&spelling).unwrap_or_else(failed_parse) + } + Err(err) => failed_parse(err), + }; + // saltwater generates null-terminated string immediately, // but bindgen only adds the null-terminator during codegen. if let Token::Literal(Literal::Str(ref mut string)) = diff --git a/tests/expectations/tests/jsval_layout_opaque.rs b/tests/expectations/tests/jsval_layout_opaque.rs index 233aff7b41..6c7c7b1520 100644 --- a/tests/expectations/tests/jsval_layout_opaque.rs +++ b/tests/expectations/tests/jsval_layout_opaque.rs @@ -92,9 +92,21 @@ where } } } +pub const JSVAL_ALIGNMENT: u32 = 0; pub const JSVAL_TAG_SHIFT: u32 = 47; pub const JSVAL_PAYLOAD_MASK: u64 = 140737488355327; pub const JSVAL_TAG_MASK: i64 = -140737488355328; +pub const JSVAL_TYPE_TO_TAG: u32 = 0; +pub const JSVAL_LOWER_INCL_TAG_OF_OBJ_OR_NULL_SET: u32 = 0; +pub const JSVAL_UPPER_EXCL_TAG_OF_PRIMITIVE_SET: u32 = 0; +pub const JSVAL_UPPER_INCL_TAG_OF_NUMBER_SET: u32 = 0; +pub const JSVAL_LOWER_INCL_TAG_OF_GCTHING_SET: u32 = 0; +pub const JSVAL_LOWER_INCL_SHIFTED_TAG_OF_OBJ_OR_NULL_SET: u32 = 0; +pub const JSVAL_UPPER_EXCL_SHIFTED_TAG_OF_PRIMITIVE_SET: u32 = 0; +pub const JSVAL_UPPER_EXCL_SHIFTED_TAG_OF_NUMBER_SET: u32 = 0; +pub const JSVAL_LOWER_INCL_SHIFTED_TAG_OF_GCTHING_SET: u32 = 0; +pub const JS_VALUE_CONSTEXPR: u32 = 0; +pub const JS_VALUE_CONSTEXPR_VAR: u32 = 0; pub type size_t = ::std::os::raw::c_ulonglong; #[repr(u8)] #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] diff --git a/tests/expectations/tests/jsval_layout_opaque_1_0.rs b/tests/expectations/tests/jsval_layout_opaque_1_0.rs index f2433a4619..4fa64a8622 100644 --- a/tests/expectations/tests/jsval_layout_opaque_1_0.rs +++ b/tests/expectations/tests/jsval_layout_opaque_1_0.rs @@ -135,9 +135,21 @@ impl ::std::cmp::PartialEq for __BindgenUnionField { } } impl ::std::cmp::Eq for __BindgenUnionField {} +pub const JSVAL_ALIGNMENT: u32 = 0; pub const JSVAL_TAG_SHIFT: u32 = 47; pub const JSVAL_PAYLOAD_MASK: u64 = 140737488355327; pub const JSVAL_TAG_MASK: i64 = -140737488355328; +pub const JSVAL_TYPE_TO_TAG: u32 = 0; +pub const JSVAL_LOWER_INCL_TAG_OF_OBJ_OR_NULL_SET: u32 = 0; +pub const JSVAL_UPPER_EXCL_TAG_OF_PRIMITIVE_SET: u32 = 0; +pub const JSVAL_UPPER_INCL_TAG_OF_NUMBER_SET: u32 = 0; +pub const JSVAL_LOWER_INCL_TAG_OF_GCTHING_SET: u32 = 0; +pub const JSVAL_LOWER_INCL_SHIFTED_TAG_OF_OBJ_OR_NULL_SET: u32 = 0; +pub const JSVAL_UPPER_EXCL_SHIFTED_TAG_OF_PRIMITIVE_SET: u32 = 0; +pub const JSVAL_UPPER_EXCL_SHIFTED_TAG_OF_NUMBER_SET: u32 = 0; +pub const JSVAL_LOWER_INCL_SHIFTED_TAG_OF_GCTHING_SET: u32 = 0; +pub const JS_VALUE_CONSTEXPR: u32 = 0; +pub const JS_VALUE_CONSTEXPR_VAR: u32 = 0; pub type size_t = ::std::os::raw::c_ulonglong; #[repr(u8)] #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] From 89ad3bf0076c54e54d1a8f9c57374baf0ebcfa6a Mon Sep 17 00:00:00 2001 From: Joshua Nelson Date: Sun, 21 Jun 2020 22:31:49 -0400 Subject: [PATCH 5/5] Remove trailing tokens when parsing integer literals This was the same LLVM bug noted in parse_macro, but it seems that cexpr allowed trailing tokens while saltwater does not. --- src/ir/var.rs | 55 +++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/src/ir/var.rs b/src/ir/var.rs index 1dc5660583..2960f0a750 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -372,9 +372,30 @@ fn parse_macro( } let parsed_macros = ctx.parsed_macros(); - // TODO: remove this clone (will need changes in saltwater) - if let Some(literal) = swcc_expr(swcc_tokens.clone(), &parsed_macros) { - return Some((ident_str, literal)); + swcc_expr(swcc_tokens.collect(), &parsed_macros) + .map(|literal| (ident_str, literal)) +} + +fn swcc_expr( + mut tokens: Vec>, + definitions: &HashMap, +) -> Option { + use saltwater::{Locatable, PreProcessor}; + + let parse = |tokens: Vec>| { + let mut tokens = tokens.into_iter().peekable(); + let location = tokens.peek()?.location; + PreProcessor::cpp_expr(definitions, tokens, location) + .ok()? + .const_fold() + .ok()? + .into_literal() + .ok() + }; + + // TODO: remove this clone (requires changes in saltwater) + if let Some(literal) = parse(tokens.clone()) { + return Some(literal); } // Try without the last token, to workaround a libclang bug in versions @@ -383,34 +404,12 @@ fn parse_macro( // See: // https://bugs.llvm.org//show_bug.cgi?id=9069 // https://reviews.llvm.org/D26446 - let tokens = tokens[1..tokens.len() - 1] - .iter() - .filter_map(ClangToken::as_swcc_token); - if let Some(literal) = swcc_expr(tokens, &parsed_macros) { - Some((ident_str, literal)) - } else { - None - } -} - -fn swcc_expr( - swcc_tokens: impl Iterator>, - definitions: &HashMap, -) -> Option { - use saltwater::PreProcessor; - - let mut swcc_tokens = swcc_tokens.peekable(); - let location = swcc_tokens.peek()?.location; - PreProcessor::cpp_expr(definitions, swcc_tokens, location) - .ok()? - .const_fold() - .ok()? - .into_literal() - .ok() + tokens.pop(); + parse(tokens) } fn parse_int_literal_tokens(cursor: &clang::Cursor) -> Option { - let swcc_tokens = cursor.swcc_tokens().into_iter(); + let swcc_tokens = cursor.swcc_tokens(); // TODO(emilio): We can try to parse other kinds of literals. match swcc_expr(swcc_tokens, &HashMap::new()) {