diff --git a/tasks/ast_tools/Cargo.toml b/tasks/ast_tools/Cargo.toml index b4e37258136d1..6d590a0e76faf 100644 --- a/tasks/ast_tools/Cargo.toml +++ b/tasks/ast_tools/Cargo.toml @@ -48,15 +48,7 @@ rayon = { workspace = true } rustc-hash = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } -syn = { workspace = true, features = [ - "clone-impls", - "derive", - "extra-traits", - "full", - "parsing", - "printing", - "proc-macro", -] } +syn = { workspace = true, features = ["extra-traits", "full", "parsing", "printing"] } toml = { workspace = true } [features] diff --git a/tasks/ast_tools/src/parse/load.rs b/tasks/ast_tools/src/parse/load.rs index c71d4c748dd12..b5e6e0af52234 100644 --- a/tasks/ast_tools/src/parse/load.rs +++ b/tasks/ast_tools/src/parse/load.rs @@ -1,6 +1,5 @@ use std::{fs, path::Path}; -use indexmap::map::Entry; use syn::{ Attribute, Generics, Ident, Item, ItemEnum, ItemMacro, ItemStruct, Meta, Token, Variant, Visibility, WhereClause, braced, @@ -12,7 +11,6 @@ use syn::{ use crate::{schema::FileId, utils::ident_name}; use super::{ - FxIndexMap, parse::convert_expr_to_string, skeleton::{EnumSkeleton, Skeleton, StructSkeleton}, }; @@ -23,22 +21,22 @@ use super::{ /// * Name of type. /// * Inherits of enums wrapped in `inherit_variants!` macro. /// -/// Inserts [`Skeleton`]s into `skeletons` and `meta_skeletons`. +/// Returns a list of [`Skeleton`]s found in the file. +/// Each entry is `(type_name, skeleton, is_meta)`. /// /// This is the bare minimum to be able to "link up" types to each other in next pass. pub fn load_file( file_id: FileId, file_path: &str, - skeletons: &mut FxIndexMap, - meta_skeletons: &mut FxIndexMap, root_path: &Path, -) { +) -> Vec<(String, Skeleton, bool)> { let content = fs::read_to_string(root_path.join(file_path)).unwrap(); let file = parse_file(content.as_str()).unwrap(); + let mut results = Vec::new(); for item in file.items { - let (name, skeleton, is_meta) = match item { + let entry = match item { Item::Struct(item) => { let Some((skeleton, is_meta)) = parse_struct(item, file_id) else { continue }; (skeleton.name.clone(), Skeleton::Struct(skeleton), is_meta) @@ -53,15 +51,9 @@ pub fn load_file( } _ => continue, }; - - let use_skeletons = if is_meta { &mut *meta_skeletons } else { &mut *skeletons }; - match use_skeletons.entry(name) { - Entry::Occupied(entry) => panic!("2 types with same name: {}", entry.key()), - Entry::Vacant(entry) => { - entry.insert(skeleton); - } - } + results.push(entry); } + results } fn parse_struct(item: ItemStruct, file_id: FileId) -> Option<(StructSkeleton, /* is_meta */ bool)> { diff --git a/tasks/ast_tools/src/parse/mod.rs b/tasks/ast_tools/src/parse/mod.rs index 139f0492efec5..d68585623e979 100644 --- a/tasks/ast_tools/src/parse/mod.rs +++ b/tasks/ast_tools/src/parse/mod.rs @@ -18,7 +18,7 @@ //! 2nd phase involves full parsing of each type, and linking types to each other. //! //! A [`TypeDef`] is generated for each type. The `IndexVec` that is created is indexed -//! by [`TypeId`] - same order of entries as the `FxIndexMap` from phase 1. +//! by [`TypeId`] - same order of entries as the `FxIndexSet` of type names from phase 1. //! //! `parse_attr` method is called on [`Derive`]s and [`Generator`]s which handle attributes, //! for the derive/generator to parse the attribute and update the [`TypeDef`] accordingly. @@ -44,15 +44,18 @@ //! //! [`TypeId`]: crate::schema::TypeId //! [`TypeDef`]: crate::schema::TypeDef +//! [`Skeleton`]: skeleton::Skeleton //! [`Derive`]: crate::Derive //! [`Generator`]: crate::Generator -use std::path::Path; +use rayon::prelude::*; + +use oxc_index::IndexVec; use crate::{ Codegen, log, log_success, schema::{Derives, File, FileId, Schema}, - utils::FxIndexMap, + utils::FxIndexSet, }; pub mod attr; @@ -62,7 +65,6 @@ mod parse; mod skeleton; use load::load_file; use parse::parse; -use skeleton::Skeleton; /// Analyse the files with provided paths, and generate a [`Schema`]. pub fn parse_files(file_paths: &[String], codegen: &Codegen) -> Schema { @@ -72,41 +74,70 @@ pub fn parse_files(file_paths: &[String], codegen: &Codegen) -> Schema { // Meta types are not part of the AST, but associated with it. // `TypeId` is index into `skeletons`. // `MetaId` is index into `meta_skeletons`. - let mut skeletons = FxIndexMap::default(); - let mut meta_skeletons = FxIndexMap::default(); - - let files = file_paths - .iter() + log!("Loading files... "); + let results = file_paths + .par_iter() .enumerate() .map(|(file_id, file_path)| { let file_id = FileId::from_usize(file_id); - analyse_file( - file_id, - file_path, - &mut skeletons, - &mut meta_skeletons, - codegen.root_path(), - ) + let file_skeletons = load_file(file_id, file_path, codegen.root_path()); + // `Skeleton` contains `syn` types which are `!Send` (see `AssertSend` below) + AssertSend((file_path, file_skeletons)) }) - .collect(); + .collect::>(); + log_success!(); + + // Sequential phase: merge into name sets + skeleton vecs (preserving deterministic order). + let mut type_names = FxIndexSet::default(); + let mut type_skeletons = Vec::new(); + let mut meta_names = FxIndexSet::default(); + let mut meta_skeletons = Vec::new(); + let mut files = IndexVec::new(); + + for AssertSend((file_path, file_skeletons)) in results { + for (name, skeleton, is_meta) in file_skeletons { + let (names, skeletons) = if is_meta { + (&mut meta_names, &mut meta_skeletons) + } else { + (&mut type_names, &mut type_skeletons) + }; + + let (index, is_new) = names.insert_full(name); + assert!(is_new, "2 types with same name: {}", names.get_index(index).unwrap()); + skeletons.push(skeleton); + } + files.push(File::new(file_path)); + } + + let type_skeletons = IndexVec::from_vec(type_skeletons); + let meta_skeletons = IndexVec::from_vec(meta_skeletons); // Convert skeletons into schema - parse(skeletons, meta_skeletons, files, codegen) + parse(type_names, type_skeletons, meta_names, meta_skeletons, files, codegen) } -/// Analyse file with provided path and add types to `skeletons` and `meta_skeletons`. +/// Wrapper to assert a type is safe to send across threads. /// -/// Returns a [`File`]. -fn analyse_file( - file_id: FileId, - file_path: &str, - skeletons: &mut FxIndexMap, - meta_skeletons: &mut FxIndexMap, - root_path: &Path, -) -> File { - log!("Load {file_path}... "); - load_file(file_id, file_path, skeletons, meta_skeletons, root_path); - log_success!(); +/// `syn` types are `!Send` because `proc_macro2::Span` contains an `Rc` internally +/// (when the `proc-macro` feature is enabled on `proc-macro2`). +/// +/// This crate does not enable the `proc-macro` feature on `syn` crate, which would usually make `syn` types `Send`. +/// But unfortunately it gets enabled by transitive dependencies (`serde_derive`, `bpaf_derive`, etc), +/// due to feature unification. +/// +/// `Span` is embedded throughout the syn AST - in every `Ident`, token, `Type`, `Expr`, and `Attribute` - +/// so there's no way to extract the data we need without it. +/// +/// # Why this is sound +/// +/// `Rc` is `!Send` because two `Rc`s pointing to the same allocation could be used concurrently from different threads, +/// violating the non-atomic reference count. But that's only a problem if an `Rc` has been *cloned*. A sole owner of +/// an `Rc` is safe to send - there's no other `Rc` to race with. +/// +/// `syn::parse_file` parses a `&str` and returns a self-contained AST. The `Span`s in this tree are created fresh +/// by `proc_macro2` and are not clones of any external `Rc`. So each parsed tree is the sole owner of all its `Rc`s, +/// and sending it to another thread cannot cause a data race. +struct AssertSend(T); - File::new(file_path) -} +// SAFETY: See above +unsafe impl Send for AssertSend {} diff --git a/tasks/ast_tools/src/parse/parse.rs b/tasks/ast_tools/src/parse/parse.rs index f5ed1402991e7..f881dbc14cab5 100644 --- a/tasks/ast_tools/src/parse/parse.rs +++ b/tasks/ast_tools/src/parse/parse.rs @@ -17,7 +17,7 @@ use crate::{ PointerDef, PointerKind, PrimitiveDef, Schema, StructDef, TypeDef, TypeId, VariantDef, VecDef, Visibility, }, - utils::{FxIndexMap, FxIndexSet, ident_name}, + utils::{FxIndexSet, ident_name}, }; use super::{ @@ -28,18 +28,15 @@ use super::{ /// Parse [`Skeleton`]s into [`TypeDef`]s. pub fn parse( - skeletons: FxIndexMap, - meta_skeletons: FxIndexMap, + type_names: FxIndexSet, + type_skeletons: IndexVec, + meta_names: FxIndexSet, + meta_skeletons: IndexVec, files: IndexVec, codegen: &Codegen, ) -> Schema { - // Split `skeletons` into an `IndexSet` (type names) and `IndexVec` (skeletons) - let (type_names, skeletons_vec) = skeletons.into_iter().unzip(); - // Split `meta_skeletons` into an `IndexSet` (meta names) and `IndexVec` (skeletons) - let (meta_names, meta_skeletons_vec) = meta_skeletons.into_iter().unzip(); - let parser = Parser::new(type_names, meta_names, files, codegen); - parser.parse_all(skeletons_vec, meta_skeletons_vec) + parser.parse_all(type_skeletons, meta_skeletons) } /// Types parser.