From dc1fc08dc9a6957a79cc24edde4dc016debf3fa7 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Fri, 24 Jun 2022 17:12:58 -0700 Subject: [PATCH 1/6] rustdoc: reference function signature types from the `p` array This reduces the size of the function signature index, because it's common to have many functions that operate on the same types. $ wc -c search-index-old.js search-index-new.js 5224374 search-index-old.js 3932314 search-index-new.js By my math, this reduces the uncompressed size of the search index by 32%. On compressed signatures, the wins are less drastic, a mere 8%: $ wc -c search-index-old.js.gz search-index-new.js.gz 404532 search-index-old.js.gz 371635 search-index-new.js.gz --- src/librustdoc/clean/types.rs | 4 - src/librustdoc/formats/item_type.rs | 2 - src/librustdoc/html/render/mod.rs | 81 ++++++----- src/librustdoc/html/render/search_index.rs | 153 ++++++++++++++------- src/librustdoc/html/static/js/search.js | 131 +++++++++++++----- src/librustdoc/json/conversions.rs | 1 - 6 files changed, 242 insertions(+), 130 deletions(-) diff --git a/src/librustdoc/clean/types.rs b/src/librustdoc/clean/types.rs index 2762d5e8502b2..2dbb2d46e026e 100644 --- a/src/librustdoc/clean/types.rs +++ b/src/librustdoc/clean/types.rs @@ -1671,10 +1671,6 @@ impl Type { matches!(self, Type::ImplTrait(_)) } - pub(crate) fn is_primitive(&self) -> bool { - self.primitive_type().is_some() - } - pub(crate) fn projection(&self) -> Option<(&Type, DefId, PathSegment)> { if let QPath { self_type, trait_, assoc, .. } = self { Some((self_type, trait_.def_id(), *assoc.clone())) diff --git a/src/librustdoc/formats/item_type.rs b/src/librustdoc/formats/item_type.rs index eca5501cd339d..9cb3327d7c781 100644 --- a/src/librustdoc/formats/item_type.rs +++ b/src/librustdoc/formats/item_type.rs @@ -48,7 +48,6 @@ pub(crate) enum ItemType { ProcAttribute = 23, ProcDerive = 24, TraitAlias = 25, - Generic = 26, } impl Serialize for ItemType { @@ -175,7 +174,6 @@ impl ItemType { ItemType::ProcAttribute => "attr", ItemType::ProcDerive => "derive", ItemType::TraitAlias => "traitalias", - ItemType::Generic => "generic", } } } diff --git a/src/librustdoc/html/render/mod.rs b/src/librustdoc/html/render/mod.rs index 3f426ee93e77e..b01afa02b8b94 100644 --- a/src/librustdoc/html/render/mod.rs +++ b/src/librustdoc/html/render/mod.rs @@ -110,63 +110,71 @@ pub(crate) struct IndexItem { /// A type used for the search index. #[derive(Debug)] pub(crate) struct RenderType { - name: Option, - generics: Option>, + id: Option, + generics: Option>, } -/// Full type of functions/methods in the search index. -#[derive(Debug)] -pub(crate) struct IndexItemFunctionType { - inputs: Vec, - output: Vec, -} - -impl Serialize for IndexItemFunctionType { +impl Serialize for RenderType { fn serialize(&self, serializer: S) -> Result where S: Serializer, { - // If we couldn't figure out a type, just write `null`. - let has_missing = self.inputs.iter().chain(self.output.iter()).any(|i| i.ty.name.is_none()); - if has_missing { - serializer.serialize_none() - } else { + let id = match &self.id { + // 0 is a sentinel, everything else is one-indexed + None => 0, + Some(RenderTypeId::Index(idx)) => idx + 1, + _ => panic!("must convert render types to indexes before serializing"), + }; + if let Some(generics) = &self.generics { let mut seq = serializer.serialize_seq(None)?; - seq.serialize_element(&self.inputs)?; - match self.output.as_slice() { - [] => {} - [one] => seq.serialize_element(one)?, - all => seq.serialize_element(all)?, - } + seq.serialize_element(&id)?; + seq.serialize_element(generics)?; seq.end() + } else { + id.serialize(serializer) } } } -#[derive(Debug)] -pub(crate) struct TypeWithKind { - ty: RenderType, - kind: ItemType, +#[derive(Clone, Debug)] +pub(crate) enum RenderTypeId { + DefId(DefId), + Primitive(clean::PrimitiveType), + Index(usize), } -impl From<(RenderType, ItemType)> for TypeWithKind { - fn from(x: (RenderType, ItemType)) -> TypeWithKind { - TypeWithKind { ty: x.0, kind: x.1 } - } +/// Full type of functions/methods in the search index. +#[derive(Debug)] +pub(crate) struct IndexItemFunctionType { + inputs: Vec, + output: Vec, } -impl Serialize for TypeWithKind { +impl Serialize for IndexItemFunctionType { fn serialize(&self, serializer: S) -> Result where S: Serializer, { - let mut seq = serializer.serialize_seq(None)?; - seq.serialize_element(&self.ty.name)?; - seq.serialize_element(&self.kind)?; - if let Some(generics) = &self.ty.generics { - seq.serialize_element(generics)?; + // If we couldn't figure out a type, just write `0`. + let has_missing = self + .inputs + .iter() + .chain(self.output.iter()) + .any(|i| i.id.is_none() && i.generics.is_none()); + if has_missing { + 0.serialize(serializer) + } else { + let mut seq = serializer.serialize_seq(None)?; + match &self.inputs[..] { + [one] if one.generics.is_none() => seq.serialize_element(one)?, + _ => seq.serialize_element(&self.inputs)?, + } + match &self.output[..] { + [one] if one.generics.is_none() => seq.serialize_element(one)?, + _ => seq.serialize_element(&self.output)?, + } + seq.end() } - seq.end() } } @@ -2517,7 +2525,6 @@ fn item_ty_to_section(ty: ItemType) -> ItemSection { ItemType::ProcAttribute => ItemSection::AttributeMacros, ItemType::ProcDerive => ItemSection::DeriveMacros, ItemType::TraitAlias => ItemSection::TraitAliases, - ItemType::Generic => unreachable!(), } } diff --git a/src/librustdoc/html/render/search_index.rs b/src/librustdoc/html/render/search_index.rs index 9f302cc256659..6520dfc8ab1a9 100644 --- a/src/librustdoc/html/render/search_index.rs +++ b/src/librustdoc/html/render/search_index.rs @@ -3,7 +3,8 @@ use std::collections::BTreeMap; use rustc_data_structures::fx::FxHashMap; use rustc_middle::ty::TyCtxt; -use rustc_span::symbol::{kw, Symbol}; +use rustc_span::def_id::DefId; +use rustc_span::symbol::Symbol; use serde::ser::{Serialize, SerializeStruct, Serializer}; use crate::clean; @@ -12,7 +13,7 @@ use crate::formats::cache::{Cache, OrphanImplItem}; use crate::formats::item_type::ItemType; use crate::html::format::join_with_double_colon; use crate::html::markdown::short_markdown_summary; -use crate::html::render::{IndexItem, IndexItemFunctionType, RenderType, TypeWithKind}; +use crate::html::render::{IndexItem, IndexItemFunctionType, RenderType, RenderTypeId}; /// Builds the search index from the collected metadata pub(crate) fn build_index<'tcx>( @@ -48,14 +49,12 @@ pub(crate) fn build_index<'tcx>( .doc_value() .map_or_else(String::new, |s| short_markdown_summary(&s, &krate.module.link_names(cache))); - let Cache { ref mut search_index, ref paths, .. } = *cache; - // Aliases added through `#[doc(alias = "...")]`. Since a few items can have the same alias, // we need the alias element to have an array of items. let mut aliases: BTreeMap> = BTreeMap::new(); // Sort search index items. This improves the compressibility of the search index. - search_index.sort_unstable_by(|k1, k2| { + cache.search_index.sort_unstable_by(|k1, k2| { // `sort_unstable_by_key` produces lifetime errors let k1 = (&k1.path, &k1.name, &k1.ty, &k1.parent); let k2 = (&k2.path, &k2.name, &k2.ty, &k2.parent); @@ -63,7 +62,7 @@ pub(crate) fn build_index<'tcx>( }); // Set up alias indexes. - for (i, item) in search_index.iter().enumerate() { + for (i, item) in cache.search_index.iter().enumerate() { for alias in &item.aliases[..] { aliases.entry(alias.as_str().to_lowercase()).or_default().push(i); } @@ -74,6 +73,79 @@ pub(crate) fn build_index<'tcx>( let mut lastpath = ""; let mut lastpathid = 0usize; + // First, on function signatures + let mut search_index = std::mem::replace(&mut cache.search_index, Vec::new()); + for item in search_index.iter_mut() { + fn convert_render_type( + ty: &mut RenderType, + cache: &mut Cache, + defid_to_pathid: &mut FxHashMap, + lastpathid: &mut usize, + crate_paths: &mut Vec<(ItemType, Symbol)>, + ) { + if let Some(generics) = &mut ty.generics { + for item in generics { + convert_render_type(item, cache, defid_to_pathid, lastpathid, crate_paths); + } + } + let Cache { ref paths, ref external_paths, .. } = *cache; + let Some(id) = ty.id.clone() else { + assert!(ty.generics.is_some()); + return; + }; + let (defid, path, item_type) = match id { + RenderTypeId::DefId(defid) => { + if let Some(&(ref fqp, item_type)) = + paths.get(&defid).or_else(|| external_paths.get(&defid)) + { + (defid, *fqp.last().unwrap(), item_type) + } else { + ty.id = None; + return; + } + } + RenderTypeId::Primitive(primitive) => { + let defid = *cache.primitive_locations.get(&primitive).unwrap(); + (defid, primitive.as_sym(), ItemType::Primitive) + } + RenderTypeId::Index(_) => return, + }; + match defid_to_pathid.entry(defid) { + Entry::Occupied(entry) => ty.id = Some(RenderTypeId::Index(*entry.get())), + Entry::Vacant(entry) => { + let pathid = *lastpathid; + entry.insert(pathid); + *lastpathid += 1; + crate_paths.push((item_type, path)); + ty.id = Some(RenderTypeId::Index(pathid)); + } + } + } + if let Some(search_type) = &mut item.search_type { + for item in &mut search_type.inputs { + convert_render_type( + item, + cache, + &mut defid_to_pathid, + &mut lastpathid, + &mut crate_paths, + ); + } + for item in &mut search_type.output { + convert_render_type( + item, + cache, + &mut defid_to_pathid, + &mut lastpathid, + &mut crate_paths, + ); + } + } + } + + let Cache { ref paths, .. } = *cache; + + // Then, on parent modules let crate_items: Vec<&IndexItem> = search_index .iter_mut() .map(|item| { @@ -151,6 +223,7 @@ pub(crate) fn build_index<'tcx>( "`{}` is missing idx", item.name ); + // 0 is a sentinel, everything else is one-indexed item.parent_idx.map(|x| x + 1).unwrap_or(0) }) .collect::>(), @@ -202,36 +275,33 @@ pub(crate) fn get_function_type_for_search<'tcx>( _ => return None, }; - inputs.retain(|a| a.ty.name.is_some()); - output.retain(|a| a.ty.name.is_some()); + inputs.retain(|a| a.id.is_some() || a.generics.is_some()); + output.retain(|a| a.id.is_some() || a.generics.is_some()); Some(IndexItemFunctionType { inputs, output }) } -fn get_index_type(clean_type: &clean::Type, generics: Vec) -> RenderType { +fn get_index_type(clean_type: &clean::Type, generics: Vec) -> RenderType { RenderType { - name: get_index_type_name(clean_type).map(|s| s.as_str().to_ascii_lowercase()), + id: get_index_type_id(clean_type), generics: if generics.is_empty() { None } else { Some(generics) }, } } -fn get_index_type_name(clean_type: &clean::Type) -> Option { +fn get_index_type_id(clean_type: &clean::Type) -> Option { match *clean_type { - clean::Type::Path { ref path, .. } => { - let path_segment = path.segments.last().unwrap(); - Some(path_segment.name) - } + clean::Type::Path { ref path, .. } => Some(RenderTypeId::DefId(path.def_id())), clean::DynTrait(ref bounds, _) => { let path = &bounds[0].trait_; - Some(path.segments.last().unwrap().name) + Some(RenderTypeId::DefId(path.def_id())) } - // We return an empty name because we don't care about the generic name itself. - clean::Generic(_) | clean::ImplTrait(_) => Some(kw::Empty), - clean::Primitive(ref p) => Some(p.as_sym()), + clean::Primitive(p) => Some(RenderTypeId::Primitive(p)), clean::BorrowedRef { ref type_, .. } | clean::RawPointer(_, ref type_) => { - get_index_type_name(type_) + get_index_type_id(type_) } clean::BareFunction(_) + | clean::Generic(_) + | clean::ImplTrait(_) | clean::Tuple(_) | clean::Slice(_) | clean::Array(_, _) @@ -254,16 +324,10 @@ fn add_generics_and_bounds_as_types<'tcx, 'a>( arg: &'a Type, tcx: TyCtxt<'tcx>, recurse: usize, - res: &mut Vec, + res: &mut Vec, cache: &Cache, ) { - fn insert_ty( - res: &mut Vec, - tcx: TyCtxt<'_>, - ty: Type, - mut generics: Vec, - cache: &Cache, - ) { + fn insert_ty(res: &mut Vec, ty: Type, mut generics: Vec) { // generics and impl trait are both identified by their generics, // rather than a type name itself let anonymous = ty.is_full_generic() || ty.is_impl_trait(); @@ -316,20 +380,11 @@ fn add_generics_and_bounds_as_types<'tcx, 'a>( return; } } - let mut index_ty = get_index_type(&ty, generics); - if index_ty.name.as_ref().map(|s| s.is_empty() && generics_empty).unwrap_or(true) { + let index_ty = get_index_type(&ty, generics); + if index_ty.id.is_none() && generics_empty { return; } - if anonymous { - // We remove the name of the full generic because we have no use for it. - index_ty.name = Some(String::new()); - res.push(TypeWithKind::from((index_ty, ItemType::Generic))); - } else if let Some(kind) = ty.def_id(cache).map(|did| tcx.def_kind(did).into()) { - res.push(TypeWithKind::from((index_ty, kind))); - } else if ty.is_primitive() { - // This is a primitive, let's store it as such. - res.push(TypeWithKind::from((index_ty, ItemType::Primitive))); - } + res.push(index_ty); } if recurse >= 10 { @@ -379,7 +434,7 @@ fn add_generics_and_bounds_as_types<'tcx, 'a>( } } } - insert_ty(res, tcx, arg.clone(), ty_generics, cache); + insert_ty(res, arg.clone(), ty_generics); } // Otherwise we check if the trait bounds are "inlined" like `T: Option`... if let Some(bound) = generics.params.iter().find(|g| g.is_type() && g.name == arg_s) { @@ -398,7 +453,7 @@ fn add_generics_and_bounds_as_types<'tcx, 'a>( ); } } - insert_ty(res, tcx, arg.clone(), ty_generics, cache); + insert_ty(res, arg.clone(), ty_generics); } } else if let Type::ImplTrait(ref bounds) = *arg { let mut ty_generics = Vec::new(); @@ -416,7 +471,7 @@ fn add_generics_and_bounds_as_types<'tcx, 'a>( ); } } - insert_ty(res, tcx, arg.clone(), ty_generics, cache); + insert_ty(res, arg.clone(), ty_generics); } else { // This is not a type parameter. So for example if we have `T, U: Option`, and we're // looking at `Option`, we enter this "else" condition, otherwise if it's `T`, we don't. @@ -437,7 +492,7 @@ fn add_generics_and_bounds_as_types<'tcx, 'a>( ); } } - insert_ty(res, tcx, arg.clone(), ty_generics, cache); + insert_ty(res, arg.clone(), ty_generics); } } @@ -450,7 +505,7 @@ fn get_fn_inputs_and_outputs<'tcx>( tcx: TyCtxt<'tcx>, impl_generics: Option<&(clean::Type, clean::Generics)>, cache: &Cache, -) -> (Vec, Vec) { +) -> (Vec, Vec) { let decl = &func.decl; let combined_generics; @@ -478,9 +533,7 @@ fn get_fn_inputs_and_outputs<'tcx>( if !args.is_empty() { all_types.extend(args); } else { - if let Some(kind) = arg.type_.def_id(cache).map(|did| tcx.def_kind(did).into()) { - all_types.push(TypeWithKind::from((get_index_type(&arg.type_, vec![]), kind))); - } + all_types.push(get_index_type(&arg.type_, vec![])); } } @@ -497,9 +550,7 @@ fn get_fn_inputs_and_outputs<'tcx>( cache, ); if ret_types.is_empty() { - if let Some(kind) = return_type.def_id(cache).map(|did| tcx.def_kind(did).into()) { - ret_types.push(TypeWithKind::from((get_index_type(return_type, vec![]), kind))); - } + ret_types.push(get_index_type(return_type, vec![])); } } _ => {} diff --git a/src/librustdoc/html/static/js/search.js b/src/librustdoc/html/static/js/search.js index cb1609d498340..54057627c92b6 100644 --- a/src/librustdoc/html/static/js/search.js +++ b/src/librustdoc/html/static/js/search.js @@ -114,10 +114,6 @@ function levenshtein(s1, s2) { function initSearch(rawSearchIndex) { const MAX_LEV_DISTANCE = 3; const MAX_RESULTS = 200; - const GENERICS_DATA = 2; - const NAME = 0; - const INPUTS_DATA = 0; - const OUTPUT_DATA = 1; const NO_TYPE_FILTER = -1; /** * @type {Array} @@ -895,21 +891,18 @@ function initSearch(rawSearchIndex) { * @return {integer} - Returns the best match (if any) or `MAX_LEV_DISTANCE + 1`. */ function checkGenerics(row, elem, defaultLev) { - if (row.length <= GENERICS_DATA || row[GENERICS_DATA].length === 0) { - return elem.generics.length === 0 ? defaultLev : MAX_LEV_DISTANCE + 1; - } else if (row[GENERICS_DATA].length > 0 && row[GENERICS_DATA][0][NAME] === "") { - if (row.length > GENERICS_DATA) { - return checkGenerics(row[GENERICS_DATA][0], elem, defaultLev); - } + if (row.generics.length === 0) { return elem.generics.length === 0 ? defaultLev : MAX_LEV_DISTANCE + 1; + } else if (row.generics.length > 0 && row.generics[0].name === null) { + return checkGenerics(row.generics[0], elem, defaultLev); } // The names match, but we need to be sure that all generics kinda // match as well. let elem_name; - if (elem.generics.length > 0 && row[GENERICS_DATA].length >= elem.generics.length) { + if (elem.generics.length > 0 && row.generics.length >= elem.generics.length) { const elems = Object.create(null); - for (const entry of row[GENERICS_DATA]) { - elem_name = entry[NAME]; + for (const entry of row.generics) { + elem_name = entry.name; if (elem_name === "") { // Pure generic, needs to check into it. if (checkGenerics(entry, elem, MAX_LEV_DISTANCE + 1) !== 0) { @@ -963,7 +956,7 @@ function initSearch(rawSearchIndex) { */ function checkIfInGenerics(row, elem) { let lev = MAX_LEV_DISTANCE + 1; - for (const entry of row[GENERICS_DATA]) { + for (const entry of row.generics) { lev = Math.min(checkType(entry, elem, true), lev); if (lev === 0) { break; @@ -984,23 +977,22 @@ function initSearch(rawSearchIndex) { * no match, returns `MAX_LEV_DISTANCE + 1`. */ function checkType(row, elem, literalSearch) { - if (row[NAME].length === 0) { + if (row.name === null) { // This is a pure "generic" search, no need to run other checks. - if (row.length > GENERICS_DATA) { + if (row.generics.length > 0) { return checkIfInGenerics(row, elem); } return MAX_LEV_DISTANCE + 1; } - let lev = levenshtein(row[NAME], elem.name); + let lev = levenshtein(row.name, elem.name); if (literalSearch) { if (lev !== 0) { // The name didn't match, let's try to check if the generics do. if (elem.generics.length === 0) { - const checkGeneric = (row.length > GENERICS_DATA && - row[GENERICS_DATA].length > 0); - if (checkGeneric && row[GENERICS_DATA] - .findIndex(tmp_elem => tmp_elem[NAME] === elem.name) !== -1) { + const checkGeneric = row.generics.length > 0; + if (checkGeneric && row.generics + .findIndex(tmp_elem => tmp_elem.name === elem.name) !== -1) { return 0; } } @@ -1009,7 +1001,7 @@ function initSearch(rawSearchIndex) { return checkGenerics(row, elem, MAX_LEV_DISTANCE + 1); } return 0; - } else if (row.length > GENERICS_DATA) { + } else if (row.generics.length > 0) { if (elem.generics.length === 0) { if (lev === 0) { return 0; @@ -1059,9 +1051,9 @@ function initSearch(rawSearchIndex) { function findArg(row, elem, typeFilter) { let lev = MAX_LEV_DISTANCE + 1; - if (row && row.type && row.type[INPUTS_DATA] && row.type[INPUTS_DATA].length > 0) { - for (const input of row.type[INPUTS_DATA]) { - if (!typePassesFilter(typeFilter, input[1])) { + if (row && row.type && row.type.inputs && row.type.inputs.length > 0) { + for (const input of row.type.inputs) { + if (!typePassesFilter(typeFilter, input.ty)) { continue; } lev = Math.min(lev, checkType(input, elem, parsedQuery.literalSearch)); @@ -1086,13 +1078,10 @@ function initSearch(rawSearchIndex) { function checkReturned(row, elem, typeFilter) { let lev = MAX_LEV_DISTANCE + 1; - if (row && row.type && row.type.length > OUTPUT_DATA) { - let ret = row.type[OUTPUT_DATA]; - if (typeof ret[0] === "string") { - ret = [ret]; - } + if (row && row.type && row.type.output.length > 0) { + const ret = row.type.output; for (const ret_ty of ret) { - if (!typePassesFilter(typeFilter, ret_ty[1])) { + if (!typePassesFilter(typeFilter, ret_ty.ty)) { continue; } lev = Math.min(lev, checkType(ret_ty, elem, parsedQuery.literalSearch)); @@ -1836,6 +1825,65 @@ function initSearch(rawSearchIndex) { filterCrates); } + function buildItemSearchTypeAll(types, lowercasePaths) { + const PATH_INDEX_DATA = 0; + const GENERICS_DATA = 1; + if (types === null) { + return []; + } + return types.map(type => { + let pathIndex, generics; + if (typeof type === "number") { + pathIndex = type; + generics = []; + } else { + pathIndex = type[PATH_INDEX_DATA]; + generics = buildItemSearchTypeAll(type[GENERICS_DATA], lowercasePaths); + } + return { + name: pathIndex === 0 ? null : lowercasePaths[pathIndex - 1].name, + ty: pathIndex === 0 ? null : lowercasePaths[pathIndex - 1].ty, + generics: generics, + }; + }); + } + + function buildFunctionSearchType(functionSearchType, lowercasePaths) { + const INPUTS_DATA = 0; + const OUTPUT_DATA = 1; + if (functionSearchType === 0 || functionSearchType === null) { + return null; + } + let inputs, output; + if (typeof functionSearchType[INPUTS_DATA] === "number") { + const pathIndex = functionSearchType[INPUTS_DATA]; + inputs = [{ + name: pathIndex === 0 ? null : lowercasePaths[pathIndex - 1].name, + ty: pathIndex === 0 ? null : lowercasePaths[pathIndex - 1].ty, + generics: [], + }]; + } else { + inputs = buildItemSearchTypeAll(functionSearchType[INPUTS_DATA], lowercasePaths); + } + if (functionSearchType.length > 1) { + if (typeof functionSearchType[OUTPUT_DATA] === "number") { + const pathIndex = functionSearchType[OUTPUT_DATA]; + output = [{ + name: pathIndex === 0 ? null : lowercasePaths[pathIndex - 1].name, + ty: pathIndex === 0 ? null : lowercasePaths[pathIndex - 1].ty, + generics: [], + }]; + } else { + output = buildItemSearchTypeAll(functionSearchType[OUTPUT_DATA], lowercasePaths); + } + } else { + output = null; + } + return { + inputs, output, + }; + } + function buildIndex(rawSearchIndex) { searchIndex = []; /** @@ -1862,14 +1910,22 @@ function initSearch(rawSearchIndex) { * q[i] contains the full path of the item, or an empty string indicating * "same as q[i-1]". * - * i[i], f[i] are a mystery. + * i[i] contains an item's parent, usually a module. For compactness, + * it is a set of indexes into the `p` array. + * + * f[i] contains function signatures, or `0` if the item isn't a function. + * Functions are themselves encoded as arrays. The first item is a list of + * types representing the function's inputs, and the second list item is a list + * of types representing the function's output. Tuples are flattened. + * Types are also represented as arrays; the first item is an index into the `p` + * array, while the second is a list of types representing any generic parameters. * * `a` defines aliases with an Array of pairs: [name, offset], where `offset` * points into the n/t/d/q/i/f arrays. * * `doc` contains the description of the crate. * - * `p` is a mystery and isn't the same length as n/t/d/q/i/f. + * `p` is a list of path/type pairs. It is used for parents and function parameters. * * @type {{ * doc: string, @@ -1879,7 +1935,7 @@ function initSearch(rawSearchIndex) { * d: Array, * q: Array, * i: Array, - * f: Array>, + * f: Array<0 | Object>, * p: Array, * }} */ @@ -1923,9 +1979,14 @@ function initSearch(rawSearchIndex) { // [Number] index to items] const aliases = crateCorpus.a; + // an array of [{name: String, ty: Number}] + const lowercasePaths = []; + // convert `rawPaths` entries into object form + // generate normalizedPaths for function search mode let len = paths.length; for (i = 0; i < len; ++i) { + lowercasePaths.push({ty: paths[i][0], name: paths[i][1].toLowerCase()}); paths[i] = {ty: paths[i][0], name: paths[i][1]}; } @@ -1955,7 +2016,7 @@ function initSearch(rawSearchIndex) { path: itemPaths[i] ? itemPaths[i] : lastPath, desc: itemDescs[i], parent: itemParentIdxs[i] > 0 ? paths[itemParentIdxs[i] - 1] : undefined, - type: itemFunctionSearchTypes[i], + type: buildFunctionSearchType(itemFunctionSearchTypes[i], lowercasePaths), id: id, normalizedName: word.indexOf("_") === -1 ? word : word.replace(/_/g, ""), }; diff --git a/src/librustdoc/json/conversions.rs b/src/librustdoc/json/conversions.rs index c627dcc30d667..232c82087e5d0 100644 --- a/src/librustdoc/json/conversions.rs +++ b/src/librustdoc/json/conversions.rs @@ -753,7 +753,6 @@ impl FromWithTcx for ItemKind { TraitAlias => ItemKind::TraitAlias, ProcAttribute => ItemKind::ProcAttribute, ProcDerive => ItemKind::ProcDerive, - Generic => unreachable!(), } } } From 5deb396630f2cdaad85f94b558f34a236d284b94 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Fri, 24 Jun 2022 18:45:49 -0700 Subject: [PATCH 2/6] Fix rustdoc under `#[no_core]` --- src/librustdoc/html/render/search_index.rs | 60 ++++++++++++---------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/src/librustdoc/html/render/search_index.rs b/src/librustdoc/html/render/search_index.rs index 6520dfc8ab1a9..f9192a5e64848 100644 --- a/src/librustdoc/html/render/search_index.rs +++ b/src/librustdoc/html/render/search_index.rs @@ -3,12 +3,14 @@ use std::collections::BTreeMap; use rustc_data_structures::fx::FxHashMap; use rustc_middle::ty::TyCtxt; -use rustc_span::def_id::DefId; +use rustc_span::def_id::LOCAL_CRATE; use rustc_span::symbol::Symbol; use serde::ser::{Serialize, SerializeStruct, Serializer}; use crate::clean; -use crate::clean::types::{FnRetTy, Function, GenericBound, Generics, Type, WherePredicate}; +use crate::clean::types::{ + FnRetTy, Function, GenericBound, Generics, ItemId, Type, WherePredicate, +}; use crate::formats::cache::{Cache, OrphanImplItem}; use crate::formats::item_type::ItemType; use crate::html::format::join_with_double_colon; @@ -21,7 +23,7 @@ pub(crate) fn build_index<'tcx>( cache: &mut Cache, tcx: TyCtxt<'tcx>, ) -> String { - let mut defid_to_pathid = FxHashMap::default(); + let mut itemid_to_pathid = FxHashMap::default(); let mut crate_paths = vec![]; // Attach all orphan items to the type's definition if the type @@ -79,13 +81,13 @@ pub(crate) fn build_index<'tcx>( fn convert_render_type( ty: &mut RenderType, cache: &mut Cache, - defid_to_pathid: &mut FxHashMap, + itemid_to_pathid: &mut FxHashMap, lastpathid: &mut usize, crate_paths: &mut Vec<(ItemType, Symbol)>, ) { if let Some(generics) = &mut ty.generics { for item in generics { - convert_render_type(item, cache, defid_to_pathid, lastpathid, crate_paths); + convert_render_type(item, cache, itemid_to_pathid, lastpathid, crate_paths); } } let Cache { ref paths, ref external_paths, .. } = *cache; @@ -93,24 +95,25 @@ pub(crate) fn build_index<'tcx>( assert!(ty.generics.is_some()); return; }; - let (defid, path, item_type) = match id { + let (itemid, path, item_type) = match id { RenderTypeId::DefId(defid) => { if let Some(&(ref fqp, item_type)) = paths.get(&defid).or_else(|| external_paths.get(&defid)) { - (defid, *fqp.last().unwrap(), item_type) + (ItemId::DefId(defid), *fqp.last().unwrap(), item_type) } else { ty.id = None; return; } } - RenderTypeId::Primitive(primitive) => { - let defid = *cache.primitive_locations.get(&primitive).unwrap(); - (defid, primitive.as_sym(), ItemType::Primitive) - } + RenderTypeId::Primitive(primitive) => ( + ItemId::Primitive(primitive, LOCAL_CRATE), + primitive.as_sym(), + ItemType::Primitive, + ), RenderTypeId::Index(_) => return, }; - match defid_to_pathid.entry(defid) { + match itemid_to_pathid.entry(itemid) { Entry::Occupied(entry) => ty.id = Some(RenderTypeId::Index(*entry.get())), Entry::Vacant(entry) => { let pathid = *lastpathid; @@ -126,7 +129,7 @@ pub(crate) fn build_index<'tcx>( convert_render_type( item, cache, - &mut defid_to_pathid, + &mut itemid_to_pathid, &mut lastpathid, &mut crate_paths, ); @@ -135,7 +138,7 @@ pub(crate) fn build_index<'tcx>( convert_render_type( item, cache, - &mut defid_to_pathid, + &mut itemid_to_pathid, &mut lastpathid, &mut crate_paths, ); @@ -149,21 +152,22 @@ pub(crate) fn build_index<'tcx>( let crate_items: Vec<&IndexItem> = search_index .iter_mut() .map(|item| { - item.parent_idx = item.parent.and_then(|defid| match defid_to_pathid.entry(defid) { - Entry::Occupied(entry) => Some(*entry.get()), - Entry::Vacant(entry) => { - let pathid = lastpathid; - entry.insert(pathid); - lastpathid += 1; - - if let Some(&(ref fqp, short)) = paths.get(&defid) { - crate_paths.push((short, *fqp.last().unwrap())); - Some(pathid) - } else { - None + item.parent_idx = + item.parent.and_then(|defid| match itemid_to_pathid.entry(ItemId::DefId(defid)) { + Entry::Occupied(entry) => Some(*entry.get()), + Entry::Vacant(entry) => { + let pathid = lastpathid; + entry.insert(pathid); + lastpathid += 1; + + if let Some(&(ref fqp, short)) = paths.get(&defid) { + crate_paths.push((short, *fqp.last().unwrap())); + Some(pathid) + } else { + None + } } - } - }); + }); // Omit the parent path if it is same to that of the prior item. if lastpath == &item.path { From 8081096a7f9b84fe780f4426d70f4c5bb767eba8 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Mon, 27 Jun 2022 11:07:16 -0700 Subject: [PATCH 3/6] Add documentation --- src/librustdoc/html/static/js/externs.js | 53 ++++++++++++++++++++++++ src/librustdoc/html/static/js/search.js | 35 +++++++++++++++- 2 files changed, 87 insertions(+), 1 deletion(-) diff --git a/src/librustdoc/html/static/js/externs.js b/src/librustdoc/html/static/js/externs.js index defdc20132e67..141d76d59e172 100644 --- a/src/librustdoc/html/static/js/externs.js +++ b/src/librustdoc/html/static/js/externs.js @@ -81,3 +81,56 @@ let ResultsTable; * }} */ let Results; + +/** + * A pair of [inputs, outputs], or 0 for null. This is gets stored in the search index. + * The JavaScript deserializes this into FunctionSearchType. + * + * An input or output can be encoded as just a number if there is only one of them, AND + * it has no generics. The no generics rule exists to avoid ambiguity: imagine if you had + * a function with a single output, and that output had a single generic: + * + * fn something() -> Result + * + * If output was allowed to be any RawFunctionType, it would look like this + * + * [[], [50, [3, 3]]] + * + * The problem is that the above output could be interpreted as either a type with ID 50 and two + * generics, or it could be interpreted as a pair of types, the first one with ID 50 and the second + * with ID 3 and a single generic parameter that is also ID 3. We avoid this ambiguity by choosing + * in favor of the pair of types interpretation. This is why the `(number|Array)` + * is used instead of `(RawFunctionType|Array)`. + * + * @typedef {( + * 0 | + * [(number|Array)] | + * [(number|Array), (number|Array)] + * )} + */ +let RawFunctionSearchType; + +/** + * A single function input or output type. This is either a single path ID, or a pair of + * [path ID, generics]. + * + * @typedef {number | [number, Array]} + */ +let RawFunctionType; + +/** + * @typedef {{ + * inputs: Array, + * outputs: Array, + * }} + */ +let FunctionSearchType; + +/** + * @typedef {{ + * name: (null|string), + * ty: (null|number), + * generics: Array, + * }} + */ +let FunctionType; diff --git a/src/librustdoc/html/static/js/search.js b/src/librustdoc/html/static/js/search.js index 54057627c92b6..a766dd68e107c 100644 --- a/src/librustdoc/html/static/js/search.js +++ b/src/librustdoc/html/static/js/search.js @@ -1825,6 +1825,24 @@ function initSearch(rawSearchIndex) { filterCrates); } + /** + * Convert a list of RawFunctionType / ID to object-based FunctionType. + * + * Crates often have lots of functions in them, and it's common to have a large number of + * functions that operate on a small set of data types, so the search index compresses them + * by encoding function parameter and return types as indexes into an array of names. + * + * Even when a general-purpose compression algorithm is used, this is still a win. I checked. + * https://github.com/rust-lang/rust/pull/98475#issue-1284395985 + * + * The format for individual function types is encoded in + * librustdoc/html/render/mod.rs: impl Serialize for RenderType + * + * @param {null|Array} types + * @param {Array<{name: string, ty: number}>} lowercasePaths + * + * @return {Array} + */ function buildItemSearchTypeAll(types, lowercasePaths) { const PATH_INDEX_DATA = 0; const GENERICS_DATA = 1; @@ -1848,6 +1866,21 @@ function initSearch(rawSearchIndex) { }); } + /** + * Convert from RawFunctionSearchType to FunctionSearchType. + * + * Crates often have lots of functions in them, and function signatures are sometimes complex, + * so rustdoc uses a pretty tight encoding for them. This function converts it to a simpler, + * object-based encoding so that the actual search code is more readable and easier to debug. + * + * The raw function search type format is generated using serde in + * librustdoc/html/render/mod.rs: impl Serialize for IndexItemFunctionType + * + * @param {RawFunctionSearchType} functionSearchType + * @param {Array<{name: string, ty: number}>} lowercasePaths + * + * @return {null|FunctionSearchType} + */ function buildFunctionSearchType(functionSearchType, lowercasePaths) { const INPUTS_DATA = 0; const OUTPUT_DATA = 1; @@ -1935,7 +1968,7 @@ function initSearch(rawSearchIndex) { * d: Array, * q: Array, * i: Array, - * f: Array<0 | Object>, + * f: Array, * p: Array, * }} */ From 6f9a460fef36c427f477db4c3c2712267408fcff Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Mon, 27 Jun 2022 11:07:45 -0700 Subject: [PATCH 4/6] Put back, fix ()-output optimization --- src/librustdoc/html/render/mod.rs | 1 + src/librustdoc/html/static/js/search.js | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/librustdoc/html/render/mod.rs b/src/librustdoc/html/render/mod.rs index b01afa02b8b94..1ef41d62e5eb0 100644 --- a/src/librustdoc/html/render/mod.rs +++ b/src/librustdoc/html/render/mod.rs @@ -170,6 +170,7 @@ impl Serialize for IndexItemFunctionType { _ => seq.serialize_element(&self.inputs)?, } match &self.output[..] { + [] => {} [one] if one.generics.is_none() => seq.serialize_element(one)?, _ => seq.serialize_element(&self.output)?, } diff --git a/src/librustdoc/html/static/js/search.js b/src/librustdoc/html/static/js/search.js index a766dd68e107c..46fab7ee57bc6 100644 --- a/src/librustdoc/html/static/js/search.js +++ b/src/librustdoc/html/static/js/search.js @@ -1910,7 +1910,7 @@ function initSearch(rawSearchIndex) { output = buildItemSearchTypeAll(functionSearchType[OUTPUT_DATA], lowercasePaths); } } else { - output = null; + output = []; } return { inputs, output, From b54e3e6c989a2b4b95d6405fccbb2ab2dc450f24 Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Mon, 27 Jun 2022 12:07:13 -0700 Subject: [PATCH 5/6] Update src/librustdoc/html/static/js/externs.js Co-authored-by: Guillaume Gomez --- src/librustdoc/html/static/js/externs.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/librustdoc/html/static/js/externs.js b/src/librustdoc/html/static/js/externs.js index 141d76d59e172..23947d002ab8d 100644 --- a/src/librustdoc/html/static/js/externs.js +++ b/src/librustdoc/html/static/js/externs.js @@ -83,7 +83,7 @@ let ResultsTable; let Results; /** - * A pair of [inputs, outputs], or 0 for null. This is gets stored in the search index. + * A pair of [inputs, outputs], or 0 for null. This is stored in the search index. * The JavaScript deserializes this into FunctionSearchType. * * An input or output can be encoded as just a number if there is only one of them, AND From 33cf9ea4a2aebb015e81071968659bd51218c5af Mon Sep 17 00:00:00 2001 From: Michael Howell Date: Mon, 27 Jun 2022 14:13:13 -0700 Subject: [PATCH 6/6] Add comments, fixes for `0` sentinel --- src/librustdoc/html/render/search_index.rs | 28 +++++++++++++++++++++- src/librustdoc/html/static/js/externs.js | 6 +++++ src/librustdoc/html/static/js/search.js | 7 +++--- 3 files changed, 36 insertions(+), 5 deletions(-) diff --git a/src/librustdoc/html/render/search_index.rs b/src/librustdoc/html/render/search_index.rs index f9192a5e64848..d672f0bb5992b 100644 --- a/src/librustdoc/html/render/search_index.rs +++ b/src/librustdoc/html/render/search_index.rs @@ -234,7 +234,33 @@ pub(crate) fn build_index<'tcx>( )?; crate_data.serialize_field( "f", - &self.items.iter().map(|item| &item.search_type).collect::>(), + &self + .items + .iter() + .map(|item| { + // Fake option to get `0` out as a sentinel instead of `null`. + // We want to use `0` because it's three less bytes. + enum FunctionOption<'a> { + Function(&'a IndexItemFunctionType), + None, + } + impl<'a> Serialize for FunctionOption<'a> { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + match self { + FunctionOption::None => 0.serialize(serializer), + FunctionOption::Function(ty) => ty.serialize(serializer), + } + } + } + match &item.search_type { + Some(ty) => FunctionOption::Function(ty), + None => FunctionOption::None, + } + }) + .collect::>(), )?; crate_data.serialize_field( "p", diff --git a/src/librustdoc/html/static/js/externs.js b/src/librustdoc/html/static/js/externs.js index 23947d002ab8d..ecbe15a59da3c 100644 --- a/src/librustdoc/html/static/js/externs.js +++ b/src/librustdoc/html/static/js/externs.js @@ -86,6 +86,9 @@ let Results; * A pair of [inputs, outputs], or 0 for null. This is stored in the search index. * The JavaScript deserializes this into FunctionSearchType. * + * Numeric IDs are *ONE-indexed* into the paths array (`p`). Zero is used as a sentinel for `null` + * because `null` is four bytes while `0` is one byte. + * * An input or output can be encoded as just a number if there is only one of them, AND * it has no generics. The no generics rule exists to avoid ambiguity: imagine if you had * a function with a single output, and that output had a single generic: @@ -114,6 +117,9 @@ let RawFunctionSearchType; * A single function input or output type. This is either a single path ID, or a pair of * [path ID, generics]. * + * Numeric IDs are *ONE-indexed* into the paths array (`p`). Zero is used as a sentinel for `null` + * because `null` is four bytes while `0` is one byte. + * * @typedef {number | [number, Array]} */ let RawFunctionType; diff --git a/src/librustdoc/html/static/js/search.js b/src/librustdoc/html/static/js/search.js index 46fab7ee57bc6..75c7bd45a2949 100644 --- a/src/librustdoc/html/static/js/search.js +++ b/src/librustdoc/html/static/js/search.js @@ -1846,9 +1846,6 @@ function initSearch(rawSearchIndex) { function buildItemSearchTypeAll(types, lowercasePaths) { const PATH_INDEX_DATA = 0; const GENERICS_DATA = 1; - if (types === null) { - return []; - } return types.map(type => { let pathIndex, generics; if (typeof type === "number") { @@ -1859,6 +1856,7 @@ function initSearch(rawSearchIndex) { generics = buildItemSearchTypeAll(type[GENERICS_DATA], lowercasePaths); } return { + // `0` is used as a sentinel because it's fewer bytes than `null` name: pathIndex === 0 ? null : lowercasePaths[pathIndex - 1].name, ty: pathIndex === 0 ? null : lowercasePaths[pathIndex - 1].ty, generics: generics, @@ -1884,7 +1882,8 @@ function initSearch(rawSearchIndex) { function buildFunctionSearchType(functionSearchType, lowercasePaths) { const INPUTS_DATA = 0; const OUTPUT_DATA = 1; - if (functionSearchType === 0 || functionSearchType === null) { + // `0` is used as a sentinel because it's fewer bytes than `null` + if (functionSearchType === 0) { return null; } let inputs, output;