diff --git a/.ignore b/.ignore new file mode 100644 index 0000000000000..40d1513978fc6 --- /dev/null +++ b/.ignore @@ -0,0 +1,2 @@ +# Make vscode *not* count `config.toml` as ignored, so it is included in search +!/config.toml diff --git a/.reuse/dep5 b/.reuse/dep5 index 0e2650ff2c048..5706ea0b2046b 100644 --- a/.reuse/dep5 +++ b/.reuse/dep5 @@ -36,6 +36,7 @@ Files: compiler/* .gitignore .gitmodules .mailmap + .ignore Copyright: The Rust Project Developers (see https://thanks.rust-lang.org) License: MIT or Apache-2.0 diff --git a/Cargo.lock b/Cargo.lock index c7d2857b46365..96cef9070842e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3141,7 +3141,19 @@ dependencies = [ "bitflags 2.5.0", "getopts", "memchr", - "pulldown-cmark-escape", + "pulldown-cmark-escape 0.10.1", + "unicase", +] + +[[package]] +name = "pulldown-cmark" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8746739f11d39ce5ad5c2520a9b75285310dbfe78c541ccf832d38615765aec0" +dependencies = [ + "bitflags 2.5.0", + "memchr", + "pulldown-cmark-escape 0.11.0", "unicase", ] @@ -3151,6 +3163,12 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd348ff538bc9caeda7ee8cad2d1d48236a1f443c1fa3913c6a02fe0043b1dd3" +[[package]] +name = "pulldown-cmark-escape" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae" + [[package]] name = "pulldown-cmark-to-cmark" version = "13.0.0" @@ -4604,7 +4622,7 @@ name = "rustc_resolve" version = "0.0.0" dependencies = [ "bitflags 2.5.0", - "pulldown-cmark 0.9.6", + "pulldown-cmark 0.11.0", "rustc_arena", "rustc_ast", "rustc_ast_pretty", @@ -4760,8 +4778,6 @@ checksum = "8ba09476327c4b70ccefb6180f046ef588c26a24cf5d269a9feba316eb4f029f" name = "rustc_trait_selection" version = "0.0.0" dependencies = [ - "bitflags 2.5.0", - "derivative", "itertools", "rustc_ast", "rustc_ast_ir", @@ -4770,7 +4786,6 @@ dependencies = [ "rustc_errors", "rustc_fluent_macro", "rustc_hir", - "rustc_index", "rustc_infer", "rustc_macros", "rustc_middle", @@ -4783,7 +4798,6 @@ dependencies = [ "rustc_target", "rustc_transmute", "rustc_type_ir", - "rustc_type_ir_macros", "smallvec", "tracing", ] @@ -4887,6 +4901,7 @@ dependencies = [ "indexmap", "itertools", "minifier", + "pulldown-cmark 0.9.6", "regex", "rustdoc-json-types", "serde", diff --git a/compiler/rustc_resolve/Cargo.toml b/compiler/rustc_resolve/Cargo.toml index b6ae54010c242..b71853b871dc5 100644 --- a/compiler/rustc_resolve/Cargo.toml +++ b/compiler/rustc_resolve/Cargo.toml @@ -6,7 +6,7 @@ edition = "2021" [dependencies] # tidy-alphabetical-start bitflags = "2.4.1" -pulldown-cmark = { version = "0.9.6", default-features = false } +pulldown-cmark = { version = "0.11", features = ["html"], default-features = false } rustc_arena = { path = "../rustc_arena" } rustc_ast = { path = "../rustc_ast" } rustc_ast_pretty = { path = "../rustc_ast_pretty" } diff --git a/compiler/rustc_resolve/src/rustdoc.rs b/compiler/rustc_resolve/src/rustdoc.rs index 66b4981eb55ba..594608153211d 100644 --- a/compiler/rustc_resolve/src/rustdoc.rs +++ b/compiler/rustc_resolve/src/rustdoc.rs @@ -1,4 +1,6 @@ -use pulldown_cmark::{BrokenLink, CowStr, Event, LinkType, Options, Parser, Tag}; +use pulldown_cmark::{ + BrokenLink, BrokenLinkCallback, CowStr, Event, LinkType, Options, Parser, Tag, +}; use rustc_ast as ast; use rustc_ast::util::comments::beautify_doc_string; use rustc_data_structures::fx::FxHashMap; @@ -427,7 +429,9 @@ fn parse_links<'md>(doc: &'md str) -> Vec> { while let Some(event) = event_iter.next() { match event { - Event::Start(Tag::Link(link_type, dest, _)) if may_be_doc_link(link_type) => { + Event::Start(Tag::Link { link_type, dest_url, title: _, id: _ }) + if may_be_doc_link(link_type) => + { if matches!( link_type, LinkType::Inline @@ -441,7 +445,7 @@ fn parse_links<'md>(doc: &'md str) -> Vec> { } } - links.push(preprocess_link(&dest)); + links.push(preprocess_link(&dest_url)); } _ => {} } @@ -451,8 +455,8 @@ fn parse_links<'md>(doc: &'md str) -> Vec> { } /// Collects additional data of link. -fn collect_link_data<'input, 'callback>( - event_iter: &mut Parser<'input, 'callback>, +fn collect_link_data<'input, F: BrokenLinkCallback<'input>>( + event_iter: &mut Parser<'input, F>, ) -> Option> { let mut display_text: Option = None; let mut append_text = |text: CowStr<'_>| { diff --git a/compiler/rustc_trait_selection/Cargo.toml b/compiler/rustc_trait_selection/Cargo.toml index 1f4fb57d996cc..f023a0eb53aeb 100644 --- a/compiler/rustc_trait_selection/Cargo.toml +++ b/compiler/rustc_trait_selection/Cargo.toml @@ -5,8 +5,6 @@ edition = "2021" [dependencies] # tidy-alphabetical-start -bitflags = "2.4.1" -derivative = "2.2.0" itertools = "0.12" rustc_ast = { path = "../rustc_ast" } rustc_ast_ir = { path = "../rustc_ast_ir" } @@ -15,7 +13,6 @@ rustc_data_structures = { path = "../rustc_data_structures" } rustc_errors = { path = "../rustc_errors" } rustc_fluent_macro = { path = "../rustc_fluent_macro" } rustc_hir = { path = "../rustc_hir" } -rustc_index = { path = "../rustc_index" } rustc_infer = { path = "../rustc_infer" } rustc_macros = { path = "../rustc_macros" } rustc_middle = { path = "../rustc_middle" } @@ -28,7 +25,6 @@ rustc_span = { path = "../rustc_span" } rustc_target = { path = "../rustc_target" } rustc_transmute = { path = "../rustc_transmute", features = ["rustc"] } rustc_type_ir = { path = "../rustc_type_ir" } -rustc_type_ir_macros = { path = "../rustc_type_ir_macros" } smallvec = { version = "1.8.1", features = ["union", "may_dangle"] } tracing = "0.1" # tidy-alphabetical-end diff --git a/library/alloc/benches/str.rs b/library/alloc/benches/str.rs index c148ab6b220a5..92a48e0e6b5a6 100644 --- a/library/alloc/benches/str.rs +++ b/library/alloc/benches/str.rs @@ -347,3 +347,5 @@ make_test!(rsplitn_space_char, s, s.rsplitn(10, ' ').count()); make_test!(split_space_str, s, s.split(" ").count()); make_test!(split_ad_str, s, s.split("ad").count()); + +make_test!(to_lowercase, s, s.to_lowercase()); diff --git a/library/alloc/src/str.rs b/library/alloc/src/str.rs index 3bb808a6c73ab..4be2c1b5ba618 100644 --- a/library/alloc/src/str.rs +++ b/library/alloc/src/str.rs @@ -10,6 +10,7 @@ use core::borrow::{Borrow, BorrowMut}; use core::iter::FusedIterator; use core::mem; +use core::mem::MaybeUninit; use core::ptr; use core::str::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher}; use core::unicode::conversions; @@ -367,14 +368,9 @@ impl str { without modifying the original"] #[stable(feature = "unicode_case_mapping", since = "1.2.0")] pub fn to_lowercase(&self) -> String { - let out = convert_while_ascii(self.as_bytes(), u8::to_ascii_lowercase); + let (mut s, rest) = convert_while_ascii(self, u8::to_ascii_lowercase); - // Safety: we know this is a valid char boundary since - // out.len() is only progressed if ascii bytes are found - let rest = unsafe { self.get_unchecked(out.len()..) }; - - // Safety: We have written only valid ASCII to our vec - let mut s = unsafe { String::from_utf8_unchecked(out) }; + let prefix_len = s.len(); for (i, c) in rest.char_indices() { if c == 'Σ' { @@ -383,8 +379,7 @@ impl str { // in `SpecialCasing.txt`, // so hard-code it rather than have a generic "condition" mechanism. // See https://github.com/rust-lang/rust/issues/26035 - let out_len = self.len() - rest.len(); - let sigma_lowercase = map_uppercase_sigma(&self, i + out_len); + let sigma_lowercase = map_uppercase_sigma(self, prefix_len + i); s.push(sigma_lowercase); } else { match conversions::to_lower(c) { @@ -460,14 +455,7 @@ impl str { without modifying the original"] #[stable(feature = "unicode_case_mapping", since = "1.2.0")] pub fn to_uppercase(&self) -> String { - let out = convert_while_ascii(self.as_bytes(), u8::to_ascii_uppercase); - - // Safety: we know this is a valid char boundary since - // out.len() is only progressed if ascii bytes are found - let rest = unsafe { self.get_unchecked(out.len()..) }; - - // Safety: We have written only valid ASCII to our vec - let mut s = unsafe { String::from_utf8_unchecked(out) }; + let (mut s, rest) = convert_while_ascii(self, u8::to_ascii_uppercase); for c in rest.chars() { match conversions::to_upper(c) { @@ -616,50 +604,83 @@ pub unsafe fn from_boxed_utf8_unchecked(v: Box<[u8]>) -> Box { unsafe { Box::from_raw(Box::into_raw(v) as *mut str) } } -/// Converts the bytes while the bytes are still ascii. +/// Converts leading ascii bytes in `s` by calling the `convert` function. +/// /// For better average performance, this happens in chunks of `2*size_of::()`. -/// Returns a vec with the converted bytes. +/// +/// Returns a tuple of the converted prefix and the remainder starting from +/// the first non-ascii character. #[inline] #[cfg(not(test))] #[cfg(not(no_global_oom_handling))] -fn convert_while_ascii(b: &[u8], convert: fn(&u8) -> u8) -> Vec { - let mut out = Vec::with_capacity(b.len()); +fn convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> (String, &str) { + // Process the input in chunks of 16 bytes to enable auto-vectorization. + // Previously the chunk size depended on the size of `usize`, + // but on 32-bit platforms with sse or neon is also the better choice. + // The only downside on other platforms would be a bit more loop-unrolling. + const N: usize = 16; + + let mut slice = s.as_bytes(); + let mut out = Vec::with_capacity(slice.len()); + let mut out_slice = out.spare_capacity_mut(); + + let mut ascii_prefix_len = 0_usize; + let mut is_ascii = [false; N]; + + while slice.len() >= N { + // Safety: checked in loop condition + let chunk = unsafe { slice.get_unchecked(..N) }; + // Safety: out_slice has at least same length as input slice and gets sliced with the same offsets + let out_chunk = unsafe { out_slice.get_unchecked_mut(..N) }; + + for j in 0..N { + is_ascii[j] = chunk[j] <= 127; + } - const USIZE_SIZE: usize = mem::size_of::(); - const MAGIC_UNROLL: usize = 2; - const N: usize = USIZE_SIZE * MAGIC_UNROLL; - const NONASCII_MASK: usize = usize::from_ne_bytes([0x80; USIZE_SIZE]); + // Auto-vectorization for this check is a bit fragile, sum and comparing against the chunk + // size gives the best result, specifically a pmovmsk instruction on x86. + // There is a codegen test in `issue-123712-str-to-lower-autovectorization.rs` which should + // be updated when this method is changed. + // See also https://github.com/llvm/llvm-project/issues/96395 + if is_ascii.iter().map(|x| *x as u8).sum::() as usize != N { + break; + } - let mut i = 0; - unsafe { - while i + N <= b.len() { - // Safety: we have checks the sizes `b` and `out` to know that our - let in_chunk = b.get_unchecked(i..i + N); - let out_chunk = out.spare_capacity_mut().get_unchecked_mut(i..i + N); - - let mut bits = 0; - for j in 0..MAGIC_UNROLL { - // read the bytes 1 usize at a time (unaligned since we haven't checked the alignment) - // safety: in_chunk is valid bytes in the range - bits |= in_chunk.as_ptr().cast::().add(j).read_unaligned(); - } - // if our chunks aren't ascii, then return only the prior bytes as init - if bits & NONASCII_MASK != 0 { - break; - } + for j in 0..N { + out_chunk[j] = MaybeUninit::new(convert(&chunk[j])); + } - // perform the case conversions on N bytes (gets heavily autovec'd) - for j in 0..N { - // safety: in_chunk and out_chunk is valid bytes in the range - let out = out_chunk.get_unchecked_mut(j); - out.write(convert(in_chunk.get_unchecked(j))); - } + ascii_prefix_len += N; + slice = unsafe { slice.get_unchecked(N..) }; + out_slice = unsafe { out_slice.get_unchecked_mut(N..) }; + } - // mark these bytes as initialised - i += N; + // handle the remainder as individual bytes + while slice.len() > 0 { + let byte = slice[0]; + if byte > 127 { + break; } - out.set_len(i); + // Safety: out_slice has same length as input slice and gets sliced with the same offsets + unsafe { + *out_slice.get_unchecked_mut(0) = MaybeUninit::new(convert(&byte)); + } + ascii_prefix_len += 1; + slice = unsafe { slice.get_unchecked(1..) }; + out_slice = unsafe { out_slice.get_unchecked_mut(1..) }; } - out + unsafe { + // SAFETY: ascii_prefix_len bytes have been initialized above + out.set_len(ascii_prefix_len); + + // SAFETY: We have written only valid ascii to the output vec + let ascii_string = String::from_utf8_unchecked(out); + + // SAFETY: we know this is a valid char boundary + // since we only skipped over leading ascii bytes + let rest = core::str::from_utf8_unchecked(slice); + + (ascii_string, rest) + } } diff --git a/library/alloc/tests/str.rs b/library/alloc/tests/str.rs index 0078f5eaa3d2b..103c400c49b06 100644 --- a/library/alloc/tests/str.rs +++ b/library/alloc/tests/str.rs @@ -1849,7 +1849,10 @@ fn to_lowercase() { assert_eq!("ΑΣ''Α".to_lowercase(), "ασ''α"); // https://github.com/rust-lang/rust/issues/124714 + // input lengths around the boundary of the chunk size used by the ascii prefix optimization + assert_eq!("abcdefghijklmnoΣ".to_lowercase(), "abcdefghijklmnoς"); assert_eq!("abcdefghijklmnopΣ".to_lowercase(), "abcdefghijklmnopς"); + assert_eq!("abcdefghijklmnopqΣ".to_lowercase(), "abcdefghijklmnopqς"); // a really long string that has it's lowercase form // even longer. this tests that implementations don't assume diff --git a/library/core/src/any.rs b/library/core/src/any.rs index 37cb8e7d303af..eab11ae288a95 100644 --- a/library/core/src/any.rs +++ b/library/core/src/any.rs @@ -602,7 +602,7 @@ impl dyn Any + Send + Sync { /// While `TypeId` implements `Hash`, `PartialOrd`, and `Ord`, it is worth /// noting that the hashes and ordering will vary between Rust releases. Beware /// of relying on them inside of your code! -#[derive(Clone, Copy, Debug, Eq, PartialOrd, Ord)] +#[derive(Clone, Copy, Eq, PartialOrd, Ord)] #[stable(feature = "rust1", since = "1.0.0")] pub struct TypeId { // We avoid using `u128` because that imposes higher alignment requirements on many platforms. @@ -644,6 +644,10 @@ impl TypeId { let t2 = t as u64; TypeId { t: (t1, t2) } } + + fn as_u128(self) -> u128 { + u128::from(self.t.0) << 64 | u128::from(self.t.1) + } } #[stable(feature = "rust1", since = "1.0.0")] @@ -666,6 +670,13 @@ impl hash::Hash for TypeId { } } +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Debug for TypeId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + f.debug_tuple("TypeId").field(&self.as_u128()).finish() + } +} + /// Returns the name of a type as a string slice. /// /// # Note diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs index cfadfb0233ea7..128f1c818272b 100644 --- a/library/core/src/lib.rs +++ b/library/core/src/lib.rs @@ -34,12 +34,9 @@ //! Rust user code is to call the functions provided by this library instead (such as //! `ptr::copy`). //! -//! * `rust_begin_panic` - This function takes four arguments, a -//! `fmt::Arguments`, a `&'static str`, and two `u32`'s. These four arguments -//! dictate the panic message, the file at which panic was invoked, and the -//! line and column inside the file. It is up to consumers of this core +//! * Panic handler - This function takes one argument, a `&panic::PanicInfo`. It is up to consumers of this core //! library to define this panic function; it is only required to never -//! return. This requires a `lang` attribute named `panic_impl`. +//! return. You should mark your implementation using `#[panic_handler]`. //! //! * `rust_eh_personality` - is used by the failure mechanisms of the //! compiler. This is often mapped to GCC's personality function, but crates diff --git a/library/core/src/num/dec2flt/common.rs b/library/core/src/num/dec2flt/common.rs index 11a626485191c..c85727b493816 100644 --- a/library/core/src/num/dec2flt/common.rs +++ b/library/core/src/num/dec2flt/common.rs @@ -39,9 +39,7 @@ impl ByteSlice for [u8] { fn parse_digits(&self, mut func: impl FnMut(u8)) -> &Self { let mut s = self; - // FIXME: Can't use s.split_first() here yet, - // see https://github.com/rust-lang/rust/issues/109328 - while let [c, s_next @ ..] = s { + while let Some((c, s_next)) = s.split_first() { let c = c.wrapping_sub(b'0'); if c < 10 { func(c); diff --git a/library/core/src/num/dec2flt/parse.rs b/library/core/src/num/dec2flt/parse.rs index b0a23835c5bd4..975bb8ad6bc1f 100644 --- a/library/core/src/num/dec2flt/parse.rs +++ b/library/core/src/num/dec2flt/parse.rs @@ -51,9 +51,7 @@ fn try_parse_19digits(s_ref: &mut &[u8], x: &mut u64) { let mut s = *s_ref; while *x < MIN_19DIGIT_INT { - // FIXME: Can't use s.split_first() here yet, - // see https://github.com/rust-lang/rust/issues/109328 - if let [c, s_next @ ..] = s { + if let Some((c, s_next)) = s.split_first() { let digit = c.wrapping_sub(b'0'); if digit < 10 { diff --git a/src/librustdoc/Cargo.toml b/src/librustdoc/Cargo.toml index 31222f213d800..51fb126cb3407 100644 --- a/src/librustdoc/Cargo.toml +++ b/src/librustdoc/Cargo.toml @@ -13,6 +13,7 @@ base64 = "0.21.7" itertools = "0.12" indexmap = "2" minifier = "0.3.0" +pulldown-cmark-old = { version = "0.9.6", package = "pulldown-cmark", default-features = false } regex = "1" rustdoc-json-types = { path = "../rustdoc-json-types" } serde_json = "1.0" diff --git a/src/librustdoc/html/markdown.rs b/src/librustdoc/html/markdown.rs index bae929c64eab2..a7f0df5afa98f 100644 --- a/src/librustdoc/html/markdown.rs +++ b/src/librustdoc/html/markdown.rs @@ -54,7 +54,8 @@ use crate::html::render::small_url_encode; use crate::html::toc::TocBuilder; use pulldown_cmark::{ - html, BrokenLink, CodeBlockKind, CowStr, Event, LinkType, OffsetIter, Options, Parser, Tag, + html, BrokenLink, BrokenLinkCallback, CodeBlockKind, CowStr, Event, LinkType, OffsetIter, + Options, Parser, Tag, TagEnd, }; #[cfg(test)] @@ -230,7 +231,7 @@ impl<'a, I: Iterator>> Iterator for CodeBlocks<'_, 'a, I> { let mut original_text = String::new(); for event in &mut self.inner { match event { - Event::End(Tag::CodeBlock(..)) => break, + Event::End(TagEnd::CodeBlock) => break, Event::Text(ref s) => { original_text.push_str(s); } @@ -359,16 +360,17 @@ impl<'a, I: Iterator>> Iterator for LinkReplacer<'a, I> { match &mut event { // This is a shortcut link that was resolved by the broken_link_callback: `[fn@f]` // Remove any disambiguator. - Some(Event::Start(Tag::Link( + Some(Event::Start(Tag::Link { // [fn@f] or [fn@f][] - LinkType::ShortcutUnknown | LinkType::CollapsedUnknown, - dest, + link_type: LinkType::ShortcutUnknown | LinkType::CollapsedUnknown, + dest_url, title, - ))) => { - debug!("saw start of shortcut link to {dest} with title {title}"); + .. + })) => { + debug!("saw start of shortcut link to {dest_url} with title {title}"); // If this is a shortcut link, it was resolved by the broken_link_callback. // So the URL will already be updated properly. - let link = self.links.iter().find(|&link| *link.href == **dest); + let link = self.links.iter().find(|&link| *link.href == **dest_url); // Since this is an external iterator, we can't replace the inner text just yet. // Store that we saw a link so we know to replace it later. if let Some(link) = link { @@ -381,16 +383,9 @@ impl<'a, I: Iterator>> Iterator for LinkReplacer<'a, I> { } } // Now that we're done with the shortcut link, don't replace any more text. - Some(Event::End(Tag::Link( - LinkType::ShortcutUnknown | LinkType::CollapsedUnknown, - dest, - _, - ))) => { - debug!("saw end of shortcut link to {dest}"); - if self.links.iter().any(|link| *link.href == **dest) { - assert!(self.shortcut_link.is_some(), "saw closing link without opening tag"); - self.shortcut_link = None; - } + Some(Event::End(TagEnd::Link)) if self.shortcut_link.is_some() => { + debug!("saw end of shortcut link"); + self.shortcut_link = None; } // Handle backticks in inline code blocks, but only if we're in the middle of a shortcut link. // [`fn@f`] @@ -433,9 +428,11 @@ impl<'a, I: Iterator>> Iterator for LinkReplacer<'a, I> { } // If this is a link, but not a shortcut link, // replace the URL, since the broken_link_callback was not called. - Some(Event::Start(Tag::Link(_, dest, title))) => { - if let Some(link) = self.links.iter().find(|&link| *link.original_text == **dest) { - *dest = CowStr::Borrowed(link.href.as_ref()); + Some(Event::Start(Tag::Link { dest_url, title, .. })) => { + if let Some(link) = + self.links.iter().find(|&link| *link.original_text == **dest_url) + { + *dest_url = CowStr::Borrowed(link.href.as_ref()); if title.is_empty() && !link.tooltip.is_empty() { *title = CowStr::Borrowed(link.tooltip.as_ref()); } @@ -477,9 +474,9 @@ impl<'a, I: Iterator>> Iterator for TableWrapper<'a, I> { self.stored_events.push_back(Event::Start(Tag::Table(t))); Event::Html(CowStr::Borrowed("
")) } - Event::End(Tag::Table(t)) => { + Event::End(TagEnd::Table) => { self.stored_events.push_back(Event::Html(CowStr::Borrowed("
"))); - Event::End(Tag::Table(t)) + Event::End(TagEnd::Table) } e => e, }) @@ -519,11 +516,11 @@ impl<'a, 'b, 'ids, I: Iterator>> Iterator } let event = self.inner.next(); - if let Some((Event::Start(Tag::Heading(level, _, _)), _)) = event { + if let Some((Event::Start(Tag::Heading { level, .. }), _)) = event { let mut id = String::new(); for event in &mut self.inner { match &event.0 { - Event::End(Tag::Heading(..)) => break, + Event::End(TagEnd::Heading(_)) => break, Event::Text(text) | Event::Code(text) => { id.extend(text.chars().filter_map(slugify)); self.buf.push_back(event); @@ -566,27 +563,27 @@ impl<'a, I: Iterator>> SummaryLine<'a, I> { } } -fn check_if_allowed_tag(t: &Tag<'_>) -> bool { +fn check_if_allowed_tag(t: &TagEnd) -> bool { matches!( t, - Tag::Paragraph - | Tag::Emphasis - | Tag::Strong - | Tag::Strikethrough - | Tag::Link(..) - | Tag::BlockQuote + TagEnd::Paragraph + | TagEnd::Emphasis + | TagEnd::Strong + | TagEnd::Strikethrough + | TagEnd::Link + | TagEnd::BlockQuote ) } -fn is_forbidden_tag(t: &Tag<'_>) -> bool { +fn is_forbidden_tag(t: &TagEnd) -> bool { matches!( t, - Tag::CodeBlock(_) - | Tag::Table(_) - | Tag::TableHead - | Tag::TableRow - | Tag::TableCell - | Tag::FootnoteDefinition(_) + TagEnd::CodeBlock + | TagEnd::Table + | TagEnd::TableHead + | TagEnd::TableRow + | TagEnd::TableCell + | TagEnd::FootnoteDefinition ) } @@ -604,12 +601,12 @@ impl<'a, I: Iterator>> Iterator for SummaryLine<'a, I> { let mut is_start = true; let is_allowed_tag = match event { Event::Start(ref c) => { - if is_forbidden_tag(c) { + if is_forbidden_tag(&c.to_end()) { self.skipped_tags += 1; return None; } self.depth += 1; - check_if_allowed_tag(c) + check_if_allowed_tag(&c.to_end()) } Event::End(ref c) => { if is_forbidden_tag(c) { @@ -633,7 +630,7 @@ impl<'a, I: Iterator>> Iterator for SummaryLine<'a, I> { if is_start { Some(Event::Start(Tag::Paragraph)) } else { - Some(Event::End(Tag::Paragraph)) + Some(Event::End(TagEnd::Paragraph)) } } else { Some(event) @@ -679,7 +676,7 @@ impl<'a, I: Iterator>> Iterator for Footnotes<'a, I> { Some((Event::Start(Tag::FootnoteDefinition(def)), _)) => { let mut content = Vec::new(); for (event, _) in &mut self.inner { - if let Event::End(Tag::FootnoteDefinition(..)) = event { + if let Event::End(TagEnd::FootnoteDefinition) = event { break; } content.push(event); @@ -696,7 +693,7 @@ impl<'a, I: Iterator>> Iterator for Footnotes<'a, I> { for (mut content, id) in v { write!(ret, "
  • ").unwrap(); let mut is_paragraph = false; - if let Some(&Event::End(Tag::Paragraph)) = content.last() { + if let Some(&Event::End(TagEnd::Paragraph)) = content.last() { content.pop(); is_paragraph = true; } @@ -806,7 +803,7 @@ pub(crate) fn find_codes( tests.visit_test(text, block_info, line); prev_offset = offset.start; } - Event::Start(Tag::Heading(level, _, _)) => { + Event::Start(Tag::Heading { level, .. }) => { register_header = Some(level as u32); } Event::Text(ref s) if register_header.is_some() => { @@ -1432,7 +1429,7 @@ impl MarkdownItemInfo<'_> { // Treat inline HTML as plain text. let p = p.map(|event| match event.0 { - Event::Html(text) => (Event::Text(text), event.1), + Event::Html(text) | Event::InlineHtml(text) => (Event::Text(text), event.1), _ => event, }); @@ -1442,7 +1439,7 @@ impl MarkdownItemInfo<'_> { let p = Footnotes::new(p); let p = TableWrapper::new(p.map(|(ev, _)| ev)); let p = p.filter(|event| { - !matches!(event, Event::Start(Tag::Paragraph) | Event::End(Tag::Paragraph)) + !matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph)) }); html::push_html(&mut s, p); @@ -1472,7 +1469,7 @@ impl MarkdownSummaryLine<'_> { let mut s = String::new(); let without_paragraphs = LinkReplacer::new(&mut summary, links).filter(|event| { - !matches!(event, Event::Start(Tag::Paragraph) | Event::End(Tag::Paragraph)) + !matches!(event, Event::Start(Tag::Paragraph) | Event::End(TagEnd::Paragraph)) }); html::push_html(&mut s, without_paragraphs); @@ -1544,8 +1541,8 @@ fn markdown_summary_with_limit( _ => {} }, Event::End(tag) => match tag { - Tag::Emphasis | Tag::Strong => buf.close_tag(), - Tag::Paragraph | Tag::Heading(..) => return ControlFlow::Break(()), + TagEnd::Emphasis | TagEnd::Strong => buf.close_tag(), + TagEnd::Paragraph | TagEnd::Heading(_) => return ControlFlow::Break(()), _ => {} }, Event::HardBreak | Event::SoftBreak => buf.push(" ")?, @@ -1605,8 +1602,8 @@ pub(crate) fn plain_text_summary(md: &str, link_names: &[RenderedLink]) -> Strin } Event::HardBreak | Event::SoftBreak => s.push(' '), Event::Start(Tag::CodeBlock(..)) => break, - Event::End(Tag::Paragraph) => break, - Event::End(Tag::Heading(..)) => break, + Event::End(TagEnd::Paragraph) => break, + Event::End(TagEnd::Heading(..)) => break, _ => (), } } @@ -1765,7 +1762,7 @@ pub(crate) fn markdown_links<'md, R>( while let Some((event, span)) = event_iter.next() { match event { - Event::Start(Tag::Link(link_type, dest, _)) if may_be_doc_link(link_type) => { + Event::Start(Tag::Link { link_type, dest_url, .. }) if may_be_doc_link(link_type) => { let range = match link_type { // Link is pulled from the link itself. LinkType::ReferenceUnknown | LinkType::ShortcutUnknown => { @@ -1775,7 +1772,7 @@ pub(crate) fn markdown_links<'md, R>( LinkType::Inline => span_for_offset_backward(span, b'(', b')'), // Link is pulled from elsewhere in the document. LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut => { - span_for_link(&dest, span) + span_for_link(&dest_url, span) } LinkType::Autolink | LinkType::Email => unreachable!(), }; @@ -1795,7 +1792,7 @@ pub(crate) fn markdown_links<'md, R>( if let Some(link) = preprocess_link(MarkdownLink { kind: link_type, - link: dest.into_string(), + link: dest_url.into_string(), display_text, range, }) { @@ -1810,8 +1807,8 @@ pub(crate) fn markdown_links<'md, R>( } /// Collects additional data of link. -fn collect_link_data<'input, 'callback>( - event_iter: &mut OffsetIter<'input, 'callback>, +fn collect_link_data<'input, F: BrokenLinkCallback<'input>>( + event_iter: &mut OffsetIter<'input, F>, ) -> Option { let mut display_text: Option = None; let mut append_text = |text: CowStr<'_>| { diff --git a/src/librustdoc/lint.rs b/src/librustdoc/lint.rs index dd2bb47e5926b..8eaca70eaff48 100644 --- a/src/librustdoc/lint.rs +++ b/src/librustdoc/lint.rs @@ -196,6 +196,14 @@ declare_rustdoc_lint! { "detects redundant explicit links in doc comments" } +declare_rustdoc_lint! { + /// This compatibility lint checks for Markdown syntax that works in the old engine but not + /// the new one. + UNPORTABLE_MARKDOWN, + Warn, + "detects markdown that is interpreted differently in different parser" +} + pub(crate) static RUSTDOC_LINTS: Lazy> = Lazy::new(|| { vec![ BROKEN_INTRA_DOC_LINKS, @@ -209,6 +217,7 @@ pub(crate) static RUSTDOC_LINTS: Lazy> = Lazy::new(|| { MISSING_CRATE_LEVEL_DOCS, UNESCAPED_BACKTICKS, REDUNDANT_EXPLICIT_LINKS, + UNPORTABLE_MARKDOWN, ] }); diff --git a/src/librustdoc/passes/lint.rs b/src/librustdoc/passes/lint.rs index c6d5b7bd346d4..bc804a340bf2c 100644 --- a/src/librustdoc/passes/lint.rs +++ b/src/librustdoc/passes/lint.rs @@ -6,6 +6,7 @@ mod check_code_block_syntax; mod html_tags; mod redundant_explicit_links; mod unescaped_backticks; +mod unportable_markdown; use super::Pass; use crate::clean::*; @@ -31,6 +32,7 @@ impl<'a, 'tcx> DocVisitor for Linter<'a, 'tcx> { html_tags::visit_item(self.cx, item); unescaped_backticks::visit_item(self.cx, item); redundant_explicit_links::visit_item(self.cx, item); + unportable_markdown::visit_item(self.cx, item); self.visit_item_recur(item) } diff --git a/src/librustdoc/passes/lint/bare_urls.rs b/src/librustdoc/passes/lint/bare_urls.rs index 8f68f6ff4764a..4b2d3092837e1 100644 --- a/src/librustdoc/passes/lint/bare_urls.rs +++ b/src/librustdoc/passes/lint/bare_urls.rs @@ -42,11 +42,11 @@ pub(super) fn visit_item(cx: &DocContext<'_>, item: &Item) { match event { Event::Text(s) => find_raw_urls(cx, &s, range, &report_diag), // We don't want to check the text inside code blocks or links. - Event::Start(tag @ (Tag::CodeBlock(_) | Tag::Link(..))) => { + Event::Start(tag @ (Tag::CodeBlock(_) | Tag::Link { .. })) => { while let Some((event, _)) = p.next() { match event { Event::End(end) - if mem::discriminant(&end) == mem::discriminant(&tag) => + if mem::discriminant(&end) == mem::discriminant(&tag.to_end()) => { break; } diff --git a/src/librustdoc/passes/lint/html_tags.rs b/src/librustdoc/passes/lint/html_tags.rs index a0064a9011254..87dfa5d5389d7 100644 --- a/src/librustdoc/passes/lint/html_tags.rs +++ b/src/librustdoc/passes/lint/html_tags.rs @@ -4,7 +4,7 @@ use crate::clean::*; use crate::core::DocContext; use crate::html::markdown::main_body_opts; -use pulldown_cmark::{BrokenLink, Event, LinkType, Parser, Tag}; +use pulldown_cmark::{BrokenLink, Event, LinkType, Parser, Tag, TagEnd}; use rustc_resolve::rustdoc::source_span_for_markdown_range; use std::iter::Peekable; @@ -140,10 +140,10 @@ pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item) { for (event, range) in p { match event { Event::Start(Tag::CodeBlock(_)) => in_code_block = true, - Event::Html(text) if !in_code_block => { + Event::Html(text) | Event::InlineHtml(text) if !in_code_block => { extract_tags(&mut tags, &text, range, &mut is_in_comment, &report_diag) } - Event::End(Tag::CodeBlock(_)) => in_code_block = false, + Event::End(TagEnd::CodeBlock) => in_code_block = false, _ => {} } } diff --git a/src/librustdoc/passes/lint/redundant_explicit_links.rs b/src/librustdoc/passes/lint/redundant_explicit_links.rs index 7ab974046b9c7..b36b41c9f2d2e 100644 --- a/src/librustdoc/passes/lint/redundant_explicit_links.rs +++ b/src/librustdoc/passes/lint/redundant_explicit_links.rs @@ -1,6 +1,8 @@ use std::ops::Range; -use pulldown_cmark::{BrokenLink, CowStr, Event, LinkType, OffsetIter, Parser, Tag}; +use pulldown_cmark::{ + BrokenLink, BrokenLinkCallback, CowStr, Event, LinkType, OffsetIter, Parser, Tag, +}; use rustc_ast::NodeId; use rustc_errors::SuggestionStyle; use rustc_hir::def::{DefKind, DocLinkResMap, Namespace, Res}; @@ -95,7 +97,7 @@ fn check_redundant_explicit_link<'md>( while let Some((event, link_range)) = offset_iter.next() { match event { - Event::Start(Tag::Link(link_type, dest, _)) => { + Event::Start(Tag::Link { link_type, dest_url, .. }) => { let link_data = collect_link_data(&mut offset_iter); if let Some(resolvable_link) = link_data.resolvable_link.as_ref() { @@ -108,7 +110,7 @@ fn check_redundant_explicit_link<'md>( } } - let explicit_link = dest.to_string(); + let explicit_link = dest_url.to_string(); let display_link = link_data.resolvable_link.clone()?; if explicit_link.ends_with(&display_link) || display_link.ends_with(&explicit_link) @@ -122,7 +124,7 @@ fn check_redundant_explicit_link<'md>( doc, resolutions, link_range, - dest.to_string(), + dest_url.to_string(), link_data, if link_type == LinkType::Inline { (b'(', b')') @@ -139,7 +141,7 @@ fn check_redundant_explicit_link<'md>( doc, resolutions, link_range, - &dest, + &dest_url, link_data, ); } @@ -259,7 +261,9 @@ fn find_resolution(resolutions: &DocLinkResMap, path: &str) -> Option) -> LinkData { +fn collect_link_data<'input, F: BrokenLinkCallback<'input>>( + offset_iter: &mut OffsetIter<'input, F>, +) -> LinkData { let mut resolvable_link = None; let mut resolvable_link_range = None; let mut display_link = String::new(); diff --git a/src/librustdoc/passes/lint/unportable_markdown.rs b/src/librustdoc/passes/lint/unportable_markdown.rs new file mode 100644 index 0000000000000..1db0cc2db60ef --- /dev/null +++ b/src/librustdoc/passes/lint/unportable_markdown.rs @@ -0,0 +1,152 @@ +//! Detects specific markdown syntax that's different between pulldown-cmark +//! 0.9 and 0.11. +//! +//! This is a mitigation for old parser bugs that affected some +//! real crates' docs. The old parser claimed to comply with CommonMark, +//! but it did not. These warnings will eventually be removed, +//! though some of them may become Clippy lints. +//! +//! https://github.com/rust-lang/rust/pull/121659#issuecomment-1992752820 +//! +//! https://rustc-dev-guide.rust-lang.org/bug-fix-procedure.html#add-the-lint-to-the-list-of-removed-lists + +use crate::clean::Item; +use crate::core::DocContext; +use pulldown_cmark as cmarkn; +use pulldown_cmark_old as cmarko; +use rustc_lint_defs::Applicability; +use rustc_resolve::rustdoc::source_span_for_markdown_range; +use std::collections::{BTreeMap, BTreeSet}; + +pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item) { + let tcx = cx.tcx; + let Some(hir_id) = DocContext::as_local_hir_id(tcx, item.item_id) else { + // If non-local, no need to check anything. + return; + }; + + let dox = item.doc_value(); + if dox.is_empty() { + return; + } + + // P1: unintended strikethrough was fixed by requiring single-tildes to flank + // the same way underscores do, so nothing is done here + + // P2: block quotes without following space parsed wrong + // + // This is the set of starting points for block quotes with no space after + // the `>`. It is populated by the new parser, and if the old parser fails to + // clear it out, it'll produce a warning. + let mut spaceless_block_quotes = BTreeSet::new(); + + // P3: missing footnote references + // + // This is populated by listening for FootnoteReference from + // the new parser and old parser. + let mut missing_footnote_references = BTreeMap::new(); + let mut found_footnote_references = BTreeSet::new(); + + // populate problem cases from new parser + { + pub fn main_body_opts_new() -> cmarkn::Options { + cmarkn::Options::ENABLE_TABLES + | cmarkn::Options::ENABLE_FOOTNOTES + | cmarkn::Options::ENABLE_STRIKETHROUGH + | cmarkn::Options::ENABLE_TASKLISTS + | cmarkn::Options::ENABLE_SMART_PUNCTUATION + } + let mut parser_new = cmarkn::Parser::new_ext(&dox, main_body_opts_new()).into_offset_iter(); + while let Some((event, span)) = parser_new.next() { + if let cmarkn::Event::Start(cmarkn::Tag::BlockQuote(_)) = event { + if !dox[span.clone()].starts_with("> ") { + spaceless_block_quotes.insert(span.start); + } + } + if let cmarkn::Event::FootnoteReference(_) = event { + found_footnote_references.insert(span.start + 1); + } + } + } + + // remove cases where they don't actually differ + { + pub fn main_body_opts_old() -> cmarko::Options { + cmarko::Options::ENABLE_TABLES + | cmarko::Options::ENABLE_FOOTNOTES + | cmarko::Options::ENABLE_STRIKETHROUGH + | cmarko::Options::ENABLE_TASKLISTS + | cmarko::Options::ENABLE_SMART_PUNCTUATION + } + let mut parser_old = cmarko::Parser::new_ext(&dox, main_body_opts_old()).into_offset_iter(); + while let Some((event, span)) = parser_old.next() { + if let cmarko::Event::Start(cmarko::Tag::BlockQuote) = event { + if !dox[span.clone()].starts_with("> ") { + spaceless_block_quotes.remove(&span.start); + } + } + if let cmarko::Event::FootnoteReference(_) = event { + if !found_footnote_references.contains(&(span.start + 1)) { + missing_footnote_references.insert(span.start + 1, span); + } + } + } + } + + for start in spaceless_block_quotes { + let (span, precise) = + source_span_for_markdown_range(tcx, &dox, &(start..start + 1), &item.attrs.doc_strings) + .map(|span| (span, true)) + .unwrap_or_else(|| (item.attr_span(tcx), false)); + + tcx.node_span_lint(crate::lint::UNPORTABLE_MARKDOWN, hir_id, span, |lint| { + lint.primary_message("unportable markdown"); + lint.help(format!("confusing block quote with no space after the `>` marker")); + if precise { + lint.span_suggestion( + span.shrink_to_hi(), + "if the quote is intended, add a space", + " ", + Applicability::MaybeIncorrect, + ); + lint.span_suggestion( + span.shrink_to_lo(), + "if it should not be a quote, escape it", + "\\", + Applicability::MaybeIncorrect, + ); + } + }); + } + for (_caret, span) in missing_footnote_references { + let (ref_span, precise) = + source_span_for_markdown_range(tcx, &dox, &span, &item.attrs.doc_strings) + .map(|span| (span, true)) + .unwrap_or_else(|| (item.attr_span(tcx), false)); + + tcx.node_span_lint(crate::lint::UNPORTABLE_MARKDOWN, hir_id, ref_span, |lint| { + lint.primary_message("unportable markdown"); + if precise { + lint.span_suggestion( + ref_span.shrink_to_lo(), + "if it should not be a footnote, escape it", + "\\", + Applicability::MaybeIncorrect, + ); + } + if dox.as_bytes().get(span.end) == Some(&b'[') { + lint.help("confusing footnote reference and link"); + if precise { + lint.span_suggestion( + ref_span.shrink_to_hi(), + "if the footnote is intended, add a space", + " ", + Applicability::MaybeIncorrect, + ); + } else { + lint.help("there should be a space between the link and the footnote"); + } + } + }); + } +} diff --git a/src/tools/clippy/clippy_lints/src/doc/mod.rs b/src/tools/clippy/clippy_lints/src/doc/mod.rs index 3d875e7ac2d3f..3e210fd153bf5 100644 --- a/src/tools/clippy/clippy_lints/src/doc/mod.rs +++ b/src/tools/clippy/clippy_lints/src/doc/mod.rs @@ -6,10 +6,10 @@ use clippy_utils::ty::is_type_diagnostic_item; use clippy_utils::visitors::Visitable; use clippy_utils::{in_constant, is_entrypoint_fn, is_trait_impl_item, method_chain_args}; use pulldown_cmark::Event::{ - Code, End, FootnoteReference, HardBreak, Html, Rule, SoftBreak, Start, TaskListMarker, Text, + Code, DisplayMath, End, FootnoteReference, HardBreak, Html, InlineHtml, InlineMath, Rule, SoftBreak, Start, TaskListMarker, Text, }; use pulldown_cmark::Tag::{BlockQuote, CodeBlock, FootnoteDefinition, Heading, Item, Link, Paragraph}; -use pulldown_cmark::{BrokenLink, CodeBlockKind, CowStr, Options}; +use pulldown_cmark::{BrokenLink, CodeBlockKind, CowStr, Options, TagEnd}; use rustc_ast::ast::Attribute; use rustc_data_structures::fx::FxHashSet; use rustc_hir::intravisit::{self, Visitor}; @@ -659,7 +659,7 @@ fn check_doc<'a, Events: Iterator, Range { + Html(tag) | InlineHtml(tag) => { if tag.starts_with(", Range { + Start(BlockQuote(_)) => { blockquote_level += 1; containers.push(Container::Blockquote); }, - End(BlockQuote) => { + End(TagEnd::BlockQuote) => { blockquote_level -= 1; containers.pop(); }, @@ -699,15 +699,15 @@ fn check_doc<'a, Events: Iterator, Range { + End(TagEnd::CodeBlock) => { in_code = false; is_rust = false; ignore = false; }, - Start(Link(_, url, _)) => in_link = Some(url), - End(Link(..)) => in_link = None, - Start(Heading(_, _, _) | Paragraph | Item) => { - if let Start(Heading(_, _, _)) = event { + Start(Link { dest_url, .. }) => in_link = Some(dest_url), + End(TagEnd::Link) => in_link = None, + Start(Heading { .. } | Paragraph | Item) => { + if let Start(Heading { .. }) = event { in_heading = true; } if let Start(Item) = event { @@ -720,11 +720,11 @@ fn check_doc<'a, Events: Iterator, Range { - if let End(Heading(_, _, _)) = event { + End(TagEnd::Heading(_) | TagEnd::Paragraph | TagEnd::Item) => { + if let End(TagEnd::Heading(_)) = event { in_heading = false; } - if let End(Item) = event { + if let End(TagEnd::Item) = event { containers.pop(); } if ticks_unbalanced && let Some(span) = fragments.span(cx, paragraph_range.clone()) { @@ -746,8 +746,8 @@ fn check_doc<'a, Events: Iterator, Range in_footnote_definition = true, - End(FootnoteDefinition(..)) => in_footnote_definition = false, - Start(_tag) | End(_tag) => (), // We don't care about other tags + End(TagEnd::FootnoteDefinition) => in_footnote_definition = false, + Start(_) | End(_) => (), // We don't care about other tags SoftBreak | HardBreak => { if !containers.is_empty() && let Some((next_event, next_range)) = events.peek() @@ -765,7 +765,7 @@ fn check_doc<'a, Events: Iterator, Range (), + TaskListMarker(_) | Code(_) | Rule | InlineMath(..) | DisplayMath(..) => (), FootnoteReference(text) | Text(text) => { paragraph_range.end = range.end; ticks_unbalanced |= text.contains('`') && !in_code; diff --git a/src/tools/tidy/src/deps.rs b/src/tools/tidy/src/deps.rs index aa119819aaa26..82fa43f581fde 100644 --- a/src/tools/tidy/src/deps.rs +++ b/src/tools/tidy/src/deps.rs @@ -335,6 +335,7 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[ "proc-macro2", "psm", "pulldown-cmark", + "pulldown-cmark-escape", "punycode", "quote", "r-efi", diff --git a/tests/codegen/issues/issue-123712-str-to-lower-autovectorization.rs b/tests/codegen/issues/issue-123712-str-to-lower-autovectorization.rs new file mode 100644 index 0000000000000..e490114bf07f5 --- /dev/null +++ b/tests/codegen/issues/issue-123712-str-to-lower-autovectorization.rs @@ -0,0 +1,46 @@ +//@ compile-flags: -Copt-level=3 +#![crate_type = "lib"] + +/// Ensure that the ascii-prefix loop for `str::to_lowercase` and `str::to_uppercase` uses vector +/// instructions. Since these methods do not get inlined, the relevant code is duplicated here and +/// should be updated when the implementation changes. +// CHECK-LABEL: @lower_while_ascii +// CHECK: [[A:%[0-9]]] = load <16 x i8> +// CHECK-NEXT: [[B:%[0-9]]] = icmp slt <16 x i8> [[A]], zeroinitializer +// CHECK-NEXT: [[C:%[0-9]]] = bitcast <16 x i1> [[B]] to i16 +#[no_mangle] +pub fn lower_while_ascii(mut input: &[u8], mut output: &mut [u8]) -> usize { + // Process the input in chunks to enable auto-vectorization. + const N: usize = 16; + + output = &mut output[..input.len()]; + + let mut ascii_prefix_len = 0_usize; + let mut is_ascii = [false; N]; + + while input.len() >= N { + let chunk = unsafe { input.get_unchecked(..N) }; + let out_chunk = unsafe { output.get_unchecked_mut(..N) }; + + for j in 0..N { + is_ascii[j] = chunk[j] <= 127; + } + + // auto-vectorization for this check is a bit fragile, + // sum and comparing against the chunk size gives the best result, + // specifically a pmovmsk instruction on x86. + if is_ascii.iter().map(|x| *x as u8).sum::() as usize != N { + break; + } + + for j in 0..N { + out_chunk[j] = chunk[j].to_ascii_lowercase(); + } + + ascii_prefix_len += N; + input = unsafe { input.get_unchecked(N..) }; + output = unsafe { output.get_unchecked_mut(N..) }; + } + + ascii_prefix_len +} diff --git a/tests/rustdoc-ui/unportable-markdown.rs b/tests/rustdoc-ui/unportable-markdown.rs new file mode 100644 index 0000000000000..8035e680f3cf4 --- /dev/null +++ b/tests/rustdoc-ui/unportable-markdown.rs @@ -0,0 +1,63 @@ +// https://internals.rust-lang.org/t/proposal-migrate-the-syntax-of-rustdoc-markdown-footnotes-to-be-compatible-with-the-syntax-used-in-github/18929 +// +// A series of test cases for CommonMark corner cases that pulldown-cmark 0.11 fixes. +// +// This version of the lint is targeted at two especially-common cases where docs got broken. +// Other differences in parsing should not warn. +#![allow(rustdoc::broken_intra_doc_links)] +#![deny(rustdoc::unportable_markdown)] + +/// +/// +/// Test footnote [^foot]. +/// +/// [^foot]: This is nested within the footnote now, but didn't used to be. +/// +/// This is a multi-paragraph footnote. +pub struct GfmFootnotes; + +/// +/// +/// test [^foo][^bar] +//~^ ERROR unportable markdown +/// +/// [^foo]: test +/// [^bar]: test2 +pub struct FootnoteSmashedName; + +/// +/// +/// - _t +/// # test +/// t_ +pub struct NestingCornerCase; + +/// +/// +/// *~~__emphasis strike strong__~~* ~~*__strike emphasis strong__*~~ +pub struct Emphasis1; + +/// +/// +/// | +/// | +pub struct NotEnoughTable; + +/// +/// +/// foo +/// >bar +//~^ ERROR unportable markdown +pub struct BlockQuoteNoSpace; + +/// Negative test. +/// +/// foo +/// > bar +pub struct BlockQuoteSpace; + +/// Negative test. +/// +/// >bar +/// baz +pub struct BlockQuoteNoSpaceStart; diff --git a/tests/rustdoc-ui/unportable-markdown.stderr b/tests/rustdoc-ui/unportable-markdown.stderr new file mode 100644 index 0000000000000..b524aca25aef9 --- /dev/null +++ b/tests/rustdoc-ui/unportable-markdown.stderr @@ -0,0 +1,39 @@ +error: unportable markdown + --> $DIR/unportable-markdown.rs:21:10 + | +LL | /// test [^foo][^bar] + | ^^^^^^ + | + = help: confusing footnote reference and link +note: the lint level is defined here + --> $DIR/unportable-markdown.rs:8:9 + | +LL | #![deny(rustdoc::unportable_markdown)] + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +help: if it should not be a footnote, escape it + | +LL | /// test \[^foo][^bar] + | + +help: if the footnote is intended, add a space + | +LL | /// test [^foo] [^bar] + | + + +error: unportable markdown + --> $DIR/unportable-markdown.rs:49:5 + | +LL | /// >bar + | ^ + | + = help: confusing block quote with no space after the `>` marker +help: if the quote is intended, add a space + | +LL | /// > bar + | + +help: if it should not be a quote, escape it + | +LL | /// \>bar + | + + +error: aborting due to 2 previous errors +