From 7cb6912f466bf113336c27b6ee4c73165f265105 Mon Sep 17 00:00:00 2001 From: Zalathar Date: Sat, 7 Mar 2026 21:15:11 +1100 Subject: [PATCH 1/9] Remove a flaky `got_timeout` assert from two channel tests In CI, the receiver thread can be descheduled for a surprisingly long time, so there's no guarantee that a timeout actually occurs. --- library/std/tests/sync/mpmc.rs | 3 --- library/std/tests/sync/mpsc.rs | 3 --- 2 files changed, 6 deletions(-) diff --git a/library/std/tests/sync/mpmc.rs b/library/std/tests/sync/mpmc.rs index bf80ab96a88bd..db221ff15890e 100644 --- a/library/std/tests/sync/mpmc.rs +++ b/library/std/tests/sync/mpmc.rs @@ -475,7 +475,6 @@ fn stress_recv_timeout_two_threads() { }); let mut recv_count = 0; - let mut got_timeout = false; loop { match rx.recv_timeout(timeout) { Ok(n) => { @@ -483,7 +482,6 @@ fn stress_recv_timeout_two_threads() { recv_count += 1; } Err(RecvTimeoutError::Timeout) => { - got_timeout = true; continue; } Err(RecvTimeoutError::Disconnected) => break, @@ -491,7 +489,6 @@ fn stress_recv_timeout_two_threads() { } assert_eq!(recv_count, stress); - assert!(got_timeout); } #[test] diff --git a/library/std/tests/sync/mpsc.rs b/library/std/tests/sync/mpsc.rs index 9de4a71987b8e..4dc4b955da7c2 100644 --- a/library/std/tests/sync/mpsc.rs +++ b/library/std/tests/sync/mpsc.rs @@ -438,7 +438,6 @@ fn stress_recv_timeout_two_threads() { }); let mut recv_count = 0; - let mut got_timeout = false; loop { match rx.recv_timeout(timeout) { Ok(n) => { @@ -446,7 +445,6 @@ fn stress_recv_timeout_two_threads() { recv_count += 1; } Err(RecvTimeoutError::Timeout) => { - got_timeout = true; continue; } Err(RecvTimeoutError::Disconnected) => break, @@ -454,7 +452,6 @@ fn stress_recv_timeout_two_threads() { } assert_eq!(recv_count, stress); - assert!(got_timeout); } #[test] From 4eb9e661ec4bcaae73d084ea24add520c83a7c5e Mon Sep 17 00:00:00 2001 From: Jules Bertholet Date: Sat, 16 Mar 2024 14:53:41 -0400 Subject: [PATCH 2/9] Extend ASCII fast paths of `char` methods beyond ASCII --- library/core/src/char/methods.rs | 33 ++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs index 87b328c912878..284a3eeb75dfb 100644 --- a/library/core/src/char/methods.rs +++ b/library/core/src/char/methods.rs @@ -777,8 +777,9 @@ impl char { #[inline] pub fn is_alphabetic(self) -> bool { match self { - 'a'..='z' | 'A'..='Z' => true, - c => c > '\x7f' && unicode::Alphabetic(c), + 'A'..='Z' | 'a'..='z' => true, + '\0'..='\u{A9}' => false, + _ => unicode::Alphabetic(self), } } @@ -819,7 +820,8 @@ impl char { pub const fn is_lowercase(self) -> bool { match self { 'a'..='z' => true, - c => c > '\x7f' && unicode::Lowercase(c), + '\0'..='\u{A9}' => false, + _ => unicode::Lowercase(self), } } @@ -860,7 +862,8 @@ impl char { pub const fn is_uppercase(self) -> bool { match self { 'A'..='Z' => true, - c => c > '\x7f' && unicode::Uppercase(c), + '\0'..='\u{BF}' => false, + _ => unicode::Uppercase(self), } } @@ -893,7 +896,8 @@ impl char { pub const fn is_whitespace(self) -> bool { match self { ' ' | '\x09'..='\x0d' => true, - c => c > '\x7f' && unicode::White_Space(c), + '\0'..='\u{84}' => false, + _ => unicode::White_Space(self), } } @@ -920,10 +924,10 @@ impl char { #[stable(feature = "rust1", since = "1.0.0")] #[inline] pub fn is_alphanumeric(self) -> bool { - if self.is_ascii() { - self.is_ascii_alphanumeric() - } else { - unicode::Alphabetic(self) || unicode::N(self) + match self { + '0'..='9' | 'A'..='Z' | 'a'..='z' => true, + '\0'..='\u{A9}' => false, + _ => unicode::Alphabetic(self) || unicode::N(self), } } @@ -969,7 +973,7 @@ impl char { #[must_use] #[inline] pub(crate) fn is_grapheme_extended(self) -> bool { - !self.is_ascii() && unicode::Grapheme_Extend(self) + self > '\u{02FF}' && unicode::Grapheme_Extend(self) } /// Returns `true` if this `char` has the `Cased` property. @@ -985,7 +989,11 @@ impl char { #[doc(hidden)] #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")] pub fn is_cased(self) -> bool { - if self.is_ascii() { self.is_ascii_alphabetic() } else { unicode::Cased(self) } + match self { + 'A'..='Z' | 'a'..='z' => true, + '\0'..='\u{A9}' => false, + _ => unicode::Cased(self), + } } /// Returns `true` if this `char` has the `Case_Ignorable` property. @@ -1047,7 +1055,8 @@ impl char { pub fn is_numeric(self) -> bool { match self { '0'..='9' => true, - c => c > '\x7f' && unicode::N(c), + '\0'..='\u{B1}' => false, + _ => unicode::N(self), } } From e844c24122d10c6a4a944ffb1a85c8f36d20a41e Mon Sep 17 00:00:00 2001 From: b-naber Date: Tue, 3 Feb 2026 15:29:55 +0000 Subject: [PATCH 3/9] resolve namespaced crates with open modules --- compiler/rustc_hir/src/def.rs | 19 ++- .../src/hir_ty_lowering/mod.rs | 1 + compiler/rustc_passes/src/dead.rs | 2 +- .../rustc_resolve/src/build_reduced_graph.rs | 1 + compiler/rustc_resolve/src/diagnostics.rs | 8 +- compiler/rustc_resolve/src/ident.rs | 16 +- compiler/rustc_resolve/src/imports.rs | 2 +- compiler/rustc_resolve/src/lib.rs | 144 ++++++++++++------ compiler/rustc_session/src/config/externs.rs | 7 + .../resolve/auxiliary/open-ns-mod-my_api.rs | 9 ++ tests/ui/resolve/auxiliary/open-ns-my_api.rs | 3 + .../resolve/auxiliary/open-ns-my_api_core.rs | 15 ++ .../resolve/auxiliary/open-ns-my_api_utils.rs | 13 ++ tests/ui/resolve/open-ns-1.rs | 19 +++ tests/ui/resolve/open-ns-1.stderr | 22 +++ tests/ui/resolve/open-ns-10.rs | 8 + tests/ui/resolve/open-ns-10.stderr | 2 + tests/ui/resolve/open-ns-11.rs | 12 ++ tests/ui/resolve/open-ns-11.stderr | 9 ++ tests/ui/resolve/open-ns-2.rs | 18 +++ tests/ui/resolve/open-ns-2.stderr | 15 ++ tests/ui/resolve/open-ns-3.rs | 14 ++ tests/ui/resolve/open-ns-3.stderr | 19 +++ tests/ui/resolve/open-ns-4.rs | 12 ++ tests/ui/resolve/open-ns-4.stderr | 9 ++ tests/ui/resolve/open-ns-5.rs | 18 +++ tests/ui/resolve/open-ns-6.rs | 13 ++ tests/ui/resolve/open-ns-7.rs | 14 ++ tests/ui/resolve/open-ns-7.stderr | 16 ++ tests/ui/resolve/open-ns-8.rs | 23 +++ tests/ui/resolve/open-ns-9.rs | 25 +++ tests/ui/resolve/open-ns-9.stderr | 27 ++++ 32 files changed, 481 insertions(+), 54 deletions(-) create mode 100644 tests/ui/resolve/auxiliary/open-ns-mod-my_api.rs create mode 100644 tests/ui/resolve/auxiliary/open-ns-my_api.rs create mode 100644 tests/ui/resolve/auxiliary/open-ns-my_api_core.rs create mode 100644 tests/ui/resolve/auxiliary/open-ns-my_api_utils.rs create mode 100644 tests/ui/resolve/open-ns-1.rs create mode 100644 tests/ui/resolve/open-ns-1.stderr create mode 100644 tests/ui/resolve/open-ns-10.rs create mode 100644 tests/ui/resolve/open-ns-10.stderr create mode 100644 tests/ui/resolve/open-ns-11.rs create mode 100644 tests/ui/resolve/open-ns-11.stderr create mode 100644 tests/ui/resolve/open-ns-2.rs create mode 100644 tests/ui/resolve/open-ns-2.stderr create mode 100644 tests/ui/resolve/open-ns-3.rs create mode 100644 tests/ui/resolve/open-ns-3.stderr create mode 100644 tests/ui/resolve/open-ns-4.rs create mode 100644 tests/ui/resolve/open-ns-4.stderr create mode 100644 tests/ui/resolve/open-ns-5.rs create mode 100644 tests/ui/resolve/open-ns-6.rs create mode 100644 tests/ui/resolve/open-ns-7.rs create mode 100644 tests/ui/resolve/open-ns-7.stderr create mode 100644 tests/ui/resolve/open-ns-8.rs create mode 100644 tests/ui/resolve/open-ns-9.rs create mode 100644 tests/ui/resolve/open-ns-9.stderr diff --git a/compiler/rustc_hir/src/def.rs b/compiler/rustc_hir/src/def.rs index 3959ee7f94128..76bab70e5c0df 100644 --- a/compiler/rustc_hir/src/def.rs +++ b/compiler/rustc_hir/src/def.rs @@ -590,6 +590,13 @@ pub enum Res { /// **Belongs to the type namespace.** ToolMod, + /// The resolution for an open module in a namespaced crate. E.g. `my_api` + /// in the namespaced crate `my_api::utils` when `my_api` isn't part of the + /// extern prelude. + /// + /// **Belongs to the type namespace.** + OpenMod(Symbol), + // Macro namespace /// An attribute that is *not* implemented via macro. /// E.g., `#[inline]` and `#[rustfmt::skip]`, which are essentially directives, @@ -838,6 +845,7 @@ impl Res { | Res::SelfTyAlias { .. } | Res::SelfCtor(..) | Res::ToolMod + | Res::OpenMod(..) | Res::NonMacroAttr(..) | Res::Err => None, } @@ -869,6 +877,7 @@ impl Res { Res::Local(..) => "local variable", Res::SelfTyParam { .. } | Res::SelfTyAlias { .. } => "self type", Res::ToolMod => "tool module", + Res::OpenMod(..) => "namespaced crate", Res::NonMacroAttr(attr_kind) => attr_kind.descr(), Res::Err => "unresolved item", } @@ -895,6 +904,7 @@ impl Res { Res::SelfTyAlias { alias_to, is_trait_impl } } Res::ToolMod => Res::ToolMod, + Res::OpenMod(sym) => Res::OpenMod(sym), Res::NonMacroAttr(attr_kind) => Res::NonMacroAttr(attr_kind), Res::Err => Res::Err, } @@ -911,6 +921,7 @@ impl Res { Res::SelfTyAlias { alias_to, is_trait_impl } } Res::ToolMod => Res::ToolMod, + Res::OpenMod(sym) => Res::OpenMod(sym), Res::NonMacroAttr(attr_kind) => Res::NonMacroAttr(attr_kind), Res::Err => Res::Err, }) @@ -936,9 +947,11 @@ impl Res { pub fn ns(&self) -> Option { match self { Res::Def(kind, ..) => kind.ns(), - Res::PrimTy(..) | Res::SelfTyParam { .. } | Res::SelfTyAlias { .. } | Res::ToolMod => { - Some(Namespace::TypeNS) - } + Res::PrimTy(..) + | Res::SelfTyParam { .. } + | Res::SelfTyAlias { .. } + | Res::ToolMod + | Res::OpenMod(..) => Some(Namespace::TypeNS), Res::SelfCtor(..) | Res::Local(..) => Some(Namespace::ValueNS), Res::NonMacroAttr(..) => Some(Namespace::MacroNS), Res::Err => None, diff --git a/compiler/rustc_hir_analysis/src/hir_ty_lowering/mod.rs b/compiler/rustc_hir_analysis/src/hir_ty_lowering/mod.rs index 8b1dad9a65471..c6829a941bbec 100644 --- a/compiler/rustc_hir_analysis/src/hir_ty_lowering/mod.rs +++ b/compiler/rustc_hir_analysis/src/hir_ty_lowering/mod.rs @@ -2786,6 +2786,7 @@ impl<'tcx> dyn HirTyLowerer<'tcx> + '_ { | Res::SelfCtor(_) | Res::Local(_) | Res::ToolMod + | Res::OpenMod(..) | Res::NonMacroAttr(_) | Res::Err) => Const::new_error_with_message( tcx, diff --git a/compiler/rustc_passes/src/dead.rs b/compiler/rustc_passes/src/dead.rs index 15c91deef247d..30634c800e819 100644 --- a/compiler/rustc_passes/src/dead.rs +++ b/compiler/rustc_passes/src/dead.rs @@ -157,7 +157,7 @@ impl<'tcx> MarkSymbolVisitor<'tcx> { Res::Def(_, def_id) => self.check_def_id(def_id), Res::SelfTyParam { trait_: t } => self.check_def_id(t), Res::SelfTyAlias { alias_to: i, .. } => self.check_def_id(i), - Res::ToolMod | Res::NonMacroAttr(..) | Res::Err => {} + Res::ToolMod | Res::NonMacroAttr(..) | Res::OpenMod(..) | Res::Err => {} } } diff --git a/compiler/rustc_resolve/src/build_reduced_graph.rs b/compiler/rustc_resolve/src/build_reduced_graph.rs index a280acc0d51df..af9680ac8888d 100644 --- a/compiler/rustc_resolve/src/build_reduced_graph.rs +++ b/compiler/rustc_resolve/src/build_reduced_graph.rs @@ -357,6 +357,7 @@ impl<'ra, 'tcx> Resolver<'ra, 'tcx> { | Res::SelfTyParam { .. } | Res::SelfTyAlias { .. } | Res::SelfCtor(..) + | Res::OpenMod(..) | Res::Err => bug!("unexpected resolution: {:?}", res), } } diff --git a/compiler/rustc_resolve/src/diagnostics.rs b/compiler/rustc_resolve/src/diagnostics.rs index c776020c21274..fd5bf2da5fc6f 100644 --- a/compiler/rustc_resolve/src/diagnostics.rs +++ b/compiler/rustc_resolve/src/diagnostics.rs @@ -1,3 +1,4 @@ +// ignore-tidy-filelength use std::ops::ControlFlow; use itertools::Itertools as _; @@ -1735,8 +1736,8 @@ impl<'ra, 'tcx> Resolver<'ra, 'tcx> { Res::Def(DefKind::Macro(kinds), _) => { format!("{} {}", kinds.article(), kinds.descr()) } - Res::ToolMod => { - // Don't confuse the user with tool modules. + Res::ToolMod | Res::OpenMod(..) => { + // Don't confuse the user with tool modules or open modules. continue; } Res::Def(DefKind::Trait, _) if macro_kind == MacroKind::Derive => { @@ -1973,7 +1974,8 @@ impl<'ra, 'tcx> Resolver<'ra, 'tcx> { let (built_in, from) = match scope { Scope::StdLibPrelude | Scope::MacroUsePrelude => ("", " from prelude"), Scope::ExternPreludeFlags - if self.tcx.sess.opts.externs.get(ident.as_str()).is_some() => + if self.tcx.sess.opts.externs.get(ident.as_str()).is_some() + || matches!(res, Res::OpenMod(..)) => { ("", " passed with `--extern`") } diff --git a/compiler/rustc_resolve/src/ident.rs b/compiler/rustc_resolve/src/ident.rs index 7cfd5b5f861a4..cd3f63304f02d 100644 --- a/compiler/rustc_resolve/src/ident.rs +++ b/compiler/rustc_resolve/src/ident.rs @@ -26,7 +26,7 @@ use crate::{ AmbiguityError, AmbiguityKind, AmbiguityWarning, BindingKey, CmResolver, Decl, DeclKind, Determinacy, Finalize, IdentKey, ImportKind, LateDecl, Module, ModuleKind, ModuleOrUniformRoot, ParentScope, PathResult, PrivacyError, Res, ResolutionError, Resolver, Scope, ScopeSet, - Segment, Stage, Used, errors, + Segment, Stage, Symbol, Used, errors, }; #[derive(Copy, Clone)] @@ -386,7 +386,6 @@ impl<'ra, 'tcx> Resolver<'ra, 'tcx> { } /// Resolve an identifier in the specified set of scopes. - #[instrument(level = "debug", skip(self))] pub(crate) fn resolve_ident_in_scope_set<'r>( self: CmResolver<'r, 'ra, 'tcx>, orig_ident: Ident, @@ -976,6 +975,14 @@ impl<'ra, 'tcx> Resolver<'ra, 'tcx> { ignore_import, ) } + ModuleOrUniformRoot::OpenModule(sym) => { + let open_ns_name = format!("{}::{}", sym.as_str(), ident.name); + let ns_ident = IdentKey::with_root_ctxt(Symbol::intern(&open_ns_name)); + match self.extern_prelude_get_flag(ns_ident, ident.span, finalize.is_some()) { + Some(decl) => Ok(decl), + None => Err(Determinacy::Determined), + } + } ModuleOrUniformRoot::ModuleAndExternPrelude(module) => self.resolve_ident_in_scope_set( ident, ScopeSet::ModuleAndExternPrelude(ns, module), @@ -1962,7 +1969,10 @@ impl<'ra, 'tcx> Resolver<'ra, 'tcx> { } let maybe_assoc = opt_ns != Some(MacroNS) && PathSource::Type.is_expected(res); - if let Some(def_id) = binding.res().module_like_def_id() { + if let Res::OpenMod(sym) = binding.res() { + module = Some(ModuleOrUniformRoot::OpenModule(sym)); + record_segment_res(self.reborrow(), finalize, res, id); + } else if let Some(def_id) = binding.res().module_like_def_id() { if self.mods_with_parse_errors.contains(&def_id) { module_had_parse_errors = true; } diff --git a/compiler/rustc_resolve/src/imports.rs b/compiler/rustc_resolve/src/imports.rs index 7696b4b220d6c..8f534de0f76dd 100644 --- a/compiler/rustc_resolve/src/imports.rs +++ b/compiler/rustc_resolve/src/imports.rs @@ -41,7 +41,7 @@ type Res = def::Res; /// A potential import declaration in the process of being planted into a module. /// Also used for lazily planting names from `--extern` flags to extern prelude. -#[derive(Clone, Copy, Default, PartialEq)] +#[derive(Clone, Copy, Default, PartialEq, Debug)] pub(crate) enum PendingDecl<'ra> { Ready(Option>), #[default] diff --git a/compiler/rustc_resolve/src/lib.rs b/compiler/rustc_resolve/src/lib.rs index 6b3b5e2ec45f0..9aa96ce27e084 100644 --- a/compiler/rustc_resolve/src/lib.rs +++ b/compiler/rustc_resolve/src/lib.rs @@ -64,6 +64,7 @@ use rustc_hir::definitions::DisambiguatorState; use rustc_hir::{PrimTy, TraitCandidate, find_attr}; use rustc_index::bit_set::DenseBitSet; use rustc_metadata::creader::CStore; +use rustc_middle::bug; use rustc_middle::metadata::{AmbigModChild, ModChild, Reexport}; use rustc_middle::middle::privacy::EffectiveVisibilities; use rustc_middle::query::Providers; @@ -448,6 +449,11 @@ enum ModuleOrUniformRoot<'ra> { /// Used only for resolving single-segment imports. The reason it exists is that import paths /// are always split into two parts, the first of which should be some kind of module. CurrentScope, + + /// Virtual module for the resolution of base names of namespaced crates, + /// where the base name doesn't correspond to a module in the extern prelude. + /// E.g. `my_api::utils` is in the prelude, but `my_api` is not. + OpenModule(Symbol), } #[derive(Debug)] @@ -1108,13 +1114,20 @@ impl<'ra> DeclData<'ra> { } } +#[derive(Debug)] struct ExternPreludeEntry<'ra> { /// Name declaration from an `extern crate` item. /// The boolean flag is true is `item_decl` is non-redundant, happens either when /// `flag_decl` is `None`, or when `extern crate` introducing `item_decl` used renaming. item_decl: Option<(Decl<'ra>, Span, /* introduced by item */ bool)>, /// Name declaration from an `--extern` flag, lazily populated on first use. - flag_decl: Option, /* finalized */ bool)>>, + flag_decl: Option< + CacheCell<( + PendingDecl<'ra>, + /* finalized */ bool, + /* open flag (namespaced crate) */ bool, + )>, + >, } impl ExternPreludeEntry<'_> { @@ -1125,7 +1138,14 @@ impl ExternPreludeEntry<'_> { fn flag() -> Self { ExternPreludeEntry { item_decl: None, - flag_decl: Some(CacheCell::new((PendingDecl::Pending, false))), + flag_decl: Some(CacheCell::new((PendingDecl::Pending, false, false))), + } + } + + fn open_flag() -> Self { + ExternPreludeEntry { + item_decl: None, + flag_decl: Some(CacheCell::new((PendingDecl::Pending, false, true))), } } @@ -1637,35 +1657,7 @@ impl<'ra, 'tcx> Resolver<'ra, 'tcx> { let mut invocation_parents = FxHashMap::default(); invocation_parents.insert(LocalExpnId::ROOT, InvocationParent::ROOT); - let mut extern_prelude: FxIndexMap<_, _> = tcx - .sess - .opts - .externs - .iter() - .filter_map(|(name, entry)| { - // Make sure `self`, `super`, `_` etc do not get into extern prelude. - // FIXME: reject `--extern self` and similar in option parsing instead. - if entry.add_prelude - && let name = Symbol::intern(name) - && name.can_be_raw() - { - let ident = IdentKey::with_root_ctxt(name); - Some((ident, ExternPreludeEntry::flag())) - } else { - None - } - }) - .collect(); - - if !attr::contains_name(attrs, sym::no_core) { - let ident = IdentKey::with_root_ctxt(sym::core); - extern_prelude.insert(ident, ExternPreludeEntry::flag()); - if !attr::contains_name(attrs, sym::no_std) { - let ident = IdentKey::with_root_ctxt(sym::std); - extern_prelude.insert(ident, ExternPreludeEntry::flag()); - } - } - + let extern_prelude = build_extern_prelude(tcx, attrs); let registered_tools = tcx.registered_tools(()); let edition = tcx.sess.edition(); @@ -2320,10 +2312,10 @@ impl<'ra, 'tcx> Resolver<'ra, 'tcx> { ) -> Option> { let entry = self.extern_prelude.get(&ident); entry.and_then(|entry| entry.flag_decl.as_ref()).and_then(|flag_decl| { - let (pending_decl, finalized) = flag_decl.get(); + let (pending_decl, finalized, is_open) = flag_decl.get(); let decl = match pending_decl { PendingDecl::Ready(decl) => { - if finalize && !finalized { + if finalize && !finalized && !is_open { self.cstore_mut().process_path_extern( self.tcx, ident.name, @@ -2334,18 +2326,28 @@ impl<'ra, 'tcx> Resolver<'ra, 'tcx> { } PendingDecl::Pending => { debug_assert!(!finalized); - let crate_id = if finalize { - self.cstore_mut().process_path_extern(self.tcx, ident.name, orig_ident_span) + if is_open { + let res = Res::OpenMod(ident.name); + Some(self.arenas.new_pub_def_decl(res, DUMMY_SP, LocalExpnId::ROOT)) } else { - self.cstore_mut().maybe_process_path_extern(self.tcx, ident.name) - }; - crate_id.map(|crate_id| { - let res = Res::Def(DefKind::Mod, crate_id.as_def_id()); - self.arenas.new_pub_def_decl(res, DUMMY_SP, LocalExpnId::ROOT) - }) + let crate_id = if finalize { + self.cstore_mut().process_path_extern( + self.tcx, + ident.name, + orig_ident_span, + ) + } else { + self.cstore_mut().maybe_process_path_extern(self.tcx, ident.name) + }; + crate_id.map(|crate_id| { + let def_id = crate_id.as_def_id(); + let res = Res::Def(DefKind::Mod, def_id); + self.arenas.new_pub_def_decl(res, DUMMY_SP, LocalExpnId::ROOT) + }) + } } }; - flag_decl.set((PendingDecl::Ready(decl), finalize || finalized)); + flag_decl.set((PendingDecl::Ready(decl), finalize || finalized, is_open)); decl.or_else(|| finalize.then_some(self.dummy_decl)) }) } @@ -2387,7 +2389,9 @@ impl<'ra, 'tcx> Resolver<'ra, 'tcx> { PathResult::Module(ModuleOrUniformRoot::ExternPrelude) | PathResult::Failed { .. } => { None } - PathResult::Module(..) | PathResult::Indeterminate => unreachable!(), + path_result @ (PathResult::Module(..) | PathResult::Indeterminate) => { + bug!("got invalid path_result: {path_result:?}") + } } } @@ -2505,6 +2509,60 @@ impl<'ra, 'tcx> Resolver<'ra, 'tcx> { } } +fn build_extern_prelude<'tcx, 'ra>( + tcx: TyCtxt<'tcx>, + attrs: &[ast::Attribute], +) -> FxIndexMap> { + let mut extern_prelude: FxIndexMap> = tcx + .sess + .opts + .externs + .iter() + .filter_map(|(name, entry)| { + // Make sure `self`, `super`, `_` etc do not get into extern prelude. + // FIXME: reject `--extern self` and similar in option parsing instead. + if entry.add_prelude + && let sym = Symbol::intern(name) + && sym.can_be_raw() + { + Some((IdentKey::with_root_ctxt(sym), ExternPreludeEntry::flag())) + } else { + None + } + }) + .collect(); + + // Add open base entries for namespaced crates whose base segment + // is missing from the prelude (e.g. `foo::bar` without `foo`). + // These are necessary in order to resolve the open modules, whereas + // the namespaced names are necessary in `extern_prelude` for actually + // resolving the namespaced crates. + let missing_open_bases: Vec = extern_prelude + .keys() + .filter_map(|ident| { + let (base, _) = ident.name.as_str().split_once("::")?; + let base_sym = Symbol::intern(base); + base_sym.can_be_raw().then(|| IdentKey::with_root_ctxt(base_sym)) + }) + .filter(|base_ident| !extern_prelude.contains_key(base_ident)) + .collect(); + + extern_prelude.extend( + missing_open_bases.into_iter().map(|ident| (ident, ExternPreludeEntry::open_flag())), + ); + + // Inject `core` / `std` unless suppressed by attributes. + if !attr::contains_name(attrs, sym::no_core) { + extern_prelude.insert(IdentKey::with_root_ctxt(sym::core), ExternPreludeEntry::flag()); + + if !attr::contains_name(attrs, sym::no_std) { + extern_prelude.insert(IdentKey::with_root_ctxt(sym::std), ExternPreludeEntry::flag()); + } + } + + extern_prelude +} + fn names_to_string(names: impl Iterator) -> String { let mut result = String::new(); for (i, name) in names.enumerate().filter(|(_, name)| *name != kw::PathRoot) { diff --git a/compiler/rustc_session/src/config/externs.rs b/compiler/rustc_session/src/config/externs.rs index d668d8b4203db..ff76eaaeaf4fc 100644 --- a/compiler/rustc_session/src/config/externs.rs +++ b/compiler/rustc_session/src/config/externs.rs @@ -43,6 +43,13 @@ pub(crate) fn split_extern_opt<'a>( } }; + // Reject paths with more than two segments. + if unstable_opts.namespaced_crates && crate_name.split("::").count() > 2 { + return Err(early_dcx.early_struct_fatal(format!( + "crate name `{crate_name}` passed to `--extern` can have at most two segments." + ))); + } + if !valid_crate_name(&crate_name, unstable_opts) { let mut error = early_dcx.early_struct_fatal(format!( "crate name `{crate_name}` passed to `--extern` is not a valid ASCII identifier" diff --git a/tests/ui/resolve/auxiliary/open-ns-mod-my_api.rs b/tests/ui/resolve/auxiliary/open-ns-mod-my_api.rs new file mode 100644 index 0000000000000..dc8b5720c0c10 --- /dev/null +++ b/tests/ui/resolve/auxiliary/open-ns-mod-my_api.rs @@ -0,0 +1,9 @@ +pub mod utils { + pub fn root_helper() { + println!("root_helper"); + } +} + +pub fn root_function() -> String { + "my_api root!".to_string() +} diff --git a/tests/ui/resolve/auxiliary/open-ns-my_api.rs b/tests/ui/resolve/auxiliary/open-ns-my_api.rs new file mode 100644 index 0000000000000..be4bf31f0fbcd --- /dev/null +++ b/tests/ui/resolve/auxiliary/open-ns-my_api.rs @@ -0,0 +1,3 @@ +pub fn root_function() -> String { + "my_api root!".to_string() +} diff --git a/tests/ui/resolve/auxiliary/open-ns-my_api_core.rs b/tests/ui/resolve/auxiliary/open-ns-my_api_core.rs new file mode 100644 index 0000000000000..41418f1516f60 --- /dev/null +++ b/tests/ui/resolve/auxiliary/open-ns-my_api_core.rs @@ -0,0 +1,15 @@ +// #![crate_name = "my_api::core"] + +pub mod util { + pub fn core_mod_fn() -> String { + format!("core_fn from my_api::core::util",) + } +} + +pub fn core_fn() -> String { + format!("core_fn from my_api::core!",) +} + +pub fn core_fn2() -> String { + format!("core_fn2 from my_api::core!",) +} diff --git a/tests/ui/resolve/auxiliary/open-ns-my_api_utils.rs b/tests/ui/resolve/auxiliary/open-ns-my_api_utils.rs new file mode 100644 index 0000000000000..d2af20728bd5e --- /dev/null +++ b/tests/ui/resolve/auxiliary/open-ns-my_api_utils.rs @@ -0,0 +1,13 @@ +pub mod util { + pub fn util_mod_helper() -> String { + format!("Helper from my_api::utils::util",) + } +} + +pub fn utils_helper() -> String { + format!("Helper from my_api::utils!",) +} + +pub fn get_u32() -> u32 { + 1 +} diff --git a/tests/ui/resolve/open-ns-1.rs b/tests/ui/resolve/open-ns-1.rs new file mode 100644 index 0000000000000..e77ddbe58122d --- /dev/null +++ b/tests/ui/resolve/open-ns-1.rs @@ -0,0 +1,19 @@ +//@ aux-crate:my_api=open-ns-my_api.rs +//@ aux-crate:my_api::utils=open-ns-my_api_utils.rs +//@ aux-crate:my_api::core=open-ns-my_api_core.rs +//@ compile-flags: -Z namespaced-crates +//@ edition: 2024 + +use my_api::root_function; +use my_api::utils::util; +//~^ ERROR unresolved import `my_api::utils` + +fn main() { + let _ = root_function(); + let _ = my_api::root_function(); + let _ = my_api::utils::utils_helper(); + //~^ ERROR cannot find `utils` in `my_api` [E0433] + let _ = util::util_mod_helper(); + let _ = my_api::core::core_fn(); + //~^ ERROR cannot find `core` in `my_api` [E0433] +} diff --git a/tests/ui/resolve/open-ns-1.stderr b/tests/ui/resolve/open-ns-1.stderr new file mode 100644 index 0000000000000..65b9c6a355ce0 --- /dev/null +++ b/tests/ui/resolve/open-ns-1.stderr @@ -0,0 +1,22 @@ +error[E0432]: unresolved import `my_api::utils` + --> $DIR/open-ns-1.rs:8:13 + | +LL | use my_api::utils::util; + | ^^^^^ could not find `utils` in `my_api` + +error[E0433]: cannot find `utils` in `my_api` + --> $DIR/open-ns-1.rs:14:21 + | +LL | let _ = my_api::utils::utils_helper(); + | ^^^^^ could not find `utils` in `my_api` + +error[E0433]: cannot find `core` in `my_api` + --> $DIR/open-ns-1.rs:17:21 + | +LL | let _ = my_api::core::core_fn(); + | ^^^^ could not find `core` in `my_api` + +error: aborting due to 3 previous errors + +Some errors have detailed explanations: E0432, E0433. +For more information about an error, try `rustc --explain E0432`. diff --git a/tests/ui/resolve/open-ns-10.rs b/tests/ui/resolve/open-ns-10.rs new file mode 100644 index 0000000000000..b05a0ab270df0 --- /dev/null +++ b/tests/ui/resolve/open-ns-10.rs @@ -0,0 +1,8 @@ +// Tests that namespaced crate names are limited to two segments + +//@ aux-crate: nscrate::three::segments=open-ns-my_api_utils.rs +//@ compile-flags: -Z namespaced-crates +//@ edition: 2024 +//~? ERROR crate name `nscrate::three::segments` passed to `--extern` can have at most two segments. + +fn main() {} diff --git a/tests/ui/resolve/open-ns-10.stderr b/tests/ui/resolve/open-ns-10.stderr new file mode 100644 index 0000000000000..fdef748c6fa7d --- /dev/null +++ b/tests/ui/resolve/open-ns-10.stderr @@ -0,0 +1,2 @@ +error: crate name `nscrate::three::segments` passed to `--extern` can have at most two segments. + diff --git a/tests/ui/resolve/open-ns-11.rs b/tests/ui/resolve/open-ns-11.rs new file mode 100644 index 0000000000000..90e85a9ffc043 --- /dev/null +++ b/tests/ui/resolve/open-ns-11.rs @@ -0,0 +1,12 @@ +// Tests that std has higher precedence than an open module with the same name. + +//@ aux-crate: std::utils=open-ns-my_api_utils.rs +//@ compile-flags: -Z namespaced-crates +//@ edition: 2024 + +use std::utils::get_u32; +//~^ ERROR unresolved import `std::utils` + +fn main() { + let _ = get_u32(); +} diff --git a/tests/ui/resolve/open-ns-11.stderr b/tests/ui/resolve/open-ns-11.stderr new file mode 100644 index 0000000000000..cb073bc985a9e --- /dev/null +++ b/tests/ui/resolve/open-ns-11.stderr @@ -0,0 +1,9 @@ +error[E0432]: unresolved import `std::utils` + --> $DIR/open-ns-11.rs:7:10 + | +LL | use std::utils::get_u32; + | ^^^^^ could not find `utils` in `std` + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0432`. diff --git a/tests/ui/resolve/open-ns-2.rs b/tests/ui/resolve/open-ns-2.rs new file mode 100644 index 0000000000000..6165a4102be07 --- /dev/null +++ b/tests/ui/resolve/open-ns-2.rs @@ -0,0 +1,18 @@ +//@ aux-crate: my_api=open-ns-my_api.rs +//@ aux-crate: my_api::utils=open-ns-my_api_utils.rs +//@ aux-crate: my_api::core=open-ns-my_api_core.rs +//@ compile-flags: -Z namespaced-crates +//@ edition: 2024 + +use my_api::core::{core_fn, core_fn2}; +//~^ ERROR unresolved import `my_api::core` [E0432] +use my_api::utils::*; +//~^ ERROR unresolved import `my_api::utils` [E0432] +use my_api::*; + +fn main() { + let _ = root_function(); + let _ = utils_helper(); + let _ = core_fn(); + let _ = core_fn2(); +} diff --git a/tests/ui/resolve/open-ns-2.stderr b/tests/ui/resolve/open-ns-2.stderr new file mode 100644 index 0000000000000..0e221234c5179 --- /dev/null +++ b/tests/ui/resolve/open-ns-2.stderr @@ -0,0 +1,15 @@ +error[E0432]: unresolved import `my_api::core` + --> $DIR/open-ns-2.rs:7:13 + | +LL | use my_api::core::{core_fn, core_fn2}; + | ^^^^ could not find `core` in `my_api` + +error[E0432]: unresolved import `my_api::utils` + --> $DIR/open-ns-2.rs:9:13 + | +LL | use my_api::utils::*; + | ^^^^^ could not find `utils` in `my_api` + +error: aborting due to 2 previous errors + +For more information about this error, try `rustc --explain E0432`. diff --git a/tests/ui/resolve/open-ns-3.rs b/tests/ui/resolve/open-ns-3.rs new file mode 100644 index 0000000000000..9c78999fe3687 --- /dev/null +++ b/tests/ui/resolve/open-ns-3.rs @@ -0,0 +1,14 @@ +// This test should fail with `utils_helper` being unresolvable in `my_api::utils`. +// If a crate contains a module that overlaps with a namespaced crate name, then +// the namespaced crate will not be used in name resolution. + +//@ aux-crate: my_api::utils=open-ns-my_api_utils.rs +//@ aux-crate: my_api=open-ns-mod-my_api.rs +//@ compile-flags: -Z namespaced-crates +//@ edition: 2024 + +fn main() { + let _ = my_api::utils::root_helper(); + let _ = my_api::utils::utils_helper(); + //~^ ERROR cannot find function `utils_helper` in module `my_api::utils` [E0425] +} diff --git a/tests/ui/resolve/open-ns-3.stderr b/tests/ui/resolve/open-ns-3.stderr new file mode 100644 index 0000000000000..8ae261af01429 --- /dev/null +++ b/tests/ui/resolve/open-ns-3.stderr @@ -0,0 +1,19 @@ +error[E0425]: cannot find function `utils_helper` in module `my_api::utils` + --> $DIR/open-ns-3.rs:12:28 + | +LL | let _ = my_api::utils::utils_helper(); + | ^^^^^^^^^^^^ not found in `my_api::utils` + | +help: consider importing this function + | +LL + use my_api::utils::utils_helper; + | +help: if you import `utils_helper`, refer to it directly + | +LL - let _ = my_api::utils::utils_helper(); +LL + let _ = utils_helper(); + | + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0425`. diff --git a/tests/ui/resolve/open-ns-4.rs b/tests/ui/resolve/open-ns-4.rs new file mode 100644 index 0000000000000..4db3ad4c80a1d --- /dev/null +++ b/tests/ui/resolve/open-ns-4.rs @@ -0,0 +1,12 @@ +// This tests that namespaced crates are shadowed. + +//@ aux-crate: my_api=open-ns-my_api.rs +//@ aux-crate: my_api::utils=open-ns-my_api_utils.rs +//@ compile-flags: -Z namespaced-crates +//@ edition: 2024 + +fn main() { + let _ = my_api::root_function(); + let _ = my_api::utils::utils_helper(); + //~^ ERROR cannot find `utils` in `my_api` [E0433] +} diff --git a/tests/ui/resolve/open-ns-4.stderr b/tests/ui/resolve/open-ns-4.stderr new file mode 100644 index 0000000000000..2e6872c57986f --- /dev/null +++ b/tests/ui/resolve/open-ns-4.stderr @@ -0,0 +1,9 @@ +error[E0433]: cannot find `utils` in `my_api` + --> $DIR/open-ns-4.rs:10:21 + | +LL | let _ = my_api::utils::utils_helper(); + | ^^^^^ could not find `utils` in `my_api` + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0433`. diff --git a/tests/ui/resolve/open-ns-5.rs b/tests/ui/resolve/open-ns-5.rs new file mode 100644 index 0000000000000..8776da3106cf8 --- /dev/null +++ b/tests/ui/resolve/open-ns-5.rs @@ -0,0 +1,18 @@ +// Tests that namespaced crate names work inside macros. + +//@ aux-crate: my_api::utils=open-ns-my_api_utils.rs +//@ compile-flags: -Z namespaced-crates +//@ edition: 2024 +//@ check-pass + +macro_rules! import_and_call { + ($import_path:path, $fn_name:ident) => {{ + use $import_path; + $fn_name(); + }}; +} + +fn main() { + import_and_call!(my_api::utils::utils_helper, utils_helper); + let _x = 4 + 5; +} diff --git a/tests/ui/resolve/open-ns-6.rs b/tests/ui/resolve/open-ns-6.rs new file mode 100644 index 0000000000000..856858aac43a5 --- /dev/null +++ b/tests/ui/resolve/open-ns-6.rs @@ -0,0 +1,13 @@ +// Tests that open modules are resolvable. + +//@ aux-crate: my_api::utils=open-ns-my_api_utils.rs +//@ compile-flags: -Z namespaced-crates +//@ edition: 2024 +//@ check-pass + +use my_api; +use my_api::utils::utils_helper; + +fn main() { + let _ = utils_helper(); +} diff --git a/tests/ui/resolve/open-ns-7.rs b/tests/ui/resolve/open-ns-7.rs new file mode 100644 index 0000000000000..cf16c594fa40b --- /dev/null +++ b/tests/ui/resolve/open-ns-7.rs @@ -0,0 +1,14 @@ +// Tests that namespaced crates cannot be resolved if shadowed. + +//@ aux-crate: my_api=open-ns-my_api.rs +//@ aux-crate: my_api::utils=open-ns-my_api_utils.rs +//@ compile-flags: -Z namespaced-crates +//@ edition: 2024 + +use my_api::utils::utils_helper; +//~^ ERROR unresolved import `my_api::utils` [E0432] + +fn main() { + let _ = my_api::utils::utils_helper(); + //~^ ERROR cannot find `utils` in `my_api` [E0433] +} diff --git a/tests/ui/resolve/open-ns-7.stderr b/tests/ui/resolve/open-ns-7.stderr new file mode 100644 index 0000000000000..b008547539831 --- /dev/null +++ b/tests/ui/resolve/open-ns-7.stderr @@ -0,0 +1,16 @@ +error[E0432]: unresolved import `my_api::utils` + --> $DIR/open-ns-7.rs:8:13 + | +LL | use my_api::utils::utils_helper; + | ^^^^^ could not find `utils` in `my_api` + +error[E0433]: cannot find `utils` in `my_api` + --> $DIR/open-ns-7.rs:12:21 + | +LL | let _ = my_api::utils::utils_helper(); + | ^^^^^ could not find `utils` in `my_api` + +error: aborting due to 2 previous errors + +Some errors have detailed explanations: E0432, E0433. +For more information about an error, try `rustc --explain E0432`. diff --git a/tests/ui/resolve/open-ns-8.rs b/tests/ui/resolve/open-ns-8.rs new file mode 100644 index 0000000000000..46aafb66e63a6 --- /dev/null +++ b/tests/ui/resolve/open-ns-8.rs @@ -0,0 +1,23 @@ +// Tests that a macro-generated item has higher precendence than a namespaced crate +//@ aux-crate: my_api::utils=open-ns-my_api_utils.rs +//@ compile-flags: -Z namespaced-crates +//@ edition: 2024 +//@ check-pass + +macro_rules! define { + () => { + pub mod my_api { + pub mod utils { + pub fn get_u32() -> u32 { + 2 + } + } + } + }; +} + +fn main() { + define!(); + let res = my_api::utils::get_u32(); + assert_eq!(res, 2); +} diff --git a/tests/ui/resolve/open-ns-9.rs b/tests/ui/resolve/open-ns-9.rs new file mode 100644 index 0000000000000..7ded0b383d8d4 --- /dev/null +++ b/tests/ui/resolve/open-ns-9.rs @@ -0,0 +1,25 @@ +//@ aux-crate: my_api::utils=open-ns-my_api_utils.rs +//@ compile-flags: -Z namespaced-crates +//@ edition: 2024 + +use my_api::utils::get_u32; +//~^ ERROR `my_api` is ambiguous [E0659] + +macro_rules! define { + () => { + pub mod my_api { + pub mod utils { + pub fn get_u32() -> u32 { + 2 + } + } + } + }; +} + +define!(); + +fn main() { + let val = get_u32(); + assert_eq!(val, 2); +} diff --git a/tests/ui/resolve/open-ns-9.stderr b/tests/ui/resolve/open-ns-9.stderr new file mode 100644 index 0000000000000..675f487823e55 --- /dev/null +++ b/tests/ui/resolve/open-ns-9.stderr @@ -0,0 +1,27 @@ +error[E0659]: `my_api` is ambiguous + --> $DIR/open-ns-9.rs:5:5 + | +LL | use my_api::utils::get_u32; + | ^^^^^^ ambiguous name + | + = note: ambiguous because of a conflict between a macro-expanded name and a less macro-expanded name from outer scope during import or macro resolution + = note: `my_api` could refer to a namespaced crate passed with `--extern` +note: `my_api` could also refer to the module defined here + --> $DIR/open-ns-9.rs:10:9 + | +LL | / pub mod my_api { +LL | | pub mod utils { +LL | | pub fn get_u32() -> u32 { +LL | | 2 +... | +LL | | } + | |_________^ +... +LL | define!(); + | --------- in this macro invocation + = help: use `crate::my_api` to refer to this module unambiguously + = note: this error originates in the macro `define` (in Nightly builds, run with -Z macro-backtrace for more info) + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0659`. From 9a4b38c916ca758e05a020a550048d4ef47f6a74 Mon Sep 17 00:00:00 2001 From: Guillaume Gomez Date: Sat, 21 Mar 2026 14:13:00 +0100 Subject: [PATCH 4/9] Add new alias for Guillaume Gomez email address --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index c0333b49f53b2..17232083679c5 100644 --- a/.mailmap +++ b/.mailmap @@ -262,6 +262,7 @@ Guillaume Gomez Guillaume Gomez ggomez Guillaume Gomez Guillaume Gomez Guillaume Gomez Guillaume Gomez +Guillaume Gomez Guillaume Gomez gnzlbg hamidreza kalbasi Hanna Kruppe From b31dc4ab495c0297c4f09324094e9c8e62d52977 Mon Sep 17 00:00:00 2001 From: Usman Akinyemi Date: Sat, 21 Mar 2026 21:45:57 +0530 Subject: [PATCH 5/9] diagnostics: avoid ICE for undeclared generic parameter in impl Avoid an ICE for: struct A; impl A {} The compiler no longer panics and can proceed to emit existing diagnostics. Adds `tests/ui/missing/undeclared-generic-parameter.rs`. Signed-off-by: Usman Akinyemi --- .../rustc_resolve/src/late/diagnostics.rs | 2 +- .../missing/undeclared-generic-parameter.rs | 5 +++ .../undeclared-generic-parameter.stderr | 36 +++++++++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 tests/ui/missing/undeclared-generic-parameter.rs create mode 100644 tests/ui/missing/undeclared-generic-parameter.stderr diff --git a/compiler/rustc_resolve/src/late/diagnostics.rs b/compiler/rustc_resolve/src/late/diagnostics.rs index e24389e0d74df..713d6eb74bcba 100644 --- a/compiler/rustc_resolve/src/late/diagnostics.rs +++ b/compiler/rustc_resolve/src/late/diagnostics.rs @@ -3381,7 +3381,7 @@ impl<'ast, 'ra, 'tcx> LateResolutionVisitor<'_, 'ast, 'ra, 'tcx> { && def_id.is_local() && let Some(local_def_id) = def_id.as_local() && let Some(struct_generics) = self.r.struct_generics.get(&local_def_id) - && let target_param = &struct_generics.params[idx] + && let Some(target_param) = &struct_generics.params.get(idx) && let GenericParamKind::Const { ty, .. } = &target_param.kind && let TyKind::Path(_, path) = &ty.kind { diff --git a/tests/ui/missing/undeclared-generic-parameter.rs b/tests/ui/missing/undeclared-generic-parameter.rs new file mode 100644 index 0000000000000..eebae215c8813 --- /dev/null +++ b/tests/ui/missing/undeclared-generic-parameter.rs @@ -0,0 +1,5 @@ +struct A; +impl A {} +//~^ ERROR cannot find type `B` in this scope +//~| ERROR struct takes 0 generic arguments but 1 generic argument was supplied +fn main() {} diff --git a/tests/ui/missing/undeclared-generic-parameter.stderr b/tests/ui/missing/undeclared-generic-parameter.stderr new file mode 100644 index 0000000000000..101a5790ed4e6 --- /dev/null +++ b/tests/ui/missing/undeclared-generic-parameter.stderr @@ -0,0 +1,36 @@ +error[E0425]: cannot find type `B` in this scope + --> $DIR/undeclared-generic-parameter.rs:2:8 + | +LL | struct A; + | --------- similarly named struct `A` defined here +LL | impl A {} + | ^ + | +help: a struct with a similar name exists + | +LL - impl A {} +LL + impl A {} + | +help: you might be missing a type parameter + | +LL | impl A {} + | +++ + +error[E0107]: struct takes 0 generic arguments but 1 generic argument was supplied + --> $DIR/undeclared-generic-parameter.rs:2:6 + | +LL | impl A {} + | ^--- help: remove the unnecessary generics + | | + | expected 0 generic arguments + | +note: struct defined here, with 0 generic parameters + --> $DIR/undeclared-generic-parameter.rs:1:8 + | +LL | struct A; + | ^ + +error: aborting due to 2 previous errors + +Some errors have detailed explanations: E0107, E0425. +For more information about an error, try `rustc --explain E0107`. From e0aed9c5f665ff7ee4881e8db68382b454fb994a Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Sat, 21 Mar 2026 19:18:20 +0100 Subject: [PATCH 6/9] Point to the tracking issue for #[diagnostic::on_move] Now that the tracking issue has been opened, point to it. --- compiler/rustc_feature/src/unstable.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/rustc_feature/src/unstable.rs b/compiler/rustc_feature/src/unstable.rs index e8ca20d7f5f44..a559211e5d467 100644 --- a/compiler/rustc_feature/src/unstable.rs +++ b/compiler/rustc_feature/src/unstable.rs @@ -473,7 +473,7 @@ declare_features! ( /// Allows giving non-const impls custom diagnostic messages if attempted to be used as const (unstable, diagnostic_on_const, "1.93.0", Some(143874)), /// Allows giving on-move borrowck custom diagnostic messages for a type - (unstable, diagnostic_on_move, "CURRENT_RUSTC_VERSION", Some(150935)), + (unstable, diagnostic_on_move, "CURRENT_RUSTC_VERSION", Some(154181)), /// Allows `#[doc(cfg(...))]`. (unstable, doc_cfg, "1.21.0", Some(43781)), /// Allows `#[doc(masked)]`. From 8d072616a5d0c484e9761b0958ece68e8d235fe3 Mon Sep 17 00:00:00 2001 From: Jules Bertholet Date: Sat, 16 Mar 2024 15:56:18 -0400 Subject: [PATCH 7/9] Add APIs for dealing with titlecase - `char::is_cased` - `char::is_titlecase` - `char::case` - `char::to_titlecase` --- library/alloc/src/lib.rs | 1 + library/core/src/char/methods.rs | 240 ++++++++++++++++-- library/core/src/char/mod.rs | 83 ++++-- library/core/src/unicode/mod.rs | 4 +- library/core/src/unicode/unicode_data.rs | 82 +++++- library/coretests/tests/char.rs | 57 ++++- library/coretests/tests/lib.rs | 1 + library/coretests/tests/unicode.rs | 34 ++- library/coretests/tests/unicode/test_data.rs | 78 ++++++ .../src/case_mapping.rs | 45 +++- src/tools/unicode-table-generator/src/main.rs | 58 +++-- 11 files changed, 581 insertions(+), 102 deletions(-) diff --git a/library/alloc/src/lib.rs b/library/alloc/src/lib.rs index 7ac9cdc3833d3..bcd9e092a310f 100644 --- a/library/alloc/src/lib.rs +++ b/library/alloc/src/lib.rs @@ -148,6 +148,7 @@ #![feature(slice_range)] #![feature(std_internals)] #![feature(temporary_niche_types)] +#![feature(titlecase)] #![feature(transmutability)] #![feature(trivial_clone)] #![feature(trusted_fused)] diff --git a/library/core/src/char/methods.rs b/library/core/src/char/methods.rs index 284a3eeb75dfb..e9c3b040dc50b 100644 --- a/library/core/src/char/methods.rs +++ b/library/core/src/char/methods.rs @@ -777,12 +777,76 @@ impl char { #[inline] pub fn is_alphabetic(self) -> bool { match self { - 'A'..='Z' | 'a'..='z' => true, + 'a'..='z' | 'A'..='Z' => true, '\0'..='\u{A9}' => false, _ => unicode::Alphabetic(self), } } + /// Returns `true` if this `char` has the `Cased` property. + /// A character is cased if and only if it is uppercase, lowercase, or titlecase. + /// + /// `Cased` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and + /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`]. + /// + /// [Unicode Standard]: https://www.unicode.org/versions/latest/ + /// [ucd]: https://www.unicode.org/reports/tr44/ + /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// #![feature(titlecase)] + /// assert!('A'.is_cased()); + /// assert!('a'.is_cased()); + /// assert!(!'京'.is_cased()); + /// ``` + #[must_use] + #[unstable(feature = "titlecase", issue = "153892")] + #[inline] + pub fn is_cased(self) -> bool { + match self { + 'a'..='z' | 'A'..='Z' => true, + '\0'..='\u{A9}' => false, + _ => unicode::Cased(self), + } + } + + /// Returns the case of this character: + /// [`Some(CharCase::Upper)`][`CharCase::Upper`] if [`self.is_uppercase()`][`char::is_uppercase`], + /// [`Some(CharCase::Lower)`][`CharCase::Lower`] if [`self.is_lowercase()`][`char::is_lowercase`], + /// [`Some(CharCase::Title)`][`CharCase::Title`] if [`self.is_titlecase()`][`char::is_titlecase`], and + /// `None` if [`!self.is_cased()`][`char::is_cased`]. + /// + /// # Examples + /// + /// ``` + /// #![feature(titlecase)] + /// use core::char::CharCase; + /// assert_eq!('a'.case(), Some(CharCase::Lower)); + /// assert_eq!('δ'.case(), Some(CharCase::Lower)); + /// assert_eq!('A'.case(), Some(CharCase::Upper)); + /// assert_eq!('Δ'.case(), Some(CharCase::Upper)); + /// assert_eq!('Dž'.case(), Some(CharCase::Title)); + /// assert_eq!('中'.case(), None); + /// ``` + #[must_use] + #[unstable(feature = "titlecase", issue = "153892")] + #[inline] + pub fn case(self) -> Option { + match self { + 'a'..='z' => Some(CharCase::Lower), + 'A'..='Z' => Some(CharCase::Upper), + '\0'..='\u{A9}' => None, + _ if !unicode::Cased(self) => None, + _ if unicode::Lowercase(self) => Some(CharCase::Lower), + _ if unicode::Uppercase(self) => Some(CharCase::Upper), + _ => Some(CharCase::Title), + } + } + /// Returns `true` if this `char` has the `Lowercase` property. /// /// `Lowercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and @@ -825,6 +889,40 @@ impl char { } } + /// Returns `true` if this `char` has the general category for titlecase letters. + /// Conceptually, these characters consist of an uppercase portion followed by a lowercase portion. + /// + /// Titlecase letters (code points with the general category of `Lt`) are described in Chapter 4 + /// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character + /// Database][ucd] [`UnicodeData.txt`]. + /// + /// [Unicode Standard]: https://www.unicode.org/versions/latest/ + /// [ucd]: https://www.unicode.org/reports/tr44/ + /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// #![feature(titlecase)] + /// assert!('Dž'.is_titlecase()); + /// assert!('ῼ'.is_titlecase()); + /// assert!(!'D'.is_titlecase()); + /// assert!(!'z'.is_titlecase()); + /// assert!(!'中'.is_titlecase()); + /// assert!(!' '.is_titlecase()); + /// ``` + #[must_use] + #[unstable(feature = "titlecase", issue = "153892")] + #[inline] + pub fn is_titlecase(self) -> bool { + match self { + '\0'..='\u{01C4}' => false, + _ => self.is_cased() && !self.is_lowercase() && !self.is_uppercase(), + } + } + /// Returns `true` if this `char` has the `Uppercase` property. /// /// `Uppercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and @@ -925,7 +1023,7 @@ impl char { #[inline] pub fn is_alphanumeric(self) -> bool { match self { - '0'..='9' | 'A'..='Z' | 'a'..='z' => true, + 'a'..='z' | 'A'..='Z' | '0'..='9' => true, '\0'..='\u{A9}' => false, _ => unicode::Alphabetic(self) || unicode::N(self), } @@ -976,26 +1074,6 @@ impl char { self > '\u{02FF}' && unicode::Grapheme_Extend(self) } - /// Returns `true` if this `char` has the `Cased` property. - /// - /// `Cased` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and - /// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`]. - /// - /// [Unicode Standard]: https://www.unicode.org/versions/latest/ - /// [ucd]: https://www.unicode.org/reports/tr44/ - /// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt - #[must_use] - #[inline] - #[doc(hidden)] - #[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")] - pub fn is_cased(self) -> bool { - match self { - 'A'..='Z' | 'a'..='z' => true, - '\0'..='\u{A9}' => false, - _ => unicode::Cased(self), - } - } - /// Returns `true` if this `char` has the `Case_Ignorable` property. /// /// `Case_Ignorable` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and @@ -1119,7 +1197,7 @@ impl char { /// // convert into themselves. /// assert_eq!('山'.to_lowercase().to_string(), "山"); /// ``` - #[must_use = "this returns the lowercase character as a new iterator, \ + #[must_use = "this returns the lowercased character as a new iterator, \ without modifying the original"] #[stable(feature = "rust1", since = "1.0.0")] #[inline] @@ -1127,9 +1205,115 @@ impl char { ToLowercase(CaseMappingIter::new(conversions::to_lower(self))) } + /// Returns an iterator that yields the titlecase mapping of this `char` as one or more + /// `char`s. + /// + /// This is usually, but not always, equivalent to the uppercase mapping + /// returned by [`Self::to_uppercase`]. Prefer this method when seeking to capitalize + /// Only The First Letter of a word, but use [`Self::to_uppercase`] for ALL CAPS. + /// + /// If this `char` does not have an titlecase mapping, the iterator yields the same `char`. + /// + /// If this `char` has a one-to-one titlecase mapping given by the [Unicode Character + /// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`. + /// + /// [ucd]: https://www.unicode.org/reports/tr44/ + /// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt + /// + /// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields + /// the `char`(s) given by [`SpecialCasing.txt`]. + /// + /// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt + /// + /// This operation performs an unconditional mapping without tailoring. That is, the conversion + /// is independent of context and language. + /// + /// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in + /// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion. + /// + /// [Unicode Standard]: https://www.unicode.org/versions/latest/ + /// + /// # Examples + /// + /// As an iterator: + /// + /// ``` + /// #![feature(titlecase)] + /// for c in 'ß'.to_titlecase() { + /// print!("{c}"); + /// } + /// println!(); + /// ``` + /// + /// Using `println!` directly: + /// + /// ``` + /// #![feature(titlecase)] + /// println!("{}", 'ß'.to_titlecase()); + /// ``` + /// + /// Both are equivalent to: + /// + /// ``` + /// println!("Ss"); + /// ``` + /// + /// Using [`to_string`](../std/string/trait.ToString.html#tymethod.to_string): + /// + /// ``` + /// #![feature(titlecase)] + /// assert_eq!('c'.to_titlecase().to_string(), "C"); + /// assert_eq!('dž'.to_titlecase().to_string(), "Dž"); + /// assert_eq!('ῼ'.to_titlecase().to_string(), "ῼ"); + /// + /// // Sometimes the result is more than one character: + /// assert_eq!('ß'.to_titlecase().to_string(), "Ss"); + /// + /// // Characters that do not have separate cased forms + /// // convert into themselves. + /// assert_eq!('山'.to_titlecase().to_string(), "山"); + /// ``` + /// + /// # Note on locale + /// + /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two: + /// + /// * 'Dotless': I / ı, sometimes written ï + /// * 'Dotted': İ / i + /// + /// Note that the lowercase dotted 'i' is the same as the Latin. Therefore: + /// + /// ``` + /// #![feature(titlecase)] + /// let upper_i = 'i'.to_titlecase().to_string(); + /// ``` + /// + /// The value of `upper_i` here relies on the language of the text: if we're + /// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should + /// be `"İ"`. `to_titlecase()` does not take this into account, and so: + /// + /// ``` + /// #![feature(titlecase)] + /// let upper_i = 'i'.to_titlecase().to_string(); + /// + /// assert_eq!(upper_i, "I"); + /// ``` + /// + /// holds across languages. + #[must_use = "this returns the titlecased character as a new iterator, \ + without modifying the original"] + #[unstable(feature = "titlecase", issue = "153892")] + #[inline] + pub fn to_titlecase(self) -> ToTitlecase { + ToTitlecase(CaseMappingIter::new(conversions::to_title(self))) + } + /// Returns an iterator that yields the uppercase mapping of this `char` as one or more /// `char`s. /// + /// Prefer this method when converting a word into ALL CAPS, but consider [`Self::to_titlecase`] + /// instead if you seek to capitalize Only The First Letter. + /// /// If this `char` does not have an uppercase mapping, the iterator yields the same `char`. /// /// If this `char` has a one-to-one uppercase mapping given by the [Unicode Character @@ -1179,9 +1363,11 @@ impl char { /// /// ``` /// assert_eq!('c'.to_uppercase().to_string(), "C"); + /// assert_eq!('dž'.to_uppercase().to_string(), "DŽ"); /// /// // Sometimes the result is more than one character: /// assert_eq!('ſt'.to_uppercase().to_string(), "ST"); + /// assert_eq!('ῼ'.to_uppercase().to_string(), "ΩΙ"); /// /// // Characters that do not have both uppercase and lowercase /// // convert into themselves. @@ -1190,7 +1376,7 @@ impl char { /// /// # Note on locale /// - /// In Turkish, the equivalent of 'i' in Latin has five forms instead of two: + /// In Turkish and Azeri, the equivalent of 'i' in Latin has five forms instead of two: /// /// * 'Dotless': I / ı, sometimes written ï /// * 'Dotted': İ / i @@ -1202,7 +1388,7 @@ impl char { /// ``` /// /// The value of `upper_i` here relies on the language of the text: if we're - /// in `en-US`, it should be `"I"`, but if we're in `tr_TR`, it should + /// in `en-US`, it should be `"I"`, but if we're in `tr-TR` or `az-AZ`, it should /// be `"İ"`. `to_uppercase()` does not take this into account, and so: /// /// ``` @@ -1212,7 +1398,7 @@ impl char { /// ``` /// /// holds across languages. - #[must_use = "this returns the uppercase character as a new iterator, \ + #[must_use = "this returns the uppercased character as a new iterator, \ without modifying the original"] #[stable(feature = "rust1", since = "1.0.0")] #[inline] @@ -1455,7 +1641,7 @@ impl char { #[rustc_const_stable(feature = "const_ascii_ctype_on_intrinsics", since = "1.47.0")] #[inline] pub const fn is_ascii_alphabetic(&self) -> bool { - matches!(*self, 'A'..='Z' | 'a'..='z') + matches!(*self, 'a'..='z' | 'A'..='Z') } /// Checks if the value is an ASCII uppercase character: diff --git a/library/core/src/char/mod.rs b/library/core/src/char/mod.rs index 82a3f6f916be3..3231c4193064c 100644 --- a/library/core/src/char/mod.rs +++ b/library/core/src/char/mod.rs @@ -363,13 +363,21 @@ impl fmt::Display for EscapeDebug { } macro_rules! casemappingiter_impls { - ($(#[$attr:meta])* $ITER_NAME:ident) => { + ( + #[$stab:meta] + #[$dendstab:meta] + #[$fusedstab:meta] + #[$exactstab:meta] + #[$displaystab:meta] + $(#[$attr:meta])* + $ITER_NAME:ident + ) => { $(#[$attr])* - #[stable(feature = "rust1", since = "1.0.0")] + #[$stab] #[derive(Debug, Clone)] pub struct $ITER_NAME(CaseMappingIter); - #[stable(feature = "rust1", since = "1.0.0")] + #[$stab] impl Iterator for $ITER_NAME { type Item = char; fn next(&mut self) -> Option { @@ -405,7 +413,7 @@ macro_rules! casemappingiter_impls { } } - #[stable(feature = "case_mapping_double_ended", since = "1.59.0")] + #[$dendstab] impl DoubleEndedIterator for $ITER_NAME { fn next_back(&mut self) -> Option { self.0.next_back() @@ -423,10 +431,10 @@ macro_rules! casemappingiter_impls { } } - #[stable(feature = "fused", since = "1.26.0")] + #[$fusedstab] impl FusedIterator for $ITER_NAME {} - #[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")] + #[$exactstab] impl ExactSizeIterator for $ITER_NAME { fn len(&self) -> usize { self.0.len() @@ -453,7 +461,7 @@ macro_rules! casemappingiter_impls { #[unstable(feature = "std_internals", issue = "none")] unsafe impl TrustedRandomAccess for $ITER_NAME {} - #[stable(feature = "char_struct_display", since = "1.16.0")] + #[$displaystab] impl fmt::Display for $ITER_NAME { #[inline] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -464,23 +472,48 @@ macro_rules! casemappingiter_impls { } casemappingiter_impls! { - /// Returns an iterator that yields the lowercase equivalent of a `char`. + #[stable(feature = "rust1", since = "1.0.0")] + #[stable(feature = "case_mapping_double_ended", since = "1.59.0")] + #[stable(feature = "fused", since = "1.26.0")] + #[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")] + #[stable(feature = "char_struct_display", since = "1.16.0")] + /// Returns an iterator that yields the uppercase equivalent of a `char`. /// - /// This `struct` is created by the [`to_lowercase`] method on [`char`]. See + /// This `struct` is created by the [`to_uppercase`] method on [`char`]. See /// its documentation for more. /// - /// [`to_lowercase`]: char::to_lowercase - ToLowercase + /// [`to_uppercase`]: char::to_uppercase + ToUppercase } casemappingiter_impls! { - /// Returns an iterator that yields the uppercase equivalent of a `char`. + #[unstable(feature = "titlecase", issue = "153892")] + #[unstable(feature = "titlecase", issue = "153892")] + #[unstable(feature = "titlecase", issue = "153892")] + #[unstable(feature = "titlecase", issue = "153892")] + #[unstable(feature = "titlecase", issue = "153892")] + /// Returns an iterator that yields the titlecase equivalent of a `char`. /// - /// This `struct` is created by the [`to_uppercase`] method on [`char`]. See + /// This `struct` is created by the [`to_titlecase`] method on [`char`]. See /// its documentation for more. /// - /// [`to_uppercase`]: char::to_uppercase - ToUppercase + /// [`to_titlecase`]: char::to_titlecase + ToTitlecase +} + +casemappingiter_impls! { + #[stable(feature = "rust1", since = "1.0.0")] + #[stable(feature = "case_mapping_double_ended", since = "1.59.0")] + #[stable(feature = "fused", since = "1.26.0")] + #[stable(feature = "exact_size_case_mapping_iter", since = "1.35.0")] + #[stable(feature = "char_struct_display", since = "1.16.0")] + /// Returns an iterator that yields the lowercase equivalent of a `char`. + /// + /// This `struct` is created by the [`to_lowercase`] method on [`char`]. See + /// its documentation for more. + /// + /// [`to_lowercase`]: char::to_lowercase + ToLowercase } #[derive(Debug, Clone)] @@ -603,3 +636,23 @@ impl fmt::Display for TryFromCharError { #[stable(feature = "u8_from_char", since = "1.59.0")] impl Error for TryFromCharError {} + +/// The case of a cased character, +/// as returned by [`char::case`]. +/// +/// Titlecase characters conceptually are composed of an uppercase portion +/// followed by a lowercase portion. +/// The variant discriminants represent this: +/// the most significant bit represents whether the case +/// conceptually starts as uppercase, while the least significant bit +/// represents whether it conceptually ends as uppercase. +#[unstable(feature = "titlecase", issue = "153892")] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum CharCase { + /// Lowercase. Corresponds to the `Lowercase` Unicode property. + Lower = 0b00, + /// Titlecase. Corresponds to the `Titlecase_Letter` Unicode general category. + Title = 0b10, + /// Uppercase. Corresponds to the `Uppercase` Unicode property. + Upper = 0b11, +} diff --git a/library/core/src/unicode/mod.rs b/library/core/src/unicode/mod.rs index 4c220e3ea0129..22a1166fdf168 100644 --- a/library/core/src/unicode/mod.rs +++ b/library/core/src/unicode/mod.rs @@ -4,12 +4,12 @@ // for use in alloc, not re-exported in std. #[rustfmt::skip] -pub use unicode_data::case_ignorable::lookup as Case_Ignorable; -pub use unicode_data::cased::lookup as Cased; pub use unicode_data::conversions; #[rustfmt::skip] pub(crate) use unicode_data::alphabetic::lookup as Alphabetic; +pub(crate) use unicode_data::case_ignorable::lookup as Case_Ignorable; +pub(crate) use unicode_data::cased::lookup as Cased; pub(crate) use unicode_data::grapheme_extend::lookup as Grapheme_Extend; pub(crate) use unicode_data::lowercase::lookup as Lowercase; pub(crate) use unicode_data::n::lookup as N; diff --git a/library/core/src/unicode/unicode_data.rs b/library/core/src/unicode/unicode_data.rs index dd3712669e500..f602cd5c5b6b3 100644 --- a/library/core/src/unicode/unicode_data.rs +++ b/library/core/src/unicode/unicode_data.rs @@ -9,7 +9,8 @@ // White_Space : 256 bytes, 19 codepoints in 8 ranges (U+000085 - U+003001) using cascading // to_lower : 1112 bytes, 1462 codepoints in 185 ranges (U+0000C0 - U+01E921) using 2-level LUT // to_upper : 1998 bytes, 1554 codepoints in 299 ranges (U+0000B5 - U+01E943) using 2-level LUT -// Total : 9657 bytes +// to_title : 340 bytes, 135 codepoints in 49 ranges (U+0000DF - U+00FB17) using 2-level LUT +// Total : 9997 bytes #[inline(always)] const fn bitset_search< @@ -823,14 +824,10 @@ pub mod conversions { unsafe { char::from_u32_unchecked(((plane as u32) << 16) | (low as u32)) } } - fn lookup(input: char, ascii: char, l1_lut: &L1Lut) -> [char; 3] { - if input.is_ascii() { - return [ascii, '\0', '\0']; - } - + fn lookup(input: char, l1_lut: &L1Lut) -> Option<[char; 3]> { let (input_high, input_low) = deconstruct(input); let Some(l2_lut) = l1_lut.l2_luts.get(input_high as usize) else { - return [input, '\0', '\0']; + return None; }; let idx = l2_lut.singles.binary_search_by(|(range, _)| { @@ -844,6 +841,7 @@ pub mod conversions { Ordering::Equal } }); + if let Ok(idx) = idx { // SAFETY: binary search guarantees that the index is in bounds. let &(range, output_delta) = unsafe { l2_lut.singles.get_unchecked(idx) }; @@ -852,7 +850,7 @@ pub mod conversions { let output_low = input_low.wrapping_add_signed(output_delta); // SAFETY: Table data are guaranteed to be valid Unicode. let output = unsafe { reconstruct(input_high, output_low) }; - return [output, '\0', '\0']; + return Some([output, '\0', '\0']); } }; @@ -861,18 +859,37 @@ pub mod conversions { let &(_, output_lows) = unsafe { l2_lut.multis.get_unchecked(idx) }; // SAFETY: Table data are guaranteed to be valid Unicode. let output = output_lows.map(|output_low| unsafe { reconstruct(input_high, output_low) }); - return output; + return Some(output); }; - [input, '\0', '\0'] + None } pub fn to_lower(c: char) -> [char; 3] { - lookup(c, c.to_ascii_lowercase(), &LOWERCASE_LUT) + // https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%253AChanges_When_Lowercased%253A%5D-%5B%253AASCII%253A%5D&abb=on + if c < '\u{C0}' { + return [c.to_ascii_lowercase(), '\0', '\0']; + } + + lookup(c, &LOWERCASE_LUT).unwrap_or([c, '\0', '\0']) } pub fn to_upper(c: char) -> [char; 3] { - lookup(c, c.to_ascii_uppercase(), &UPPERCASE_LUT) + // https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%253AChanges_When_Uppercased%253A%5D-%5B%253AASCII%253A%5D&abb=on + if c < '\u{B5}' { + return [c.to_ascii_uppercase(), '\0', '\0']; + } + + lookup(c, &UPPERCASE_LUT).unwrap_or([c, '\0', '\0']) + } + + pub fn to_title(c: char) -> [char; 3] { + // https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%253AChanges_When_Titlecased%253A%5D-%5B%253AASCII%253A%5D&abb=on + if c < '\u{B5}' { + return [c.to_ascii_uppercase(), '\0', '\0']; + } + + lookup(c, &TITLECASE_LUT).or_else(|| lookup(c, &UPPERCASE_LUT)).unwrap_or([c, '\0', '\0']) } static LOWERCASE_LUT: L1Lut = L1Lut { @@ -1150,4 +1167,45 @@ pub mod conversions { }, ], }; + + static TITLECASE_LUT: L1Lut = L1Lut { + l2_luts: [ + L2Lut { + singles: &[ // 26 entries, 156 bytes + (Range::singleton(0x01c4), 1), (Range::singleton(0x01c5), 0), + (Range::singleton(0x01c6), -1), (Range::singleton(0x01c7), 1), + (Range::singleton(0x01c8), 0), (Range::singleton(0x01c9), -1), + (Range::singleton(0x01ca), 1), (Range::singleton(0x01cb), 0), + (Range::singleton(0x01cc), -1), (Range::singleton(0x01f1), 1), + (Range::singleton(0x01f2), 0), (Range::singleton(0x01f3), -1), + (Range::step_by_1(0x10d0..=0x10fa), 0), (Range::step_by_1(0x10fd..=0x10ff), 0), + (Range::step_by_1(0x1f80..=0x1f87), 8), (Range::step_by_1(0x1f88..=0x1f8f), 0), + (Range::step_by_1(0x1f90..=0x1f97), 8), (Range::step_by_1(0x1f98..=0x1f9f), 0), + (Range::step_by_1(0x1fa0..=0x1fa7), 8), (Range::step_by_1(0x1fa8..=0x1faf), 0), + (Range::singleton(0x1fb3), 9), (Range::singleton(0x1fbc), 0), (Range::singleton(0x1fc3), 9), + (Range::singleton(0x1fcc), 0), (Range::singleton(0x1ff3), 9), (Range::singleton(0x1ffc), 0), + ], + multis: &[ // 23 entries, 184 bytes + (0x00df, [0x0053, 0x0073, 0x0000]), (0x0587, [0x0535, 0x0582, 0x0000]), + (0x1fb2, [0x1fba, 0x0345, 0x0000]), (0x1fb4, [0x0386, 0x0345, 0x0000]), + (0x1fb7, [0x0391, 0x0342, 0x0345]), (0x1fc2, [0x1fca, 0x0345, 0x0000]), + (0x1fc4, [0x0389, 0x0345, 0x0000]), (0x1fc7, [0x0397, 0x0342, 0x0345]), + (0x1ff2, [0x1ffa, 0x0345, 0x0000]), (0x1ff4, [0x038f, 0x0345, 0x0000]), + (0x1ff7, [0x03a9, 0x0342, 0x0345]), (0xfb00, [0x0046, 0x0066, 0x0000]), + (0xfb01, [0x0046, 0x0069, 0x0000]), (0xfb02, [0x0046, 0x006c, 0x0000]), + (0xfb03, [0x0046, 0x0066, 0x0069]), (0xfb04, [0x0046, 0x0066, 0x006c]), + (0xfb05, [0x0053, 0x0074, 0x0000]), (0xfb06, [0x0053, 0x0074, 0x0000]), + (0xfb13, [0x0544, 0x0576, 0x0000]), (0xfb14, [0x0544, 0x0565, 0x0000]), + (0xfb15, [0x0544, 0x056b, 0x0000]), (0xfb16, [0x054e, 0x0576, 0x0000]), + (0xfb17, [0x0544, 0x056d, 0x0000]), + ], + }, + L2Lut { + singles: &[ // 0 entries, 0 bytes + ], + multis: &[ // 0 entries, 0 bytes + ], + }, + ], + }; } diff --git a/library/coretests/tests/char.rs b/library/coretests/tests/char.rs index f0f6a24429284..aa20585953b7c 100644 --- a/library/coretests/tests/char.rs +++ b/library/coretests/tests/char.rs @@ -1,5 +1,6 @@ +use std::char::{self, CharCase}; +use std::str; use std::str::FromStr; -use std::{char, str}; #[test] fn test_convert() { @@ -39,6 +40,29 @@ fn test_from_str() { assert!(char::from_str("abc").is_err()); } +#[test] +fn test_is_cased() { + assert!('a'.is_cased()); + assert!('ö'.is_cased()); + assert!('ß'.is_cased()); + assert!('Ü'.is_cased()); + assert!('P'.is_cased()); + assert!('ª'.is_cased()); + assert!(!'攂'.is_cased()); +} + +#[test] +fn test_char_case() { + for c in '\0'..='\u{10FFFF}' { + match c.case() { + None => assert!(!c.is_cased()), + Some(CharCase::Lower) => assert!(c.is_lowercase()), + Some(CharCase::Upper) => assert!(c.is_uppercase()), + Some(CharCase::Title) => assert!(c.is_titlecase()), + } + } +} + #[test] fn test_is_lowercase() { assert!('a'.is_lowercase()); @@ -48,6 +72,17 @@ fn test_is_lowercase() { assert!(!'P'.is_lowercase()); } +#[test] +fn test_is_titlecase() { + assert!('Dž'.is_titlecase()); + assert!('ᾨ'.is_titlecase()); + assert!(!'h'.is_titlecase()); + assert!(!'ä'.is_titlecase()); + assert!(!'ß'.is_titlecase()); + assert!(!'Ö'.is_titlecase()); + assert!(!'T'.is_titlecase()); +} + #[test] fn test_is_uppercase() { assert!(!'h'.is_uppercase()); @@ -57,6 +92,26 @@ fn test_is_uppercase() { assert!('T'.is_uppercase()); } +#[test] +fn titlecase_fast_path() { + for c in '\0'..='\u{01C4}' { + assert!(!(c.is_cased() && !c.is_lowercase() && !c.is_uppercase())) + } +} + +#[test] +fn at_most_one_case() { + for c in '\0'..='\u{10FFFF}' { + assert_eq!( + !c.is_cased() as u8 + + c.is_lowercase() as u8 + + c.is_uppercase() as u8 + + c.is_titlecase() as u8, + 1 + ); + } +} + #[test] fn test_is_whitespace() { assert!(' '.is_whitespace()); diff --git a/library/coretests/tests/lib.rs b/library/coretests/tests/lib.rs index 72112f8b01133..5f7039641dae3 100644 --- a/library/coretests/tests/lib.rs +++ b/library/coretests/tests/lib.rs @@ -111,6 +111,7 @@ #![feature(step_trait)] #![feature(str_internals)] #![feature(strict_provenance_lints)] +#![feature(titlecase)] #![feature(trusted_len)] #![feature(trusted_random_access)] #![feature(try_blocks)] diff --git a/library/coretests/tests/unicode.rs b/library/coretests/tests/unicode.rs index 1fae74f0f11ef..a8a221db8f955 100644 --- a/library/coretests/tests/unicode.rs +++ b/library/coretests/tests/unicode.rs @@ -27,17 +27,21 @@ fn test_boolean_property(ranges: &[RangeInclusive], lookup: fn(char) -> bo } #[track_caller] -fn test_case_mapping(ranges: &[(char, [char; 3])], lookup: fn(char) -> [char; 3]) { +fn test_case_mapping( + ranges: &[(char, [char; 3])], + lookup: fn(char) -> [char; 3], + fallback: fn(char) -> [char; 3], +) { let mut start = '\u{80}'; for &(key, val) in ranges { for c in start..key { - assert_eq!(lookup(c), [c, '\0', '\0'], "{c:?}"); + assert_eq!(lookup(c), fallback(c), "{c:?}"); } assert_eq!(lookup(key), val, "{key:?}"); start = char::from_u32(key as u32 + 1).unwrap(); } for c in start..=char::MAX { - assert_eq!(lookup(c), [c, '\0', '\0'], "{c:?}"); + assert_eq!(lookup(c), fallback(c), "{c:?}"); } } @@ -45,6 +49,7 @@ fn test_case_mapping(ranges: &[(char, [char; 3])], lookup: fn(char) -> [char; 3] #[cfg_attr(miri, ignore)] // Miri is too slow fn alphabetic() { test_boolean_property(test_data::ALPHABETIC, unicode_data::alphabetic::lookup); + test_boolean_property(test_data::ALPHABETIC, char::is_alphabetic); } #[test] @@ -57,6 +62,7 @@ fn case_ignorable() { #[cfg_attr(miri, ignore)] // Miri is too slow fn cased() { test_boolean_property(test_data::CASED, unicode_data::cased::lookup); + test_boolean_property(test_data::CASED, char::is_cased); } #[test] @@ -69,34 +75,52 @@ fn grapheme_extend() { #[cfg_attr(miri, ignore)] // Miri is too slow fn lowercase() { test_boolean_property(test_data::LOWERCASE, unicode_data::lowercase::lookup); + test_boolean_property(test_data::LOWERCASE, char::is_lowercase); } #[test] #[cfg_attr(miri, ignore)] // Miri is too slow fn n() { test_boolean_property(test_data::N, unicode_data::n::lookup); + test_boolean_property(test_data::N, char::is_numeric); } #[test] #[cfg_attr(miri, ignore)] // Miri is too slow fn uppercase() { test_boolean_property(test_data::UPPERCASE, unicode_data::uppercase::lookup); + test_boolean_property(test_data::UPPERCASE, char::is_uppercase); } #[test] #[cfg_attr(miri, ignore)] // Miri is too slow fn white_space() { test_boolean_property(test_data::WHITE_SPACE, unicode_data::white_space::lookup); + test_boolean_property(test_data::WHITE_SPACE, char::is_whitespace); } #[test] #[cfg_attr(miri, ignore)] // Miri is too slow fn to_lowercase() { - test_case_mapping(test_data::TO_LOWER, unicode_data::conversions::to_lower); + test_case_mapping(test_data::TO_LOWER, unicode_data::conversions::to_lower, |c| { + [c, '\0', '\0'] + }); } #[test] #[cfg_attr(miri, ignore)] // Miri is too slow fn to_uppercase() { - test_case_mapping(test_data::TO_UPPER, unicode_data::conversions::to_upper); + test_case_mapping(test_data::TO_UPPER, unicode_data::conversions::to_upper, |c| { + [c, '\0', '\0'] + }); +} + +#[test] +#[cfg_attr(miri, ignore)] // Miri is too slow +fn to_titlecase() { + test_case_mapping( + test_data::TO_TITLE, + unicode_data::conversions::to_title, + unicode_data::conversions::to_upper, + ); } diff --git a/library/coretests/tests/unicode/test_data.rs b/library/coretests/tests/unicode/test_data.rs index cfc695475b1f8..3071aedcae07a 100644 --- a/library/coretests/tests/unicode/test_data.rs +++ b/library/coretests/tests/unicode/test_data.rs @@ -2900,3 +2900,81 @@ pub(super) static TO_UPPER: &[(char, [char; 3]); 1554] = &[ ('\u{1e942}', ['\u{1e920}', '\u{0}', '\u{0}']), ('\u{1e943}', ['\u{1e921}', '\u{0}', '\u{0}']), ]; + +#[rustfmt::skip] +pub(super) static TO_TITLE: &[(char, [char; 3]); 135] = &[ + ('\u{df}', ['S', 's', '\u{0}']), ('\u{1c4}', ['\u{1c5}', '\u{0}', '\u{0}']), + ('\u{1c5}', ['\u{1c5}', '\u{0}', '\u{0}']), ('\u{1c6}', ['\u{1c5}', '\u{0}', '\u{0}']), + ('\u{1c7}', ['\u{1c8}', '\u{0}', '\u{0}']), ('\u{1c8}', ['\u{1c8}', '\u{0}', '\u{0}']), + ('\u{1c9}', ['\u{1c8}', '\u{0}', '\u{0}']), ('\u{1ca}', ['\u{1cb}', '\u{0}', '\u{0}']), + ('\u{1cb}', ['\u{1cb}', '\u{0}', '\u{0}']), ('\u{1cc}', ['\u{1cb}', '\u{0}', '\u{0}']), + ('\u{1f1}', ['\u{1f2}', '\u{0}', '\u{0}']), ('\u{1f2}', ['\u{1f2}', '\u{0}', '\u{0}']), + ('\u{1f3}', ['\u{1f2}', '\u{0}', '\u{0}']), ('\u{587}', ['\u{535}', '\u{582}', '\u{0}']), + ('\u{10d0}', ['\u{10d0}', '\u{0}', '\u{0}']), ('\u{10d1}', ['\u{10d1}', '\u{0}', '\u{0}']), + ('\u{10d2}', ['\u{10d2}', '\u{0}', '\u{0}']), ('\u{10d3}', ['\u{10d3}', '\u{0}', '\u{0}']), + ('\u{10d4}', ['\u{10d4}', '\u{0}', '\u{0}']), ('\u{10d5}', ['\u{10d5}', '\u{0}', '\u{0}']), + ('\u{10d6}', ['\u{10d6}', '\u{0}', '\u{0}']), ('\u{10d7}', ['\u{10d7}', '\u{0}', '\u{0}']), + ('\u{10d8}', ['\u{10d8}', '\u{0}', '\u{0}']), ('\u{10d9}', ['\u{10d9}', '\u{0}', '\u{0}']), + ('\u{10da}', ['\u{10da}', '\u{0}', '\u{0}']), ('\u{10db}', ['\u{10db}', '\u{0}', '\u{0}']), + ('\u{10dc}', ['\u{10dc}', '\u{0}', '\u{0}']), ('\u{10dd}', ['\u{10dd}', '\u{0}', '\u{0}']), + ('\u{10de}', ['\u{10de}', '\u{0}', '\u{0}']), ('\u{10df}', ['\u{10df}', '\u{0}', '\u{0}']), + ('\u{10e0}', ['\u{10e0}', '\u{0}', '\u{0}']), ('\u{10e1}', ['\u{10e1}', '\u{0}', '\u{0}']), + ('\u{10e2}', ['\u{10e2}', '\u{0}', '\u{0}']), ('\u{10e3}', ['\u{10e3}', '\u{0}', '\u{0}']), + ('\u{10e4}', ['\u{10e4}', '\u{0}', '\u{0}']), ('\u{10e5}', ['\u{10e5}', '\u{0}', '\u{0}']), + ('\u{10e6}', ['\u{10e6}', '\u{0}', '\u{0}']), ('\u{10e7}', ['\u{10e7}', '\u{0}', '\u{0}']), + ('\u{10e8}', ['\u{10e8}', '\u{0}', '\u{0}']), ('\u{10e9}', ['\u{10e9}', '\u{0}', '\u{0}']), + ('\u{10ea}', ['\u{10ea}', '\u{0}', '\u{0}']), ('\u{10eb}', ['\u{10eb}', '\u{0}', '\u{0}']), + ('\u{10ec}', ['\u{10ec}', '\u{0}', '\u{0}']), ('\u{10ed}', ['\u{10ed}', '\u{0}', '\u{0}']), + ('\u{10ee}', ['\u{10ee}', '\u{0}', '\u{0}']), ('\u{10ef}', ['\u{10ef}', '\u{0}', '\u{0}']), + ('\u{10f0}', ['\u{10f0}', '\u{0}', '\u{0}']), ('\u{10f1}', ['\u{10f1}', '\u{0}', '\u{0}']), + ('\u{10f2}', ['\u{10f2}', '\u{0}', '\u{0}']), ('\u{10f3}', ['\u{10f3}', '\u{0}', '\u{0}']), + ('\u{10f4}', ['\u{10f4}', '\u{0}', '\u{0}']), ('\u{10f5}', ['\u{10f5}', '\u{0}', '\u{0}']), + ('\u{10f6}', ['\u{10f6}', '\u{0}', '\u{0}']), ('\u{10f7}', ['\u{10f7}', '\u{0}', '\u{0}']), + ('\u{10f8}', ['\u{10f8}', '\u{0}', '\u{0}']), ('\u{10f9}', ['\u{10f9}', '\u{0}', '\u{0}']), + ('\u{10fa}', ['\u{10fa}', '\u{0}', '\u{0}']), ('\u{10fd}', ['\u{10fd}', '\u{0}', '\u{0}']), + ('\u{10fe}', ['\u{10fe}', '\u{0}', '\u{0}']), ('\u{10ff}', ['\u{10ff}', '\u{0}', '\u{0}']), + ('\u{1f80}', ['\u{1f88}', '\u{0}', '\u{0}']), ('\u{1f81}', ['\u{1f89}', '\u{0}', '\u{0}']), + ('\u{1f82}', ['\u{1f8a}', '\u{0}', '\u{0}']), ('\u{1f83}', ['\u{1f8b}', '\u{0}', '\u{0}']), + ('\u{1f84}', ['\u{1f8c}', '\u{0}', '\u{0}']), ('\u{1f85}', ['\u{1f8d}', '\u{0}', '\u{0}']), + ('\u{1f86}', ['\u{1f8e}', '\u{0}', '\u{0}']), ('\u{1f87}', ['\u{1f8f}', '\u{0}', '\u{0}']), + ('\u{1f88}', ['\u{1f88}', '\u{0}', '\u{0}']), ('\u{1f89}', ['\u{1f89}', '\u{0}', '\u{0}']), + ('\u{1f8a}', ['\u{1f8a}', '\u{0}', '\u{0}']), ('\u{1f8b}', ['\u{1f8b}', '\u{0}', '\u{0}']), + ('\u{1f8c}', ['\u{1f8c}', '\u{0}', '\u{0}']), ('\u{1f8d}', ['\u{1f8d}', '\u{0}', '\u{0}']), + ('\u{1f8e}', ['\u{1f8e}', '\u{0}', '\u{0}']), ('\u{1f8f}', ['\u{1f8f}', '\u{0}', '\u{0}']), + ('\u{1f90}', ['\u{1f98}', '\u{0}', '\u{0}']), ('\u{1f91}', ['\u{1f99}', '\u{0}', '\u{0}']), + ('\u{1f92}', ['\u{1f9a}', '\u{0}', '\u{0}']), ('\u{1f93}', ['\u{1f9b}', '\u{0}', '\u{0}']), + ('\u{1f94}', ['\u{1f9c}', '\u{0}', '\u{0}']), ('\u{1f95}', ['\u{1f9d}', '\u{0}', '\u{0}']), + ('\u{1f96}', ['\u{1f9e}', '\u{0}', '\u{0}']), ('\u{1f97}', ['\u{1f9f}', '\u{0}', '\u{0}']), + ('\u{1f98}', ['\u{1f98}', '\u{0}', '\u{0}']), ('\u{1f99}', ['\u{1f99}', '\u{0}', '\u{0}']), + ('\u{1f9a}', ['\u{1f9a}', '\u{0}', '\u{0}']), ('\u{1f9b}', ['\u{1f9b}', '\u{0}', '\u{0}']), + ('\u{1f9c}', ['\u{1f9c}', '\u{0}', '\u{0}']), ('\u{1f9d}', ['\u{1f9d}', '\u{0}', '\u{0}']), + ('\u{1f9e}', ['\u{1f9e}', '\u{0}', '\u{0}']), ('\u{1f9f}', ['\u{1f9f}', '\u{0}', '\u{0}']), + ('\u{1fa0}', ['\u{1fa8}', '\u{0}', '\u{0}']), ('\u{1fa1}', ['\u{1fa9}', '\u{0}', '\u{0}']), + ('\u{1fa2}', ['\u{1faa}', '\u{0}', '\u{0}']), ('\u{1fa3}', ['\u{1fab}', '\u{0}', '\u{0}']), + ('\u{1fa4}', ['\u{1fac}', '\u{0}', '\u{0}']), ('\u{1fa5}', ['\u{1fad}', '\u{0}', '\u{0}']), + ('\u{1fa6}', ['\u{1fae}', '\u{0}', '\u{0}']), ('\u{1fa7}', ['\u{1faf}', '\u{0}', '\u{0}']), + ('\u{1fa8}', ['\u{1fa8}', '\u{0}', '\u{0}']), ('\u{1fa9}', ['\u{1fa9}', '\u{0}', '\u{0}']), + ('\u{1faa}', ['\u{1faa}', '\u{0}', '\u{0}']), ('\u{1fab}', ['\u{1fab}', '\u{0}', '\u{0}']), + ('\u{1fac}', ['\u{1fac}', '\u{0}', '\u{0}']), ('\u{1fad}', ['\u{1fad}', '\u{0}', '\u{0}']), + ('\u{1fae}', ['\u{1fae}', '\u{0}', '\u{0}']), ('\u{1faf}', ['\u{1faf}', '\u{0}', '\u{0}']), + ('\u{1fb2}', ['\u{1fba}', '\u{345}', '\u{0}']), + ('\u{1fb3}', ['\u{1fbc}', '\u{0}', '\u{0}']), ('\u{1fb4}', ['\u{386}', '\u{345}', '\u{0}']), + ('\u{1fb7}', ['\u{391}', '\u{342}', '\u{345}']), + ('\u{1fbc}', ['\u{1fbc}', '\u{0}', '\u{0}']), + ('\u{1fc2}', ['\u{1fca}', '\u{345}', '\u{0}']), + ('\u{1fc3}', ['\u{1fcc}', '\u{0}', '\u{0}']), ('\u{1fc4}', ['\u{389}', '\u{345}', '\u{0}']), + ('\u{1fc7}', ['\u{397}', '\u{342}', '\u{345}']), + ('\u{1fcc}', ['\u{1fcc}', '\u{0}', '\u{0}']), + ('\u{1ff2}', ['\u{1ffa}', '\u{345}', '\u{0}']), + ('\u{1ff3}', ['\u{1ffc}', '\u{0}', '\u{0}']), ('\u{1ff4}', ['\u{38f}', '\u{345}', '\u{0}']), + ('\u{1ff7}', ['\u{3a9}', '\u{342}', '\u{345}']), + ('\u{1ffc}', ['\u{1ffc}', '\u{0}', '\u{0}']), ('\u{fb00}', ['F', 'f', '\u{0}']), + ('\u{fb01}', ['F', 'i', '\u{0}']), ('\u{fb02}', ['F', 'l', '\u{0}']), + ('\u{fb03}', ['F', 'f', 'i']), ('\u{fb04}', ['F', 'f', 'l']), + ('\u{fb05}', ['S', 't', '\u{0}']), ('\u{fb06}', ['S', 't', '\u{0}']), + ('\u{fb13}', ['\u{544}', '\u{576}', '\u{0}']), + ('\u{fb14}', ['\u{544}', '\u{565}', '\u{0}']), + ('\u{fb15}', ['\u{544}', '\u{56b}', '\u{0}']), + ('\u{fb16}', ['\u{54e}', '\u{576}', '\u{0}']), + ('\u{fb17}', ['\u{544}', '\u{56d}', '\u{0}']), +]; diff --git a/src/tools/unicode-table-generator/src/case_mapping.rs b/src/tools/unicode-table-generator/src/case_mapping.rs index 6345cb8ef2532..b7b385542ef53 100644 --- a/src/tools/unicode-table-generator/src/case_mapping.rs +++ b/src/tools/unicode-table-generator/src/case_mapping.rs @@ -48,7 +48,7 @@ use std::ops::RangeInclusive; use crate::fmt_helpers::Hex; use crate::{UnicodeData, fmt_list}; -pub(crate) fn generate_case_mapping(data: &UnicodeData) -> (String, [(String, usize); 2]) { +pub(crate) fn generate_case_mapping(data: &UnicodeData) -> (String, [(String, usize); 3]) { let mut file = String::new(); file.push_str("\n\n"); @@ -59,7 +59,10 @@ pub(crate) fn generate_case_mapping(data: &UnicodeData) -> (String, [(String, us file.push_str("\n\n"); let (upper_tables, upper_desc, upper_size) = generate_tables("UPPER", &data.to_upper); file.push_str(&upper_tables); - (file, [(lower_desc, lower_size), (upper_desc, upper_size)]) + file.push_str("\n\n"); + let (title_tables, title_desc, title_size) = generate_tables("TITLE", &data.to_title); + file.push_str(&title_tables); + (file, [(lower_desc, lower_size), (upper_desc, upper_size), (title_desc, title_size)]) } // So far, only planes 0 and 1 (Basic Multilingual Plane and Supplementary @@ -336,14 +339,10 @@ unsafe fn reconstruct(plane: u16, low: u16) -> char { unsafe { char::from_u32_unchecked(((plane as u32) << 16) | (low as u32)) } } -fn lookup(input: char, ascii: char, l1_lut: &L1Lut) -> [char; 3] { - if input.is_ascii() { - return [ascii, '\0', '\0']; - } - +fn lookup(input: char, l1_lut: &L1Lut) -> Option<[char; 3]> { let (input_high, input_low) = deconstruct(input); let Some(l2_lut) = l1_lut.l2_luts.get(input_high as usize) else { - return [input, '\0', '\0']; + return None; }; let idx = l2_lut.singles.binary_search_by(|(range, _)| { @@ -357,6 +356,7 @@ fn lookup(input: char, ascii: char, l1_lut: &L1Lut) -> [char; 3] { Ordering::Equal } }); + if let Ok(idx) = idx { // SAFETY: binary search guarantees that the index is in bounds. let &(range, output_delta) = unsafe { l2_lut.singles.get_unchecked(idx) }; @@ -365,7 +365,7 @@ fn lookup(input: char, ascii: char, l1_lut: &L1Lut) -> [char; 3] { let output_low = input_low.wrapping_add_signed(output_delta); // SAFETY: Table data are guaranteed to be valid Unicode. let output = unsafe { reconstruct(input_high, output_low) }; - return [output, '\0', '\0']; + return Some([output, '\0', '\0']); } }; @@ -374,17 +374,36 @@ fn lookup(input: char, ascii: char, l1_lut: &L1Lut) -> [char; 3] { let &(_, output_lows) = unsafe { l2_lut.multis.get_unchecked(idx) }; // SAFETY: Table data are guaranteed to be valid Unicode. let output = output_lows.map(|output_low| unsafe { reconstruct(input_high, output_low) }); - return output; + return Some(output); }; - [input, '\0', '\0'] + None } pub fn to_lower(c: char) -> [char; 3] { - lookup(c, c.to_ascii_lowercase(), &LOWERCASE_LUT) + // https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%253AChanges_When_Lowercased%253A%5D-%5B%253AASCII%253A%5D&abb=on + if c < '\u{C0}' { + return [c.to_ascii_lowercase(), '\0', '\0']; + } + + lookup(c, &LOWERCASE_LUT).unwrap_or([c, '\0', '\0']) } pub fn to_upper(c: char) -> [char; 3] { - lookup(c, c.to_ascii_uppercase(), &UPPERCASE_LUT) + // https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%253AChanges_When_Uppercased%253A%5D-%5B%253AASCII%253A%5D&abb=on + if c < '\u{B5}' { + return [c.to_ascii_uppercase(), '\0', '\0']; + } + + lookup(c, &UPPERCASE_LUT).unwrap_or([c, '\0', '\0']) +} + +pub fn to_title(c: char) -> [char; 3] { + // https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%253AChanges_When_Titlecased%253A%5D-%5B%253AASCII%253A%5D&abb=on + if c < '\u{B5}' { + return [c.to_ascii_uppercase(), '\0', '\0']; + } + + lookup(c, &TITLECASE_LUT).or_else(|| lookup(c, &UPPERCASE_LUT)).unwrap_or([c, '\0', '\0']) } "; diff --git a/src/tools/unicode-table-generator/src/main.rs b/src/tools/unicode-table-generator/src/main.rs index 4e7c40efc1234..cdd137ff56a52 100644 --- a/src/tools/unicode-table-generator/src/main.rs +++ b/src/tools/unicode-table-generator/src/main.rs @@ -100,32 +100,25 @@ static PROPERTIES: &[&str] = &[ struct UnicodeData { ranges: Vec<(&'static str, Vec>)>, + /// Only stores mappings that are not to self to_upper: BTreeMap, + /// Only stores mappings that differ from `to_upper` + to_title: BTreeMap, + /// Only stores mappings that are not to self to_lower: BTreeMap, } -fn to_mapping(origin: u32, codepoints: Vec) -> Option<[u32; 3]> { - let mut a = None; - let mut b = None; - let mut c = None; - - for codepoint in codepoints { - if origin == codepoint.value() { - return None; - } - - if a.is_none() { - a = Some(codepoint.value()); - } else if b.is_none() { - b = Some(codepoint.value()); - } else if c.is_none() { - c = Some(codepoint.value()); - } else { - panic!("more than 3 mapped codepoints") - } +fn to_mapping( + if_different_from: &[ucd_parse::Codepoint], + codepoints: &[ucd_parse::Codepoint], +) -> Option<[u32; 3]> { + if codepoints == if_different_from { + return None; } - Some([a.unwrap(), b.unwrap_or(0), c.unwrap_or(0)]) + let mut ret = [ucd_parse::Codepoint::default(); 3]; + ret[0..codepoints.len()].copy_from_slice(codepoints); + Some(ret.map(ucd_parse::Codepoint::value)) } static UNICODE_DIRECTORY: &str = "unicode-downloads"; @@ -145,8 +138,7 @@ fn load_data() -> UnicodeData { } } - let mut to_lower = BTreeMap::new(); - let mut to_upper = BTreeMap::new(); + let [mut to_lower, mut to_upper, mut to_title] = [const { BTreeMap::new() }; 3]; for row in ucd_parse::UnicodeDataExpander::new( ucd_parse::parse::<_, ucd_parse::UnicodeData>(&UNICODE_DIRECTORY).unwrap(), ) { @@ -172,6 +164,11 @@ fn load_data() -> UnicodeData { { to_upper.insert(row.codepoint.value(), [mapped.value(), 0, 0]); } + if let Some(mapped) = row.simple_titlecase_mapping + && Some(mapped) != row.simple_uppercase_mapping + { + to_title.insert(row.codepoint.value(), [mapped.value(), 0, 0]); + } } for row in ucd_parse::parse::<_, ucd_parse::SpecialCaseMapping>(&UNICODE_DIRECTORY).unwrap() { @@ -181,12 +178,15 @@ fn load_data() -> UnicodeData { } let key = row.codepoint.value(); - if let Some(lower) = to_mapping(key, row.lowercase) { + if let Some(lower) = to_mapping(&[row.codepoint], &row.lowercase) { to_lower.insert(key, lower); } - if let Some(upper) = to_mapping(key, row.uppercase) { + if let Some(upper) = to_mapping(&[row.codepoint], &row.uppercase) { to_upper.insert(key, upper); } + if let Some(title) = to_mapping(&row.uppercase, &row.titlecase) { + to_title.insert(key, title); + } } // Filter out ASCII codepoints. @@ -207,7 +207,7 @@ fn load_data() -> UnicodeData { .collect(); properties.sort_by_key(|p| p.0); - UnicodeData { ranges: properties, to_lower, to_upper } + UnicodeData { ranges: properties, to_lower, to_title, to_upper } } fn main() { @@ -259,7 +259,7 @@ fn main() { total_bytes += emitter.bytes_used; } let (conversions, sizes) = case_mapping::generate_case_mapping(&unicode_data); - for (name, (desc, size)) in ["to_lower", "to_upper"].iter().zip(sizes) { + for (name, (desc, size)) in ["to_lower", "to_upper", "to_title"].iter().zip(sizes) { table_file.push_str(&format!("// {:16}: {:5} bytes, {desc}\n", name, size,)); total_bytes += size; } @@ -369,7 +369,11 @@ pub(super) static {prop_upper}: &[RangeInclusive; {is_true_len}] = &[{is_t .unwrap(); } - for (name, lut) in ["TO_LOWER", "TO_UPPER"].iter().zip([&data.to_lower, &data.to_upper]) { + for (name, lut) in ["TO_LOWER", "TO_UPPER", "TO_TITLE"].iter().zip([ + &data.to_lower, + &data.to_upper, + &data.to_title, + ]) { let lut = lut .iter() .map(|(key, values)| { From e19ad08c6dff5116ae99dd1bed43f151757640f8 Mon Sep 17 00:00:00 2001 From: Zalathar Date: Fri, 20 Mar 2026 20:39:28 +1100 Subject: [PATCH 8/9] Use enums to clarify `DepNodeColorMap` color marking When a function's documentation has to explain the meaning of nested results and options, then it is often a good candidate for using a custom result enum instead. This commit also renames `DepNodeColorMap::try_mark` to `try_set_color`, to make it more distinct from the similarly-named `DepGraph::try_mark_green`. The difference is that `try_mark_green` is a higher-level operation that tries to determine whether a node _can_ be marked green, whereas `try_set_color` is a lower-level operation that actually records a color for the node. --- compiler/rustc_middle/src/dep_graph/graph.rs | 51 ++++++++++++++----- .../rustc_middle/src/dep_graph/serialized.rs | 25 +++++---- typos.toml | 1 + 3 files changed, 52 insertions(+), 25 deletions(-) diff --git a/compiler/rustc_middle/src/dep_graph/graph.rs b/compiler/rustc_middle/src/dep_graph/graph.rs index d0d7d581b4395..7823e9d62f1ff 100644 --- a/compiler/rustc_middle/src/dep_graph/graph.rs +++ b/compiler/rustc_middle/src/dep_graph/graph.rs @@ -1415,28 +1415,29 @@ impl DepNodeColorMap { if value <= DepNodeIndex::MAX_AS_U32 { Some(DepNodeIndex::from_u32(value)) } else { None } } - /// This tries to atomically mark a node green and assign `index` as the new - /// index if `green` is true, otherwise it will try to atomicaly mark it red. + /// Atomically sets the color of a previous-session dep node to either green + /// or red, if it has not already been colored. /// - /// This returns `Ok` if `index` gets assigned or the node is marked red, otherwise it returns - /// the already allocated index in `Err` if it is green already. If it was already - /// red, `Err(None)` is returned. + /// If the node already has a color, the new color is ignored, and the + /// return value indicates the existing color. #[inline(always)] - pub(super) fn try_mark( + pub(super) fn try_set_color( &self, prev_index: SerializedDepNodeIndex, - index: DepNodeIndex, - green: bool, - ) -> Result<(), Option> { - let value = &self.values[prev_index]; - match value.compare_exchange( + color: DesiredColor, + ) -> TrySetColorResult { + match self.values[prev_index].compare_exchange( COMPRESSED_UNKNOWN, - if green { index.as_u32() } else { COMPRESSED_RED }, + match color { + DesiredColor::Red => COMPRESSED_RED, + DesiredColor::Green { index } => index.as_u32(), + }, Ordering::Relaxed, Ordering::Relaxed, ) { - Ok(_) => Ok(()), - Err(v) => Err(if v == COMPRESSED_RED { None } else { Some(DepNodeIndex::from_u32(v)) }), + Ok(_) => TrySetColorResult::Success, + Err(COMPRESSED_RED) => TrySetColorResult::AlreadyRed, + Err(index) => TrySetColorResult::AlreadyGreen { index: DepNodeIndex::from_u32(index) }, } } @@ -1463,6 +1464,28 @@ impl DepNodeColorMap { } } +/// The color that [`DepNodeColorMap::try_set_color`] should try to apply to a node. +#[derive(Clone, Copy, Debug)] +pub(super) enum DesiredColor { + /// Try to mark the node red. + Red, + /// Try to mark the node green, associating it with a current-session node index. + Green { index: DepNodeIndex }, +} + +/// Return value of [`DepNodeColorMap::try_set_color`], indicating success or failure, +/// and (on failure) what the existing color is. +#[derive(Clone, Copy, Debug)] +pub(super) enum TrySetColorResult { + /// The [`DesiredColor`] was freshly applied to the node. + Success, + /// Coloring failed because the node was already marked red. + AlreadyRed, + /// Coloring failed because the node was already marked green, + /// and corresponds to node `index` in the current-session dep graph. + AlreadyGreen { index: DepNodeIndex }, +} + #[inline(never)] #[cold] pub(crate) fn print_markframe_trace(graph: &DepGraph, frame: &MarkFrame<'_>) { diff --git a/compiler/rustc_middle/src/dep_graph/serialized.rs b/compiler/rustc_middle/src/dep_graph/serialized.rs index 8a4ac4b5e5acd..7a8d25d367447 100644 --- a/compiler/rustc_middle/src/dep_graph/serialized.rs +++ b/compiler/rustc_middle/src/dep_graph/serialized.rs @@ -58,7 +58,7 @@ use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; use rustc_session::Session; use tracing::{debug, instrument}; -use super::graph::{CurrentDepGraph, DepNodeColorMap}; +use super::graph::{CurrentDepGraph, DepNodeColorMap, DesiredColor, TrySetColorResult}; use super::retained::RetainedDepGraph; use super::{DepKind, DepNode, DepNodeIndex}; use crate::dep_graph::edges::EdgesVec; @@ -905,13 +905,14 @@ impl GraphEncoder { let mut local = self.status.local.borrow_mut(); let index = self.status.next_index(&mut *local); + let color = if is_green { DesiredColor::Green { index } } else { DesiredColor::Red }; - // Use `try_mark` to avoid racing when `send_promoted` is called concurrently + // Use `try_set_color` to avoid racing when `send_promoted` is called concurrently // on the same index. - match colors.try_mark(prev_index, index, is_green) { - Ok(()) => (), - Err(None) => panic!("dep node {:?} is unexpectedly red", prev_index), - Err(Some(dep_node_index)) => return dep_node_index, + match colors.try_set_color(prev_index, color) { + TrySetColorResult::Success => {} + TrySetColorResult::AlreadyRed => panic!("dep node {prev_index:?} is unexpectedly red"), + TrySetColorResult::AlreadyGreen { index } => return index, } self.status.bump_index(&mut *local); @@ -923,7 +924,8 @@ impl GraphEncoder { /// from the previous dep graph and expects all edges to already have a new dep node index /// assigned. /// - /// This will also ensure the dep node is marked green if `Some` is returned. + /// Tries to mark the dep node green, and returns Some if it is now green, + /// or None if had already been concurrently marked red. #[inline] pub(crate) fn send_promoted( &self, @@ -935,10 +937,10 @@ impl GraphEncoder { let mut local = self.status.local.borrow_mut(); let index = self.status.next_index(&mut *local); - // Use `try_mark_green` to avoid racing when `send_promoted` or `send_and_color` + // Use `try_set_color` to avoid racing when `send_promoted` or `send_and_color` // is called concurrently on the same index. - match colors.try_mark(prev_index, index, true) { - Ok(()) => { + match colors.try_set_color(prev_index, DesiredColor::Green { index }) { + TrySetColorResult::Success => { self.status.bump_index(&mut *local); self.status.encode_promoted_node( index, @@ -949,7 +951,8 @@ impl GraphEncoder { ); Some(index) } - Err(dep_node_index) => dep_node_index, + TrySetColorResult::AlreadyRed => None, + TrySetColorResult::AlreadyGreen { index } => Some(index), } } diff --git a/typos.toml b/typos.toml index 82e2b98f2e49c..8e14ce58dcb4f 100644 --- a/typos.toml +++ b/typos.toml @@ -48,6 +48,7 @@ unstalled = "unstalled" # short for un-stalled # the non-empty form can be automatically fixed by `--bless`. # # tidy-alphabetical-start +atomicaly = "atomically" definitinon = "definition" dependy = "" similarlty = "similarity" From 1fec51c446893ab97a04ba04ea896d29c88e421d Mon Sep 17 00:00:00 2001 From: Zalathar Date: Sun, 22 Mar 2026 15:10:46 +1100 Subject: [PATCH 9/9] Remove `DepNodeColorMap::insert_red` This method is only used to initialize the always-red node, which can be done with `try_set_color` instead. --- compiler/rustc_middle/src/dep_graph/graph.rs | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/compiler/rustc_middle/src/dep_graph/graph.rs b/compiler/rustc_middle/src/dep_graph/graph.rs index 7823e9d62f1ff..0f50abb827de2 100644 --- a/compiler/rustc_middle/src/dep_graph/graph.rs +++ b/compiler/rustc_middle/src/dep_graph/graph.rs @@ -164,9 +164,10 @@ impl DepGraph { ); assert_eq!(red_node_index, DepNodeIndex::FOREVER_RED_NODE); if prev_graph_node_count > 0 { - colors.insert_red(SerializedDepNodeIndex::from_u32( - DepNodeIndex::FOREVER_RED_NODE.as_u32(), - )); + let prev_index = + const { SerializedDepNodeIndex::from_u32(DepNodeIndex::FOREVER_RED_NODE.as_u32()) }; + let result = colors.try_set_color(prev_index, DesiredColor::Red); + assert_matches!(result, TrySetColorResult::Success); } DepGraph { @@ -1455,13 +1456,6 @@ impl DepNodeColorMap { DepNodeColor::Unknown } } - - #[inline] - pub(super) fn insert_red(&self, index: SerializedDepNodeIndex) { - let value = self.values[index].swap(COMPRESSED_RED, Ordering::Release); - // Sanity check for duplicate nodes - assert_eq!(value, COMPRESSED_UNKNOWN, "tried to color an already colored node as red"); - } } /// The color that [`DepNodeColorMap::try_set_color`] should try to apply to a node.