From 05bc0ef24d9b69cc4fd058afe4e80290a5307911 Mon Sep 17 00:00:00 2001 From: Doug Gregor Date: Fri, 2 Jun 2017 11:28:48 -0700 Subject: [PATCH 01/17] [GSB] Eliminate PotentialArchetype::NestedTypeUpdate. NestedTypeUpdate was mostly just the internal name for ArchetypeResolutionKind, but the translation was a bit lossy and there was no point in having separate enums. Standardize on ArchetypeResolutionKind, adding a new case (WellFormed) to capture the idea that we can create a new potential archetype only when we know there is a nested type with that name---and avoid creating unresolved potential archetypes. (cherry picked from commit fafeec0037691b11c1082da6e4c400a6cad4bf8d) --- include/swift/AST/GenericSignatureBuilder.h | 23 ++---- lib/AST/GenericSignatureBuilder.cpp | 86 ++++++++++----------- 2 files changed, 49 insertions(+), 60 deletions(-) diff --git a/include/swift/AST/GenericSignatureBuilder.h b/include/swift/AST/GenericSignatureBuilder.h index 86063c70ac0b5..e2d2582f6938c 100644 --- a/include/swift/AST/GenericSignatureBuilder.h +++ b/include/swift/AST/GenericSignatureBuilder.h @@ -74,6 +74,11 @@ enum class ArchetypeResolutionKind { /// Only create a new potential archetype to describe this dependent type /// if it is already known. AlreadyKnown, + + /// Only create a potential archetype when it is well-formed (i.e., we know + /// that there is a nested type with that name), but (unlike \c AlreadyKnown) + /// allow the creation of a new potential archetype. + WellFormed, }; /// \brief Collects a set of requirements of generic parameters, both explicitly @@ -1577,18 +1582,6 @@ class GenericSignatureBuilder::PotentialArchetype { PotentialArchetype *getNestedType(TypeDecl *concreteDecl, GenericSignatureBuilder &builder); - /// Describes the kind of update that is performed. - enum class NestedTypeUpdate { - /// Resolve an existing potential archetype, but don't create a new - /// one if not present. - ResolveExisting, - /// If this potential archetype is missing, create it. - AddIfMissing, - /// If this potential archetype is missing and would be a better anchor, - /// create it. - AddIfBetterAnchor, - }; - /// \brief Retrieve (or create) a nested type that is the current best /// nested archetype anchor (locally) with the given name. /// @@ -1597,7 +1590,7 @@ class GenericSignatureBuilder::PotentialArchetype { PotentialArchetype *getNestedArchetypeAnchor( Identifier name, GenericSignatureBuilder &builder, - NestedTypeUpdate kind = NestedTypeUpdate::AddIfMissing); + ArchetypeResolutionKind kind); /// Update the named nested type when we know this type conforms to the given /// protocol. @@ -1607,7 +1600,7 @@ class GenericSignatureBuilder::PotentialArchetype { /// a potential archetype should not be created if it's missing. PotentialArchetype *updateNestedTypeForConformance( PointerUnion type, - NestedTypeUpdate kind); + ArchetypeResolutionKind kind); /// Update the named nested type when we know this type conforms to the given /// protocol. @@ -1618,7 +1611,7 @@ class GenericSignatureBuilder::PotentialArchetype { PotentialArchetype *updateNestedTypeForConformance( Identifier name, ProtocolDecl *protocol, - NestedTypeUpdate kind); + ArchetypeResolutionKind kind); /// \brief Retrieve (or build) the type corresponding to the potential /// archetype within the given generic environment. diff --git a/lib/AST/GenericSignatureBuilder.cpp b/lib/AST/GenericSignatureBuilder.cpp index 3b653df07e976..f6935aad4b702 100644 --- a/lib/AST/GenericSignatureBuilder.cpp +++ b/lib/AST/GenericSignatureBuilder.cpp @@ -1385,7 +1385,7 @@ bool PotentialArchetype::addConformance(ProtocolDecl *proto, // Resolve any existing nested types that need it. for (auto &nested : NestedTypes) { (void)updateNestedTypeForConformance(nested.first, proto, - NestedTypeUpdate::ResolveExisting); + ArchetypeResolutionKind::AlreadyKnown); } return true; @@ -1544,7 +1544,7 @@ PotentialArchetype *PotentialArchetype::getArchetypeAnchor( auto parentAnchor = parent->getArchetypeAnchor(builder); anchor = parentAnchor->getNestedArchetypeAnchor( getNestedName(), builder, - NestedTypeUpdate::ResolveExisting); + ArchetypeResolutionKind::AlreadyKnown); // FIXME: Hack for cases where we couldn't resolve the nested type. if (!anchor) @@ -1669,27 +1669,28 @@ PotentialArchetype *PotentialArchetype::getNestedType( // Retrieve the nested archetype anchor, which is the best choice (so far) // for this nested type. - return getNestedArchetypeAnchor(nestedName, builder); + return getNestedArchetypeAnchor(nestedName, builder, + ArchetypeResolutionKind::AlwaysPartial); } PotentialArchetype *PotentialArchetype::getNestedType( AssociatedTypeDecl *assocType, GenericSignatureBuilder &builder) { return updateNestedTypeForConformance(assocType, - NestedTypeUpdate::AddIfMissing); + ArchetypeResolutionKind::WellFormed); } PotentialArchetype *PotentialArchetype::getNestedType( TypeDecl *getConcreteTypeDecl, GenericSignatureBuilder &builder) { return updateNestedTypeForConformance(getConcreteTypeDecl, - NestedTypeUpdate::AddIfMissing); + ArchetypeResolutionKind::WellFormed); } PotentialArchetype *PotentialArchetype::getNestedArchetypeAnchor( Identifier name, GenericSignatureBuilder &builder, - NestedTypeUpdate kind) { + ArchetypeResolutionKind kind) { // Look for the best associated type or concrete type within the protocols // we know about. AssociatedTypeDecl *bestAssocType = nullptr; @@ -1729,8 +1730,9 @@ PotentialArchetype *PotentialArchetype::getNestedArchetypeAnchor( // If we found an associated type, use it. PotentialArchetype *resultPA = nullptr; if (bestAssocType) { - resultPA = updateNestedTypeForConformance(bestAssocType, - NestedTypeUpdate::AddIfMissing); + resultPA = updateNestedTypeForConformance( + bestAssocType, + ArchetypeResolutionKind::WellFormed); } // If we have an associated type, drop any concrete decls that aren't in @@ -1770,8 +1772,9 @@ PotentialArchetype *PotentialArchetype::getNestedArchetypeAnchor( // Update for all of the concrete decls with this name, which will introduce // various same-type constraints. for (auto concreteDecl : concreteDecls) { - auto concreteDeclPA = updateNestedTypeForConformance(concreteDecl, - NestedTypeUpdate::AddIfMissing); + auto concreteDeclPA = updateNestedTypeForConformance( + concreteDecl, + ArchetypeResolutionKind::WellFormed); if (!resultPA && concreteDecl == bestConcreteDecl) resultPA = concreteDeclPA; } @@ -1781,11 +1784,17 @@ PotentialArchetype *PotentialArchetype::getNestedArchetypeAnchor( // Check whether we can add a missing nested type for this case. switch (kind) { - case NestedTypeUpdate::AddIfBetterAnchor: - case NestedTypeUpdate::AddIfMissing: + case ArchetypeResolutionKind::AlwaysPartial: + case ArchetypeResolutionKind::CompleteWellFormed: + // FIXME: CompleteWellFormed should operate the same as WellFormed here. break; - case NestedTypeUpdate::ResolveExisting: + case ArchetypeResolutionKind::WellFormed: + if (!bestAssocType && !bestConcreteDecl) + return nullptr; + break; + + case ArchetypeResolutionKind::AlreadyKnown: // Don't add a new type; return nullptr; } @@ -1812,9 +1821,9 @@ PotentialArchetype *PotentialArchetype::getNestedArchetypeAnchor( PotentialArchetype *PotentialArchetype::updateNestedTypeForConformance( - Identifier name, - ProtocolDecl *proto, - NestedTypeUpdate kind) { + Identifier name, + ProtocolDecl *proto, + ArchetypeResolutionKind kind) { /// Determine whether there is an associated type or concrete type with this /// name in this protocol. If not, there's nothing to do. AssociatedTypeDecl *assocType = nullptr; @@ -1843,7 +1852,7 @@ PotentialArchetype *PotentialArchetype::updateNestedTypeForConformance( PotentialArchetype *PotentialArchetype::updateNestedTypeForConformance( PointerUnion type, - NestedTypeUpdate kind) { + ArchetypeResolutionKind kind) { auto *assocType = type.dyn_cast(); auto *concreteDecl = type.dyn_cast(); if (!assocType && !concreteDecl) @@ -1895,13 +1904,9 @@ PotentialArchetype *PotentialArchetype::updateNestedTypeForConformance( // If we don't have a result potential archetype yet, we may need to add one. if (!resultPA) { switch (kind) { - case NestedTypeUpdate::AddIfBetterAnchor: - // FIXME: The loop above should have kept track of whether this type - // would make a better anchor, so we can bail out here if the answer is - // "no". - LLVM_FALLTHROUGH; - - case NestedTypeUpdate::AddIfMissing: { + case ArchetypeResolutionKind::AlwaysPartial: + case ArchetypeResolutionKind::CompleteWellFormed: + case ArchetypeResolutionKind::WellFormed: { if (assocType) resultPA = new PotentialArchetype(this, assocType); else @@ -1937,7 +1942,7 @@ PotentialArchetype *PotentialArchetype::updateNestedTypeForConformance( break; } - case NestedTypeUpdate::ResolveExisting: + case ArchetypeResolutionKind::AlreadyKnown: break; } } @@ -2398,25 +2403,12 @@ PotentialArchetype *GenericSignatureBuilder::resolveArchetype( if (!base) return nullptr; - // Figure out what kind of nested type update we want. - typedef PotentialArchetype::NestedTypeUpdate NestedTypeUpdate; - NestedTypeUpdate updateKind; - switch (resolutionKind) { - case ArchetypeResolutionKind::AlreadyKnown: - updateKind = NestedTypeUpdate::ResolveExisting; - break; - - case ArchetypeResolutionKind::AlwaysPartial: - case ArchetypeResolutionKind::CompleteWellFormed: - updateKind = NestedTypeUpdate::AddIfMissing; - break; - } - // If we know the associated type already, get that specific type. if (auto assocType = dependentMember->getAssocType()) - return base->updateNestedTypeForConformance(assocType, updateKind); + return base->updateNestedTypeForConformance(assocType, resolutionKind); // Resolve based on name alone. + // FIXME: Pass through the resolution kind? auto name = dependentMember->getName(); switch (resolutionKind) { case ArchetypeResolutionKind::AlreadyKnown: { @@ -2429,7 +2421,8 @@ PotentialArchetype *GenericSignatureBuilder::resolveArchetype( case ArchetypeResolutionKind::AlwaysPartial: case ArchetypeResolutionKind::CompleteWellFormed: - return base->getNestedArchetypeAnchor(name, *this, updateKind); + case ArchetypeResolutionKind::WellFormed: + return base->getNestedArchetypeAnchor(name, *this, resolutionKind); } } @@ -2874,7 +2867,7 @@ ConstraintResult GenericSignatureBuilder::resolveUnresolvedType( parentPA->getNestedArchetypeAnchor( pa->getNestedName(), *this, - PotentialArchetype::NestedTypeUpdate::ResolveExisting); + ArchetypeResolutionKind::WellFormed); if (resolvedPA) { assert(!pa->isUnresolved() && "This type must have been resolved"); return ConstraintResult::Resolved; @@ -2997,8 +2990,9 @@ void GenericSignatureBuilder::updateSuperclass( for (auto &nested : T->getNestedTypes()) { if (nested.second.empty()) continue; if (nested.second.front()->isUnresolved()) { - (void)T->getNestedArchetypeAnchor(nested.first, *this, - PotentialArchetype::NestedTypeUpdate::ResolveExisting); + (void)T->getNestedArchetypeAnchor( + nested.first, *this, + ArchetypeResolutionKind::AlreadyKnown); } } }; @@ -4536,7 +4530,9 @@ static PotentialArchetype *getLocalAnchor(PotentialArchetype *pa, if (!parent) return pa; auto parentAnchor = getLocalAnchor(parent, builder); - return parentAnchor->getNestedArchetypeAnchor(pa->getNestedName(), builder); + return parentAnchor->getNestedArchetypeAnchor( + pa->getNestedName(), builder, + ArchetypeResolutionKind::AlwaysPartial); } /// Computes the ordered set of archetype anchors required to form a minimum From 830da217c29704aeb894752c765ab692573953df Mon Sep 17 00:00:00 2001 From: Doug Gregor Date: Tue, 6 Jun 2017 11:03:47 -0700 Subject: [PATCH 02/17] [GSB] Separate out a "structurally derived" requirement source kind. Rather than abusing the "superclass" requirement source with a null protocol conformance, introduce a separate "structurally derived" requirement source kind for structurally-derived requirements that don't need any additional information, e.g., the class layout requirement derived from a superclass requirement. (cherry picked from commit ffea1b35ca27f78ebd1e11391b2474a4165a8ee7) --- include/swift/AST/GenericSignatureBuilder.h | 21 ++++++++++ lib/AST/GenericSignatureBuilder.cpp | 45 ++++++++++++++++++++- test/Generics/superclass_constraint.swift | 4 +- 3 files changed, 67 insertions(+), 3 deletions(-) diff --git a/include/swift/AST/GenericSignatureBuilder.h b/include/swift/AST/GenericSignatureBuilder.h index e2d2582f6938c..aa408da080f2a 100644 --- a/include/swift/AST/GenericSignatureBuilder.h +++ b/include/swift/AST/GenericSignatureBuilder.h @@ -789,6 +789,10 @@ class GenericSignatureBuilder::RequirementSource final /// This stores the \c ProtocolConformance* used to resolve the /// requirement. Concrete, + + /// A requirement that was resolved based on structural derivation from + /// another requirement. + Derived, }; /// The kind of requirement source. @@ -797,6 +801,7 @@ class GenericSignatureBuilder::RequirementSource final private: /// The kind of storage we have. enum class StorageKind : uint8_t { + None, RootArchetype, StoredType, ProtocolConformance, @@ -844,6 +849,7 @@ class GenericSignatureBuilder::RequirementSource final case Superclass: case Parent: case Concrete: + case Derived: return 0; } @@ -887,6 +893,7 @@ class GenericSignatureBuilder::RequirementSource final case Superclass: case Parent: case Concrete: + case Derived: return false; } @@ -961,6 +968,16 @@ class GenericSignatureBuilder::RequirementSource final storage.assocType = assocType; } + RequirementSource(Kind kind, const RequirementSource *parent) + : kind(kind), storageKind(StorageKind::None), + hasTrailingWrittenRequirementLoc(false), + usesRequirementSignature(false), parent(parent) { + assert((static_cast(parent) != isRootKind(kind)) && + "Root RequirementSource should not have parent (or vice versa)"); + assert(isAcceptableStorageKind(kind, storageKind) && + "RequirementSource kind/storageKind mismatch"); + } + public: /// Retrieve an abstract requirement source. static const RequirementSource *forAbstract(PotentialArchetype *root); @@ -1017,6 +1034,10 @@ class GenericSignatureBuilder::RequirementSource final const RequirementSource *viaParent(GenericSignatureBuilder &builder, AssociatedTypeDecl *assocType) const; + /// A constraint source that describes a constraint that is structurally + /// derived from another constraint but does not require further information. + const RequirementSource *viaDerived(GenericSignatureBuilder &builder) const; + /// Retrieve the root requirement source. const RequirementSource *getRoot() const; diff --git a/lib/AST/GenericSignatureBuilder.cpp b/lib/AST/GenericSignatureBuilder.cpp index f6935aad4b702..523486a52cc94 100644 --- a/lib/AST/GenericSignatureBuilder.cpp +++ b/lib/AST/GenericSignatureBuilder.cpp @@ -118,6 +118,7 @@ bool RequirementSource::isAcceptableStorageKind(Kind kind, case StorageKind::StoredType: case StorageKind::ProtocolConformance: case StorageKind::AssociatedTypeDecl: + case StorageKind::None: return false; } @@ -129,6 +130,7 @@ bool RequirementSource::isAcceptableStorageKind(Kind kind, case StorageKind::RootArchetype: case StorageKind::StoredType: case StorageKind::ProtocolConformance: + case StorageKind::None: return false; } @@ -141,6 +143,7 @@ bool RequirementSource::isAcceptableStorageKind(Kind kind, case StorageKind::RootArchetype: case StorageKind::ProtocolConformance: case StorageKind::AssociatedTypeDecl: + case StorageKind::None: return false; } @@ -152,6 +155,19 @@ bool RequirementSource::isAcceptableStorageKind(Kind kind, case StorageKind::RootArchetype: case StorageKind::StoredType: + case StorageKind::AssociatedTypeDecl: + case StorageKind::None: + return false; + } + + case Derived: + switch (storageKind) { + case StorageKind::None: + return true; + + case StorageKind::RootArchetype: + case StorageKind::StoredType: + case StorageKind::ProtocolConformance: case StorageKind::AssociatedTypeDecl: return false; } @@ -163,6 +179,9 @@ bool RequirementSource::isAcceptableStorageKind(Kind kind, const void *RequirementSource::getOpaqueStorage1() const { switch (storageKind) { + case StorageKind::None: + return nullptr; + case StorageKind::RootArchetype: return storage.rootArchetype; @@ -213,6 +232,7 @@ bool RequirementSource::isInferredRequirement(bool includeQuietInferred) const { case ProtocolRequirement: case RequirementSignatureSelf: case Superclass: + case Derived: break; } } @@ -238,6 +258,7 @@ bool RequirementSource::isDerivedRequirement() const { case Superclass: case Concrete: case RequirementSignatureSelf: + case Derived: return true; case ProtocolRequirement: @@ -286,6 +307,7 @@ bool RequirementSource::isSelfDerivedSource(PotentialArchetype *pa, case RequirementSource::NestedTypeNameMatch: case RequirementSource::Concrete: case RequirementSource::Superclass: + case RequirementSource::Derived: return false; } }) == nullptr; @@ -395,6 +417,7 @@ bool RequirementSource::isSelfDerivedConformance( case Concrete: case Superclass: case Parent: + case Derived: return false; case Explicit: case Inferred: @@ -548,6 +571,14 @@ const RequirementSource *RequirementSource::viaParent( 0, WrittenRequirementLoc()); } +const RequirementSource *RequirementSource::viaDerived( + GenericSignatureBuilder &builder) const { + REQUIREMENT_SOURCE_FACTORY_BODY( + (nodeID, Derived, this, nullptr, nullptr, nullptr), + (Derived, this), + 0, WrittenRequirementLoc()); +} + #undef REQUIREMENT_SOURCE_FACTORY_BODY const RequirementSource *RequirementSource::getRoot() const { @@ -602,6 +633,7 @@ RequirementSource::visitPotentialArchetypesAlongPath( case RequirementSource::Concrete: case RequirementSource::Superclass: + case RequirementSource::Derived: return parent->visitPotentialArchetypesAlongPath(visitor); case RequirementSource::ProtocolRequirement: @@ -618,6 +650,7 @@ RequirementSource::visitPotentialArchetypesAlongPath( Type RequirementSource::getStoredType() const { switch (storageKind) { + case StorageKind::None: case StorageKind::RootArchetype: case StorageKind::ProtocolConformance: case StorageKind::AssociatedTypeDecl: @@ -632,6 +665,9 @@ Type RequirementSource::getStoredType() const { ProtocolDecl *RequirementSource::getProtocolDecl() const { switch (storageKind) { + case StorageKind::None: + return nullptr; + case StorageKind::RootArchetype: if (kind == RequirementSignatureSelf) return getTrailingObjects()[0]; @@ -808,6 +844,10 @@ void RequirementSource::print(llvm::raw_ostream &out, case Superclass: out << "Superclass"; break; + + case Derived: + out << "Derived"; + break; } // Local function to dump a source location, if we can. @@ -822,6 +862,7 @@ void RequirementSource::print(llvm::raw_ostream &out, }; switch (storageKind) { + case StorageKind::None: case StorageKind::RootArchetype: break; @@ -961,6 +1002,7 @@ bool FloatingRequirementSource::isExplicit() const { case RequirementSource::ProtocolRequirement: case RequirementSource::InferredProtocolRequirement: case RequirementSource::Superclass: + case RequirementSource::Derived: return false; } @@ -981,6 +1023,7 @@ bool FloatingRequirementSource::isExplicit() const { case RequirementSource::NestedTypeNameMatch: case RequirementSource::Parent: case RequirementSource::Superclass: + case RequirementSource::Derived: return false; } } @@ -3006,7 +3049,7 @@ void GenericSignatureBuilder::updateSuperclass( // Presence of a superclass constraint implies a _Class layout // constraint. - auto layoutReqSource = source->viaSuperclass(*this, nullptr); + auto layoutReqSource = source->viaDerived(*this); addLayoutRequirementDirect(T, LayoutConstraint::getLayoutConstraint( superclass->getClassOrBoundGenericClass()->isObjC() diff --git a/test/Generics/superclass_constraint.swift b/test/Generics/superclass_constraint.swift index 94a37e081eda6..23fbbd688d16e 100644 --- a/test/Generics/superclass_constraint.swift +++ b/test/Generics/superclass_constraint.swift @@ -76,7 +76,7 @@ extension P2 where Self.T : C { // CHECK: superclassConformance1 // CHECK: Requirements: // CHECK-NEXT: τ_0_0 : C [τ_0_0: Explicit @ {{.*}}:11] -// CHECK-NEXT: τ_0_0 : _NativeClass [τ_0_0: Explicit @ {{.*}}:11 -> Superclass] +// CHECK-NEXT: τ_0_0 : _NativeClass [τ_0_0: Explicit @ {{.*}}:11 -> Derived] // CHECK-NEXT: τ_0_0 : P3 [τ_0_0: Explicit @ {{.*}}:11 -> Superclass (C: P3)] // CHECK: Canonical generic signature: <τ_0_0 where τ_0_0 : C> func superclassConformance1(t: T) @@ -88,7 +88,7 @@ func superclassConformance1(t: T) // CHECK: superclassConformance2 // CHECK: Requirements: // CHECK-NEXT: τ_0_0 : C [τ_0_0: Explicit @ {{.*}}:11] -// CHECK-NEXT: τ_0_0 : _NativeClass [τ_0_0: Explicit @ {{.*}}:11 -> Superclass] +// CHECK-NEXT: τ_0_0 : _NativeClass [τ_0_0: Explicit @ {{.*}}:11 -> Derived] // CHECK-NEXT: τ_0_0 : P3 [τ_0_0: Explicit @ {{.*}}:11 -> Superclass (C: P3)] // CHECK: Canonical generic signature: <τ_0_0 where τ_0_0 : C> func superclassConformance2(t: T) From 50a7ad5be0e04cde5fec6d4ac8c0495580903d54 Mon Sep 17 00:00:00 2001 From: Doug Gregor Date: Tue, 6 Jun 2017 11:13:14 -0700 Subject: [PATCH 03/17] [GSB] Improve handling of conformances resolved by concrete types. Centralize and simplify the handling of conformance requirements resolved by same-type-to-concrete requirements in a few ways: * Always store a ProtocolConformanceRef in via-superclass and via-concrete requirement sources, so we never lose this information. * When concretizing a nested type based on its parent, use the via-concrete conformance information rather than performing lookup again, simplifying this operation considerably and avoiding redundant lookups. * When adding a conformance requirement to a potential archetype that is equivalent to a concrete type, attempt to find and record the conformance. Fixes SR-4295 / rdar://problem/31372308. (cherry picked from commit 52e52b564be02396a70be548542ba53bf6cc8688) --- include/swift/AST/GenericSignatureBuilder.h | 38 +++-- lib/AST/GenericSignatureBuilder.cpp | 159 +++++++++--------- .../0100-sr4295.swift | 2 +- ...nd-pas-conformance-to-known-protocol.swift | 2 +- 4 files changed, 110 insertions(+), 91 deletions(-) rename validation-test/{compiler_crashers_2 => compiler_crashers_2_fixed}/0100-sr4295.swift (87%) rename validation-test/{compiler_crashers => compiler_crashers_fixed}/28706-conformance-failed-to-find-pas-conformance-to-known-protocol.swift (89%) diff --git a/include/swift/AST/GenericSignatureBuilder.h b/include/swift/AST/GenericSignatureBuilder.h index aa408da080f2a..d89c7765ab738 100644 --- a/include/swift/AST/GenericSignatureBuilder.h +++ b/include/swift/AST/GenericSignatureBuilder.h @@ -23,6 +23,7 @@ #include "swift/AST/Decl.h" #include "swift/AST/DiagnosticEngine.h" #include "swift/AST/Identifier.h" +#include "swift/AST/ProtocolConformanceRef.h" #include "swift/AST/Types.h" #include "swift/AST/TypeLoc.h" #include "swift/AST/TypeRepr.h" @@ -286,6 +287,15 @@ class GenericSignatureBuilder { FloatingRequirementSource source, UnresolvedHandlingKind unresolvedHandling); + /// Resolve the conformance of the given potential archetype to + /// the given protocol when the potential archetype is known to be equivalent + /// to a concrete type. + /// + /// \returns the requirement source for the resolved conformance, or nullptr + /// if the conformance could not be resolved. + const RequirementSource *resolveConcreteConformance(PotentialArchetype *pa, + ProtocolDecl *proto); + /// Retrieve the constraint source conformance for the superclass constraint /// of the given potential archetype (if present) to the given protocol. /// @@ -293,9 +303,8 @@ class GenericSignatureBuilder { /// queried. /// /// \param proto The protocol to which we are establishing conformance. - const RequirementSource *resolveSuperConformance( - GenericSignatureBuilder::PotentialArchetype *pa, - ProtocolDecl *proto); + const RequirementSource *resolveSuperConformance(PotentialArchetype *pa, + ProtocolDecl *proto); /// \brief Add a new conformance requirement specifying that the given /// potential archetype conforms to the given protocol. @@ -775,7 +784,7 @@ class GenericSignatureBuilder::RequirementSource final /// A requirement that was resolved via a superclass requirement. /// - /// This stores the \c ProtocolConformance* used to resolve the + /// This stores the \c ProtocolConformanceRef used to resolve the /// requirement. Superclass, @@ -826,7 +835,7 @@ class GenericSignatureBuilder::RequirementSource final TypeBase *type; /// A protocol conformance used to satisfy the requirement. - ProtocolConformance *conformance; + void *conformance; /// An associated type to which a requirement is being applied. AssociatedTypeDecl *assocType; @@ -943,7 +952,7 @@ class GenericSignatureBuilder::RequirementSource final } RequirementSource(Kind kind, const RequirementSource *parent, - ProtocolConformance *conformance) + ProtocolConformanceRef conformance) : kind(kind), storageKind(StorageKind::ProtocolConformance), hasTrailingWrittenRequirementLoc(false), usesRequirementSignature(false), parent(parent) { @@ -952,7 +961,7 @@ class GenericSignatureBuilder::RequirementSource final assert(isAcceptableStorageKind(kind, storageKind) && "RequirementSource kind/storageKind mismatch"); - storage.conformance = conformance; + storage.conformance = conformance.getOpaqueValue(); } RequirementSource(Kind kind, const RequirementSource *parent, @@ -1019,13 +1028,14 @@ class GenericSignatureBuilder::RequirementSource final /// A requirement source that describes that a requirement that is resolved /// via a superclass requirement. const RequirementSource *viaSuperclass( - GenericSignatureBuilder &builder, - ProtocolConformance *conformance) const; + GenericSignatureBuilder &builder, + ProtocolConformanceRef conformance) const; /// A requirement source that describes that a requirement that is resolved /// via a same-type-to-concrete requirement. - const RequirementSource *viaConcrete(GenericSignatureBuilder &builder, - ProtocolConformance *conformance) const; + const RequirementSource *viaConcrete( + GenericSignatureBuilder &builder, + ProtocolConformanceRef conformance) const; /// A constraint source that describes that a constraint that is resolved /// for a nested type via a constraint on its parent. @@ -1126,9 +1136,9 @@ class GenericSignatureBuilder::RequirementSource final ProtocolDecl *getProtocolDecl() const; /// Retrieve the protocol conformance for this requirement, if there is one. - ProtocolConformance *getProtocolConformance() const { - if (storageKind != StorageKind::ProtocolConformance) return nullptr; - return storage.conformance; + ProtocolConformanceRef getProtocolConformance() const { + assert(storageKind == StorageKind::ProtocolConformance); + return ProtocolConformanceRef::getFromOpaqueValue(storage.conformance); } /// Retrieve the associated type declaration for this requirement, if there diff --git a/lib/AST/GenericSignatureBuilder.cpp b/lib/AST/GenericSignatureBuilder.cpp index 523486a52cc94..d1e36036903dd 100644 --- a/lib/AST/GenericSignatureBuilder.cpp +++ b/lib/AST/GenericSignatureBuilder.cpp @@ -544,20 +544,21 @@ const RequirementSource *RequirementSource::viaProtocolRequirement( } const RequirementSource *RequirementSource::viaSuperclass( - GenericSignatureBuilder &builder, - ProtocolConformance *conformance) const { + GenericSignatureBuilder &builder, + ProtocolConformanceRef conformance) const { REQUIREMENT_SOURCE_FACTORY_BODY( - (nodeID, Superclass, this, conformance, + (nodeID, Superclass, this, conformance.getOpaqueValue(), nullptr, nullptr), (Superclass, this, conformance), 0, WrittenRequirementLoc()); } const RequirementSource *RequirementSource::viaConcrete( - GenericSignatureBuilder &builder, - ProtocolConformance *conformance) const { + GenericSignatureBuilder &builder, + ProtocolConformanceRef conformance) const { REQUIREMENT_SOURCE_FACTORY_BODY( - (nodeID, Concrete, this, conformance, nullptr, nullptr), + (nodeID, Concrete, this, conformance.getOpaqueValue(), + nullptr, nullptr), (Concrete, this, conformance), 0, WrittenRequirementLoc()); } @@ -679,10 +680,7 @@ ProtocolDecl *RequirementSource::getProtocolDecl() const { return nullptr; case StorageKind::ProtocolConformance: - if (storage.conformance) - return storage.conformance->getProtocol(); - - return nullptr; + return getProtocolConformance().getRequirement(); case StorageKind::AssociatedTypeDecl: return storage.assocType->getProtocol(); @@ -873,12 +871,16 @@ void RequirementSource::print(llvm::raw_ostream &out, } break; - case StorageKind::ProtocolConformance: - if (storage.conformance) { - out << " (" << storage.conformance->getType()->getString() << ": " - << storage.conformance->getProtocol()->getName() << ")"; + case StorageKind::ProtocolConformance: { + auto conformance = getProtocolConformance(); + if (conformance.isConcrete()) { + out << " (" << conformance.getConcrete()->getType()->getString() << ": " + << conformance.getConcrete()->getProtocol()->getName() << ")"; + } else { + out << " (abstract " << conformance.getRequirement()->getName() << ")"; } break; + } case StorageKind::AssociatedTypeDecl: out << " (" << storage.assocType->getProtocol()->getName() @@ -1305,9 +1307,40 @@ ConstraintResult GenericSignatureBuilder::handleUnresolvedRequirement( } } +const RequirementSource * +GenericSignatureBuilder::resolveConcreteConformance(PotentialArchetype *pa, + ProtocolDecl *proto) { + auto concrete = pa->getConcreteType(); + if (!concrete) return nullptr; + + // Lookup the conformance of the concrete type to this protocol. + auto conformance = + getLookupConformanceFn()(pa->getDependentType({ }, /*allowUnresolved=*/true) + ->getCanonicalType(), + concrete, + proto->getDeclaredInterfaceType() + ->castTo()); + if (!conformance) return nullptr; + + // Conformance to this protocol is redundant; update the requirement source + // appropriately. + auto paEquivClass = pa->getOrCreateEquivalenceClass(); + const RequirementSource *concreteSource; + if (auto writtenSource = + paEquivClass->findAnyConcreteConstraintAsWritten(pa)) + concreteSource = writtenSource->source; + else + concreteSource = paEquivClass->concreteTypeConstraints.front().source; + + concreteSource = concreteSource->viaConcrete(*this, *conformance); + paEquivClass->conformsTo[proto].push_back({pa, proto, concreteSource}); + ++NumConformanceConstraints; + return concreteSource; +} + const RequirementSource *GenericSignatureBuilder::resolveSuperConformance( - GenericSignatureBuilder::PotentialArchetype *pa, - ProtocolDecl *proto) { + PotentialArchetype *pa, + ProtocolDecl *proto) { // Get the superclass constraint. Type superclass = pa->getSuperclass(); if (!superclass) return nullptr; @@ -1332,7 +1365,7 @@ const RequirementSource *GenericSignatureBuilder::resolveSuperConformance( superclassSource = paEquivClass->superclassConstraints.front().source; superclassSource = - superclassSource->viaSuperclass(*this, conformance->getConcrete()); + superclassSource->viaSuperclass(*this, *conformance); paEquivClass->conformsTo[proto].push_back({pa, proto, superclassSource}); ++NumConformanceConstraints; return superclassSource; @@ -1376,7 +1409,7 @@ static void maybeAddSameTypeRequirementForNestedType( if (!assocType) return; // Dig out the type witness. - auto superConformance = superSource->getProtocolConformance(); + auto superConformance = superSource->getProtocolConformance().getConcrete(); auto concreteType = superConformance->getTypeWitness(assocType, builder.getLazyResolver()); if (!concreteType) return; @@ -1421,9 +1454,13 @@ bool PotentialArchetype::addConformance(ProtocolDecl *proto, ++NumConformanceConstraints; ++NumConformances; - // Determine whether there is a superclass constraint where the - // superclass conforms to this protocol. - (void)getBuilder()->resolveSuperConformance(this, proto); + // If there is a concrete type that resolves this conformance requirement, + // record the conformance. + if (!builder.resolveConcreteConformance(this, proto)) { + // Otherwise, determine whether there is a superclass constraint where the + // superclass conforms to this protocol. + (void)builder.resolveSuperConformance(this, proto); + } // Resolve any existing nested types that need it. for (auto &nested : NestedTypes) { @@ -1664,12 +1701,11 @@ namespace { // parent PA that has a concrete type. static void concretizeNestedTypeFromConcreteParent( GenericSignatureBuilder::PotentialArchetype *parent, - const RequirementSource *parentConcreteSource, GenericSignatureBuilder::PotentialArchetype *nestedPA, - GenericSignatureBuilder &builder, - llvm::function_ref - lookupConformance) { - auto concreteParent = parent->getConcreteType(); + GenericSignatureBuilder &builder) { + auto parentEquiv = parent->getEquivalenceClassIfPresent(); + assert(parentEquiv && "can't have a concrete type without an equiv class"); + auto concreteParent = parentEquiv->concreteType; assert(concreteParent && "attempting to resolve concrete nested type of non-concrete PA"); @@ -1678,11 +1714,21 @@ static void concretizeNestedTypeFromConcreteParent( auto assocType = nestedPA->getResolvedAssociatedType(); if (!assocType) return; - auto source = parentConcreteSource->viaConcrete(builder, /*FIXME: */nullptr) - ->viaParent(builder, assocType); + auto proto = assocType->getProtocol(); + assert(parentEquiv->conformsTo.count(proto) > 0 && + "No conformance requirement"); + const RequirementSource *parentConcreteSource = nullptr; + for (const auto &constraint : parentEquiv->conformsTo.find(proto)->second) { + if (constraint.source->kind == RequirementSource::Concrete) { + parentConcreteSource = constraint.source; + } + } - // FIXME: Get the conformance from the parent. - auto conformance = lookupConformance(assocType->getProtocol()); + // Error condition: parent did not conform to this protocol, so they + if (!parentConcreteSource) return; + + auto source = parentConcreteSource->viaParent(builder, assocType); + auto conformance = parentConcreteSource->getProtocolConformance(); Type witnessType; if (conformance.isConcrete()) { @@ -2051,21 +2097,7 @@ PotentialArchetype *PotentialArchetype::updateNestedTypeForConformance( // FIXME: This feels like massive overkill. Why do we have to loop? if (isConcreteType()) { for (auto equivT : getRepresentative()->getEquivalenceClassMembers()) { - concretizeNestedTypeFromConcreteParent( - equivT, RequirementSource::forNestedTypeNameMatch(this), - resultPA, builder, - [&](ProtocolDecl *proto) -> ProtocolConformanceRef { - auto depTy = resultPA->getDependentType({}, - /*allowUnresolved=*/true) - ->getCanonicalType(); - auto protocolTy = - proto->getDeclaredInterfaceType()->castTo(); - auto conformance = builder.getLookupConformanceFn()( - depTy, getConcreteType(), protocolTy); - assert(conformance && - "failed to find PA's conformance to known protocol"); - return *conformance; - }); + concretizeNestedTypeFromConcreteParent(equivT, resultPA, builder); } } } @@ -3375,49 +3407,26 @@ ConstraintResult GenericSignatureBuilder::addSameTypeRequirementToConcrete( // Record the requirement. equivClass->concreteType = Concrete; - // Make sure the concrete type fulfills the requirements on the archetype. - // FIXME: Move later... - DenseMap conformances; - CanType depTy = rep->getDependentType({ }, /*allowUnresolved=*/true) - ->getCanonicalType(); + // Make sure the concrete type fulfills the conformance requirements of + // this equivalence class. for (auto protocol : rep->getConformsTo()) { - auto conformance = - getLookupConformanceFn()(depTy, Concrete, - protocol->getDeclaredInterfaceType() - ->castTo()); - if (!conformance) { - if (!Concrete->hasError()) { + if (!resolveConcreteConformance(rep, protocol)) { + if (!Concrete->hasError() && Source->getLoc().isValid()) { Diags.diagnose(Source->getLoc(), diag::requires_generic_param_same_type_does_not_conform, Concrete, protocol->getName()); } + return ConstraintResult::Conflicting; } - - conformances.insert({protocol, *conformance}); - - // Abstract conformances are acceptable for existential types. - assert(conformance->isConcrete() || Concrete->isExistentialType()); - - // Update the requirement source now that we know it's concrete. - // FIXME: Bad concrete source info. - auto concreteSource = Source->viaConcrete(*this, - conformance->isConcrete() - ? conformance->getConcrete() - : nullptr); - equivClass->conformsTo[protocol].push_back({T, protocol, concreteSource}); - ++NumConformanceConstraints; } // Eagerly resolve any existing nested types to their concrete forms (others // will be "concretized" as they are constructed, in getNestedType). for (auto equivT : rep->getEquivalenceClassMembers()) { for (auto nested : equivT->getNestedTypes()) { - concretizeNestedTypeFromConcreteParent( - equivT, Source, nested.second.front(), *this, - [&](ProtocolDecl *proto) -> ProtocolConformanceRef { - return conformances.find(proto)->second; - }); + concretizeNestedTypeFromConcreteParent(equivT, nested.second.front(), + *this); } } diff --git a/validation-test/compiler_crashers_2/0100-sr4295.swift b/validation-test/compiler_crashers_2_fixed/0100-sr4295.swift similarity index 87% rename from validation-test/compiler_crashers_2/0100-sr4295.swift rename to validation-test/compiler_crashers_2_fixed/0100-sr4295.swift index c3952ee27b187..699a1ccea49ec 100644 --- a/validation-test/compiler_crashers_2/0100-sr4295.swift +++ b/validation-test/compiler_crashers_2_fixed/0100-sr4295.swift @@ -1,4 +1,4 @@ -// RUN: not --crash %target-swift-frontend -emit-ir -primary-file %s +// RUN: not %target-swift-frontend -emit-ir -primary-file %s // REQUIRES: asserts diff --git a/validation-test/compiler_crashers/28706-conformance-failed-to-find-pas-conformance-to-known-protocol.swift b/validation-test/compiler_crashers_fixed/28706-conformance-failed-to-find-pas-conformance-to-known-protocol.swift similarity index 89% rename from validation-test/compiler_crashers/28706-conformance-failed-to-find-pas-conformance-to-known-protocol.swift rename to validation-test/compiler_crashers_fixed/28706-conformance-failed-to-find-pas-conformance-to-known-protocol.swift index 8df4f0e359b98..8be1db13a9f99 100644 --- a/validation-test/compiler_crashers/28706-conformance-failed-to-find-pas-conformance-to-known-protocol.swift +++ b/validation-test/compiler_crashers_fixed/28706-conformance-failed-to-find-pas-conformance-to-known-protocol.swift @@ -6,5 +6,5 @@ // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // REQUIRES: asserts -// RUN: not --crash %target-swift-frontend %s -emit-ir +// RUN: not %target-swift-frontend %s -emit-ir protocol P{let c{}typealias e:RangeReplaceableCollection}extension P{typealias e:a From 7c68366b3ba092ae9f6cba8cc877fcca23c039d0 Mon Sep 17 00:00:00 2001 From: Doug Gregor Date: Wed, 7 Jun 2017 15:08:23 -0700 Subject: [PATCH 04/17] [GSB] Remove unused form of addSameTypeRequirementDirect() (cherry picked from commit a4e35ed151af59feb1c1c8cdd009b39dd19cca57) --- include/swift/AST/GenericSignatureBuilder.h | 9 --------- lib/AST/GenericSignatureBuilder.cpp | 11 ----------- test/Constraints/same_types.swift | 3 +++ test/Generics/superclass_constraint.swift | 2 +- ...ance-isconcrete-concrete-isexistentialtype.swift | 13 +++++++++++++ ...e-didnt-find-the-associated-type-we-wanted.swift | 13 +++++++++++++ 6 files changed, 30 insertions(+), 21 deletions(-) create mode 100644 validation-test/compiler_crashers_fixed/28788-conformance-isconcrete-concrete-isexistentialtype.swift create mode 100644 validation-test/compiler_crashers_fixed/28793-nestedpabyname-didnt-find-the-associated-type-we-wanted.swift diff --git a/include/swift/AST/GenericSignatureBuilder.h b/include/swift/AST/GenericSignatureBuilder.h index d89c7765ab738..6d5dc37bdeca0 100644 --- a/include/swift/AST/GenericSignatureBuilder.h +++ b/include/swift/AST/GenericSignatureBuilder.h @@ -330,15 +330,6 @@ class GenericSignatureBuilder { FloatingRequirementSource Source, llvm::function_ref diagnoseMismatch); - /// \brief Add a new same-type requirement between two fully resolved types - /// (output of GenericSignatureBuilder::resolve). - /// - /// The two types must not be incompatible concrete types. - ConstraintResult addSameTypeRequirementDirect( - ResolvedType paOrT1, - ResolvedType paOrT2, - FloatingRequirementSource Source); - /// \brief Add a new same-type requirement between two unresolved types. /// /// The types are resolved with \c GenericSignatureBuilder::resolve, and must diff --git a/lib/AST/GenericSignatureBuilder.cpp b/lib/AST/GenericSignatureBuilder.cpp index d1e36036903dd..89f5ab81ecbb1 100644 --- a/lib/AST/GenericSignatureBuilder.cpp +++ b/lib/AST/GenericSignatureBuilder.cpp @@ -3510,17 +3510,6 @@ ConstraintResult GenericSignatureBuilder::addSameTypeRequirement( diagnoseMismatch); } -ConstraintResult GenericSignatureBuilder::addSameTypeRequirementDirect( - ResolvedType paOrT1, - ResolvedType paOrT2, - FloatingRequirementSource source) { - return addSameTypeRequirementDirect(paOrT1, paOrT2, source, - [&](Type type1, Type type2) { - Diags.diagnose(source.getLoc(), diag::requires_same_concrete_type, - type1, type2); - }); -} - ConstraintResult GenericSignatureBuilder::addSameTypeRequirementDirect( ResolvedType paOrT1, ResolvedType paOrT2, FloatingRequirementSource source, llvm::function_ref diagnoseMismatch) { diff --git a/test/Constraints/same_types.swift b/test/Constraints/same_types.swift index ff56973fa2758..f37666c9e3a60 100644 --- a/test/Constraints/same_types.swift +++ b/test/Constraints/same_types.swift @@ -88,6 +88,7 @@ func test6(_ t: T) -> (Y, X) where T.Bar == Y { func test7(_ t: T) -> (Y, X) where T.Bar == Y, T.Bar.Foo == X { // expected-warning@-1{{redundant same-type constraint 'T.Bar.Foo' == 'X'}} + // expected-note@-2{{same-type constraint 'T.Bar.Foo' == 'Y.Foo' (aka 'X') implied here}} return (t.bar, t.bar.foo) } @@ -119,12 +120,14 @@ func fail6(_ t: T) -> Int where T == Int { // expected-error{{same-type requi func test8(_ t: T, u: U) -> (Y, Y, X, X) where T.Bar == Y, U.Bar.Foo == X, T.Bar == U.Bar { // expected-warning{{redundant same-type constraint 'U.Bar.Foo' == 'X'}} + // expected-note@-1{{same-type constraint 'T.Bar.Foo' == 'Y.Foo' (aka 'X') implied here}} return (t.bar, u.bar, t.bar.foo, u.bar.foo) } func test8a(_ t: T, u: U) -> (Y, Y, X, X) where T.Bar == Y, U.Bar.Foo == X, U.Bar == T.Bar { // expected-warning{{redundant same-type constraint 'U.Bar.Foo' == 'X'}} + // expected-note@-1{{same-type constraint 'T.Bar.Foo' == 'Y.Foo' (aka 'X') implied here}} return (t.bar, u.bar, t.bar.foo, u.bar.foo) } diff --git a/test/Generics/superclass_constraint.swift b/test/Generics/superclass_constraint.swift index 23fbbd688d16e..16ed23b689a6f 100644 --- a/test/Generics/superclass_constraint.swift +++ b/test/Generics/superclass_constraint.swift @@ -102,7 +102,7 @@ class C2 : C, P4 { } // CHECK: superclassConformance3 // CHECK: Requirements: // CHECK-NEXT: τ_0_0 : C2 [τ_0_0: Explicit @ {{.*}}:61] -// CHECK-NEXT: τ_0_0 : _NativeClass [τ_0_0: Explicit @ {{.*}}:46 -> Superclass] +// CHECK-NEXT: τ_0_0 : _NativeClass [τ_0_0: Explicit @ {{.*}}:46 -> Derived] // CHECK-NEXT: τ_0_0 : P4 [τ_0_0: Explicit @ {{.*}}:61 -> Superclass (C2: P4)] // CHECK: Canonical generic signature: <τ_0_0 where τ_0_0 : C2> func superclassConformance3(t: T) where T : C, T : P4, T : C2 {} diff --git a/validation-test/compiler_crashers_fixed/28788-conformance-isconcrete-concrete-isexistentialtype.swift b/validation-test/compiler_crashers_fixed/28788-conformance-isconcrete-concrete-isexistentialtype.swift new file mode 100644 index 0000000000000..f0e4bf627a347 --- /dev/null +++ b/validation-test/compiler_crashers_fixed/28788-conformance-isconcrete-concrete-isexistentialtype.swift @@ -0,0 +1,13 @@ +// This source file is part of the Swift.org open source project +// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors + +// REQUIRES: asserts +// RUN: not %target-swift-frontend %s -emit-ir +protocol P{ +typealias e:RangeReplaceableCollection +}{}extension P{{}func e +typealias e:FlattenCollection diff --git a/validation-test/compiler_crashers_fixed/28793-nestedpabyname-didnt-find-the-associated-type-we-wanted.swift b/validation-test/compiler_crashers_fixed/28793-nestedpabyname-didnt-find-the-associated-type-we-wanted.swift new file mode 100644 index 0000000000000..efc3af830ae0b --- /dev/null +++ b/validation-test/compiler_crashers_fixed/28793-nestedpabyname-didnt-find-the-associated-type-we-wanted.swift @@ -0,0 +1,13 @@ +// This source file is part of the Swift.org open source project +// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors + +// REQUIRES: asserts +// RUN: not %target-swift-frontend %s -emit-ir +protocol A:RangeReplaceableCollection +protocol P{ +protocol A +class a:A{}typealias a:A{}typealias a:RangeReplaceableCollection From 2167a3f27a81155516d0beb3d58ff5d4e2927d1d Mon Sep 17 00:00:00 2001 From: Doug Gregor Date: Wed, 7 Jun 2017 13:49:01 -0700 Subject: [PATCH 05/17] =?UTF-8?q?[GSB]=20Break=20recursion=20based=20on=20?= =?UTF-8?q?the=20=E2=80=9Croot=E2=80=9D=20type=20we=E2=80=99re=20creating?= =?UTF-8?q?=20requirements=20for.=20(cherry=20picked=20from=20commit=20b09?= =?UTF-8?q?5c8af2aae04d5524f807ff7f38bafd5b3efd1)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/AST/GenericSignatureBuilder.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/lib/AST/GenericSignatureBuilder.cpp b/lib/AST/GenericSignatureBuilder.cpp index 89f5ab81ecbb1..a11b033c1b026 100644 --- a/lib/AST/GenericSignatureBuilder.cpp +++ b/lib/AST/GenericSignatureBuilder.cpp @@ -1082,6 +1082,19 @@ bool FloatingRequirementSource::isRecursive( pa = parent; } + + // Also check the root type. + grossCount = 0; + for (Type type = rootType; + auto depTy = type->getAs(); + type = depTy->getBase()) { + if (depTy->getName() == nestedName) { + if (++grossCount > 4) { + ++NumRecursive; + return true; + } + } + } } return false; From 398b99a8012f1678d77d75bb1dd81e43a000607b Mon Sep 17 00:00:00 2001 From: Doug Gregor Date: Fri, 23 Jun 2017 16:58:56 -0700 Subject: [PATCH 06/17] [GSB] Avoid recursively growing increasingly-nested potential archetypes. In some circumstances, we could end up growing increasingly-nested potential archetypes due to a poor choice of representatives and anchors. Address this in two places: * Always prefer to use the potential archetype with a lower nesting depth (== number of nested types) to one with a greater nesting depth, so we don't accumulate more nested types onto the already-longer potential archetypes, and * Prefer archetype anchors with a lower nesting depth *except* that we always prefer archetype anchors comprised of a sequence of associated types (i.e., no concrete type declarations), which is important for canonicalization. Fixes SR-4757 / rdar://problem/31912838, as well as a regression involving infinitely-recursive potential archetypes caused by the previous commit. (cherry picked from commit a72a2bf730bd7a5387e986ab2b6c20b4fca5aab0) --- lib/AST/GenericSignatureBuilder.cpp | 69 ++++++++++++++----- test/Generics/requirement_inference.swift | 4 +- ...ef-swift-protocolconformanceref-swif.swift | 9 +++ 3 files changed, 63 insertions(+), 19 deletions(-) create mode 100644 validation-test/compiler_crashers_fixed/28764-swift-protocolconformanceref-llvm-function-ref-swift-protocolconformanceref-swif.swift diff --git a/lib/AST/GenericSignatureBuilder.cpp b/lib/AST/GenericSignatureBuilder.cpp index a11b033c1b026..d3a43a0dea10a 100644 --- a/lib/AST/GenericSignatureBuilder.cpp +++ b/lib/AST/GenericSignatureBuilder.cpp @@ -1546,26 +1546,35 @@ static int compareAssociatedTypes(AssociatedTypeDecl *assocType1, return 0; } +/// Whether there are any concrete type declarations in the potential archetype. +static bool hasConcreteDecls(const PotentialArchetype *pa) { + auto parent = pa->getParent(); + if (!parent) return false; + + if (pa->getConcreteTypeDecl()) + return true; + + return hasConcreteDecls(parent); +} + /// Canonical ordering for dependent types in generic signatures. static int compareDependentTypes(PotentialArchetype * const* pa, - PotentialArchetype * const* pb) { + PotentialArchetype * const* pb, + bool outermost) { auto a = *pa, b = *pb; // Fast-path check for equality. if (a == b) return 0; - // Concrete types must be ordered *after* everything else, to ensure they - // don't become representatives in the case where a concrete type is equated - // with an associated type. - if (a->getParent() && b->getParent() && - !!a->getConcreteTypeDecl() != !!b->getConcreteTypeDecl()) - return a->getConcreteTypeDecl() ? +1 : -1; - - // Types that are equivalent to concrete types follow types that are still - // type parameters. - if (a->isConcreteType() != b->isConcreteType()) - return a->isConcreteType() ? +1 : -1; + // If one has concrete declarations somewhere but the other does not, + // prefer the one without concrete declarations. + if (outermost) { + bool aHasConcreteDecls = hasConcreteDecls(a); + bool bHasConcreteDecls = hasConcreteDecls(b); + if (aHasConcreteDecls != bHasConcreteDecls) + return aHasConcreteDecls ? +1 : -1; + } // Ordering is as follows: // - Generic params @@ -1581,9 +1590,21 @@ static int compareDependentTypes(PotentialArchetype * const* pa, auto ppb = b->getParent(); // - by base, so t_0_n.`P.T` < t_1_m.`P.T` - if (int compareBases = compareDependentTypes(&ppa, &ppb)) + if (int compareBases = compareDependentTypes(&ppa, &ppb, /*outermost=*/false)) return compareBases; + // Types that are equivalent to concrete types follow types that are still + // type parameters. + if (a->isConcreteType() != b->isConcreteType()) + return a->isConcreteType() ? +1 : -1; + + // Concrete types must be ordered *after* everything else, to ensure they + // don't become representatives in the case where a concrete type is equated + // with an associated type. + if (a->getParent() && b->getParent() && + !!a->getConcreteTypeDecl() != !!b->getConcreteTypeDecl()) + return a->getConcreteTypeDecl() ? +1 : -1; + // - by name, so t_n_m.`P.T` < t_n_m.`P.U` if (int compareNames = a->getNestedName().str().compare( b->getNestedName().str())) @@ -1627,6 +1648,11 @@ static int compareDependentTypes(PotentialArchetype * const* pa, llvm_unreachable("potential archetype total order failure"); } +static int compareDependentTypes(PotentialArchetype * const* pa, + PotentialArchetype * const* pb) { + return compareDependentTypes(pa, pb, /*outermost=*/true); +} + PotentialArchetype *PotentialArchetype::getArchetypeAnchor( GenericSignatureBuilder &builder) { // Find the best archetype within this equivalence class. @@ -1635,6 +1661,7 @@ PotentialArchetype *PotentialArchetype::getArchetypeAnchor( if (auto parent = getParent()) { // For a nested type, retrieve the parent archetype anchor first. auto parentAnchor = parent->getArchetypeAnchor(builder); + assert(parentAnchor->getNestingDepth() <= parent->getNestingDepth()); anchor = parentAnchor->getNestedArchetypeAnchor( getNestedName(), builder, ArchetypeResolutionKind::AlreadyKnown); @@ -1654,7 +1681,8 @@ PotentialArchetype *PotentialArchetype::getArchetypeAnchor( equivClass->archetypeAnchorCache.numMembers == equivClass->members.size()) { ++NumArchetypeAnchorCacheHits; - + assert(equivClass->archetypeAnchorCache.anchor->getNestingDepth() + <= rep->getNestingDepth()); return equivClass->archetypeAnchorCache.anchor; } @@ -1673,6 +1701,8 @@ PotentialArchetype *PotentialArchetype::getArchetypeAnchor( } #endif + assert(anchor->getNestingDepth() <= rep->getNestingDepth()); + // Record the cache miss and update the cache. ++NumArchetypeAnchorCacheMisses; equivClass->archetypeAnchorCache.anchor = anchor; @@ -3300,10 +3330,15 @@ GenericSignatureBuilder::addSameTypeRequirementBetweenArchetypes( if (T1 == T2) return ConstraintResult::Resolved; + unsigned nestingDepth1 = T1->getNestingDepth(); + unsigned nestingDepth2 = T2->getNestingDepth(); + // Decide which potential archetype is to be considered the representative. - // It doesn't specifically matter which we use, but it's a minor optimization - // to prefer the canonical type. - if (compareDependentTypes(&T2, &T1) < 0) { + // We prefer potential archetypes with lower nesting depths (because it + // prevents us from unnecessarily building deeply nested potential archetypes) + // and prefer anchors because it's a minor optimization. + if (nestingDepth2 < nestingDepth1 || + compareDependentTypes(&T2, &T1) < 0) { std::swap(T1, T2); std::swap(OrigT1, OrigT2); } diff --git a/test/Generics/requirement_inference.swift b/test/Generics/requirement_inference.swift index 45904ce1df0ba..df14ea8716d32 100644 --- a/test/Generics/requirement_inference.swift +++ b/test/Generics/requirement_inference.swift @@ -224,8 +224,8 @@ struct X8 : P12 { struct X9 where T.B == U.B { // CHECK-LABEL: X9.upperSameTypeConstraint - // CHECK: Generic signature: - // CHECK: Canonical generic signature: <τ_0_0, τ_0_1, τ_1_0 where τ_0_1 : P12, τ_0_0 == X8, τ_0_1.B == X7> + // CHECK: Generic signature: + // CHECK: Canonical generic signature: <τ_0_0, τ_0_1, τ_1_0 where τ_0_0 == X8, τ_0_1 : P12, τ_0_1.B == X7> func upperSameTypeConstraint(_: V) where T == X8 { } } diff --git a/validation-test/compiler_crashers_fixed/28764-swift-protocolconformanceref-llvm-function-ref-swift-protocolconformanceref-swif.swift b/validation-test/compiler_crashers_fixed/28764-swift-protocolconformanceref-llvm-function-ref-swift-protocolconformanceref-swif.swift new file mode 100644 index 0000000000000..f254454afec43 --- /dev/null +++ b/validation-test/compiler_crashers_fixed/28764-swift-protocolconformanceref-llvm-function-ref-swift-protocolconformanceref-swif.swift @@ -0,0 +1,9 @@ +// This source file is part of the Swift.org open source project +// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors + +// RUN: not %target-swift-frontend %s -emit-ir +protocol P{typealias a}{protocol A:P{{}class a{{}}typealias a:RangeReplaceableCollection From 075a4c30115b7f0065afcc7a467f7ca992b5f5b0 Mon Sep 17 00:00:00 2001 From: Doug Gregor Date: Fri, 2 Jun 2017 13:59:18 -0700 Subject: [PATCH 07/17] [GSB] Clean up the meaning of ArchetypeResolutionKind::(Complete)WellFormed. (cherry picked from commit 791ac7fad4ce5c6763b8653ba600cee06aeb4562) --- lib/AST/GenericSignature.cpp | 2 +- lib/AST/GenericSignatureBuilder.cpp | 17 ++++++----------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/lib/AST/GenericSignature.cpp b/lib/AST/GenericSignature.cpp index 880c4d8ca3eb9..5c5a50f2d21ce 100644 --- a/lib/AST/GenericSignature.cpp +++ b/lib/AST/GenericSignature.cpp @@ -872,7 +872,7 @@ ConformanceAccessPath GenericSignature::getConformanceAccessPath( auto pa = reqSigBuilder.resolveArchetype( storedType, - ArchetypeResolutionKind::CompleteWellFormed); + ArchetypeResolutionKind::AlwaysPartial); auto equivClass = pa->getOrCreateEquivalenceClass(); // Find the conformance of this potential archetype to the protocol in diff --git a/lib/AST/GenericSignatureBuilder.cpp b/lib/AST/GenericSignatureBuilder.cpp index d3a43a0dea10a..9e44e0b903cf5 100644 --- a/lib/AST/GenericSignatureBuilder.cpp +++ b/lib/AST/GenericSignatureBuilder.cpp @@ -1862,9 +1862,7 @@ PotentialArchetype *PotentialArchetype::getNestedArchetypeAnchor( // If we found an associated type, use it. PotentialArchetype *resultPA = nullptr; if (bestAssocType) { - resultPA = updateNestedTypeForConformance( - bestAssocType, - ArchetypeResolutionKind::WellFormed); + resultPA = updateNestedTypeForConformance(bestAssocType, kind); } // If we have an associated type, drop any concrete decls that aren't in @@ -1917,17 +1915,11 @@ PotentialArchetype *PotentialArchetype::getNestedArchetypeAnchor( // Check whether we can add a missing nested type for this case. switch (kind) { case ArchetypeResolutionKind::AlwaysPartial: - case ArchetypeResolutionKind::CompleteWellFormed: - // FIXME: CompleteWellFormed should operate the same as WellFormed here. break; case ArchetypeResolutionKind::WellFormed: - if (!bestAssocType && !bestConcreteDecl) - return nullptr; - break; - + case ArchetypeResolutionKind::CompleteWellFormed: case ArchetypeResolutionKind::AlreadyKnown: - // Don't add a new type; return nullptr; } @@ -5224,8 +5216,11 @@ void GenericSignatureBuilder::enumerateRequirements(llvm::function_ref< auto equivClass = rep->getOrCreateEquivalenceClass(); // If we didn't compute the derived same-type components yet, do so now. - if (equivClass->derivedSameTypeComponents.empty()) + if (equivClass->derivedSameTypeComponents.empty()) { checkSameTypeConstraints(Impl->GenericParams, rep); + rep = archetype->getRepresentative(); + equivClass = rep->getOrCreateEquivalenceClass(); + } assert(!equivClass->derivedSameTypeComponents.empty() && "Didn't compute derived same-type components?"); From 6c4e4fdd89c8ea69e8b2ffea4ca514eb7bb1aa4b Mon Sep 17 00:00:00 2001 From: Doug Gregor Date: Tue, 6 Jun 2017 11:56:23 -0700 Subject: [PATCH 08/17] [GSB] Centralize diagnosis of concrete types and conformance requirements. Ensures that we don't admit invalid cases where the concrete type does not conform to the required protocol. (cherry picked from commit c879b95917dfb8733df039371dc920271090b78d) --- lib/AST/GenericSignatureBuilder.cpp | 35 +++++++++++++++-------------- test/Constraints/same_types.swift | 11 ++++++++- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/lib/AST/GenericSignatureBuilder.cpp b/lib/AST/GenericSignatureBuilder.cpp index 9e44e0b903cf5..eb2d17a8613a0 100644 --- a/lib/AST/GenericSignatureBuilder.cpp +++ b/lib/AST/GenericSignatureBuilder.cpp @@ -1326,15 +1326,6 @@ GenericSignatureBuilder::resolveConcreteConformance(PotentialArchetype *pa, auto concrete = pa->getConcreteType(); if (!concrete) return nullptr; - // Lookup the conformance of the concrete type to this protocol. - auto conformance = - getLookupConformanceFn()(pa->getDependentType({ }, /*allowUnresolved=*/true) - ->getCanonicalType(), - concrete, - proto->getDeclaredInterfaceType() - ->castTo()); - if (!conformance) return nullptr; - // Conformance to this protocol is redundant; update the requirement source // appropriately. auto paEquivClass = pa->getOrCreateEquivalenceClass(); @@ -1345,6 +1336,23 @@ GenericSignatureBuilder::resolveConcreteConformance(PotentialArchetype *pa, else concreteSource = paEquivClass->concreteTypeConstraints.front().source; + // Lookup the conformance of the concrete type to this protocol. + auto conformance = + getLookupConformanceFn()(pa->getDependentType({ }, /*allowUnresolved=*/true) + ->getCanonicalType(), + concrete, + proto->getDeclaredInterfaceType() + ->castTo()); + if (!conformance) { + if (!concrete->hasError() && concreteSource->getLoc().isValid()) { + Diags.diagnose(concreteSource->getLoc(), + diag::requires_generic_param_same_type_does_not_conform, + concrete, proto->getName()); + } + + return nullptr; + } + concreteSource = concreteSource->viaConcrete(*this, *conformance); paEquivClass->conformsTo[proto].push_back({pa, proto, concreteSource}); ++NumConformanceConstraints; @@ -3450,15 +3458,8 @@ ConstraintResult GenericSignatureBuilder::addSameTypeRequirementToConcrete( // Make sure the concrete type fulfills the conformance requirements of // this equivalence class. for (auto protocol : rep->getConformsTo()) { - if (!resolveConcreteConformance(rep, protocol)) { - if (!Concrete->hasError() && Source->getLoc().isValid()) { - Diags.diagnose(Source->getLoc(), - diag::requires_generic_param_same_type_does_not_conform, - Concrete, protocol->getName()); - } - + if (!resolveConcreteConformance(rep, protocol)) return ConstraintResult::Conflicting; - } } // Eagerly resolve any existing nested types to their concrete forms (others diff --git a/test/Constraints/same_types.swift b/test/Constraints/same_types.swift index f37666c9e3a60..8621db911ade2 100644 --- a/test/Constraints/same_types.swift +++ b/test/Constraints/same_types.swift @@ -88,7 +88,7 @@ func test6(_ t: T) -> (Y, X) where T.Bar == Y { func test7(_ t: T) -> (Y, X) where T.Bar == Y, T.Bar.Foo == X { // expected-warning@-1{{redundant same-type constraint 'T.Bar.Foo' == 'X'}} - // expected-note@-2{{same-type constraint 'T.Bar.Foo' == 'Y.Foo' (aka 'X') implied here}} + // expected-note@-2{{same-type constraint 'T.Bar.Foo' == 'Y.Foo' (aka 'X') implied here}} return (t.bar, t.bar.foo) } @@ -241,5 +241,14 @@ func structuralSameTypeRecursive1(_: T, _: U) { } +protocol P3 { +} + +protocol P4 { + associatedtype A +} + +func test9(_: T) where T.A == X, T: P4, T.A: P3 { } // expected-error{{same-type constraint type 'X' does not conform to required protocol 'P3'}} + // FIXME: Remove -verify-ignore-unknown. // :0: error: unexpected error produced: generic parameter τ_0_0.Bar.Foo cannot be equal to both 'Y.Foo' (aka 'X') and 'Z' From 8295561558df1d9151530a96fa299e130923ad36 Mon Sep 17 00:00:00 2001 From: Doug Gregor Date: Wed, 7 Jun 2017 14:35:15 -0700 Subject: [PATCH 09/17] [GSB] Concretize nested types when merging two potential archetypes. When two potential archetypes are merged and only one of them was a concrete type beforehand, concretize the nested types in the equivalence class of the non-concrete potential archetype. Otherwise, we're liable to miss redundancies. This feels like an ad hoc extension to an ad hoc mechanism, but gets us back to parity with this patch series. (cherry picked from commit bf730fff0871f7660da00702ffc36c50971e2a8e) --- lib/AST/GenericSignatureBuilder.cpp | 25 +++++++++++++++++++++++-- test/Constraints/same_types.swift | 14 ++++++++++---- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/lib/AST/GenericSignatureBuilder.cpp b/lib/AST/GenericSignatureBuilder.cpp index eb2d17a8613a0..98e7263a19ec9 100644 --- a/lib/AST/GenericSignatureBuilder.cpp +++ b/lib/AST/GenericSignatureBuilder.cpp @@ -3345,6 +3345,7 @@ GenericSignatureBuilder::addSameTypeRequirementBetweenArchetypes( // Merge the equivalence classes. auto equivClass = T1->getOrCreateEquivalenceClass(); + auto equivClass1Members = equivClass->members; auto equivClass2Members = T2->getEquivalenceClassMembers(); for (auto equiv : equivClass2Members) equivClass->members.push_back(equiv); @@ -3366,8 +3367,10 @@ GenericSignatureBuilder::addSameTypeRequirementBetweenArchetypes( } // Same-type-to-concrete requirements. - if (equivClass2 && equivClass2->concreteType) { - if (equivClass->concreteType) { + bool t1IsConcrete = !equivClass->concreteType.isNull(); + bool t2IsConcrete = equivClass2 && !equivClass2->concreteType.isNull(); + if (t2IsConcrete) { + if (t1IsConcrete) { (void)addSameTypeRequirement(equivClass->concreteType, equivClass2->concreteType, Source, UnresolvedHandlingKind::GenerateConstraints, @@ -3418,6 +3421,14 @@ GenericSignatureBuilder::addSameTypeRequirementBetweenArchetypes( auto dependentT1 = T1->getDependentType({ }, /*allowUnresolved=*/true); for (auto equivT2 : equivClass2Members) { for (auto T2Nested : equivT2->NestedTypes) { + // If T1 is concrete but T2 is not, concretize the nested types of T2. + if (t1IsConcrete && !t2IsConcrete) { + concretizeNestedTypeFromConcreteParent(T1, T2Nested.second.front(), + *this); + continue; + } + + // Otherwise, make the nested types equivalent. Type nestedT1 = DependentMemberType::get(dependentT1, T2Nested.first); if (isErrorResult( addSameTypeRequirement( @@ -3429,6 +3440,16 @@ GenericSignatureBuilder::addSameTypeRequirementBetweenArchetypes( } } + // If T2 is concrete but T1 was not, concretize the nested types of T1. + if (t2IsConcrete && !t1IsConcrete) { + for (auto equivT1 : equivClass1Members) { + for (auto T1Nested : equivT1->NestedTypes) { + concretizeNestedTypeFromConcreteParent(T2, T1Nested.second.front(), + *this); + } + } + } + return ConstraintResult::Resolved; } diff --git a/test/Constraints/same_types.swift b/test/Constraints/same_types.swift index 8621db911ade2..53edf482b74c0 100644 --- a/test/Constraints/same_types.swift +++ b/test/Constraints/same_types.swift @@ -119,18 +119,24 @@ func fail6(_ t: T) -> Int where T == Int { // expected-error{{same-type requi } func test8(_ t: T, u: U) -> (Y, Y, X, X) - where T.Bar == Y, U.Bar.Foo == X, T.Bar == U.Bar { // expected-warning{{redundant same-type constraint 'U.Bar.Foo' == 'X'}} - // expected-note@-1{{same-type constraint 'T.Bar.Foo' == 'Y.Foo' (aka 'X') implied here}} + where T.Bar == Y, // expected-note{{same-type constraint 'U.Bar.Foo' == 'Y.Foo' (aka 'X') implied here}} + U.Bar.Foo == X, T.Bar == U.Bar { // expected-warning{{redundant same-type constraint 'U.Bar.Foo' == 'X'}} return (t.bar, u.bar, t.bar.foo, u.bar.foo) } func test8a(_ t: T, u: U) -> (Y, Y, X, X) where - T.Bar == Y, U.Bar.Foo == X, U.Bar == T.Bar { // expected-warning{{redundant same-type constraint 'U.Bar.Foo' == 'X'}} - // expected-note@-1{{same-type constraint 'T.Bar.Foo' == 'Y.Foo' (aka 'X') implied here}} + T.Bar == Y, // expected-note{{same-type constraint 'U.Bar.Foo' == 'Y.Foo' (aka 'X') implied here}} + U.Bar.Foo == X, U.Bar == T.Bar { // expected-warning{{redundant same-type constraint 'U.Bar.Foo' == 'X'}} return (t.bar, u.bar, t.bar.foo, u.bar.foo) } +func test8b(_ t: T, u: U) + where U.Bar.Foo == X, // expected-warning{{redundant same-type constraint 'U.Bar.Foo' == 'X'}} + T.Bar == Y, // expected-note{{same-type constraint 'U.Bar.Foo' == 'Y.Foo' (aka 'X') implied here}} + T.Bar == U.Bar { +} + // rdar://problem/19137463 func rdar19137463(_ t: T) where T.a == T {} // expected-error{{'a' is not a member type of 'T'}} rdar19137463(1) From 9d681fa933e93a263b007d5a2771e2b331a19fb4 Mon Sep 17 00:00:00 2001 From: Doug Gregor Date: Mon, 5 Jun 2017 16:30:36 -0700 Subject: [PATCH 10/17] [GSB] Diagnose all same-type-to-concrete conflicts consistently. (cherry picked from commit b51529f9e7adaf3efcda990940f9c172f816484c) --- include/swift/AST/DiagnosticsSema.def | 3 ++ lib/AST/GenericSignatureBuilder.cpp | 23 +++++++-------- test/Constraints/same_types.swift | 41 +++++++++++++++++++++------ 3 files changed, 45 insertions(+), 22 deletions(-) diff --git a/include/swift/AST/DiagnosticsSema.def b/include/swift/AST/DiagnosticsSema.def index 22c0a8139a767..4adc5df58bbe1 100644 --- a/include/swift/AST/DiagnosticsSema.def +++ b/include/swift/AST/DiagnosticsSema.def @@ -1692,6 +1692,9 @@ NOTE(redundant_conformance_here,none, "inferred from type here}0", (unsigned, Type, ProtocolDecl *)) +ERROR(same_type_conflict,none, + "%select{generic parameter |protocol |}0%1 cannot be equal to both " + "%2 and %3", (unsigned, Type, Type, Type)) WARNING(redundant_same_type_to_concrete,none, "redundant same-type constraint %0 == %1", (Type, Type)) NOTE(same_type_redundancy_here,none, diff --git a/lib/AST/GenericSignatureBuilder.cpp b/lib/AST/GenericSignatureBuilder.cpp index 98e7263a19ec9..beb2e31d0597b 100644 --- a/lib/AST/GenericSignatureBuilder.cpp +++ b/lib/AST/GenericSignatureBuilder.cpp @@ -1728,6 +1728,7 @@ namespace { PotentialArchetype *pa; void operator()(Type type1, Type type2) const { + // FIXME: Shouldn't need this! if (pa->getParent() && pa->getConcreteTypeDecl() && source->getLoc().isInvalid()) { diags.diagnose(pa->getConcreteTypeDecl()->getLoc(), @@ -1736,14 +1737,6 @@ namespace { type1, type2); return; } - - if (source->getLoc().isValid()) { - diags.diagnose(source->getLoc(), - diag::requires_same_type_conflict, - pa->isGenericParam(), - pa->getDependentType(/*FIXME: */{ }, true), - type1, type2); - } } }; } // end anonymous namespace @@ -5006,8 +4999,8 @@ void GenericSignatureBuilder::checkConcreteTypeConstraints( checkConstraintList( genericParams, equivClass->concreteTypeConstraints, - [](const ConcreteConstraint &constraint) { - return true; + [&](const ConcreteConstraint &constraint) { + return constraint.value->isEqual(equivClass->concreteType); }, [&](Type concreteType) { // If the concrete type is equivalent, the constraint is redundant. @@ -5016,10 +5009,14 @@ void GenericSignatureBuilder::checkConcreteTypeConstraints( if (concreteType->isEqual(equivClass->concreteType)) return ConstraintRelation::Redundant; - // Call this unrelated. - return ConstraintRelation::Unrelated; + // If either has a type parameter, call them unrelated. + if (concreteType->hasTypeParameter() || + equivClass->concreteType->hasTypeParameter()) + return ConstraintRelation::Unrelated; + + return ConstraintRelation::Conflicting; }, - None, + diag::same_type_conflict, diag::redundant_same_type_to_concrete, diag::same_type_redundancy_here); diff --git a/test/Constraints/same_types.swift b/test/Constraints/same_types.swift index 53edf482b74c0..9b3885504014b 100644 --- a/test/Constraints/same_types.swift +++ b/test/Constraints/same_types.swift @@ -58,14 +58,16 @@ func test3(_ t: T, u: U) -> (X, X) func fail1< T: Fooable, U: Fooable >(_ t: T, u: U) -> (X, Y) - where T.Foo == X, U.Foo == Y, T.Foo == U.Foo { // expected-error{{associated type 'T.Foo' cannot be equal to both 'X' and 'Y'}} + where T.Foo == X, U.Foo == Y, T.Foo == U.Foo { // expected-error{{'U.Foo' cannot be equal to both 'Y' and 'X'}} + // expected-note@-1{{same-type constraint 'T.Foo' == 'X' written here}} return (t.foo, u.foo) // expected-error{{cannot convert return expression of type 'X' to return type 'Y'}} } func fail2< T: Fooable, U: Fooable >(_ t: T, u: U) -> (X, Y) - where T.Foo == U.Foo, T.Foo == X, U.Foo == Y { // expected-error{{associated type 'U.Foo' cannot be equal to both 'X' and 'Y'}} + where T.Foo == U.Foo, T.Foo == X, U.Foo == Y { // expected-error{{'U.Foo' cannot be equal to both 'Y' and 'X'}} + // expected-note@-1{{same-type constraint 'T.Foo' == 'X' written here}} return (t.foo, u.foo) // expected-error{{cannot convert return expression of type 'X' to return type 'Y'}} } @@ -94,20 +96,22 @@ func test7(_ t: T) -> (Y, X) where T.Bar == Y, T.Bar.Foo == X { func fail4(_ t: T) -> (Y, Z) where - T.Bar == Y, - T.Bar.Foo == Z { // expected-error{{associated type 'T.Bar.Foo' cannot be equal to both 'Y.Foo' (aka 'X') and 'Z'}} + T.Bar == Y, // expected-note{{same-type constraint 'T.Bar.Foo' == 'Y.Foo' (aka 'X') implied here}} + T.Bar.Foo == Z { // expected-error{{'T.Bar.Foo' cannot be equal to both 'Z' and 'Y.Foo' (aka 'X')}} return (t.bar, t.bar.foo) // expected-error{{cannot convert return expression of type 'X' to return type 'Z'}} } func fail5(_ t: T) -> (Y, Z) where - T.Bar.Foo == Z, // expected-warning{{redundant same-type constraint 'T.Bar.Foo' == 'Z'}} - T.Bar == Y { // expected-error{{associated type 'T.Bar.Foo' cannot be equal to both 'Z' and 'X'}} - // expected-note@-1{{same-type constraint 'T.Bar.Foo' == 'Y.Foo' (aka 'X') implied here}} + T.Bar.Foo == Z, // expected-note{{same-type constraint 'T.Bar.Foo' == 'Z' written here}} + T.Bar == Y { // expected-error{{'T.Bar.Foo' cannot be equal to both 'Y.Foo' (aka 'X') and 'Z'}} return (t.bar, t.bar.foo) // expected-error{{cannot convert return expression of type 'X' to return type 'Z'}} } -func test8(_ t: T) where T.Foo == X, T.Foo == Y {} // expected-error{{associated type 'T.Foo' cannot be equal to both 'X' and 'Y'}} +func test8(_ t: T) + where T.Foo == X, // expected-note{{same-type constraint 'T.Foo' == 'X' written here}} + T.Foo == Y {} // expected-error{{'T.Foo' cannot be equal to both 'Y' and 'X'}} + func testAssocTypeEquivalence(_ fooable: T) -> X.Type where T.Foo == X { @@ -246,7 +250,6 @@ func structuralSameTypeRecursive1(_: T, _: U) where T.Assoc1 == Tuple2 // expected-error{{same-type constraint 'T.Assoc1' == '(T.Assoc1, U)' is recursive}} { } - protocol P3 { } @@ -256,5 +259,25 @@ protocol P4 { func test9(_: T) where T.A == X, T: P4, T.A: P3 { } // expected-error{{same-type constraint type 'X' does not conform to required protocol 'P3'}} +// Same-type constraint conflict through protocol where clauses. +protocol P5 where Foo1 == Foo2 { + associatedtype Foo1 + associatedtype Foo2 +} + +protocol P6 { + associatedtype Bar: P5 +} + +struct X5a {} + +struct X5b { } + +func test9(_ t: T, u: U) + where T.Bar.Foo1 == X5a, // expected-note{{same-type constraint 'T.Bar.Foo1' == 'X5a' written here}} + U.Bar.Foo2 == X5b, // expected-error{{'U.Bar.Foo2' cannot be equal to both 'X5b' and 'X5a'}} + T.Bar == U.Bar { +} + // FIXME: Remove -verify-ignore-unknown. // :0: error: unexpected error produced: generic parameter τ_0_0.Bar.Foo cannot be equal to both 'Y.Foo' (aka 'X') and 'Z' From e01e30200dac411513b3553e0c7e591ed5196a68 Mon Sep 17 00:00:00 2001 From: Doug Gregor Date: Thu, 8 Jun 2017 14:33:28 -0700 Subject: [PATCH 11/17] [GSB] Cope with typealiases within protocol hierarchies. When we see two type(aliase)s with the same name in a protocol hierarchy, make them equal with an implied same-type requirement. This detects inconstencies in typealiases across different protocols, and eliminates the need for ad hoc consistency checking. This is a step toward simplifying away the need for direct-diagnosis operations involving concrete type mismatches. While here, warn when we see an associated type with the same as a typealias from an inherited protocol; in this case, the associated type is basically useless, because it's going to be equivalent to the typealias. (cherry picked from commit c47aea7150379a24f280e30e00b02d76449f4c1a) --- include/swift/AST/DiagnosticsSema.def | 6 +-- lib/AST/GenericSignatureBuilder.cpp | 49 +++++++++---------- test/Generics/protocol_type_aliases.swift | 18 ++++--- test/attr/attr_specialize.swift | 2 +- .../0042-rdar21775089.swift | 4 +- 5 files changed, 40 insertions(+), 39 deletions(-) diff --git a/include/swift/AST/DiagnosticsSema.def b/include/swift/AST/DiagnosticsSema.def index 4adc5df58bbe1..2b1fd0903c578 100644 --- a/include/swift/AST/DiagnosticsSema.def +++ b/include/swift/AST/DiagnosticsSema.def @@ -1682,9 +1682,6 @@ ERROR(requires_generic_param_same_type_does_not_conform,none, (Type, Identifier)) ERROR(requires_same_concrete_type,none, "generic signature requires types %0 and %1 to be the same", (Type, Type)) -ERROR(protocol_typealias_conflict, none, - "type alias %0 requires types %1 and %2 to be the same", - (Identifier, Type, Type)) WARNING(redundant_conformance_constraint,none, "redundant conformance constraint %0: %1", (Type, ProtocolDecl *)) NOTE(redundant_conformance_here,none, @@ -1735,6 +1732,9 @@ WARNING(inherited_associated_type_redecl,none, WARNING(typealias_override_associated_type,none, "typealias overriding associated type %0 from protocol %1 is better " "expressed as same-type constraint on the protocol", (DeclName, Type)) +WARNING(associated_type_override_typealias,none, + "associated type %0 is redundant with type %0 declared in inherited " + "%1 %2", (DeclName, DescriptiveDeclKind, Type)) ERROR(generic_param_access,none, "%0 %select{must be declared %select{" diff --git a/lib/AST/GenericSignatureBuilder.cpp b/lib/AST/GenericSignatureBuilder.cpp index beb2e31d0597b..7dc43c0afedd0 100644 --- a/lib/AST/GenericSignatureBuilder.cpp +++ b/lib/AST/GenericSignatureBuilder.cpp @@ -1720,24 +1720,10 @@ PotentialArchetype *PotentialArchetype::getArchetypeAnchor( } namespace { - /// Function object to diagnose a conflict in same-type constraints for a - /// given potential archetype. - struct DiagnoseSameTypeConflict { - DiagnosticEngine &diags; - const RequirementSource *source; - PotentialArchetype *pa; - - void operator()(Type type1, Type type2) const { - // FIXME: Shouldn't need this! - if (pa->getParent() && pa->getConcreteTypeDecl() && - source->getLoc().isInvalid()) { - diags.diagnose(pa->getConcreteTypeDecl()->getLoc(), - diag::protocol_typealias_conflict, - pa->getConcreteTypeDecl()->getName(), - type1, type2); - return; - } - } + /// Function object used to suppress conflict diagnoses when we know we'll + /// see them again later. + struct SameTypeConflictCheckedLater { + void operator()(Type type1, Type type2) const { } }; } // end anonymous namespace @@ -1786,10 +1772,7 @@ static void concretizeNestedTypeFromConcreteParent( builder.addSameTypeRequirement( nestedPA, witnessType, source, GenericSignatureBuilder::UnresolvedHandlingKind::GenerateConstraints, - DiagnoseSameTypeConflict{ - builder.getASTContext().Diags, - source, nestedPA - }); + SameTypeConflictCheckedLater()); } PotentialArchetype *PotentialArchetype::getNestedType( @@ -2847,7 +2830,20 @@ ConstraintResult GenericSignatureBuilder::addConformanceRequirement( continue; } - // FIXME: this is a weird situation. + // We inherited a type; this associated type will be identical + // to that typealias. + if (Source->kind == RequirementSource::RequirementSignatureSelf) { + auto inheritedOwningDecl = + inheritedType->getDeclContext() + ->getAsNominalTypeOrNominalTypeExtensionContext(); + Diags.diagnose(assocTypeDecl, + diag::associated_type_override_typealias, + assocTypeDecl->getFullName(), + inheritedOwningDecl->getDescriptiveKind(), + inheritedOwningDecl->getDeclaredInterfaceType()); + } + + addInferredSameTypeReq(assocTypeDecl, inheritedType); } inheritedTypeDecls.erase(knownInherited); @@ -2890,7 +2886,8 @@ ConstraintResult GenericSignatureBuilder::addConformanceRequirement( continue; } - // FIXME: More typealiases + // Two typealiases that should be the same. + addInferredSameTypeReq(inheritedType, typealias); } inheritedTypeDecls.erase(knownInherited); @@ -3367,7 +3364,7 @@ GenericSignatureBuilder::addSameTypeRequirementBetweenArchetypes( (void)addSameTypeRequirement(equivClass->concreteType, equivClass2->concreteType, Source, UnresolvedHandlingKind::GenerateConstraints, - DiagnoseSameTypeConflict{Diags, Source, T1}); + SameTypeConflictCheckedLater()); } else { equivClass->concreteType = equivClass2->concreteType; } @@ -3462,7 +3459,7 @@ ConstraintResult GenericSignatureBuilder::addSameTypeRequirementToConcrete( if (equivClass->concreteType) { return addSameTypeRequirement(equivClass->concreteType, Concrete, Source, UnresolvedHandlingKind::GenerateConstraints, - DiagnoseSameTypeConflict{ Diags, Source, T}); + SameTypeConflictCheckedLater()); } diff --git a/test/Generics/protocol_type_aliases.swift b/test/Generics/protocol_type_aliases.swift index cf0cfbd168c27..4626349314569 100644 --- a/test/Generics/protocol_type_aliases.swift +++ b/test/Generics/protocol_type_aliases.swift @@ -53,21 +53,17 @@ func concreteRequirementOnConcreteNestedTypeAlias(_: T) where T: Q2, S = // Incompatible concrete typealias types are flagged as such protocol P3 { - typealias T = Int // expected-error{{type alias 'T' requires types 'Q3.T' (aka 'Float') and 'Int' to be the same}} + typealias T = Int } -protocol Q3: P3 { +protocol Q3: P3 { // expected-error{{generic signature requires types 'Int'}} typealias T = Float } protocol P3_1 { - typealias T = Float // expected-error{{type alias 'T' requires types 'P3.T' (aka 'Int') and 'Float' to be the same}} + typealias T = Float } protocol Q3_1: P3, P3_1 {} // expected-error{{generic signature requires types 'Float'}} -// FIXME: these shouldn't be necessary to trigger the errors above, but are, due to -// the 'recursive decl validation' FIXME in GenericSignatureBuilder.cpp. -func useTypealias(_: T, _: T.T) {} -func useTypealias1(_: T, _: T.T) {} // Subprotocols can force associated types in their parents to be concrete, and // this should be understood for types constrained by the subprotocols. @@ -114,3 +110,11 @@ func checkQ6(x: T.Type) { sameType(getP6_1_A(x), getP6_2_B(x)) } +protocol P7 { + typealias A = Int +} + +protocol P7a : P7 { + associatedtype A // expected-warning{{associated type 'A' is redundant with type 'A' declared in inherited protocol 'P7'}} +} + diff --git a/test/attr/attr_specialize.swift b/test/attr/attr_specialize.swift index 5fc979d15e97b..a3cea524d0a88 100644 --- a/test/attr/attr_specialize.swift +++ b/test/attr/attr_specialize.swift @@ -87,7 +87,7 @@ struct FloatElement : HasElt { typealias Element = Float } @_specialize(where T == FloatElement) -@_specialize(where T == IntElement) // expected-error{{associated type 'T.Element' cannot be equal to both 'Float' and 'Int'}} +@_specialize(where T == IntElement) // expected-error{{'T.Element' cannot be equal to both 'IntElement.Element' (aka 'Int') and 'Float'}} func sameTypeRequirement(_ t: T) where T.Element == Float {} @_specialize(where T == Sub) diff --git a/validation-test/compiler_crashers_2_fixed/0042-rdar21775089.swift b/validation-test/compiler_crashers_2_fixed/0042-rdar21775089.swift index 065d92159e7c7..790285361c1e9 100644 --- a/validation-test/compiler_crashers_2_fixed/0042-rdar21775089.swift +++ b/validation-test/compiler_crashers_2_fixed/0042-rdar21775089.swift @@ -7,7 +7,7 @@ protocol MySequenceType {} protocol MyIndexableType {} protocol MyCollectionType : MySequenceType, MyIndexableType { - typealias SubSequence = MySlice + associatedtype SubSequence = MySlice func makeSubSequence() -> SubSequence } extension MyCollectionType { @@ -18,7 +18,7 @@ extension MyCollectionType { } protocol MyMutableCollectionType : MyCollectionType { - typealias SubSequence = MyMutableSlice + associatedtype SubSequence = MyMutableSlice } extension MyMutableCollectionType { func makeSubSequence() -> MyMutableSlice { From 9fdd79f1936ad5ec56a40fb59ae7cd3e21332d50 Mon Sep 17 00:00:00 2001 From: Doug Gregor Date: Tue, 13 Jun 2017 11:29:15 -0700 Subject: [PATCH 12/17] [GSB] When retrieving the archetype anchor, allow partial results. Specifically, we need to be able to add a new potential archetype for the anchor. This API might need refinement. (cherry picked from commit aeb5b0172b740ef3499cd8a261c1cd88949ac59b) --- lib/AST/GenericSignatureBuilder.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/AST/GenericSignatureBuilder.cpp b/lib/AST/GenericSignatureBuilder.cpp index 7dc43c0afedd0..474882400d87e 100644 --- a/lib/AST/GenericSignatureBuilder.cpp +++ b/lib/AST/GenericSignatureBuilder.cpp @@ -1671,8 +1671,8 @@ PotentialArchetype *PotentialArchetype::getArchetypeAnchor( auto parentAnchor = parent->getArchetypeAnchor(builder); assert(parentAnchor->getNestingDepth() <= parent->getNestingDepth()); anchor = parentAnchor->getNestedArchetypeAnchor( - getNestedName(), builder, - ArchetypeResolutionKind::AlreadyKnown); + getNestedName(), builder, + ArchetypeResolutionKind::AlwaysPartial); // FIXME: Hack for cases where we couldn't resolve the nested type. if (!anchor) From efe8fdea4b9a19f9074502bad0186d5e9c982e0a Mon Sep 17 00:00:00 2001 From: Doug Gregor Date: Fri, 23 Jun 2017 22:17:58 -0700 Subject: [PATCH 13/17] Add formerly-crashing test case for SR-4786 / rdar://problem/31955862. (cherry picked from commit 2f00a08b85d9fe0f31f5d0b120f4df8d5548ffa8) --- .../compiler_crashers_2_fixed/0110-sr4786.swift | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 validation-test/compiler_crashers_2_fixed/0110-sr4786.swift diff --git a/validation-test/compiler_crashers_2_fixed/0110-sr4786.swift b/validation-test/compiler_crashers_2_fixed/0110-sr4786.swift new file mode 100644 index 0000000000000..7712938bfb220 --- /dev/null +++ b/validation-test/compiler_crashers_2_fixed/0110-sr4786.swift @@ -0,0 +1,13 @@ +// RUN: not %target-swift-frontend %s -typecheck + +public protocol _UTFEncoding { + associatedtype EncodedScalar where EncodedScalar == Int +} + +public protocol UnicodeEncoding { + associatedtype EncodedScalar: BidirectionalCollection +} + +public protocol _UTFParser { + associatedtype Encoding: UnicodeEncoding, _UTFEncoding +} From e1395ea06f91a2522aa9713dd432bde4904c14f3 Mon Sep 17 00:00:00 2001 From: Doug Gregor Date: Fri, 23 Jun 2017 22:18:08 -0700 Subject: [PATCH 14/17] [GSB] Don't add invalid concrete requirements. When a concrete requirement is invalid due to the concrete type lacking a conformance to a particular, required protocol, don't emit that incorrect requirement---it causes invalid states further down the line. Fixes SR-5014 / rdar://problem/32402482. While here, fix a comment that Huon noticed trailed off into oblivion. (cherry picked from commit dd3869739e5c6aca76ebf66ae71421f01ecabc7c) --- include/swift/AST/GenericSignatureBuilder.h | 3 +++ lib/AST/GenericSignatureBuilder.cpp | 13 +++++++++---- test/Constraints/same_types.swift | 2 +- .../0101-sr5014.swift | 4 +--- 4 files changed, 14 insertions(+), 8 deletions(-) rename validation-test/{compiler_crashers_2 => compiler_crashers_2_fixed}/0101-sr5014.swift (61%) diff --git a/include/swift/AST/GenericSignatureBuilder.h b/include/swift/AST/GenericSignatureBuilder.h index 6d5dc37bdeca0..a27e9f602cb80 100644 --- a/include/swift/AST/GenericSignatureBuilder.h +++ b/include/swift/AST/GenericSignatureBuilder.h @@ -182,6 +182,9 @@ class GenericSignatureBuilder { /// the concrete type. unsigned recursiveConcreteType : 1; + /// Whether we have an invalid concrete type. + unsigned invalidConcreteType : 1; + /// Whether we have detected recursion during the substitution of /// the superclass type. unsigned recursiveSuperclassType : 1; diff --git a/lib/AST/GenericSignatureBuilder.cpp b/lib/AST/GenericSignatureBuilder.cpp index 474882400d87e..855499e94607b 100644 --- a/lib/AST/GenericSignatureBuilder.cpp +++ b/lib/AST/GenericSignatureBuilder.cpp @@ -1350,6 +1350,7 @@ GenericSignatureBuilder::resolveConcreteConformance(PotentialArchetype *pa, concrete, proto->getName()); } + paEquivClass->invalidConcreteType = true; return nullptr; } @@ -1754,7 +1755,8 @@ static void concretizeNestedTypeFromConcreteParent( } } - // Error condition: parent did not conform to this protocol, so they + // Error condition: parent did not conform to this protocol, so there is no + // way to resolve the nested type via concrete conformance. if (!parentConcreteSource) return; auto source = parentConcreteSource->viaParent(builder, assocType); @@ -2440,7 +2442,8 @@ void GenericSignatureBuilder::PotentialArchetype::dump(llvm::raw_ostream &Out, #pragma mark Equivalence classes EquivalenceClass::EquivalenceClass(PotentialArchetype *representative) - : recursiveConcreteType(false), recursiveSuperclassType(false) + : recursiveConcreteType(false), invalidConcreteType(false), + recursiveSuperclassType(false) { members.push_back(representative); } @@ -3367,6 +3370,7 @@ GenericSignatureBuilder::addSameTypeRequirementBetweenArchetypes( SameTypeConflictCheckedLater()); } else { equivClass->concreteType = equivClass2->concreteType; + equivClass->invalidConcreteType = equivClass2->invalidConcreteType; } equivClass->concreteTypeConstraints.insert( @@ -5263,8 +5267,9 @@ void GenericSignatureBuilder::enumerateRequirements(llvm::function_ref< ? knownAnchor->concreteTypeSource : RequirementSource::forAbstract(archetype); - // Drop recursive concrete-type constraints. - if (equivClass->recursiveConcreteType) + // Drop recursive and invalid concrete-type constraints. + if (equivClass->recursiveConcreteType || + equivClass->invalidConcreteType) continue; f(RequirementKind::SameType, archetype, concreteType, source); diff --git a/test/Constraints/same_types.swift b/test/Constraints/same_types.swift index 9b3885504014b..b22ae3bfb69bd 100644 --- a/test/Constraints/same_types.swift +++ b/test/Constraints/same_types.swift @@ -77,7 +77,7 @@ func test4(_ t: T) -> Y where T.Bar == Y { func fail3(_ t: T) -> X where T.Bar == X { // expected-error {{'X' does not conform to required protocol 'Fooable'}} - return t.bar + return t.bar // expected-error{{cannot convert return expression of type 'T.Bar' }} } func test5(_ t: T) -> X where T.Bar.Foo == X { diff --git a/validation-test/compiler_crashers_2/0101-sr5014.swift b/validation-test/compiler_crashers_2_fixed/0101-sr5014.swift similarity index 61% rename from validation-test/compiler_crashers_2/0101-sr5014.swift rename to validation-test/compiler_crashers_2_fixed/0101-sr5014.swift index cf6b121ed538d..ced11e6174d82 100644 --- a/validation-test/compiler_crashers_2/0101-sr5014.swift +++ b/validation-test/compiler_crashers_2_fixed/0101-sr5014.swift @@ -1,6 +1,4 @@ -// RUN: not --crash %target-swift-frontend -emit-ir -primary-file %s - -// REQUIRES: asserts +// RUN: not %target-swift-frontend -emit-ir -primary-file %s struct Version { } From 3b8b73be27d44283aca9c05376538adc4b539fa3 Mon Sep 17 00:00:00 2001 From: Doug Gregor Date: Fri, 23 Jun 2017 22:29:47 -0700 Subject: [PATCH 15/17] Add recently-fixed test case from SR-4737 / rdar://problem/31905232. (cherry picked from commit 6862ef10ac7fdf603cb40c057e4032e1aaf03578) --- .../0109-sr4737.swift | 2952 +++++++++++++++++ 1 file changed, 2952 insertions(+) create mode 100644 validation-test/compiler_crashers_2_fixed/0109-sr4737.swift diff --git a/validation-test/compiler_crashers_2_fixed/0109-sr4737.swift b/validation-test/compiler_crashers_2_fixed/0109-sr4737.swift new file mode 100644 index 0000000000000..ad7b1be72f2f7 --- /dev/null +++ b/validation-test/compiler_crashers_2_fixed/0109-sr4737.swift @@ -0,0 +1,2952 @@ +// RUN: not %target-swift-frontend %s -typecheck + +// REQUIRES: long_test + +//===----------------------------------------------------------------------===// +extension UnicodeScalar { + // Hack providing an efficient API that is available to the standard library + @_versioned + @inline(__always) + init(_unchecked x: UInt32) { self = unsafeBitCast(x, to: UnicodeScalar.self) } + + static var replacementCharacter: UnicodeScalar { + return UnicodeScalar(_unchecked: 0xfffd) + } +} +//===----------------------------------------------------------------------===// +@_fixed_layout +public struct _UIntBuffer< + Storage: UnsignedInteger & FixedWidthInteger, + Element: UnsignedInteger & FixedWidthInteger +> { + @_versioned + var _storage: Storage + @_versioned + var _bitCount: UInt8 + + @inline(__always) + @_versioned + internal init(_storage: Storage, _bitCount: UInt8) { + self._storage = _storage + self._bitCount = _bitCount + } + + @inline(__always) + public init(containing e: Element) { + _storage = Storage(extendingOrTruncating: e) + _bitCount = UInt8(extendingOrTruncating: Element.bitWidth) + } +} + +extension _UIntBuffer : Sequence { + @_fixed_layout + public struct Iterator : IteratorProtocol, Sequence { + @inline(__always) + public init(_ x: _UIntBuffer) { _impl = x } + + @inline(__always) + public mutating func next() -> Element? { + if _impl._bitCount == 0 { return nil } + defer { + _impl._storage = _impl._storage &>> Element.bitWidth + _impl._bitCount = _impl._bitCount &- _impl._elementWidth + } + return Element(extendingOrTruncating: _impl._storage) + } + @_versioned + var _impl: _UIntBuffer + } + + @inline(__always) + public func makeIterator() -> Iterator { + return Iterator(self) + } + + @inline(__always) + public func reversed() -> _UIntBuffer { + if Element.bitWidth == 8 { + return _UIntBuffer( + _storage: + storage.byteSwapped &>> (Storage.bitWidth &- numericCast(_bitCount)), + _bitCount: _bitCount) + } + else { + var s: Storage = 0 + for x in self { + s <<= Element.bitWidth + s |= Storage(extendingOrTruncating: x) + } + return Self(_storage: s, _bitCount: _bitCount) + } + } +} + +extension _UIntBuffer : Collection { + public typealias _Element = Element + + public struct Index : Comparable { + @_versioned + var bitOffset: UInt8 + + @_versioned + init(bitOffset: UInt8) { self.bitOffset = bitOffset } + + public static func == (lhs: Index, rhs: Index) -> Bool { + return lhs.bitOffset == rhs.bitOffset + } + public static func < (lhs: Index, rhs: Index) -> Bool { + return lhs.bitOffset < rhs.bitOffset + } + } + + public var startIndex : Index { + @inline(__always) + get { return Index(bitOffset: 0) } + } + + public var endIndex : Index { + @inline(__always) + get { return Index(bitOffset: _bitCount) } + } + + @inline(__always) + public func index(after i: Index) -> Index { + return Index(bitOffset: i.bitOffset &+ _elementWidth) + } + + @_versioned + internal var _elementWidth : UInt8 { + return UInt8(extendingOrTruncating: Element.bitWidth) + } + + public subscript(i: Index) -> Element { + @inline(__always) + get { + return Element(extendingOrTruncating: _storage &>> i.bitOffset) + } + } +} + +extension _UIntBuffer : BidirectionalCollection { + @inline(__always) + public func index(before i: Index) -> Index { + return Index(bitOffset: i.bitOffset &- _elementWidth) + } +} + +extension _UIntBuffer : RandomAccessCollection { + public typealias Indices = DefaultRandomAccessIndices<_UIntBuffer> + public typealias IndexDistance = Int + + @inline(__always) + public func index(_ i: Index, offsetBy n: IndexDistance) -> Index { + let x = IndexDistance(i.bitOffset) &+ n &* Element.bitWidth + return Index(bitOffset: UInt8(extendingOrTruncating: x)) + } + + @inline(__always) + public func distance(from i: Index, to j: Index) -> IndexDistance { + return (Int(j.bitOffset) &- Int(i.bitOffset)) / Element.bitWidth + } +} + +extension FixedWidthInteger { + @inline(__always) + @_versioned + func _fullShiftLeft(_ n: N) -> Self { + return (self &<< ((n &+ 1) &>> 1)) &<< (n &>> 1) + } + @inline(__always) + @_versioned + func _fullShiftRight(_ n: N) -> Self { + return (self &>> ((n &+ 1) &>> 1)) &>> (n &>> 1) + } + @inline(__always) + @_versioned + static func _lowBits(_ n: N) -> Self { + return ~((~0 as Self)._fullShiftLeft(n)) + } +} + +extension Range { + @inline(__always) + @_versioned + func _contains_(_ other: Range) -> Bool { + return other.clamped(to: self) == other + } +} + +extension _UIntBuffer : RangeReplaceableCollection { + @inline(__always) + public init() { + _storage = 0 + _bitCount = 0 + } + + public var capacity: Int { + return Storage.bitWidth / Element.bitWidth + } + + @inline(__always) + public mutating func append(_ newElement: Element) { + _debugPrecondition(count < capacity) + _storage |= Storage(newElement) &<< _bitCount + _bitCount = _bitCount &+ _elementWidth + } + + @inline(__always) + public mutating func replaceSubrange( + _ target: Range, with replacement: C + ) where C._Element == Element { + _debugPrecondition( + (0..<_bitCount)._contains_( + target.lowerBound.bitOffset.. = (T, consumedCodeUnits: UInt8, isValid: Bool) +} + +public protocol UnicodeDecoder { + associatedtype CodeUnit : UnsignedInteger, FixedWidthInteger + associatedtype EncodedScalar : BidirectionalCollection + where EncodedScalar.Iterator.Element == CodeUnit + + init() + + mutating func parseOne( + _ input: inout I + ) -> Unicode.ParseResult where I.Element == CodeUnit +} + +extension UnicodeDecoder { + @inline(__always) + @discardableResult + public static func decode( + _ input: inout I, + repairingIllFormedSequences makeRepairs: Bool, + into output: (UnicodeScalar)->Void + ) -> Int + where I.Element == CodeUnit + { + var errors = 0 + var d = Self() + while true { + switch d.parseOne(&input) { + case let .valid(scalarContent): + output(decodeOne(scalarContent)) + case .invalid: + if !makeRepairs { return 1 } + errors += 1 + output(UnicodeScalar(_unchecked: 0xFFFD)) + case .emptyInput: + return errors + } + } + } +} + + +extension Unicode { + struct ParsingIterator< + CodeUnitIterator : IteratorProtocol, + Encoding: UnicodeEncoding, + Decoder: UnicodeDecoder + > where Decoder.CodeUnit == CodeUnitIterator.Element, + Encoding.EncodedScalar == Decoder.EncodedScalar { + var codeUnits: CodeUnitIterator + var decoder: Decoder + } +} +extension Unicode.ParsingIterator : IteratorProtocol, Sequence { + mutating func next() -> Decoder.EncodedScalar? { + switch decoder.parseOne(&codeUnits) { + case let .valid(scalarContent): return scalarContent + case .invalid: return Encoding.encodedReplacementScalar + case .emptyInput: return nil + } + } +} + +extension Unicode { + struct DefaultScalarView< + CodeUnits: BidirectionalCollection, + Encoding: UnicodeEncoding + > where CodeUnits.Iterator.Element == Encoding.CodeUnit { + var codeUnits: CodeUnits + init( + _ codeUnits: CodeUnits, + fromEncoding _: Encoding.Type = Encoding.self) { + self.codeUnits = codeUnits + } + } +} + +extension Unicode.DefaultScalarView : Sequence { + struct Iterator { + var parsing: Unicode.ParsingIterator< + CodeUnits.Iterator, Encoding, Encoding.ForwardDecoder + > + } + + func makeIterator() -> Iterator { + return Iterator( + parsing: Unicode.ParsingIterator( + codeUnits: codeUnits.makeIterator(), + decoder: Encoding.ForwardDecoder() + )) + } +} + +extension Unicode.DefaultScalarView.Iterator : IteratorProtocol, Sequence { + mutating func next() -> UnicodeScalar? { + return parsing.next().map { + Encoding.ForwardDecoder.decodeOne($0) + } + } +} + +extension Unicode.DefaultScalarView { + struct Index { + var codeUnitIndex: CodeUnits.Index + var scalar: UnicodeScalar + var stride: UInt8 + } +} + +extension Unicode.DefaultScalarView.Index : Comparable { + @inline(__always) + public static func < ( + lhs: Unicode.DefaultScalarView.Index, + rhs: Unicode.DefaultScalarView.Index + ) -> Bool { + return lhs.codeUnitIndex < rhs.codeUnitIndex + } + + @inline(__always) + public static func == ( + lhs: Unicode.DefaultScalarView.Index, + rhs: Unicode.DefaultScalarView.Index + ) -> Bool { + return lhs.codeUnitIndex == rhs.codeUnitIndex + } +} + +extension Unicode.DefaultScalarView : Collection { + public var startIndex: Index { + @inline(__always) + get { + return index( + after: Index( + codeUnitIndex: codeUnits.startIndex, + scalar: UnicodeScalar(_unchecked: 0), + stride: 0) + ) + } + } + + public var endIndex: Index { + @inline(__always) + get { + return Index( + codeUnitIndex: codeUnits.endIndex, + scalar: UnicodeScalar(_unchecked: 0), + stride: 0) + } + } + + public subscript(i: Index) -> UnicodeScalar { + @inline(__always) get { return i.scalar } + } + + @inline(__always) + public func index(after i: Index) -> Index { + let nextPosition = codeUnits.index( + i.codeUnitIndex, offsetBy: numericCast(i.stride)) + var i = IndexingIterator( + _elements: codeUnits, _position: nextPosition + ) + var d = Encoding.ForwardDecoder() + switch d.parseOne(&i) { + case .valid(let scalarContent): + return Index( + codeUnitIndex: nextPosition, + scalar: Encoding.ForwardDecoder.decodeOne(scalarContent), + stride: numericCast(scalarContent.count)) + case .invalid(let stride): + return Index( + codeUnitIndex: nextPosition, + scalar: UnicodeScalar(_unchecked: 0xfffd), + stride: numericCast(stride)) + case .emptyInput: + return endIndex + } + } +} + +// This should go in the standard library; see +// https://github.com/apple/swift/pull/9074 and +// https://bugs.swift.org/browse/SR-4721 +@_fixed_layout +public struct ReverseIndexingIterator< + Elements : BidirectionalCollection +> : IteratorProtocol, Sequence { + + @_inlineable + @inline(__always) + /// Creates an iterator over the given collection. + public /// @testable + init(_elements: Elements, _position: Elements.Index) { + self._elements = _elements + self._position = _position + } + + @_inlineable + @inline(__always) + public mutating func next() -> Elements._Element? { + guard _fastPath(_position != _elements.startIndex) else { return nil } + _position = _elements.index(before: _position) + return _elements[_position] + } + + @_versioned + internal let _elements: Elements + @_versioned + internal var _position: Elements.Index +} + +extension Unicode.DefaultScalarView : BidirectionalCollection { + @inline(__always) + public func index(before i: Index) -> Index { + var d = Encoding.ReverseDecoder() + + var more = ReverseIndexingIterator( + _elements: codeUnits, _position: i.codeUnitIndex) + + switch d.parseOne(&more) { + case .valid(let scalarContent): + let d: CodeUnits.IndexDistance = -numericCast(scalarContent.count) + return Index( + codeUnitIndex: codeUnits.index(i.codeUnitIndex, offsetBy: d), + scalar: Encoding.ReverseDecoder.decodeOne(scalarContent), + stride: numericCast(scalarContent.count)) + case .invalid(let stride): + let d: CodeUnits.IndexDistance = -numericCast(stride) + return Index( + codeUnitIndex: codeUnits.index(i.codeUnitIndex, offsetBy: d) , + scalar: UnicodeScalar(_unchecked: 0xfffd), + stride: numericCast(stride)) + case .emptyInput: fatalError("index out of bounds.") + } + } +} + +public protocol UnicodeEncoding { + associatedtype CodeUnit + + associatedtype EncodedScalar + where CodeUnit == EncodedScalar.Iterator.Element + + static var encodedReplacementScalar : EncodedScalar { get } + static func decode(_ content: EncodedScalar) -> UnicodeScalar + + associatedtype ForwardDecoder : UnicodeDecoder + where EncodedScalar == ForwardDecoder.EncodedScalar + + associatedtype ReverseDecoder : UnicodeDecoder + where EncodedScalar == ReverseDecoder.EncodedScalar +} + +internal protocol _UTFEncoding : UnicodeEncoding { + static func _isScalar(_: CodeUnit) -> Bool +} + +public protocol _UTFDecoderBase : UnicodeDecoder { + + associatedtype Buffer : RangeReplaceableCollection = EncodedScalar + var buffer: Buffer { get set } + + associatedtype BufferStorage : UnsignedInteger, FixedWidthInteger = UInt32 +} + +public protocol _UTFDecoder : _UTFDecoderBase +where Buffer == _UIntBuffer, Buffer == EncodedScalar { + static func _isScalar(_: CodeUnit) -> Bool + func _parseMultipleCodeUnits() -> Unicode.ParseResult +} + +extension _UTFEncoding { + public mutating func parseScalar( + from input: inout I, with decoder: inout Decoder + ) -> Unicode.ParseResult + where I.Element == CodeUnit { + + // Bufferless single-scalar fastpath. + if _fastPath(buffer.isEmpty) { + guard let codeUnit = input.next() else { return .emptyInput } + // ASCII, return immediately. + if Self._isScalar(codeUnit) { + return ( + EncodedScalar(containing: codeUnit), + consumedCodeUnits: 1, isValid: true) + } + // Non-ASCII, proceed to buffering mode. + buffer.append(codeUnit) + } else if Self._isScalar(CodeUnit(extendingOrTruncating: buffer._storage)) { + // ASCII in buffer. We don't refill the buffer so we can return + // to bufferless mode once we've exhausted it. + let codeUnit = CodeUnit(extendingOrTruncating: buffer._storage) + buffer.remove(at: buffer.startIndex) + return ( + EncodedScalar(containing: codeUnit), + consumedCodeUnits: 1, isValid: true) + } + // Buffering mode. + // Fill buffer back to 4 bytes (or as many as are left in the iterator). + _sanityCheck(buffer._bitCount < BufferStorage.bitWidth) + repeat { + if let codeUnit = input.next() { + buffer.append(codeUnit) + } else { + if buffer.isEmpty { return .emptyInput } + break // We still have some bytes left in our buffer. + } + } while buffer._bitCount < BufferStorage.bitWidth + + // Find one unicode scalar. + return _parseMultipleCodeUnits() + } +} + +//===----------------------------------------------------------------------===// +//===--- UTF8 Decoders ----------------------------------------------------===// +//===----------------------------------------------------------------------===// + +public protocol _UTF8Decoder : _UTFDecoder {} + +extension _UTF8Decoder { + public static func _isScalar(_ x: CodeUnit) -> Bool { return x & 0x80 == 0 } +} + +extension Unicode.UTF8 : UnicodeEncoding { + public typealias EncodedScalar = _UIntBuffer + public static var encodedReplacementScalar : EncodedScalar { + return EncodedScalar(_storage: 0xbdbfef, _bitCount: 24) + } + + public struct ForwardDecoder { + public typealias Buffer = _UIntBuffer + public typealias EncodedScalar = _UIntBuffer + public init() { } + public var buffer = Buffer() + } + + public struct ReverseDecoder { + public typealias Buffer = _UIntBuffer + public typealias EncodedScalar = _UIntBuffer + public init() { } + public var buffer = Buffer() + } + + public static func decode(_ source: EncodedScalar) -> UnicodeScalar { + let bits = source._storage + switch source._bitCount { + case 8: + return UnicodeScalar(_unchecked: bits) + case 16: + var value = (bits & 0b0_______________________11_1111__0000_0000) &>> 8 + value |= (bits & 0b0________________________________0001_1111) &<< 6 + return UnicodeScalar(_unchecked: value) + case 24: + var value = (bits & 0b0____________11_1111__0000_0000__0000_0000) &>> 16 + value |= (bits & 0b0_______________________11_1111__0000_0000) &>> 2 + value |= (bits & 0b0________________________________0000_1111) &<< 12 + return UnicodeScalar(_unchecked: value) + default: + _sanityCheck(source.count == 4) + var value = (bits & 0b0_11_1111__0000_0000__0000_0000__0000_0000) &>> 24 + value |= (bits & 0b0____________11_1111__0000_0000__0000_0000) &>> 10 + value |= (bits & 0b0_______________________11_1111__0000_0000) &<< 4 + value |= (bits & 0b0________________________________0000_0111) &<< 18 + return UnicodeScalar(_unchecked: value) + } + } +} + +extension Unicode.UTF8.ReverseDecoder : _UTF8Decoder { + public typealias CodeUnit = UInt8 + + @inline(__always) + @_versioned + internal mutating func _consumeCodeUnits(_ n: UInt8) -> EncodedScalar { + let s = buffer._storage + let bitCount = n &* UInt8(CodeUnit.bitWidth) + buffer._storage >>= bitCount + buffer._bitCount -= bitCount + return EncodedScalar( + _storage: s.byteSwapped >> (type(of: s).bitWidth - bitCount), + _bitCount: bitCount) + } + + @inline(__always) + @_versioned + internal mutating func _consumeValidCodeUnits( + _ n: UInt8 + ) -> Unicode.ParseResult { + return ParseResult( + _consumeCodeUnits(n), consumedCodeUnits: n, isValid: true) + } + + @inline(__always) + @_versioned + internal mutating func _consumeInvalidCodeUnits( + _ n: UInt8 + ) -> Unicode.ParseResult { + _ = _consumeCodeUnits(n) + return ParseResult( + UTF8.encodedReplacementScalar, consumedCodeUnits: n, isValid: false) + } + + public // @testable + func _parseMultipleCodeUnits() -> Unicode.ParseResult { + _sanityCheck(buffer._storage & 0x80 != 0) // this case handled elsewhere + + if buffer._storage & 0b0__1110_0000__1100_0000 + == 0b0__1100_0000__1000_0000 { + // 2-byte sequence. Top 4 bits of decoded result must be nonzero + let top4Bits = buffer._storage & 0b0__0001_1110__0000_0000 + if _fastPath(top4Bits != 0) { + return _consumeValidCodeUnits(2) + } + } + else if buffer._storage & 0b0__1111_0000__1100_0000__1100_0000 + == 0b0__1110_0000__1000_0000__1000_0000 { + // 3-byte sequence. The top 5 bits of the decoded result must be nonzero + // and not a surrogate + let top5Bits = buffer._storage & 0b0__1111__0010_0000__0000_0000 + if _fastPath( + top5Bits != 0 && top5Bits != 0b0__1101__0010_0000__0000_0000) { + return _consumeValidCodeUnits(3) + } + } + else if buffer._storage & 0b0__1111_1000__1100_0000__1100_0000__1100_0000 + == 0b0__1111_0000__1000_0000__1000_0000__1000_0000 { + // Make sure the top 5 bits of the decoded result would be in range + let top5bits = buffer._storage + & 0b0__0111__0011_0000__0000_0000__0000_0000 + if _fastPath( + top5bits != 0 + && top5bits <= 0b0__0100__0000_0000__0000_0000__0000_0000 + ) { + return _consumeValidCodeUnits(4) + } + } + return _parseInvalid() + } + + @inline(never) + mutating func _parseInvalid() -> Unicode.ParseResult { + if buffer._storage & 0b0__1111_0000__1100_0000 + == 0b0__1110_0000__1000_0000 { + // 2-byte prefix of 3-byte sequence. The top 5 bits of the decoded result + // must be nonzero and not a surrogate + let top5Bits = buffer._storage & 0b0__1111__0010_0000 + if top5Bits != 0 && top5Bits != 0b0__1101__0010_0000 { + return invalid(codeUnitCount: 2) + } + } + else if buffer._storage & 0b0__1111_1000__1100_0000 + == 0b0__1111_0000__1000_0000 + { + // 2-byte prefix of 4-byte sequence + // Make sure the top 5 bits of the decoded result would be in range + let top5bits = buffer._storage & 0b0__0111__0011_0000 + if top5bits != 0 && top5bits <= 0b0__0100__0000_0000 { + return invalid(codeUnitCount: 2) + } + } + else if buffer._storage & 0b0__1111_1000__1100_0000__1100_0000 + == 0b0__1111_0000__1000_0000__1000_0000 { + // 3-byte prefix of 4-byte sequence + // Make sure the top 5 bits of the decoded result would be in range + let top5bits = buffer._storage & 0b0__0111__0011_0000__0000_0000 + if top5bits != 0 && top5bits <= 0b0__0100__0000_0000__0000_0000 { + return invalid(codeUnitCount: 3) + } + } + return invalid(codeUnitCount: 1) + } +} + +extension Unicode.UTF8.ForwardDecoder : _UTF8Decoder { + public typealias CodeUnit = UInt8 + + @inline(__always) + @_versioned + internal mutating func _consumeCodeUnits(_ n: UInt8) -> EncodedScalar { + let s = buffer._storage + let bitCount = n &* UInt8(CodeUnit.bitWidth) + buffer._storage >>= bitCount + buffer._bitCount -= bitCount + return EncodedScalar(_storage: s, _bitCount: bitCount) + } + + @inline(__always) + @_versioned + internal mutating func _consumeValidCodeUnits( + _ n: UInt8 + ) -> Unicode.ParseResult { + return ParseResult( + _consumeCodeUnits(codeUnitCount, consumedCodeUnits: n, isValid: true)) + } + + @inline(__always) + @_versioned + internal func _consumeInvalidCodeUnits( + codeUnitCount n: UInt8 + ) -> Unicode.ParseResult { + _ = _consumeCodeUnits(n) + return ParseResult( + UTF8.encodedReplacementScalar, consumedCodeUnits: n, isValid: false) + } + + public // @testable + func _parseMultipleCodeUnits() -> Unicode.ParseResult { + _sanityCheck(buffer._storage & 0x80 != 0) // this case handled elsewhere + + if buffer._storage & 0b0__1100_0000__1110_0000 + == 0b0__1000_0000__1100_0000 { + // 2-byte sequence. At least one of the top 4 bits of the decoded result + // must be nonzero. + if _fastPath(buffer._storage & 0b0_0001_1110 != 0) { + return _consumeValidCodeUnits(2) + } + } + else if buffer._storage & 0b0__1100_0000__1100_0000__1111_0000 + == 0b0__1000_0000__1000_0000__1110_0000 { + // 3-byte sequence. The top 5 bits of the decoded result must be nonzero + // and not a surrogate + let top5Bits = buffer._storage & 0b0___0010_0000__0000_1111 + if _fastPath(top5Bits != 0 && top5Bits != 0b0___0010_0000__0000_1101) { + return _consumeValidCodeUnits(3) + } + } + else if buffer._storage & 0b0__1100_0000__1100_0000__1100_0000__1111_1000 + == 0b0__1000_0000__1000_0000__1000_0000__1111_0000 { + // 4-byte sequence. The top 5 bits of the decoded result must be nonzero + // and no greater than 0b0__0100_0000 + let top5bits = UInt16(buffer._storage & 0b0__0011_0000__0000_0111) + if _fastPath( + top5bits != 0 && top5bits.byteSwapped <= 0b0__0000_0100__0000_0000 + ) { + return _consumeValidCodeUnits(4) + } + } + return _parseInvalid() + } + + @inline(never) + mutating func _parseInvalid() -> Unicode.ParseResult { + + if buffer._storage & 0b0__1100_0000__1111_0000 + == 0b0__1000_0000__1110_0000 { + // 2-byte prefix of 3-byte sequence. The top 5 bits of the decoded result + // must be nonzero and not a surrogate + let top5Bits = buffer._storage & 0b0__0010_0000__0000_1111 + if top5Bits != 0 && top5Bits != 0b0__0010_0000__0000_1101 { + return _consumeInvalidCodeUnits(2) + } + } + else if buffer._storage & 0b0__1100_0000__1111_1000 + == 0b0__1000_0000__1111_0000 + { + // Prefix of 4-byte sequence. The top 5 bits of the decoded result + // must be nonzero and no greater than 0b0__0100_0000 + let top5bits = UInt16(buffer._storage & 0b0__0011_0000__0000_0111) + if top5bits != 0 && top5bits.byteSwapped <= 0b0__0000_0100__0000_0000 { + return _consumeInvalidCodeUnits( + buffer._storage & 0b0__1100_0000__0000_0000__0000_0000 + == 0b0__1000_0000__0000_0000__0000_0000 ? 3 : 2) + } + } + return 1 + } +} + +//===----------------------------------------------------------------------===// +//===--- UTF-16 Decoders --------------------------------------------------===// +//===----------------------------------------------------------------------===// + +public protocol _UTF16Decoder : _UTFDecoder where CodeUnit == UTF16.CodeUnit { + var buffer: Buffer { get set } + static var _surrogatePattern : UInt32 { get } +} + +extension _UTF16Decoder { + public static func _isScalar(_ x: CodeUnit) -> Bool { + return x & 0xf800 != 0xd800 + } + + internal mutating func _consume(bitCount: UInt8) -> EncodedScalar { + _sanityCheck(bitCount == 16) + let s = buffer._storage + buffer._storage = 0 + buffer._bitCount = 0 + return EncodedScalar(_storage: s, _bitCount: bitCount) + } + + public // @testable + func _parseMultipleCodeUnits() -> (isValid: Bool, bitCount: UInt8) { + _sanityCheck( // this case handled elsewhere + !Self._isScalar(UInt16(extendingOrTruncating: buffer._storage))) + + if _fastPath(buffer._storage & 0xFC00_FC00 == Self._surrogatePattern) { + return (true, 2*16) + } + return (false, 1*16) + } +} + +extension Unicode.UTF16 : UnicodeEncoding { + public typealias EncodedScalar = _UIntBuffer + public static var encodedReplacementScalar : EncodedScalar { + return EncodedScalar(_storage: 0xFFFD, _bitCount: 16) + } + + public struct ForwardDecoder { + public typealias Buffer = _UIntBuffer + public init() { buffer = Buffer() } + public var buffer: Buffer + } + + public struct ReverseDecoder { + public typealias Buffer = _UIntBuffer + public init() { buffer = Buffer() } + public var buffer: Buffer + } + + public static func decode(_ source: EncodedScalar) -> UnicodeScalar { + let bits = source._storage + if _fastPath(source._bitCount == 16) { + return UnicodeScalar(_unchecked: bits & 0xffff) + } + _sanityCheck(source._bitCount == 32) + let value = 0x10000 + (bits >> 16 & 0x03ff | (bits & 0x03ff) << 10) + return UnicodeScalar(_unchecked: value) + } +} + +extension UTF16.ReverseDecoder : _UTF16Decoder { + public typealias CodeUnit = UInt16 + public typealias EncodedScalar = Buffer + + public static var _surrogatePattern : UInt32 { return 0xD800_DC00 } +} + +extension Unicode.UTF16.ForwardDecoder : _UTF16Decoder { + public typealias CodeUnit = UInt16 + public typealias EncodedScalar = Buffer + + public static var _surrogatePattern : UInt32 { return 0xDC00_D800 } +} + +#if !BENCHMARK +//===--- testing ----------------------------------------------------------===// +import StdlibUnittest +import SwiftPrivate + +func checkDecodeUTF( + _ codec: Codec.Type, _ expectedHead: [UInt32], + _ expectedRepairedTail: [UInt32], _ utfStr: [Codec.CodeUnit] +) -> AssertionResult { + var decoded = [UInt32]() + var expected = expectedHead + func output(_ scalar: UInt32) { decoded.append(scalar) } + func output1(_ scalar: UnicodeScalar) { decoded.append(scalar.value) } + + var result = assertionSuccess() + + func check(_ expected: C, _ description: String) + where C.Iterator.Element == UInt32 + { + if !expected.elementsEqual(decoded) { + if result.description == "" { result = assertionFailure() } + result = result.withDescription(" [\(description)]\n") + .withDescription("expected: \(asHex(expectedHead))\n") + .withDescription("actual: \(asHex(decoded))") + } + decoded.removeAll(keepingCapacity: true) + } + + //===--- Tests without repairs ------------------------------------------===// + do { + let iterator = utfStr.makeIterator() + _ = transcode( + iterator, from: codec, to: UTF32.self, + stoppingOnError: true, into: output) + } + check(expected, "legacy, repairing: false") + + do { + var iterator = utfStr.makeIterator() + let errorCount = Codec.ForwardDecoder.decode( + &iterator, repairingIllFormedSequences: false, into: output1) + expectEqual(expectedRepairedTail.isEmpty ? 0 : 1, errorCount) + } + check(expected, "forward, repairing: false") + + do { + var iterator = utfStr.reversed().makeIterator() + let errorCount = Codec.ReverseDecoder.decode( + &iterator, repairingIllFormedSequences: false, into: output1) + if expectedRepairedTail.isEmpty { + expectEqual(0, errorCount) + check(expected.reversed(), "reverse, repairing: false") + } + else { + expectEqual(1, errorCount) + let x = (expected + expectedRepairedTail).reversed() + expectTrue( + x.starts(with: decoded), + "reverse, repairing: false\n\t\(Array(x)) does not start with \(decoded)") + decoded.removeAll(keepingCapacity: true) + } + } + + //===--- Tests with repairs ------------------------------------------===// + expected += expectedRepairedTail + do { + let iterator = utfStr.makeIterator() + _ = transcode(iterator, from: codec, to: UTF32.self, + stoppingOnError: false, into: output) + } + check(expected, "legacy, repairing: true") + do { + var iterator = utfStr.makeIterator() + let errorCount = Codec.ForwardDecoder.decode( + &iterator, repairingIllFormedSequences: true, into: output1) + + if expectedRepairedTail.isEmpty { expectEqual(0, errorCount) } + else { expectNotEqual(0, errorCount) } + } + check(expected, "forward, repairing: true") + do { + var iterator = utfStr.reversed().makeIterator() + let errorCount = Codec.ReverseDecoder.decode( + &iterator, repairingIllFormedSequences: true, into: output1) + if expectedRepairedTail.isEmpty { expectEqual(0, errorCount) } + else { expectNotEqual(0, errorCount) } + } + check(expected.reversed(), "reverse, repairing: true") + + let scalars = Unicode.DefaultScalarView(utfStr, fromEncoding: Codec.self) + expectEqualSequence(expected, scalars.map { $0.value }) + expectEqualSequence( + expected.reversed(), + scalars.reversed().map { $0.value }) + + do { + var x = scalars.makeIterator() + var j = scalars.startIndex + while (j != scalars.endIndex) { + expectEqual(x.next()!, scalars[j]) + j = scalars.index(after: j) + } + expectNil(x.next()) + } + return result +} + +func checkDecodeUTF8( + _ expectedHead: [UInt32], + _ expectedRepairedTail: [UInt32], _ utf8Str: [UInt8] +) -> AssertionResult { + return checkDecodeUTF(UTF8.self, expectedHead, expectedRepairedTail, utf8Str) +} + +func checkDecodeUTF16( + _ expectedHead: [UInt32], + _ expectedRepairedTail: [UInt32], _ utf16Str: [UInt16] +) -> AssertionResult { + return checkDecodeUTF(UTF16.self, expectedHead, expectedRepairedTail, + utf16Str) +} + +/* +func checkDecodeUTF32( + _ expectedHead: [UInt32], + _ expectedRepairedTail: [UInt32], _ utf32Str: [UInt32] +) -> AssertionResult { + return checkDecodeUTF(UTF32.self, expectedHead, expectedRepairedTail, + utf32Str) +} +*/ + +func checkEncodeUTF8(_ expected: [UInt8], + _ scalars: [UInt32]) -> AssertionResult { + var encoded = [UInt8]() + let output: (UInt8) -> Void = { encoded.append($0) } + let iterator = scalars.makeIterator() + let hadError = transcode( + iterator, + from: UTF32.self, + to: UTF8.self, + stoppingOnError: true, + into: output) + expectFalse(hadError) + if expected != encoded { + return assertionFailure() + .withDescription("\n") + .withDescription("expected: \(asHex(expected))\n") + .withDescription("actual: \(asHex(encoded))") + } + + return assertionSuccess() +} + +var UTF8Decoder = TestSuite("UTF8Decoder") + +//===----------------------------------------------------------------------===// +public struct UTFTest { + public struct Flags : OptionSet { + public let rawValue: Int + + public init(rawValue: Int) { + self.rawValue = rawValue + } + + public static let utf8IsInvalid = Flags(rawValue: 1 << 0) + public static let utf16IsInvalid = Flags(rawValue: 1 << 1) + } + + public let string: String + public let utf8: [UInt8] + public let utf16: [UInt16] + public let unicodeScalars: [UnicodeScalar] + public let unicodeScalarsRepairedTail: [UnicodeScalar] + public let flags: Flags + public let loc: SourceLoc + + public var utf32: [UInt32] { + return unicodeScalars.map(UInt32.init) + } + + public var utf32RepairedTail: [UInt32] { + return unicodeScalarsRepairedTail.map(UInt32.init) + } + + public init( + string: String, + utf8: [UInt8], + utf16: [UInt16], + scalars: [UInt32], + scalarsRepairedTail: [UInt32] = [], + flags: Flags = [], + file: String = #file, line: UInt = #line + ) { + self.string = string + self.utf8 = utf8 + self.utf16 = utf16 + self.unicodeScalars = scalars.map { UnicodeScalar($0)! } + self.unicodeScalarsRepairedTail = + scalarsRepairedTail.map { UnicodeScalar($0)! } + self.flags = flags + self.loc = SourceLoc(file, line, comment: "test data") + } +} + +public var utfTests: [UTFTest] = [] + // + // Empty sequence. + // + +utfTests.append( + UTFTest( + string: "", + utf8: [], + utf16: [], + scalars: [])) + + // + // 1-byte sequences. + // + + // U+0000 NULL +utfTests.append( + UTFTest( + string: "\u{0000}", + utf8: [ 0x00 ], + utf16: [ 0x00 ], + scalars: [ 0x00 ])) + + // U+0041 LATIN CAPITAL LETTER A +utfTests.append( + UTFTest( + string: "A", + utf8: [ 0x41 ], + utf16: [ 0x41 ], + scalars: [ 0x41 ])) + + // U+0041 LATIN CAPITAL LETTER A + // U+0042 LATIN CAPITAL LETTER B +utfTests.append( + UTFTest( + string: "AB", + utf8: [ 0x41, 0x42 ], + utf16: [ 0x41, 0x42 ], + scalars: [ 0x41, 0x42 ])) + + // U+0061 LATIN SMALL LETTER A + // U+0062 LATIN SMALL LETTER B + // U+0063 LATIN SMALL LETTER C +utfTests.append( + UTFTest( + string: "ABC", + utf8: [ 0x41, 0x42, 0x43 ], + utf16: [ 0x41, 0x42, 0x43 ], + scalars: [ 0x41, 0x42, 0x43 ])) + + // U+0000 NULL + // U+0041 LATIN CAPITAL LETTER A + // U+0042 LATIN CAPITAL LETTER B + // U+0000 NULL +utfTests.append( + UTFTest( + string: "\u{0000}AB\u{0000}", + utf8: [ 0x00, 0x41, 0x42, 0x00 ], + utf16: [ 0x00, 0x41, 0x42, 0x00 ], + scalars: [ 0x00, 0x41, 0x42, 0x00 ])) + + // U+007F DELETE +utfTests.append( + UTFTest( + string: "\u{007F}", + utf8: [ 0x7F ], + utf16: [ 0x7F ], + scalars: [ 0x7F ])) + + // + // 2-byte sequences. + // + + // U+0283 LATIN SMALL LETTER ESH +utfTests.append( + UTFTest( + string: "\u{0283}", + utf8: [ 0xCA, 0x83 ], + utf16: [ 0x0283 ], + scalars: [ 0x0283 ])) + + // U+03BA GREEK SMALL LETTER KAPPA + // U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA + // U+03C3 GREEK SMALL LETTER SIGMA + // U+03BC GREEK SMALL LETTER MU + // U+03B5 GREEK SMALL LETTER EPSILON +utfTests.append( + UTFTest( + string: "\u{03BA}\u{1F79}\u{03C3}\u{03BC}\u{03B5}", + utf8: [ 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5 ], + utf16: [ 0x03BA, 0x1F79, 0x03C3, 0x03BC, 0x03B5 ], + scalars: [ 0x03BA, 0x1F79, 0x03C3, 0x03BC, 0x03B5 ])) + + // U+0430 CYRILLIC SMALL LETTER A + // U+0431 CYRILLIC SMALL LETTER BE + // U+0432 CYRILLIC SMALL LETTER VE +utfTests.append( + UTFTest( + string: "\u{0430}\u{0431}\u{0432}", + utf8: [ 0xD0, 0xB0, 0xD0, 0xB1, 0xD0, 0xB2 ], + utf16: [ 0x0430, 0x0431, 0x0432 ], + scalars: [ 0x0430, 0x0431, 0x0432 ])) + + // + // 3-byte sequences. + // + + // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B + // U+6587 CJK UNIFIED IDEOGRAPH-6587 +utfTests.append( + UTFTest( + string: "\u{4F8b}\u{6587}", + utf8: [ 0xE4, 0xBE, 0x8B, 0xE6, 0x96, 0x87 ], + utf16: [ 0x4F8B, 0x6587 ], + scalars: [ 0x4F8B, 0x6587 ])) + + // U+D55C HANGUL SYLLABLE HAN + // U+AE00 HANGUL SYLLABLE GEUL +utfTests.append( + UTFTest( + string: "\u{d55c}\u{ae00}", + utf8: [ 0xED, 0x95, 0x9C, 0xEA, 0xB8, 0x80 ], + utf16: [ 0xD55C, 0xAE00 ], + scalars: [ 0xD55C, 0xAE00 ])) + + // U+1112 HANGUL CHOSEONG HIEUH + // U+1161 HANGUL JUNGSEONG A + // U+11AB HANGUL JONGSEONG NIEUN + // U+1100 HANGUL CHOSEONG KIYEOK + // U+1173 HANGUL JUNGSEONG EU + // U+11AF HANGUL JONGSEONG RIEUL +utfTests.append( + UTFTest( + string: "\u{1112}\u{1161}\u{11ab}\u{1100}\u{1173}\u{11af}", + utf8: + [ 0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xAB, + 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF ], + utf16: [ 0x1112, 0x1161, 0x11AB, 0x1100, 0x1173, 0x11AF ], + scalars: [ 0x1112, 0x1161, 0x11AB, 0x1100, 0x1173, 0x11AF ])) + + // U+3042 HIRAGANA LETTER A + // U+3044 HIRAGANA LETTER I + // U+3046 HIRAGANA LETTER U + // U+3048 HIRAGANA LETTER E + // U+304A HIRAGANA LETTER O +utfTests.append( + UTFTest( + string: "\u{3042}\u{3044}\u{3046}\u{3048}\u{304a}", + utf8: + [ 0xE3, 0x81, 0x82, 0xE3, 0x81, 0x84, 0xE3, 0x81, 0x86, + 0xE3, 0x81, 0x88, 0xE3, 0x81, 0x8A ], + utf16: [ 0x3042, 0x3044, 0x3046, 0x3048, 0x304A ], + scalars: [ 0x3042, 0x3044, 0x3046, 0x3048, 0x304A ])) + + // U+D7FF (unassigned) +utfTests.append( + UTFTest( + string: "\u{D7FF}", + utf8: [ 0xED, 0x9F, 0xBF ], + utf16: [ 0xD7FF ], + scalars: [ 0xD7FF ])) + + // U+E000 (private use) +utfTests.append( + UTFTest( + string: "\u{E000}", + utf8: [ 0xEE, 0x80, 0x80 ], + utf16: [ 0xE000 ], + scalars: [ 0xE000 ])) + + // U+FFFD REPLACEMENT CHARACTER +utfTests.append( + UTFTest( + string: "\u{FFFD}", + utf8: [ 0xEF, 0xBF, 0xBD ], + utf16: [ 0xFFFD ], + scalars: [ 0xFFFD ])) + + // U+FFFF (noncharacter) +utfTests.append( + UTFTest( + string: "\u{FFFF}", + utf8: [ 0xEF, 0xBF, 0xBF ], + utf16: [ 0xFFFF ], + scalars: [ 0xFFFF ])) + + // + // 4-byte sequences. + // + + // U+1F425 FRONT-FACING BABY CHICK +utfTests.append( + UTFTest( + string: "\u{1F425}", + utf8: [ 0xF0, 0x9F, 0x90, 0xA5 ], + utf16: [ 0xD83D, 0xDC25 ], + scalars: [ 0x0001_F425 ])) + + // U+0041 LATIN CAPITAL LETTER A + // U+1F425 FRONT-FACING BABY CHICK +utfTests.append( + UTFTest( + string: "A\u{1F425}", + utf8: [ 0x41, 0xF0, 0x9F, 0x90, 0xA5 ], + utf16: [ 0x41, 0xD83D, 0xDC25 ], + scalars: [ 0x41, 0x0001_F425 ])) + + // U+0041 LATIN CAPITAL LETTER A + // U+0042 LATIN CAPITAL LETTER B + // U+1F425 FRONT-FACING BABY CHICK +utfTests.append( + UTFTest( + string: "AB\u{1F425}", + utf8: [ 0x41, 0x42, 0xF0, 0x9F, 0x90, 0xA5 ], + utf16: [ 0x41, 0x42, 0xD83D, 0xDC25 ], + scalars: [ 0x41, 0x42, 0x0001_F425 ])) + + // U+0041 LATIN CAPITAL LETTER A + // U+0042 LATIN CAPITAL LETTER B + // U+0043 LATIN CAPITAL LETTER C + // U+1F425 FRONT-FACING BABY CHICK +utfTests.append( + UTFTest( + string: "ABC\u{1F425}", + utf8: [ 0x41, 0x42, 0x43, 0xF0, 0x9F, 0x90, 0xA5 ], + utf16: [ 0x41, 0x42, 0x43, 0xD83D, 0xDC25 ], + scalars: [ 0x41, 0x42, 0x43, 0x0001_F425 ])) + + // U+0041 LATIN CAPITAL LETTER A + // U+0042 LATIN CAPITAL LETTER B + // U+0043 LATIN CAPITAL LETTER C + // U+0044 LATIN CAPITAL LETTER D + // U+1F425 FRONT-FACING BABY CHICK +utfTests.append( + UTFTest( + string: "ABCD\u{1F425}", + utf8: [ 0x41, 0x42, 0x43, 0x44, 0xF0, 0x9F, 0x90, 0xA5 ], + utf16: [ 0x41, 0x42, 0x43, 0x44, 0xD83D, 0xDC25 ], + scalars: [ 0x41, 0x42, 0x43, 0x44, 0x0001_F425 ])) + + // U+0041 LATIN CAPITAL LETTER A + // U+0042 LATIN CAPITAL LETTER B + // U+0043 LATIN CAPITAL LETTER C + // U+0044 LATIN CAPITAL LETTER D + // U+0045 LATIN CAPITAL LETTER E + // U+1F425 FRONT-FACING BABY CHICK +utfTests.append( + UTFTest( + string: "ABCDE\u{1F425}", + utf8: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0xF0, 0x9F, 0x90, 0xA5 ], + utf16: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0xD83D, 0xDC25 ], + scalars: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x0001_F425 ])) + + // U+0041 LATIN CAPITAL LETTER A + // U+0042 LATIN CAPITAL LETTER B + // U+0043 LATIN CAPITAL LETTER C + // U+0044 LATIN CAPITAL LETTER D + // U+0045 LATIN CAPITAL LETTER E + // U+0046 LATIN CAPITAL LETTER F + // U+1F425 FRONT-FACING BABY CHICK +utfTests.append( + UTFTest( + string: "ABCDEF\u{1F425}", + utf8: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0xF0, 0x9F, 0x90, 0xA5 ], + utf16: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0xD83D, 0xDC25 ], + scalars: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x0001_F425 ])) + + // U+0041 LATIN CAPITAL LETTER A + // U+0042 LATIN CAPITAL LETTER B + // U+0043 LATIN CAPITAL LETTER C + // U+0044 LATIN CAPITAL LETTER D + // U+0045 LATIN CAPITAL LETTER E + // U+0046 LATIN CAPITAL LETTER F + // U+0047 LATIN CAPITAL LETTER G + // U+1F425 FRONT-FACING BABY CHICK +utfTests.append( + UTFTest( + string: "ABCDEFG\u{1F425}", + utf8: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0xF0, 0x9F, 0x90, 0xA5 ], + utf16: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0xD83D, 0xDC25 ], + scalars: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x0001_F425 ])) + + // U+0041 LATIN CAPITAL LETTER A + // U+0042 LATIN CAPITAL LETTER B + // U+0043 LATIN CAPITAL LETTER C + // U+0044 LATIN CAPITAL LETTER D + // U+0045 LATIN CAPITAL LETTER E + // U+0046 LATIN CAPITAL LETTER F + // U+0047 LATIN CAPITAL LETTER G + // U+0048 LATIN CAPITAL LETTER H + // U+1F425 FRONT-FACING BABY CHICK +utfTests.append( + UTFTest( + string: "ABCDEFGH\u{1F425}", + utf8: + [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, + 0xF0, 0x9F, 0x90, 0xA5 ], + utf16: + [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, + 0xD83D, 0xDC25 ], + scalars: + [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x0001_F425 ])) + + // U+0041 LATIN CAPITAL LETTER A + // U+0042 LATIN CAPITAL LETTER B + // U+0043 LATIN CAPITAL LETTER C + // U+0044 LATIN CAPITAL LETTER D + // U+0045 LATIN CAPITAL LETTER E + // U+0046 LATIN CAPITAL LETTER F + // U+0047 LATIN CAPITAL LETTER G + // U+0048 LATIN CAPITAL LETTER H + // U+0049 LATIN CAPITAL LETTER I + // U+1F425 FRONT-FACING BABY CHICK +utfTests.append( + UTFTest( + string: "ABCDEFGHI\u{1F425}", + utf8: + [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, + 0xF0, 0x9F, 0x90, 0xA5 ], + utf16: + [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, + 0xD83D, 0xDC25 ], + scalars: + [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x0001_F425 ])) + + // U+10000 LINEAR B SYLLABLE B008 A +utfTests.append( + UTFTest( + string: "\u{10000}", + utf8: [ 0xF0, 0x90, 0x80, 0x80 ], + utf16: [ 0xD800, 0xDC00 ], + scalars: [ 0x0001_0000 ])) + + // U+10100 AEGEAN WORD SEPARATOR LINE +utfTests.append( + UTFTest( + string: "\u{10100}", + utf8: [ 0xF0, 0x90, 0x84, 0x80 ], + utf16: [ 0xD800, 0xDD00 ], + scalars: [ 0x0001_0100 ])) + + // U+103FF (unassigned) +utfTests.append( + UTFTest( + string: "\u{103FF}", + utf8: [ 0xF0, 0x90, 0x8F, 0xBF ], + utf16: [ 0xD800, 0xDFFF ], + scalars: [ 0x0001_03FF ])) + + // U+E0000 (unassigned) +utfTests.append( + UTFTest( + string: "\u{E0000}", + utf8: [ 0xF3, 0xA0, 0x80, 0x80 ], + utf16: [ 0xDB40, 0xDC00 ], + scalars: [ 0x000E_0000 ])) + + // U+E0100 VARIATION SELECTOR-17 +utfTests.append( + UTFTest( + string: "\u{E0100}", + utf8: [ 0xF3, 0xA0, 0x84, 0x80 ], + utf16: [ 0xDB40, 0xDD00 ], + scalars: [ 0x000E_0100 ])) + + // U+E03FF (unassigned) +utfTests.append( + UTFTest( + string: "\u{E03FF}", + utf8: [ 0xF3, 0xA0, 0x8F, 0xBF ], + utf16: [ 0xDB40, 0xDFFF ], + scalars: [ 0x000E_03FF ])) + + // U+10FC00 (private use) +utfTests.append( + UTFTest( + string: "\u{10FC00}", + utf8: [ 0xF4, 0x8F, 0xB0, 0x80 ], + utf16: [ 0xDBFF, 0xDC00 ], + scalars: [ 0x0010_FC00 ])) + + // U+10FD00 (private use) +utfTests.append( + UTFTest( + string: "\u{10FD00}", + utf8: [ 0xF4, 0x8F, 0xB4, 0x80 ], + utf16: [ 0xDBFF, 0xDD00 ], + scalars: [ 0x0010_FD00 ])) + + // U+10FFFF (private use, noncharacter) +utfTests.append( + UTFTest( + string: "\u{10FFFF}", + utf8: [ 0xF4, 0x8F, 0xBF, 0xBF ], + utf16: [ 0xDBFF, 0xDFFF ], + scalars: [ 0x0010_FFFF ])) +//===----------------------------------------------------------------------===// + +UTF8Decoder.test("SmokeTest").forEach(in: utfTests) { + test in + + expectTrue( + checkDecodeUTF8(test.utf32, [], test.utf8), + stackTrace: test.loc.withCurrentLoc()) + return () +} + +UTF8Decoder.test("FirstPossibleSequence") { + // + // First possible sequence of a certain length + // + + // U+0000 NULL + expectTrue(checkDecodeUTF8([ 0x0000 ], [], [ 0x00 ])) + + // U+0080 PADDING CHARACTER + expectTrue(checkDecodeUTF8([ 0x0080 ], [], [ 0xc2, 0x80 ])) + + // U+0800 SAMARITAN LETTER ALAF + expectTrue(checkDecodeUTF8( + [ 0x0800 ], [], + [ 0xe0, 0xa0, 0x80 ])) + + // U+10000 LINEAR B SYLLABLE B008 A + expectTrue(checkDecodeUTF8( + [ 0x10000 ], [], + [ 0xf0, 0x90, 0x80, 0x80 ])) + + // U+200000 (invalid) + expectTrue(checkDecodeUTF8( + [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf8, 0x88, 0x80, 0x80, 0x80 ])) + + // U+4000000 (invalid) + expectTrue(checkDecodeUTF8( + [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0x84, 0x80, 0x80, 0x80, 0x80 ])) +} + +UTF8Decoder.test("LastPossibleSequence") { + // + // Last possible sequence of a certain length + // + + // U+007F DELETE + expectTrue(checkDecodeUTF8([ 0x007f ], [], [ 0x7f ])) + + // U+07FF (unassigned) + expectTrue(checkDecodeUTF8([ 0x07ff ], [], [ 0xdf, 0xbf ])) + + // U+FFFF (noncharacter) + expectTrue(checkDecodeUTF8( + [ 0xffff ], [], + [ 0xef, 0xbf, 0xbf ])) + + // U+1FFFFF (invalid) + expectTrue(checkDecodeUTF8( + [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf7, 0xbf, 0xbf, 0xbf ])) + + // U+3FFFFFF (invalid) + expectTrue(checkDecodeUTF8( + [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfb, 0xbf, 0xbf, 0xbf, 0xbf ])) + + // U+7FFFFFFF (invalid) + expectTrue(checkDecodeUTF8( + [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf ])) +} + +UTF8Decoder.test("CodeSpaceBoundaryConditions") { + // + // Other boundary conditions + // + + // U+D7FF (unassigned) + expectTrue(checkDecodeUTF8([ 0xd7ff ], [], [ 0xed, 0x9f, 0xbf ])) + + // U+E000 (private use) + expectTrue(checkDecodeUTF8([ 0xe000 ], [], [ 0xee, 0x80, 0x80 ])) + + // U+FFFD REPLACEMENT CHARACTER + expectTrue(checkDecodeUTF8([ 0xfffd ], [], [ 0xef, 0xbf, 0xbd ])) + + // U+10FFFF (noncharacter) + expectTrue(checkDecodeUTF8([ 0x10ffff ], [], [ 0xf4, 0x8f, 0xbf, 0xbf ])) + + // U+110000 (invalid) + expectTrue(checkDecodeUTF8( + [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf4, 0x90, 0x80, 0x80 ])) +} + +UTF8Decoder.test("UnexpectedContinuationBytes") { + // + // Unexpected continuation bytes + // + + // A sequence of unexpected continuation bytes that don't follow a first + // byte, every byte is a maximal subpart. + + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0x80, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xbf, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], [ 0xfffd, 0xfffd, 0xfffd ], + [ 0x80, 0xbf, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0x80, 0xbf, 0x80, 0xbf ])) + expectTrue(checkDecodeUTF8( + [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0x80, 0xbf, 0x82, 0xbf, 0xaa ])) + expectTrue(checkDecodeUTF8( + [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xaa, 0xb0, 0xbb, 0xbf, 0xaa, 0xa0 ])) + expectTrue(checkDecodeUTF8( + [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xaa, 0xb0, 0xbb, 0xbf, 0xaa, 0xa0, 0x8f ])) + + // All continuation bytes (0x80--0xbf). + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf ])) +} + +UTF8Decoder.test("LonelyStartBytes") { + // + // Lonely start bytes + // + + // Start bytes of 2-byte sequences (0xc0--0xdf). + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf ])) + + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020 ], + [ 0xc0, 0x20, 0xc1, 0x20, 0xc2, 0x20, 0xc3, 0x20, + 0xc4, 0x20, 0xc5, 0x20, 0xc6, 0x20, 0xc7, 0x20, + 0xc8, 0x20, 0xc9, 0x20, 0xca, 0x20, 0xcb, 0x20, + 0xcc, 0x20, 0xcd, 0x20, 0xce, 0x20, 0xcf, 0x20, + 0xd0, 0x20, 0xd1, 0x20, 0xd2, 0x20, 0xd3, 0x20, + 0xd4, 0x20, 0xd5, 0x20, 0xd6, 0x20, 0xd7, 0x20, + 0xd8, 0x20, 0xd9, 0x20, 0xda, 0x20, 0xdb, 0x20, + 0xdc, 0x20, 0xdd, 0x20, 0xde, 0x20, 0xdf, 0x20 ])) + + // Start bytes of 3-byte sequences (0xe0--0xef). + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef ])) + + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020 ], + [ 0xe0, 0x20, 0xe1, 0x20, 0xe2, 0x20, 0xe3, 0x20, + 0xe4, 0x20, 0xe5, 0x20, 0xe6, 0x20, 0xe7, 0x20, + 0xe8, 0x20, 0xe9, 0x20, 0xea, 0x20, 0xeb, 0x20, + 0xec, 0x20, 0xed, 0x20, 0xee, 0x20, 0xef, 0x20 ])) + + // Start bytes of 4-byte sequences (0xf0--0xf7). + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 ])) + + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020 ], + [ 0xf0, 0x20, 0xf1, 0x20, 0xf2, 0x20, 0xf3, 0x20, + 0xf4, 0x20, 0xf5, 0x20, 0xf6, 0x20, 0xf7, 0x20 ])) + + // Start bytes of 5-byte sequences (0xf8--0xfb). + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf8, 0xf9, 0xfa, 0xfb ])) + + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020 ], + [ 0xf8, 0x20, 0xf9, 0x20, 0xfa, 0x20, 0xfb, 0x20 ])) + + // Start bytes of 6-byte sequences (0xfc--0xfd). + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfc, 0xfd ])) + + expectTrue(checkDecodeUTF8( + [], [ 0xfffd, 0x0020, 0xfffd, 0x0020 ], + [ 0xfc, 0x20, 0xfd, 0x20 ])) +} + +UTF8Decoder.test("InvalidStartBytes") { + // + // Other bytes (0xc0--0xc1, 0xfe--0xff). + // + + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xc0 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xc1 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfe ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xff ])) + + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xc0, 0xc1, 0xfe, 0xff ])) + + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfe, 0xfe, 0xff, 0xff ])) + + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfe, 0x80, 0x80, 0x80, 0x80, 0x80 ])) + + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xff, 0x80, 0x80, 0x80, 0x80, 0x80 ])) + + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020 ], + [ 0xc0, 0x20, 0xc1, 0x20, 0xfe, 0x20, 0xff, 0x20 ])) +} + +UTF8Decoder.test("MissingContinuationBytes") { + // + // Sequences with one continuation byte missing + // + + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xc2 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xdf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xc2, 0x41 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xdf, 0x41 ])) + + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xe0, 0xa0 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xe0, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xe0, 0xa0, 0x41 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xe0, 0xbf, 0x41 ])) + + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xe1, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xec, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xe1, 0x80, 0x41 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xec, 0xbf, 0x41 ])) + + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xed, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xed, 0x9f ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xed, 0x80, 0x41 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xed, 0x9f, 0x41 ])) + + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xee, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xef, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xee, 0x80, 0x41 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xef, 0xbf, 0x41 ])) + + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0, 0x90, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0, 0xbf, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf0, 0x90, 0x80, 0x41 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf0, 0xbf, 0xbf, 0x41 ])) + + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf1, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf3, 0xbf, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf1, 0x80, 0x80, 0x41 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf3, 0xbf, 0xbf, 0x41 ])) + + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf4, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf4, 0x8f, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf4, 0x80, 0x80, 0x41 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf4, 0x8f, 0xbf, 0x41 ])) + + // Overlong sequences with one trailing byte missing. + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xc0 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xc1 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xe0, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xe0, 0x9f ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xf0, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xf0, 0x8f, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf8, 0x80, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0x80, 0x80, 0x80, 0x80 ])) + + // Sequences that represent surrogates with one trailing byte missing. + // High-surrogates + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xa0 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xac ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xaf ])) + // Low-surrogates + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xb0 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xb4 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xbf ])) + + // Ill-formed 4-byte sequences. + // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx + // U+1100xx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xf4, 0x90, 0x80 ])) + // U+13FBxx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xf4, 0xbf, 0xbf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xf5, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xf6, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xf7, 0x80, 0x80 ])) + // U+1FFBxx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xf7, 0xbf, 0xbf ])) + + // Ill-formed 5-byte sequences. + // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx + // U+2000xx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf8, 0x88, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf8, 0xbf, 0xbf, 0xbf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf9, 0x80, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfa, 0x80, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfb, 0x80, 0x80, 0x80 ])) + // U+3FFFFxx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfb, 0xbf, 0xbf, 0xbf ])) + + // Ill-formed 6-byte sequences. + // 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx + // U+40000xx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0x84, 0x80, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0xbf, 0xbf, 0xbf, 0xbf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfd, 0x80, 0x80, 0x80, 0x80 ])) + // U+7FFFFFxx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfd, 0xbf, 0xbf, 0xbf, 0xbf ])) + + // + // Sequences with two continuation bytes missing + // + + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0, 0x90 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf1, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf3, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf4, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf4, 0x8f ])) + + // Overlong sequences with two trailing byte missing. + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xe0 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf0, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf0, 0x8f ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xf8, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0x80, 0x80, 0x80 ])) + + // Sequences that represent surrogates with two trailing bytes missing. + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xed ])) + + // Ill-formed 4-byte sequences. + // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx + // U+110yxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf4, 0x90 ])) + // U+13Fyxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf4, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf5, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf6, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf7, 0x80 ])) + // U+1FFyxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf7, 0xbf ])) + + // Ill-formed 5-byte sequences. + // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx + // U+200yxx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xf8, 0x88, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xf8, 0xbf, 0xbf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xf9, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xfa, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xfb, 0x80, 0x80 ])) + // U+3FFFyxx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xfb, 0xbf, 0xbf ])) + + // Ill-formed 6-byte sequences. + // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx + // U+4000yxx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0x84, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0xbf, 0xbf, 0xbf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfd, 0x80, 0x80, 0x80 ])) + // U+7FFFFyxx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfd, 0xbf, 0xbf, 0xbf ])) + + // + // Sequences with three continuation bytes missing + // + + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf1 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf2 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf3 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf4 ])) + + // Broken overlong sequences. + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf8, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0x80, 0x80 ])) + + // Ill-formed 4-byte sequences. + // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx + // U+14yyxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf5 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf6 ])) + // U+1Cyyxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf7 ])) + + // Ill-formed 5-byte sequences. + // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx + // U+20yyxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf8, 0x88 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf8, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf9, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfa, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfb, 0x80 ])) + // U+3FCyyxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfb, 0xbf ])) + + // Ill-formed 6-byte sequences. + // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx + // U+400yyxx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0x84, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0xbf, 0xbf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xfd, 0x80, 0x80 ])) + // U+7FFCyyxx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xfd, 0xbf, 0xbf ])) + + // + // Sequences with four continuation bytes missing + // + + // Ill-formed 5-byte sequences. + // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx + // U+uzyyxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf8 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf9 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfa ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfb ])) + // U+3zyyxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfb ])) + + // Broken overlong sequences. + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf8 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfc, 0x80 ])) + + // Ill-formed 6-byte sequences. + // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx + // U+uzzyyxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfc, 0x84 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfc, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfd, 0x80 ])) + // U+7Fzzyyxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfd, 0xbf ])) + + // + // Sequences with five continuation bytes missing + // + + // Ill-formed 6-byte sequences. + // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx + // U+uzzyyxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfc ])) + // U+uuzzyyxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfd ])) + + // + // Consecutive sequences with trailing bytes missing + // + + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xc0, /**/ 0xe0, 0x80, /**/ 0xf0, 0x80, 0x80, + 0xf8, 0x80, 0x80, 0x80, + 0xfc, 0x80, 0x80, 0x80, 0x80, + 0xdf, /**/ 0xef, 0xbf, /**/ 0xf7, 0xbf, 0xbf, + 0xfb, 0xbf, 0xbf, 0xbf, + 0xfd, 0xbf, 0xbf, 0xbf, 0xbf ])) +} + +UTF8Decoder.test("OverlongSequences") { + // + // Overlong UTF-8 sequences + // + + // U+002F SOLIDUS + expectTrue(checkDecodeUTF8([ 0x002f ], [], [ 0x2f ])) + + // Overlong sequences of the above. + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xc0, 0xaf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xe0, 0x80, 0xaf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf0, 0x80, 0x80, 0xaf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf8, 0x80, 0x80, 0x80, 0xaf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf ])) + + // U+0000 NULL + expectTrue(checkDecodeUTF8([ 0x0000 ], [], [ 0x00 ])) + + // Overlong sequences of the above. + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xc0, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xe0, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf0, 0x80, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf8, 0x80, 0x80, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0x80, 0x80, 0x80, 0x80, 0x80 ])) + + // Other overlong and ill-formed sequences. + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xc0, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xc1, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xc1, 0xbf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xe0, 0x9f, 0xbf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xa0, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xbf, 0xbf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf0, 0x8f, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf0, 0x8f, 0xbf, 0xbf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf8, 0x87, 0xbf, 0xbf, 0xbf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf ])) +} + +UTF8Decoder.test("IsolatedSurrogates") { + // Unicode 6.3.0: + // + // D71. High-surrogate code point: A Unicode code point in the range + // U+D800 to U+DBFF. + // + // D73. Low-surrogate code point: A Unicode code point in the range + // U+DC00 to U+DFFF. + + // Note: U+E0100 is in UTF-16. + + // High-surrogates + + // U+D800 + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xa0, 0x80 ])) + expectTrue(checkDecodeUTF8( + [ 0x0041 ], + [ 0xfffd, 0xfffd, 0xfffd, 0x0041 ], + [ 0x41, 0xed, 0xa0, 0x80, 0x41 ])) + + // U+DB40 + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xac, 0xa0 ])) + + // U+DBFF + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xaf, 0xbf ])) + + // Low-surrogates + + // U+DC00 + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xb0, 0x80 ])) + + // U+DD00 + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xb4, 0x80 ])) + + // U+DFFF + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xbf, 0xbf ])) +} + +UTF8Decoder.test("SurrogatePairs") { + // Surrogate pairs + + // U+D800 U+DC00 + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80 ])) + + // U+D800 U+DD00 + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xa0, 0x80, 0xed, 0xb4, 0x80 ])) + + // U+D800 U+DFFF + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf ])) + + // U+DB40 U+DC00 + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xac, 0xa0, 0xed, 0xb0, 0x80 ])) + + // U+DB40 U+DD00 + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xac, 0xa0, 0xed, 0xb4, 0x80 ])) + + // U+DB40 U+DFFF + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xac, 0xa0, 0xed, 0xbf, 0xbf ])) + + // U+DBFF U+DC00 + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80 ])) + + // U+DBFF U+DD00 + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xaf, 0xbf, 0xed, 0xb4, 0x80 ])) + + // U+DBFF U+DFFF + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf ])) +} + +UTF8Decoder.test("Noncharacters") { + // + // Noncharacters + // + + // Unicode 6.3.0: + // + // D14. Noncharacter: A code point that is permanently reserved for + // internal use and that should never be interchanged. Noncharacters + // consist of the values U+nFFFE and U+nFFFF (where n is from 0 to 1016) + // and the values U+FDD0..U+FDEF. + + // U+FFFE + expectTrue(checkDecodeUTF8([ 0xfffe ], [], [ 0xef, 0xbf, 0xbe ])) + + // U+FFFF + expectTrue(checkDecodeUTF8([ 0xffff ], [], [ 0xef, 0xbf, 0xbf ])) + + // U+1FFFE + expectTrue(checkDecodeUTF8([ 0x1fffe ], [], [ 0xf0, 0x9f, 0xbf, 0xbe ])) + + // U+1FFFF + expectTrue(checkDecodeUTF8([ 0x1ffff ], [], [ 0xf0, 0x9f, 0xbf, 0xbf ])) + + // U+2FFFE + expectTrue(checkDecodeUTF8([ 0x2fffe ], [], [ 0xf0, 0xaf, 0xbf, 0xbe ])) + + // U+2FFFF + expectTrue(checkDecodeUTF8([ 0x2ffff ], [], [ 0xf0, 0xaf, 0xbf, 0xbf ])) + + // U+3FFFE + expectTrue(checkDecodeUTF8([ 0x3fffe ], [], [ 0xf0, 0xbf, 0xbf, 0xbe ])) + + // U+3FFFF + expectTrue(checkDecodeUTF8([ 0x3ffff ], [], [ 0xf0, 0xbf, 0xbf, 0xbf ])) + + // U+4FFFE + expectTrue(checkDecodeUTF8([ 0x4fffe ], [], [ 0xf1, 0x8f, 0xbf, 0xbe ])) + + // U+4FFFF + expectTrue(checkDecodeUTF8([ 0x4ffff ], [], [ 0xf1, 0x8f, 0xbf, 0xbf ])) + + // U+5FFFE + expectTrue(checkDecodeUTF8([ 0x5fffe ], [], [ 0xf1, 0x9f, 0xbf, 0xbe ])) + + // U+5FFFF + expectTrue(checkDecodeUTF8([ 0x5ffff ], [], [ 0xf1, 0x9f, 0xbf, 0xbf ])) + + // U+6FFFE + expectTrue(checkDecodeUTF8([ 0x6fffe ], [], [ 0xf1, 0xaf, 0xbf, 0xbe ])) + + // U+6FFFF + expectTrue(checkDecodeUTF8([ 0x6ffff ], [], [ 0xf1, 0xaf, 0xbf, 0xbf ])) + + // U+7FFFE + expectTrue(checkDecodeUTF8([ 0x7fffe ], [], [ 0xf1, 0xbf, 0xbf, 0xbe ])) + + // U+7FFFF + expectTrue(checkDecodeUTF8([ 0x7ffff ], [], [ 0xf1, 0xbf, 0xbf, 0xbf ])) + + // U+8FFFE + expectTrue(checkDecodeUTF8([ 0x8fffe ], [], [ 0xf2, 0x8f, 0xbf, 0xbe ])) + + // U+8FFFF + expectTrue(checkDecodeUTF8([ 0x8ffff ], [], [ 0xf2, 0x8f, 0xbf, 0xbf ])) + + // U+9FFFE + expectTrue(checkDecodeUTF8([ 0x9fffe ], [], [ 0xf2, 0x9f, 0xbf, 0xbe ])) + + // U+9FFFF + expectTrue(checkDecodeUTF8([ 0x9ffff ], [], [ 0xf2, 0x9f, 0xbf, 0xbf ])) + + // U+AFFFE + expectTrue(checkDecodeUTF8([ 0xafffe ], [], [ 0xf2, 0xaf, 0xbf, 0xbe ])) + + // U+AFFFF + expectTrue(checkDecodeUTF8([ 0xaffff ], [], [ 0xf2, 0xaf, 0xbf, 0xbf ])) + + // U+BFFFE + expectTrue(checkDecodeUTF8([ 0xbfffe ], [], [ 0xf2, 0xbf, 0xbf, 0xbe ])) + + // U+BFFFF + expectTrue(checkDecodeUTF8([ 0xbffff ], [], [ 0xf2, 0xbf, 0xbf, 0xbf ])) + + // U+CFFFE + expectTrue(checkDecodeUTF8([ 0xcfffe ], [], [ 0xf3, 0x8f, 0xbf, 0xbe ])) + + // U+CFFFF + expectTrue(checkDecodeUTF8([ 0xcfffF ], [], [ 0xf3, 0x8f, 0xbf, 0xbf ])) + + // U+DFFFE + expectTrue(checkDecodeUTF8([ 0xdfffe ], [], [ 0xf3, 0x9f, 0xbf, 0xbe ])) + + // U+DFFFF + expectTrue(checkDecodeUTF8([ 0xdffff ], [], [ 0xf3, 0x9f, 0xbf, 0xbf ])) + + // U+EFFFE + expectTrue(checkDecodeUTF8([ 0xefffe ], [], [ 0xf3, 0xaf, 0xbf, 0xbe ])) + + // U+EFFFF + expectTrue(checkDecodeUTF8([ 0xeffff ], [], [ 0xf3, 0xaf, 0xbf, 0xbf ])) + + // U+FFFFE + expectTrue(checkDecodeUTF8([ 0xffffe ], [], [ 0xf3, 0xbf, 0xbf, 0xbe ])) + + // U+FFFFF + expectTrue(checkDecodeUTF8([ 0xfffff ], [], [ 0xf3, 0xbf, 0xbf, 0xbf ])) + + // U+10FFFE + expectTrue(checkDecodeUTF8([ 0x10fffe ], [], [ 0xf4, 0x8f, 0xbf, 0xbe ])) + + // U+10FFFF + expectTrue(checkDecodeUTF8([ 0x10ffff ], [], [ 0xf4, 0x8f, 0xbf, 0xbf ])) + + // U+FDD0 + expectTrue(checkDecodeUTF8([ 0xfdd0 ], [], [ 0xef, 0xb7, 0x90 ])) + + // U+FDD1 + expectTrue(checkDecodeUTF8([ 0xfdd1 ], [], [ 0xef, 0xb7, 0x91 ])) + + // U+FDD2 + expectTrue(checkDecodeUTF8([ 0xfdd2 ], [], [ 0xef, 0xb7, 0x92 ])) + + // U+FDD3 + expectTrue(checkDecodeUTF8([ 0xfdd3 ], [], [ 0xef, 0xb7, 0x93 ])) + + // U+FDD4 + expectTrue(checkDecodeUTF8([ 0xfdd4 ], [], [ 0xef, 0xb7, 0x94 ])) + + // U+FDD5 + expectTrue(checkDecodeUTF8([ 0xfdd5 ], [], [ 0xef, 0xb7, 0x95 ])) + + // U+FDD6 + expectTrue(checkDecodeUTF8([ 0xfdd6 ], [], [ 0xef, 0xb7, 0x96 ])) + + // U+FDD7 + expectTrue(checkDecodeUTF8([ 0xfdd7 ], [], [ 0xef, 0xb7, 0x97 ])) + + // U+FDD8 + expectTrue(checkDecodeUTF8([ 0xfdd8 ], [], [ 0xef, 0xb7, 0x98 ])) + + // U+FDD9 + expectTrue(checkDecodeUTF8([ 0xfdd9 ], [], [ 0xef, 0xb7, 0x99 ])) + + // U+FDDA + expectTrue(checkDecodeUTF8([ 0xfdda ], [], [ 0xef, 0xb7, 0x9a ])) + + // U+FDDB + expectTrue(checkDecodeUTF8([ 0xfddb ], [], [ 0xef, 0xb7, 0x9b ])) + + // U+FDDC + expectTrue(checkDecodeUTF8([ 0xfddc ], [], [ 0xef, 0xb7, 0x9c ])) + + // U+FDDD + expectTrue(checkDecodeUTF8([ 0xfddd ], [], [ 0xef, 0xb7, 0x9d ])) + + // U+FDDE + expectTrue(checkDecodeUTF8([ 0xfdde ], [], [ 0xef, 0xb7, 0x9e ])) + + // U+FDDF + expectTrue(checkDecodeUTF8([ 0xfddf ], [], [ 0xef, 0xb7, 0x9f ])) + + // U+FDE0 + expectTrue(checkDecodeUTF8([ 0xfde0 ], [], [ 0xef, 0xb7, 0xa0 ])) + + // U+FDE1 + expectTrue(checkDecodeUTF8([ 0xfde1 ], [], [ 0xef, 0xb7, 0xa1 ])) + + // U+FDE2 + expectTrue(checkDecodeUTF8([ 0xfde2 ], [], [ 0xef, 0xb7, 0xa2 ])) + + // U+FDE3 + expectTrue(checkDecodeUTF8([ 0xfde3 ], [], [ 0xef, 0xb7, 0xa3 ])) + + // U+FDE4 + expectTrue(checkDecodeUTF8([ 0xfde4 ], [], [ 0xef, 0xb7, 0xa4 ])) + + // U+FDE5 + expectTrue(checkDecodeUTF8([ 0xfde5 ], [], [ 0xef, 0xb7, 0xa5 ])) + + // U+FDE6 + expectTrue(checkDecodeUTF8([ 0xfde6 ], [], [ 0xef, 0xb7, 0xa6 ])) + + // U+FDE7 + expectTrue(checkDecodeUTF8([ 0xfde7 ], [], [ 0xef, 0xb7, 0xa7 ])) + + // U+FDE8 + expectTrue(checkDecodeUTF8([ 0xfde8 ], [], [ 0xef, 0xb7, 0xa8 ])) + + // U+FDE9 + expectTrue(checkDecodeUTF8([ 0xfde9 ], [], [ 0xef, 0xb7, 0xa9 ])) + + // U+FDEA + expectTrue(checkDecodeUTF8([ 0xfdea ], [], [ 0xef, 0xb7, 0xaa ])) + + // U+FDEB + expectTrue(checkDecodeUTF8([ 0xfdeb ], [], [ 0xef, 0xb7, 0xab ])) + + // U+FDEC + expectTrue(checkDecodeUTF8([ 0xfdec ], [], [ 0xef, 0xb7, 0xac ])) + + // U+FDED + expectTrue(checkDecodeUTF8([ 0xfded ], [], [ 0xef, 0xb7, 0xad ])) + + // U+FDEE + expectTrue(checkDecodeUTF8([ 0xfdee ], [], [ 0xef, 0xb7, 0xae ])) + + // U+FDEF + expectTrue(checkDecodeUTF8([ 0xfdef ], [], [ 0xef, 0xb7, 0xaf ])) + + // U+FDF0 + expectTrue(checkDecodeUTF8([ 0xfdf0 ], [], [ 0xef, 0xb7, 0xb0 ])) + + // U+FDF1 + expectTrue(checkDecodeUTF8([ 0xfdf1 ], [], [ 0xef, 0xb7, 0xb1 ])) + + // U+FDF2 + expectTrue(checkDecodeUTF8([ 0xfdf2 ], [], [ 0xef, 0xb7, 0xb2 ])) + + // U+FDF3 + expectTrue(checkDecodeUTF8([ 0xfdf3 ], [], [ 0xef, 0xb7, 0xb3 ])) + + // U+FDF4 + expectTrue(checkDecodeUTF8([ 0xfdf4 ], [], [ 0xef, 0xb7, 0xb4 ])) + + // U+FDF5 + expectTrue(checkDecodeUTF8([ 0xfdf5 ], [], [ 0xef, 0xb7, 0xb5 ])) + + // U+FDF6 + expectTrue(checkDecodeUTF8([ 0xfdf6 ], [], [ 0xef, 0xb7, 0xb6 ])) + + // U+FDF7 + expectTrue(checkDecodeUTF8([ 0xfdf7 ], [], [ 0xef, 0xb7, 0xb7 ])) + + // U+FDF8 + expectTrue(checkDecodeUTF8([ 0xfdf8 ], [], [ 0xef, 0xb7, 0xb8 ])) + + // U+FDF9 + expectTrue(checkDecodeUTF8([ 0xfdf9 ], [], [ 0xef, 0xb7, 0xb9 ])) + + // U+FDFA + expectTrue(checkDecodeUTF8([ 0xfdfa ], [], [ 0xef, 0xb7, 0xba ])) + + // U+FDFB + expectTrue(checkDecodeUTF8([ 0xfdfb ], [], [ 0xef, 0xb7, 0xbb ])) + + // U+FDFC + expectTrue(checkDecodeUTF8([ 0xfdfc ], [], [ 0xef, 0xb7, 0xbc ])) + + // U+FDFD + expectTrue(checkDecodeUTF8([ 0xfdfd ], [], [ 0xef, 0xb7, 0xbd ])) + + // U+FDFE + expectTrue(checkDecodeUTF8([ 0xfdfe ], [], [ 0xef, 0xb7, 0xbe ])) + + // U+FDFF + expectTrue(checkDecodeUTF8([ 0xfdff ], [], [ 0xef, 0xb7, 0xbf ])) +} + +var UTF16Decoder = TestSuite("UTF16Decoder") + +UTF16Decoder.test("UTF16.transcodedLength") { + do { + let u8: [UTF8.CodeUnit] = [ 0, 1, 2, 3, 4, 5 ] + let (count, isASCII) = UTF16.transcodedLength( + of: u8.makeIterator(), + decodedAs: UTF8.self, + repairingIllFormedSequences: false)! + expectEqual(6, count) + expectTrue(isASCII) + } + + do { + // "€" == U+20AC. + let u8: [UTF8.CodeUnit] = [ 0xF0, 0xA4, 0xAD, 0xA2 ] + let (count, isASCII) = UTF16.transcodedLength( + of: u8.makeIterator(), + decodedAs: UTF8.self, + repairingIllFormedSequences: false)! + expectEqual(2, count) + expectFalse(isASCII) + } + + do { + let u16: [UTF16.CodeUnit] = [ 6, 7, 8, 9, 10, 11 ] + let (count, isASCII) = UTF16.transcodedLength( + of: u16.makeIterator(), + decodedAs: UTF16.self, + repairingIllFormedSequences: false)! + expectEqual(6, count) + expectTrue(isASCII) + } +} + +UTF16Decoder.test("Decoding1").forEach(in: utfTests) { + test in + + expectTrue( + checkDecodeUTF16( + test.utf32, test.utf32RepairedTail, test.utf16), + stackTrace: test.loc.withCurrentLoc()) + return () +} + +UTF16Decoder.test("Decoding2") { + for (name, batch) in utf16Tests { + print("Batch: \(name)") + for test in batch { + expectTrue(checkDecodeUTF16(test.scalarsHead, test.scalarsRepairedTail, + test.encoded), stackTrace: test.loc.withCurrentLoc()) + } + } +} + +public struct UTF16Test { + public let scalarsHead: [UInt32] + public let scalarsRepairedTail: [UInt32] + public let encoded: [UInt16] + public let loc: SourceLoc + + public init( + _ scalarsHead: [UInt32], _ scalarsRepairedTail: [UInt32], + _ encoded: [UInt16], + file: String = #file, line: UInt = #line + ) { + self.scalarsHead = scalarsHead + self.scalarsRepairedTail = scalarsRepairedTail + self.encoded = encoded + self.loc = SourceLoc(file, line, comment: "test data") + } +} + +public let utf16Tests = [ + "Incomplete": [ + // + // Incomplete sequences that end right before EOF. + // + + // U+D800 (high-surrogate) + UTF16Test([], [ 0xFFFD ], [ 0xD800 ]), + + // U+D800 (high-surrogate) + // U+D800 (high-surrogate) + UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xD800, 0xD800 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+D800 (high-surrogate) + UTF16Test([ 0x0041 ], [ 0xFFFD ], [ 0x0041, 0xD800 ]), + + // U+10000 LINEAR B SYLLABLE B008 A + // U+D800 (high-surrogate) + UTF16Test( + [ 0x0001_0000 ], [ 0xFFFD ], + [ 0xD800, 0xDC00, 0xD800 ]), + + // + // Incomplete sequences with more code units following them. + // + + // U+D800 (high-surrogate) + // U+0041 LATIN CAPITAL LETTER A + UTF16Test([], [ 0xFFFD, 0x0041 ], [ 0xD800, 0x0041 ]), + + // U+D800 (high-surrogate) + // U+10000 LINEAR B SYLLABLE B008 A + UTF16Test( + [], [ 0xFFFD, 0x0001_0000 ], + [ 0xD800, 0xD800, 0xDC00 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+D800 (high-surrogate) + // U+0041 LATIN CAPITAL LETTER A + UTF16Test( + [ 0x0041 ], [ 0xFFFD, 0x0041 ], + [ 0x0041, 0xD800, 0x0041 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+D800 (high-surrogate) + // U+10000 LINEAR B SYLLABLE B008 A + UTF16Test( + [ 0x0041 ], [ 0xFFFD, 0x0001_0000 ], + [ 0x0041, 0xD800, 0xD800, 0xDC00 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+D800 (high-surrogate) + // U+DB40 (high-surrogate) + // U+0041 LATIN CAPITAL LETTER A + UTF16Test( + [ 0x0041 ], [ 0xFFFD, 0xFFFD, 0x0041 ], + [ 0x0041, 0xD800, 0xDB40, 0x0041 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+D800 (high-surrogate) + // U+DB40 (high-surrogate) + // U+10000 LINEAR B SYLLABLE B008 A + UTF16Test( + [ 0x0041 ], [ 0xFFFD, 0xFFFD, 0x0001_0000 ], + [ 0x0041, 0xD800, 0xDB40, 0xD800, 0xDC00 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+D800 (high-surrogate) + // U+DB40 (high-surrogate) + // U+DBFF (high-surrogate) + // U+0041 LATIN CAPITAL LETTER A + UTF16Test( + [ 0x0041 ], [ 0xFFFD, 0xFFFD, 0xFFFD, 0x0041 ], + [ 0x0041, 0xD800, 0xDB40, 0xDBFF, 0x0041 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+D800 (high-surrogate) + // U+DB40 (high-surrogate) + // U+DBFF (high-surrogate) + // U+10000 LINEAR B SYLLABLE B008 A + UTF16Test( + [ 0x0041 ], [ 0xFFFD, 0xFFFD, 0xFFFD, 0x0001_0000 ], + [ 0x0041, 0xD800, 0xDB40, 0xDBFF, 0xD800, 0xDC00 ]), + ], + + "IllFormed": [ + // + // Low-surrogate right before EOF. + // + + // U+DC00 (low-surrogate) + UTF16Test([], [ 0xFFFD ], [ 0xDC00 ]), + + // U+DC00 (low-surrogate) + // U+DC00 (low-surrogate) + UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDC00, 0xDC00 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+DC00 (low-surrogate) + UTF16Test([ 0x0041 ], [ 0xFFFD ], [ 0x0041, 0xDC00 ]), + + // U+10000 LINEAR B SYLLABLE B008 A + // U+DC00 (low-surrogate) + UTF16Test( + [ 0x0001_0000 ], [ 0xFFFD ], + [ 0xD800, 0xDC00, 0xDC00 ]), + + // + // Low-surrogate with more code units following it. + // + + // U+DC00 (low-surrogate) + // U+0041 LATIN CAPITAL LETTER A + UTF16Test([], [ 0xFFFD, 0x0041 ], [ 0xDC00, 0x0041 ]), + + // U+DC00 (low-surrogate) + // U+10000 LINEAR B SYLLABLE B008 A + UTF16Test( + [], [ 0xFFFD, 0x0001_0000 ], + [ 0xDC00, 0xD800, 0xDC00 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+DC00 (low-surrogate) + // U+0041 LATIN CAPITAL LETTER A + UTF16Test( + [ 0x0041 ], [ 0xFFFD, 0x0041 ], + [ 0x0041, 0xDC00, 0x0041 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+DC00 (low-surrogate) + // U+10000 LINEAR B SYLLABLE B008 A + UTF16Test( + [ 0x0041 ], [ 0xFFFD, 0x0001_0000 ], + [ 0x0041, 0xDC00, 0xD800, 0xDC00 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+DC00 (low-surrogate) + // U+DD00 (low-surrogate) + // U+0041 LATIN CAPITAL LETTER A + UTF16Test( + [ 0x0041 ], [ 0xFFFD, 0xFFFD, 0x0041 ], + [ 0x0041, 0xDC00, 0xDD00, 0x0041 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+DC00 (low-surrogate) + // U+DD00 (low-surrogate) + // U+10000 LINEAR B SYLLABLE B008 A + UTF16Test( + [ 0x0041 ], [ 0xFFFD, 0xFFFD, 0x0001_0000 ], + [ 0x0041, 0xDC00, 0xDD00, 0xD800, 0xDC00 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+DC00 (low-surrogate) + // U+DD00 (low-surrogate) + // U+DFFF (low-surrogate) + // U+0041 LATIN CAPITAL LETTER A + UTF16Test( + [ 0x0041 ], [ 0xFFFD, 0xFFFD, 0xFFFD, 0x0041 ], + [ 0x0041, 0xDC00, 0xDD00, 0xDFFF, 0x0041 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+DC00 (low-surrogate) + // U+DD00 (low-surrogate) + // U+DFFF (low-surrogate) + // U+10000 LINEAR B SYLLABLE B008 A + UTF16Test( + [ 0x0041 ], [ 0xFFFD, 0xFFFD, 0xFFFD, 0x0001_0000 ], + [ 0x0041, 0xDC00, 0xDD00, 0xDFFF, 0xD800, 0xDC00 ]), + + // + // Low-surrogate followed by high-surrogate. + // + + // U+DC00 (low-surrogate) + // U+D800 (high-surrogate) + UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDC00, 0xD800 ]), + + // U+DC00 (low-surrogate) + // U+DB40 (high-surrogate) + UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDC00, 0xDB40 ]), + + // U+DC00 (low-surrogate) + // U+DBFF (high-surrogate) + UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDC00, 0xDBFF ]), + + + // U+DD00 (low-surrogate) + // U+D800 (high-surrogate) + UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDD00, 0xD800 ]), + + // U+DD00 (low-surrogate) + // U+DB40 (high-surrogate) + UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDD00, 0xDB40 ]), + + // U+DD00 (low-surrogate) + // U+DBFF (high-surrogate) + UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDD00, 0xDBFF ]), + + + // U+DFFF (low-surrogate) + // U+D800 (high-surrogate) + UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDFFF, 0xD800 ]), + + // U+DFFF (low-surrogate) + // U+DB40 (high-surrogate) + UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDFFF, 0xDB40 ]), + + // U+DFFF (low-surrogate) + // U+DBFF (high-surrogate) + UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDFFF, 0xDBFF ]), + + + // U+DC00 (low-surrogate) + // U+D800 (high-surrogate) + // U+0041 LATIN CAPITAL LETTER A + UTF16Test( + [], [ 0xFFFD, 0xFFFD, 0x0041 ], + [ 0xDC00, 0xD800, 0x0041 ]), + + // U+DC00 (low-surrogate) + // U+D800 (high-surrogate) + // U+10000 LINEAR B SYLLABLE B008 A + UTF16Test( + [], [ 0xFFFD, 0xFFFD, 0x10000 ], + [ 0xDC00, 0xD800, 0xD800, 0xDC00 ]), + ], +] + +runAllTests() + +#else +//===--- benchmarking -----------------------------------------------------===// + +@inline(never) +public func run_UTF8Decode(_ N: Int) { + // 1-byte sequences + // This test case is the longest as it's the most performance sensitive. + let ascii = "Swift is a multi-paradigm, compiled programming language created for iOS, OS X, watchOS, tvOS and Linux development by Apple Inc. Swift is designed to work with Apple's Cocoa and Cocoa Touch frameworks and the large body of existing Objective-C code written for Apple products. Swift is intended to be more resilient to erroneous code (\"safer\") than Objective-C and also more concise. It is built with the LLVM compiler framework included in Xcode 6 and later and uses the Objective-C runtime, which allows C, Objective-C, C++ and Swift code to run within a single program." + // 2-byte sequences + let russian = "Ру́сский язы́к один из восточнославянских языков, национальный язык русского народа." + // 3-byte sequences + let japanese = "日本語(にほんご、にっぽんご)は、主に日本国内や日本人同士の間で使われている言語である。" + // 4-byte sequences + // Most commonly emoji, which are usually mixed with other text. + let emoji = "Panda 🐼, Dog 🐶, Cat 🐱, Mouse 🐭." + + let strings = [ascii, russian, japanese, emoji].map { Array($0.utf8) } + + func isEmpty(_ result: UnicodeDecodingResult) -> Bool { + switch result { + case .emptyInput: + return true + default: + return false + } + } + + var total: UInt32 = 0 + + for _ in 1...200*N { + for string in strings { +#if BASELINE + _ = transcode( + string.makeIterator(), from: UTF8.self, to: UTF32.self, + stoppingOnError: false + ) { + total = total &+ $0 + } +#else + #if FORWARD + var it = string.makeIterator() + typealias D = UTF8.ForwardDecoder + D.decode(&it, repairingIllFormedSequences: true) { total = total &+ $0.value } + #elseif REVERSE + var it = string.reversed().makeIterator() + typealias D = UTF8.ReverseDecoder + D.decode(&it, repairingIllFormedSequences: true) { total = total &+ $0.value } + #elseif SEQUENCE + for s in Unicode.DefaultScalarView(string, fromEncoding: UTF8.self) { + total = total &+ s.value + } + #elseif COLLECTION + let scalars = Unicode.DefaultScalarView(string, fromEncoding: UTF8.self) + var i = scalars.startIndex + while i != scalars.endIndex { + total = total &+ scalars[i].value + i = scalars.index(after: i) + } +#elseif REVERSE_COLLECTION + let scalars = Unicode.DefaultScalarView(string, fromEncoding: UTF8.self) + var i = scalars.endIndex + while i != scalars.startIndex { + i = scalars.index(before: i) + total = total &+ scalars[i].value + } + #else + Error_Unknown_Benchmark() + #endif +#endif + } + } + if CommandLine.arguments.count > 1000 { print(total) } +} + +run_UTF8Decode(10000) +#endif + From f72fec0c52ae786ae53dfb50ca474f3a2e578e85 Mon Sep 17 00:00:00 2001 From: Doug Gregor Date: Mon, 26 Jun 2017 08:39:31 -0700 Subject: [PATCH 16/17] [GSB] Ensure that we have superclass constraints from merged equiv class. Fixes one recently-found crasher. (cherry picked from commit e256a9d8dfa376c86dc4600f2391ebcc8208bf70) --- lib/AST/GenericSignatureBuilder.cpp | 5 +++-- ...-resolvesuperconformance-swift-genericsignaturebuil.swift | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) rename validation-test/{compiler_crashers => compiler_crashers_fixed}/28758-swift-genericsignaturebuilder-resolvesuperconformance-swift-genericsignaturebuil.swift (88%) diff --git a/lib/AST/GenericSignatureBuilder.cpp b/lib/AST/GenericSignatureBuilder.cpp index 855499e94607b..a4c76d551e4f2 100644 --- a/lib/AST/GenericSignatureBuilder.cpp +++ b/lib/AST/GenericSignatureBuilder.cpp @@ -3391,12 +3391,13 @@ GenericSignatureBuilder::addSameTypeRequirementBetweenArchetypes( else source2 = equivClass2->superclassConstraints.front().source; - (void)updateSuperclass(T1, equivClass2->superclass, source2); - + // Add the superclass constraints from the second equivalence class. equivClass->superclassConstraints.insert( equivClass->superclassConstraints.end(), equivClass2->superclassConstraints.begin(), equivClass2->superclassConstraints.end()); + + (void)updateSuperclass(T1, equivClass2->superclass, source2); } // Add all of the protocol conformance requirements of T2 to T1. diff --git a/validation-test/compiler_crashers/28758-swift-genericsignaturebuilder-resolvesuperconformance-swift-genericsignaturebuil.swift b/validation-test/compiler_crashers_fixed/28758-swift-genericsignaturebuilder-resolvesuperconformance-swift-genericsignaturebuil.swift similarity index 88% rename from validation-test/compiler_crashers/28758-swift-genericsignaturebuilder-resolvesuperconformance-swift-genericsignaturebuil.swift rename to validation-test/compiler_crashers_fixed/28758-swift-genericsignaturebuilder-resolvesuperconformance-swift-genericsignaturebuil.swift index 456a3b45861ad..68538284edaf0 100644 --- a/validation-test/compiler_crashers/28758-swift-genericsignaturebuilder-resolvesuperconformance-swift-genericsignaturebuil.swift +++ b/validation-test/compiler_crashers_fixed/28758-swift-genericsignaturebuilder-resolvesuperconformance-swift-genericsignaturebuil.swift @@ -5,5 +5,5 @@ // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors -// RUN: not --crash %target-swift-frontend %s -emit-ir +// RUN: not %target-swift-frontend %s -emit-ir protocol P{{}typealias e:a{}}class a:P=extension P{typealias e:Self From 19f4ee716c005c3cea6b3927acc32b84225d3715 Mon Sep 17 00:00:00 2001 From: Doug Gregor Date: Mon, 26 Jun 2017 09:43:13 -0700 Subject: [PATCH 17/17] [GSB] Don't crash when substitution fails to produce a type. Fixes two more recently-found GSB crashers. --- lib/AST/GenericSignatureBuilder.cpp | 2 +- ...8802-constrainttype-missing-constraint-type.swift | 10 ++++++++++ .../compiler_crashers_fixed/28804-second.swift | 12 ++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 validation-test/compiler_crashers_fixed/28802-constrainttype-missing-constraint-type.swift create mode 100644 validation-test/compiler_crashers_fixed/28804-second.swift diff --git a/lib/AST/GenericSignatureBuilder.cpp b/lib/AST/GenericSignatureBuilder.cpp index a4c76d551e4f2..11a4f96579cf9 100644 --- a/lib/AST/GenericSignatureBuilder.cpp +++ b/lib/AST/GenericSignatureBuilder.cpp @@ -3704,7 +3704,7 @@ ConstraintResult GenericSignatureBuilder::addRequirement( ModuleDecl *inferForModule) { auto subst = [&](Type t) { if (subMap) - return t.subst(*subMap); + return t.subst(*subMap, SubstFlags::UseErrorType); return t; }; diff --git a/validation-test/compiler_crashers_fixed/28802-constrainttype-missing-constraint-type.swift b/validation-test/compiler_crashers_fixed/28802-constrainttype-missing-constraint-type.swift new file mode 100644 index 0000000000000..df54499ba8b37 --- /dev/null +++ b/validation-test/compiler_crashers_fixed/28802-constrainttype-missing-constraint-type.swift @@ -0,0 +1,10 @@ +// This source file is part of the Swift.org open source project +// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors + +// REQUIRES: asserts +// RUN: not %target-swift-frontend %s -emit-ir +class a