diff --git a/include/swift/AST/DiagnosticsSema.def b/include/swift/AST/DiagnosticsSema.def index 22c0a8139a767..2b1fd0903c578 100644 --- a/include/swift/AST/DiagnosticsSema.def +++ b/include/swift/AST/DiagnosticsSema.def @@ -1682,9 +1682,6 @@ ERROR(requires_generic_param_same_type_does_not_conform,none, (Type, Identifier)) ERROR(requires_same_concrete_type,none, "generic signature requires types %0 and %1 to be the same", (Type, Type)) -ERROR(protocol_typealias_conflict, none, - "type alias %0 requires types %1 and %2 to be the same", - (Identifier, Type, Type)) WARNING(redundant_conformance_constraint,none, "redundant conformance constraint %0: %1", (Type, ProtocolDecl *)) NOTE(redundant_conformance_here,none, @@ -1692,6 +1689,9 @@ NOTE(redundant_conformance_here,none, "inferred from type here}0", (unsigned, Type, ProtocolDecl *)) +ERROR(same_type_conflict,none, + "%select{generic parameter |protocol |}0%1 cannot be equal to both " + "%2 and %3", (unsigned, Type, Type, Type)) WARNING(redundant_same_type_to_concrete,none, "redundant same-type constraint %0 == %1", (Type, Type)) NOTE(same_type_redundancy_here,none, @@ -1732,6 +1732,9 @@ WARNING(inherited_associated_type_redecl,none, WARNING(typealias_override_associated_type,none, "typealias overriding associated type %0 from protocol %1 is better " "expressed as same-type constraint on the protocol", (DeclName, Type)) +WARNING(associated_type_override_typealias,none, + "associated type %0 is redundant with type %0 declared in inherited " + "%1 %2", (DeclName, DescriptiveDeclKind, Type)) ERROR(generic_param_access,none, "%0 %select{must be declared %select{" diff --git a/include/swift/AST/GenericSignatureBuilder.h b/include/swift/AST/GenericSignatureBuilder.h index 86063c70ac0b5..a27e9f602cb80 100644 --- a/include/swift/AST/GenericSignatureBuilder.h +++ b/include/swift/AST/GenericSignatureBuilder.h @@ -23,6 +23,7 @@ #include "swift/AST/Decl.h" #include "swift/AST/DiagnosticEngine.h" #include "swift/AST/Identifier.h" +#include "swift/AST/ProtocolConformanceRef.h" #include "swift/AST/Types.h" #include "swift/AST/TypeLoc.h" #include "swift/AST/TypeRepr.h" @@ -74,6 +75,11 @@ enum class ArchetypeResolutionKind { /// Only create a new potential archetype to describe this dependent type /// if it is already known. AlreadyKnown, + + /// Only create a potential archetype when it is well-formed (i.e., we know + /// that there is a nested type with that name), but (unlike \c AlreadyKnown) + /// allow the creation of a new potential archetype. + WellFormed, }; /// \brief Collects a set of requirements of generic parameters, both explicitly @@ -176,6 +182,9 @@ class GenericSignatureBuilder { /// the concrete type. unsigned recursiveConcreteType : 1; + /// Whether we have an invalid concrete type. + unsigned invalidConcreteType : 1; + /// Whether we have detected recursion during the substitution of /// the superclass type. unsigned recursiveSuperclassType : 1; @@ -281,6 +290,15 @@ class GenericSignatureBuilder { FloatingRequirementSource source, UnresolvedHandlingKind unresolvedHandling); + /// Resolve the conformance of the given potential archetype to + /// the given protocol when the potential archetype is known to be equivalent + /// to a concrete type. + /// + /// \returns the requirement source for the resolved conformance, or nullptr + /// if the conformance could not be resolved. + const RequirementSource *resolveConcreteConformance(PotentialArchetype *pa, + ProtocolDecl *proto); + /// Retrieve the constraint source conformance for the superclass constraint /// of the given potential archetype (if present) to the given protocol. /// @@ -288,9 +306,8 @@ class GenericSignatureBuilder { /// queried. /// /// \param proto The protocol to which we are establishing conformance. - const RequirementSource *resolveSuperConformance( - GenericSignatureBuilder::PotentialArchetype *pa, - ProtocolDecl *proto); + const RequirementSource *resolveSuperConformance(PotentialArchetype *pa, + ProtocolDecl *proto); /// \brief Add a new conformance requirement specifying that the given /// potential archetype conforms to the given protocol. @@ -316,15 +333,6 @@ class GenericSignatureBuilder { FloatingRequirementSource Source, llvm::function_ref diagnoseMismatch); - /// \brief Add a new same-type requirement between two fully resolved types - /// (output of GenericSignatureBuilder::resolve). - /// - /// The two types must not be incompatible concrete types. - ConstraintResult addSameTypeRequirementDirect( - ResolvedType paOrT1, - ResolvedType paOrT2, - FloatingRequirementSource Source); - /// \brief Add a new same-type requirement between two unresolved types. /// /// The types are resolved with \c GenericSignatureBuilder::resolve, and must @@ -770,7 +778,7 @@ class GenericSignatureBuilder::RequirementSource final /// A requirement that was resolved via a superclass requirement. /// - /// This stores the \c ProtocolConformance* used to resolve the + /// This stores the \c ProtocolConformanceRef used to resolve the /// requirement. Superclass, @@ -784,6 +792,10 @@ class GenericSignatureBuilder::RequirementSource final /// This stores the \c ProtocolConformance* used to resolve the /// requirement. Concrete, + + /// A requirement that was resolved based on structural derivation from + /// another requirement. + Derived, }; /// The kind of requirement source. @@ -792,6 +804,7 @@ class GenericSignatureBuilder::RequirementSource final private: /// The kind of storage we have. enum class StorageKind : uint8_t { + None, RootArchetype, StoredType, ProtocolConformance, @@ -816,7 +829,7 @@ class GenericSignatureBuilder::RequirementSource final TypeBase *type; /// A protocol conformance used to satisfy the requirement. - ProtocolConformance *conformance; + void *conformance; /// An associated type to which a requirement is being applied. AssociatedTypeDecl *assocType; @@ -839,6 +852,7 @@ class GenericSignatureBuilder::RequirementSource final case Superclass: case Parent: case Concrete: + case Derived: return 0; } @@ -882,6 +896,7 @@ class GenericSignatureBuilder::RequirementSource final case Superclass: case Parent: case Concrete: + case Derived: return false; } @@ -931,7 +946,7 @@ class GenericSignatureBuilder::RequirementSource final } RequirementSource(Kind kind, const RequirementSource *parent, - ProtocolConformance *conformance) + ProtocolConformanceRef conformance) : kind(kind), storageKind(StorageKind::ProtocolConformance), hasTrailingWrittenRequirementLoc(false), usesRequirementSignature(false), parent(parent) { @@ -940,7 +955,7 @@ class GenericSignatureBuilder::RequirementSource final assert(isAcceptableStorageKind(kind, storageKind) && "RequirementSource kind/storageKind mismatch"); - storage.conformance = conformance; + storage.conformance = conformance.getOpaqueValue(); } RequirementSource(Kind kind, const RequirementSource *parent, @@ -956,6 +971,16 @@ class GenericSignatureBuilder::RequirementSource final storage.assocType = assocType; } + RequirementSource(Kind kind, const RequirementSource *parent) + : kind(kind), storageKind(StorageKind::None), + hasTrailingWrittenRequirementLoc(false), + usesRequirementSignature(false), parent(parent) { + assert((static_cast(parent) != isRootKind(kind)) && + "Root RequirementSource should not have parent (or vice versa)"); + assert(isAcceptableStorageKind(kind, storageKind) && + "RequirementSource kind/storageKind mismatch"); + } + public: /// Retrieve an abstract requirement source. static const RequirementSource *forAbstract(PotentialArchetype *root); @@ -997,13 +1022,14 @@ class GenericSignatureBuilder::RequirementSource final /// A requirement source that describes that a requirement that is resolved /// via a superclass requirement. const RequirementSource *viaSuperclass( - GenericSignatureBuilder &builder, - ProtocolConformance *conformance) const; + GenericSignatureBuilder &builder, + ProtocolConformanceRef conformance) const; /// A requirement source that describes that a requirement that is resolved /// via a same-type-to-concrete requirement. - const RequirementSource *viaConcrete(GenericSignatureBuilder &builder, - ProtocolConformance *conformance) const; + const RequirementSource *viaConcrete( + GenericSignatureBuilder &builder, + ProtocolConformanceRef conformance) const; /// A constraint source that describes that a constraint that is resolved /// for a nested type via a constraint on its parent. @@ -1012,6 +1038,10 @@ class GenericSignatureBuilder::RequirementSource final const RequirementSource *viaParent(GenericSignatureBuilder &builder, AssociatedTypeDecl *assocType) const; + /// A constraint source that describes a constraint that is structurally + /// derived from another constraint but does not require further information. + const RequirementSource *viaDerived(GenericSignatureBuilder &builder) const; + /// Retrieve the root requirement source. const RequirementSource *getRoot() const; @@ -1100,9 +1130,9 @@ class GenericSignatureBuilder::RequirementSource final ProtocolDecl *getProtocolDecl() const; /// Retrieve the protocol conformance for this requirement, if there is one. - ProtocolConformance *getProtocolConformance() const { - if (storageKind != StorageKind::ProtocolConformance) return nullptr; - return storage.conformance; + ProtocolConformanceRef getProtocolConformance() const { + assert(storageKind == StorageKind::ProtocolConformance); + return ProtocolConformanceRef::getFromOpaqueValue(storage.conformance); } /// Retrieve the associated type declaration for this requirement, if there @@ -1577,18 +1607,6 @@ class GenericSignatureBuilder::PotentialArchetype { PotentialArchetype *getNestedType(TypeDecl *concreteDecl, GenericSignatureBuilder &builder); - /// Describes the kind of update that is performed. - enum class NestedTypeUpdate { - /// Resolve an existing potential archetype, but don't create a new - /// one if not present. - ResolveExisting, - /// If this potential archetype is missing, create it. - AddIfMissing, - /// If this potential archetype is missing and would be a better anchor, - /// create it. - AddIfBetterAnchor, - }; - /// \brief Retrieve (or create) a nested type that is the current best /// nested archetype anchor (locally) with the given name. /// @@ -1597,7 +1615,7 @@ class GenericSignatureBuilder::PotentialArchetype { PotentialArchetype *getNestedArchetypeAnchor( Identifier name, GenericSignatureBuilder &builder, - NestedTypeUpdate kind = NestedTypeUpdate::AddIfMissing); + ArchetypeResolutionKind kind); /// Update the named nested type when we know this type conforms to the given /// protocol. @@ -1607,7 +1625,7 @@ class GenericSignatureBuilder::PotentialArchetype { /// a potential archetype should not be created if it's missing. PotentialArchetype *updateNestedTypeForConformance( PointerUnion type, - NestedTypeUpdate kind); + ArchetypeResolutionKind kind); /// Update the named nested type when we know this type conforms to the given /// protocol. @@ -1618,7 +1636,7 @@ class GenericSignatureBuilder::PotentialArchetype { PotentialArchetype *updateNestedTypeForConformance( Identifier name, ProtocolDecl *protocol, - NestedTypeUpdate kind); + ArchetypeResolutionKind kind); /// \brief Retrieve (or build) the type corresponding to the potential /// archetype within the given generic environment. diff --git a/lib/AST/GenericSignature.cpp b/lib/AST/GenericSignature.cpp index 880c4d8ca3eb9..5c5a50f2d21ce 100644 --- a/lib/AST/GenericSignature.cpp +++ b/lib/AST/GenericSignature.cpp @@ -872,7 +872,7 @@ ConformanceAccessPath GenericSignature::getConformanceAccessPath( auto pa = reqSigBuilder.resolveArchetype( storedType, - ArchetypeResolutionKind::CompleteWellFormed); + ArchetypeResolutionKind::AlwaysPartial); auto equivClass = pa->getOrCreateEquivalenceClass(); // Find the conformance of this potential archetype to the protocol in diff --git a/lib/AST/GenericSignatureBuilder.cpp b/lib/AST/GenericSignatureBuilder.cpp index 3b653df07e976..11a4f96579cf9 100644 --- a/lib/AST/GenericSignatureBuilder.cpp +++ b/lib/AST/GenericSignatureBuilder.cpp @@ -118,6 +118,7 @@ bool RequirementSource::isAcceptableStorageKind(Kind kind, case StorageKind::StoredType: case StorageKind::ProtocolConformance: case StorageKind::AssociatedTypeDecl: + case StorageKind::None: return false; } @@ -129,6 +130,7 @@ bool RequirementSource::isAcceptableStorageKind(Kind kind, case StorageKind::RootArchetype: case StorageKind::StoredType: case StorageKind::ProtocolConformance: + case StorageKind::None: return false; } @@ -141,6 +143,7 @@ bool RequirementSource::isAcceptableStorageKind(Kind kind, case StorageKind::RootArchetype: case StorageKind::ProtocolConformance: case StorageKind::AssociatedTypeDecl: + case StorageKind::None: return false; } @@ -152,6 +155,19 @@ bool RequirementSource::isAcceptableStorageKind(Kind kind, case StorageKind::RootArchetype: case StorageKind::StoredType: + case StorageKind::AssociatedTypeDecl: + case StorageKind::None: + return false; + } + + case Derived: + switch (storageKind) { + case StorageKind::None: + return true; + + case StorageKind::RootArchetype: + case StorageKind::StoredType: + case StorageKind::ProtocolConformance: case StorageKind::AssociatedTypeDecl: return false; } @@ -163,6 +179,9 @@ bool RequirementSource::isAcceptableStorageKind(Kind kind, const void *RequirementSource::getOpaqueStorage1() const { switch (storageKind) { + case StorageKind::None: + return nullptr; + case StorageKind::RootArchetype: return storage.rootArchetype; @@ -213,6 +232,7 @@ bool RequirementSource::isInferredRequirement(bool includeQuietInferred) const { case ProtocolRequirement: case RequirementSignatureSelf: case Superclass: + case Derived: break; } } @@ -238,6 +258,7 @@ bool RequirementSource::isDerivedRequirement() const { case Superclass: case Concrete: case RequirementSignatureSelf: + case Derived: return true; case ProtocolRequirement: @@ -286,6 +307,7 @@ bool RequirementSource::isSelfDerivedSource(PotentialArchetype *pa, case RequirementSource::NestedTypeNameMatch: case RequirementSource::Concrete: case RequirementSource::Superclass: + case RequirementSource::Derived: return false; } }) == nullptr; @@ -395,6 +417,7 @@ bool RequirementSource::isSelfDerivedConformance( case Concrete: case Superclass: case Parent: + case Derived: return false; case Explicit: case Inferred: @@ -521,20 +544,21 @@ const RequirementSource *RequirementSource::viaProtocolRequirement( } const RequirementSource *RequirementSource::viaSuperclass( - GenericSignatureBuilder &builder, - ProtocolConformance *conformance) const { + GenericSignatureBuilder &builder, + ProtocolConformanceRef conformance) const { REQUIREMENT_SOURCE_FACTORY_BODY( - (nodeID, Superclass, this, conformance, + (nodeID, Superclass, this, conformance.getOpaqueValue(), nullptr, nullptr), (Superclass, this, conformance), 0, WrittenRequirementLoc()); } const RequirementSource *RequirementSource::viaConcrete( - GenericSignatureBuilder &builder, - ProtocolConformance *conformance) const { + GenericSignatureBuilder &builder, + ProtocolConformanceRef conformance) const { REQUIREMENT_SOURCE_FACTORY_BODY( - (nodeID, Concrete, this, conformance, nullptr, nullptr), + (nodeID, Concrete, this, conformance.getOpaqueValue(), + nullptr, nullptr), (Concrete, this, conformance), 0, WrittenRequirementLoc()); } @@ -548,6 +572,14 @@ const RequirementSource *RequirementSource::viaParent( 0, WrittenRequirementLoc()); } +const RequirementSource *RequirementSource::viaDerived( + GenericSignatureBuilder &builder) const { + REQUIREMENT_SOURCE_FACTORY_BODY( + (nodeID, Derived, this, nullptr, nullptr, nullptr), + (Derived, this), + 0, WrittenRequirementLoc()); +} + #undef REQUIREMENT_SOURCE_FACTORY_BODY const RequirementSource *RequirementSource::getRoot() const { @@ -602,6 +634,7 @@ RequirementSource::visitPotentialArchetypesAlongPath( case RequirementSource::Concrete: case RequirementSource::Superclass: + case RequirementSource::Derived: return parent->visitPotentialArchetypesAlongPath(visitor); case RequirementSource::ProtocolRequirement: @@ -618,6 +651,7 @@ RequirementSource::visitPotentialArchetypesAlongPath( Type RequirementSource::getStoredType() const { switch (storageKind) { + case StorageKind::None: case StorageKind::RootArchetype: case StorageKind::ProtocolConformance: case StorageKind::AssociatedTypeDecl: @@ -632,6 +666,9 @@ Type RequirementSource::getStoredType() const { ProtocolDecl *RequirementSource::getProtocolDecl() const { switch (storageKind) { + case StorageKind::None: + return nullptr; + case StorageKind::RootArchetype: if (kind == RequirementSignatureSelf) return getTrailingObjects()[0]; @@ -643,10 +680,7 @@ ProtocolDecl *RequirementSource::getProtocolDecl() const { return nullptr; case StorageKind::ProtocolConformance: - if (storage.conformance) - return storage.conformance->getProtocol(); - - return nullptr; + return getProtocolConformance().getRequirement(); case StorageKind::AssociatedTypeDecl: return storage.assocType->getProtocol(); @@ -808,6 +842,10 @@ void RequirementSource::print(llvm::raw_ostream &out, case Superclass: out << "Superclass"; break; + + case Derived: + out << "Derived"; + break; } // Local function to dump a source location, if we can. @@ -822,6 +860,7 @@ void RequirementSource::print(llvm::raw_ostream &out, }; switch (storageKind) { + case StorageKind::None: case StorageKind::RootArchetype: break; @@ -832,12 +871,16 @@ void RequirementSource::print(llvm::raw_ostream &out, } break; - case StorageKind::ProtocolConformance: - if (storage.conformance) { - out << " (" << storage.conformance->getType()->getString() << ": " - << storage.conformance->getProtocol()->getName() << ")"; + case StorageKind::ProtocolConformance: { + auto conformance = getProtocolConformance(); + if (conformance.isConcrete()) { + out << " (" << conformance.getConcrete()->getType()->getString() << ": " + << conformance.getConcrete()->getProtocol()->getName() << ")"; + } else { + out << " (abstract " << conformance.getRequirement()->getName() << ")"; } break; + } case StorageKind::AssociatedTypeDecl: out << " (" << storage.assocType->getProtocol()->getName() @@ -961,6 +1004,7 @@ bool FloatingRequirementSource::isExplicit() const { case RequirementSource::ProtocolRequirement: case RequirementSource::InferredProtocolRequirement: case RequirementSource::Superclass: + case RequirementSource::Derived: return false; } @@ -981,6 +1025,7 @@ bool FloatingRequirementSource::isExplicit() const { case RequirementSource::NestedTypeNameMatch: case RequirementSource::Parent: case RequirementSource::Superclass: + case RequirementSource::Derived: return false; } } @@ -1037,6 +1082,19 @@ bool FloatingRequirementSource::isRecursive( pa = parent; } + + // Also check the root type. + grossCount = 0; + for (Type type = rootType; + auto depTy = type->getAs(); + type = depTy->getBase()) { + if (depTy->getName() == nestedName) { + if (++grossCount > 4) { + ++NumRecursive; + return true; + } + } + } } return false; @@ -1262,9 +1320,49 @@ ConstraintResult GenericSignatureBuilder::handleUnresolvedRequirement( } } +const RequirementSource * +GenericSignatureBuilder::resolveConcreteConformance(PotentialArchetype *pa, + ProtocolDecl *proto) { + auto concrete = pa->getConcreteType(); + if (!concrete) return nullptr; + + // Conformance to this protocol is redundant; update the requirement source + // appropriately. + auto paEquivClass = pa->getOrCreateEquivalenceClass(); + const RequirementSource *concreteSource; + if (auto writtenSource = + paEquivClass->findAnyConcreteConstraintAsWritten(pa)) + concreteSource = writtenSource->source; + else + concreteSource = paEquivClass->concreteTypeConstraints.front().source; + + // Lookup the conformance of the concrete type to this protocol. + auto conformance = + getLookupConformanceFn()(pa->getDependentType({ }, /*allowUnresolved=*/true) + ->getCanonicalType(), + concrete, + proto->getDeclaredInterfaceType() + ->castTo()); + if (!conformance) { + if (!concrete->hasError() && concreteSource->getLoc().isValid()) { + Diags.diagnose(concreteSource->getLoc(), + diag::requires_generic_param_same_type_does_not_conform, + concrete, proto->getName()); + } + + paEquivClass->invalidConcreteType = true; + return nullptr; + } + + concreteSource = concreteSource->viaConcrete(*this, *conformance); + paEquivClass->conformsTo[proto].push_back({pa, proto, concreteSource}); + ++NumConformanceConstraints; + return concreteSource; +} + const RequirementSource *GenericSignatureBuilder::resolveSuperConformance( - GenericSignatureBuilder::PotentialArchetype *pa, - ProtocolDecl *proto) { + PotentialArchetype *pa, + ProtocolDecl *proto) { // Get the superclass constraint. Type superclass = pa->getSuperclass(); if (!superclass) return nullptr; @@ -1289,7 +1387,7 @@ const RequirementSource *GenericSignatureBuilder::resolveSuperConformance( superclassSource = paEquivClass->superclassConstraints.front().source; superclassSource = - superclassSource->viaSuperclass(*this, conformance->getConcrete()); + superclassSource->viaSuperclass(*this, *conformance); paEquivClass->conformsTo[proto].push_back({pa, proto, superclassSource}); ++NumConformanceConstraints; return superclassSource; @@ -1333,7 +1431,7 @@ static void maybeAddSameTypeRequirementForNestedType( if (!assocType) return; // Dig out the type witness. - auto superConformance = superSource->getProtocolConformance(); + auto superConformance = superSource->getProtocolConformance().getConcrete(); auto concreteType = superConformance->getTypeWitness(assocType, builder.getLazyResolver()); if (!concreteType) return; @@ -1378,14 +1476,18 @@ bool PotentialArchetype::addConformance(ProtocolDecl *proto, ++NumConformanceConstraints; ++NumConformances; - // Determine whether there is a superclass constraint where the - // superclass conforms to this protocol. - (void)getBuilder()->resolveSuperConformance(this, proto); + // If there is a concrete type that resolves this conformance requirement, + // record the conformance. + if (!builder.resolveConcreteConformance(this, proto)) { + // Otherwise, determine whether there is a superclass constraint where the + // superclass conforms to this protocol. + (void)builder.resolveSuperConformance(this, proto); + } // Resolve any existing nested types that need it. for (auto &nested : NestedTypes) { (void)updateNestedTypeForConformance(nested.first, proto, - NestedTypeUpdate::ResolveExisting); + ArchetypeResolutionKind::AlreadyKnown); } return true; @@ -1453,26 +1555,35 @@ static int compareAssociatedTypes(AssociatedTypeDecl *assocType1, return 0; } +/// Whether there are any concrete type declarations in the potential archetype. +static bool hasConcreteDecls(const PotentialArchetype *pa) { + auto parent = pa->getParent(); + if (!parent) return false; + + if (pa->getConcreteTypeDecl()) + return true; + + return hasConcreteDecls(parent); +} + /// Canonical ordering for dependent types in generic signatures. static int compareDependentTypes(PotentialArchetype * const* pa, - PotentialArchetype * const* pb) { + PotentialArchetype * const* pb, + bool outermost) { auto a = *pa, b = *pb; // Fast-path check for equality. if (a == b) return 0; - // Concrete types must be ordered *after* everything else, to ensure they - // don't become representatives in the case where a concrete type is equated - // with an associated type. - if (a->getParent() && b->getParent() && - !!a->getConcreteTypeDecl() != !!b->getConcreteTypeDecl()) - return a->getConcreteTypeDecl() ? +1 : -1; - - // Types that are equivalent to concrete types follow types that are still - // type parameters. - if (a->isConcreteType() != b->isConcreteType()) - return a->isConcreteType() ? +1 : -1; + // If one has concrete declarations somewhere but the other does not, + // prefer the one without concrete declarations. + if (outermost) { + bool aHasConcreteDecls = hasConcreteDecls(a); + bool bHasConcreteDecls = hasConcreteDecls(b); + if (aHasConcreteDecls != bHasConcreteDecls) + return aHasConcreteDecls ? +1 : -1; + } // Ordering is as follows: // - Generic params @@ -1488,9 +1599,21 @@ static int compareDependentTypes(PotentialArchetype * const* pa, auto ppb = b->getParent(); // - by base, so t_0_n.`P.T` < t_1_m.`P.T` - if (int compareBases = compareDependentTypes(&ppa, &ppb)) + if (int compareBases = compareDependentTypes(&ppa, &ppb, /*outermost=*/false)) return compareBases; + // Types that are equivalent to concrete types follow types that are still + // type parameters. + if (a->isConcreteType() != b->isConcreteType()) + return a->isConcreteType() ? +1 : -1; + + // Concrete types must be ordered *after* everything else, to ensure they + // don't become representatives in the case where a concrete type is equated + // with an associated type. + if (a->getParent() && b->getParent() && + !!a->getConcreteTypeDecl() != !!b->getConcreteTypeDecl()) + return a->getConcreteTypeDecl() ? +1 : -1; + // - by name, so t_n_m.`P.T` < t_n_m.`P.U` if (int compareNames = a->getNestedName().str().compare( b->getNestedName().str())) @@ -1534,6 +1657,11 @@ static int compareDependentTypes(PotentialArchetype * const* pa, llvm_unreachable("potential archetype total order failure"); } +static int compareDependentTypes(PotentialArchetype * const* pa, + PotentialArchetype * const* pb) { + return compareDependentTypes(pa, pb, /*outermost=*/true); +} + PotentialArchetype *PotentialArchetype::getArchetypeAnchor( GenericSignatureBuilder &builder) { // Find the best archetype within this equivalence class. @@ -1542,9 +1670,10 @@ PotentialArchetype *PotentialArchetype::getArchetypeAnchor( if (auto parent = getParent()) { // For a nested type, retrieve the parent archetype anchor first. auto parentAnchor = parent->getArchetypeAnchor(builder); + assert(parentAnchor->getNestingDepth() <= parent->getNestingDepth()); anchor = parentAnchor->getNestedArchetypeAnchor( - getNestedName(), builder, - NestedTypeUpdate::ResolveExisting); + getNestedName(), builder, + ArchetypeResolutionKind::AlwaysPartial); // FIXME: Hack for cases where we couldn't resolve the nested type. if (!anchor) @@ -1561,7 +1690,8 @@ PotentialArchetype *PotentialArchetype::getArchetypeAnchor( equivClass->archetypeAnchorCache.numMembers == equivClass->members.size()) { ++NumArchetypeAnchorCacheHits; - + assert(equivClass->archetypeAnchorCache.anchor->getNestingDepth() + <= rep->getNestingDepth()); return equivClass->archetypeAnchorCache.anchor; } @@ -1580,6 +1710,8 @@ PotentialArchetype *PotentialArchetype::getArchetypeAnchor( } #endif + assert(anchor->getNestingDepth() <= rep->getNestingDepth()); + // Record the cache miss and update the cache. ++NumArchetypeAnchorCacheMisses; equivClass->archetypeAnchorCache.anchor = anchor; @@ -1589,31 +1721,10 @@ PotentialArchetype *PotentialArchetype::getArchetypeAnchor( } namespace { - /// Function object to diagnose a conflict in same-type constraints for a - /// given potential archetype. - struct DiagnoseSameTypeConflict { - DiagnosticEngine &diags; - const RequirementSource *source; - PotentialArchetype *pa; - - void operator()(Type type1, Type type2) const { - if (pa->getParent() && pa->getConcreteTypeDecl() && - source->getLoc().isInvalid()) { - diags.diagnose(pa->getConcreteTypeDecl()->getLoc(), - diag::protocol_typealias_conflict, - pa->getConcreteTypeDecl()->getName(), - type1, type2); - return; - } - - if (source->getLoc().isValid()) { - diags.diagnose(source->getLoc(), - diag::requires_same_type_conflict, - pa->isGenericParam(), - pa->getDependentType(/*FIXME: */{ }, true), - type1, type2); - } - } + /// Function object used to suppress conflict diagnoses when we know we'll + /// see them again later. + struct SameTypeConflictCheckedLater { + void operator()(Type type1, Type type2) const { } }; } // end anonymous namespace @@ -1621,12 +1732,11 @@ namespace { // parent PA that has a concrete type. static void concretizeNestedTypeFromConcreteParent( GenericSignatureBuilder::PotentialArchetype *parent, - const RequirementSource *parentConcreteSource, GenericSignatureBuilder::PotentialArchetype *nestedPA, - GenericSignatureBuilder &builder, - llvm::function_ref - lookupConformance) { - auto concreteParent = parent->getConcreteType(); + GenericSignatureBuilder &builder) { + auto parentEquiv = parent->getEquivalenceClassIfPresent(); + assert(parentEquiv && "can't have a concrete type without an equiv class"); + auto concreteParent = parentEquiv->concreteType; assert(concreteParent && "attempting to resolve concrete nested type of non-concrete PA"); @@ -1635,11 +1745,22 @@ static void concretizeNestedTypeFromConcreteParent( auto assocType = nestedPA->getResolvedAssociatedType(); if (!assocType) return; - auto source = parentConcreteSource->viaConcrete(builder, /*FIXME: */nullptr) - ->viaParent(builder, assocType); + auto proto = assocType->getProtocol(); + assert(parentEquiv->conformsTo.count(proto) > 0 && + "No conformance requirement"); + const RequirementSource *parentConcreteSource = nullptr; + for (const auto &constraint : parentEquiv->conformsTo.find(proto)->second) { + if (constraint.source->kind == RequirementSource::Concrete) { + parentConcreteSource = constraint.source; + } + } + + // Error condition: parent did not conform to this protocol, so there is no + // way to resolve the nested type via concrete conformance. + if (!parentConcreteSource) return; - // FIXME: Get the conformance from the parent. - auto conformance = lookupConformance(assocType->getProtocol()); + auto source = parentConcreteSource->viaParent(builder, assocType); + auto conformance = parentConcreteSource->getProtocolConformance(); Type witnessType; if (conformance.isConcrete()) { @@ -1653,10 +1774,7 @@ static void concretizeNestedTypeFromConcreteParent( builder.addSameTypeRequirement( nestedPA, witnessType, source, GenericSignatureBuilder::UnresolvedHandlingKind::GenerateConstraints, - DiagnoseSameTypeConflict{ - builder.getASTContext().Diags, - source, nestedPA - }); + SameTypeConflictCheckedLater()); } PotentialArchetype *PotentialArchetype::getNestedType( @@ -1669,27 +1787,28 @@ PotentialArchetype *PotentialArchetype::getNestedType( // Retrieve the nested archetype anchor, which is the best choice (so far) // for this nested type. - return getNestedArchetypeAnchor(nestedName, builder); + return getNestedArchetypeAnchor(nestedName, builder, + ArchetypeResolutionKind::AlwaysPartial); } PotentialArchetype *PotentialArchetype::getNestedType( AssociatedTypeDecl *assocType, GenericSignatureBuilder &builder) { return updateNestedTypeForConformance(assocType, - NestedTypeUpdate::AddIfMissing); + ArchetypeResolutionKind::WellFormed); } PotentialArchetype *PotentialArchetype::getNestedType( TypeDecl *getConcreteTypeDecl, GenericSignatureBuilder &builder) { return updateNestedTypeForConformance(getConcreteTypeDecl, - NestedTypeUpdate::AddIfMissing); + ArchetypeResolutionKind::WellFormed); } PotentialArchetype *PotentialArchetype::getNestedArchetypeAnchor( Identifier name, GenericSignatureBuilder &builder, - NestedTypeUpdate kind) { + ArchetypeResolutionKind kind) { // Look for the best associated type or concrete type within the protocols // we know about. AssociatedTypeDecl *bestAssocType = nullptr; @@ -1729,8 +1848,7 @@ PotentialArchetype *PotentialArchetype::getNestedArchetypeAnchor( // If we found an associated type, use it. PotentialArchetype *resultPA = nullptr; if (bestAssocType) { - resultPA = updateNestedTypeForConformance(bestAssocType, - NestedTypeUpdate::AddIfMissing); + resultPA = updateNestedTypeForConformance(bestAssocType, kind); } // If we have an associated type, drop any concrete decls that aren't in @@ -1770,8 +1888,9 @@ PotentialArchetype *PotentialArchetype::getNestedArchetypeAnchor( // Update for all of the concrete decls with this name, which will introduce // various same-type constraints. for (auto concreteDecl : concreteDecls) { - auto concreteDeclPA = updateNestedTypeForConformance(concreteDecl, - NestedTypeUpdate::AddIfMissing); + auto concreteDeclPA = updateNestedTypeForConformance( + concreteDecl, + ArchetypeResolutionKind::WellFormed); if (!resultPA && concreteDecl == bestConcreteDecl) resultPA = concreteDeclPA; } @@ -1781,12 +1900,12 @@ PotentialArchetype *PotentialArchetype::getNestedArchetypeAnchor( // Check whether we can add a missing nested type for this case. switch (kind) { - case NestedTypeUpdate::AddIfBetterAnchor: - case NestedTypeUpdate::AddIfMissing: + case ArchetypeResolutionKind::AlwaysPartial: break; - case NestedTypeUpdate::ResolveExisting: - // Don't add a new type; + case ArchetypeResolutionKind::WellFormed: + case ArchetypeResolutionKind::CompleteWellFormed: + case ArchetypeResolutionKind::AlreadyKnown: return nullptr; } @@ -1812,9 +1931,9 @@ PotentialArchetype *PotentialArchetype::getNestedArchetypeAnchor( PotentialArchetype *PotentialArchetype::updateNestedTypeForConformance( - Identifier name, - ProtocolDecl *proto, - NestedTypeUpdate kind) { + Identifier name, + ProtocolDecl *proto, + ArchetypeResolutionKind kind) { /// Determine whether there is an associated type or concrete type with this /// name in this protocol. If not, there's nothing to do. AssociatedTypeDecl *assocType = nullptr; @@ -1843,7 +1962,7 @@ PotentialArchetype *PotentialArchetype::updateNestedTypeForConformance( PotentialArchetype *PotentialArchetype::updateNestedTypeForConformance( PointerUnion type, - NestedTypeUpdate kind) { + ArchetypeResolutionKind kind) { auto *assocType = type.dyn_cast(); auto *concreteDecl = type.dyn_cast(); if (!assocType && !concreteDecl) @@ -1895,13 +2014,9 @@ PotentialArchetype *PotentialArchetype::updateNestedTypeForConformance( // If we don't have a result potential archetype yet, we may need to add one. if (!resultPA) { switch (kind) { - case NestedTypeUpdate::AddIfBetterAnchor: - // FIXME: The loop above should have kept track of whether this type - // would make a better anchor, so we can bail out here if the answer is - // "no". - LLVM_FALLTHROUGH; - - case NestedTypeUpdate::AddIfMissing: { + case ArchetypeResolutionKind::AlwaysPartial: + case ArchetypeResolutionKind::CompleteWellFormed: + case ArchetypeResolutionKind::WellFormed: { if (assocType) resultPA = new PotentialArchetype(this, assocType); else @@ -1937,7 +2052,7 @@ PotentialArchetype *PotentialArchetype::updateNestedTypeForConformance( break; } - case NestedTypeUpdate::ResolveExisting: + case ArchetypeResolutionKind::AlreadyKnown: break; } } @@ -2003,21 +2118,7 @@ PotentialArchetype *PotentialArchetype::updateNestedTypeForConformance( // FIXME: This feels like massive overkill. Why do we have to loop? if (isConcreteType()) { for (auto equivT : getRepresentative()->getEquivalenceClassMembers()) { - concretizeNestedTypeFromConcreteParent( - equivT, RequirementSource::forNestedTypeNameMatch(this), - resultPA, builder, - [&](ProtocolDecl *proto) -> ProtocolConformanceRef { - auto depTy = resultPA->getDependentType({}, - /*allowUnresolved=*/true) - ->getCanonicalType(); - auto protocolTy = - proto->getDeclaredInterfaceType()->castTo(); - auto conformance = builder.getLookupConformanceFn()( - depTy, getConcreteType(), protocolTy); - assert(conformance && - "failed to find PA's conformance to known protocol"); - return *conformance; - }); + concretizeNestedTypeFromConcreteParent(equivT, resultPA, builder); } } } @@ -2341,7 +2442,8 @@ void GenericSignatureBuilder::PotentialArchetype::dump(llvm::raw_ostream &Out, #pragma mark Equivalence classes EquivalenceClass::EquivalenceClass(PotentialArchetype *representative) - : recursiveConcreteType(false), recursiveSuperclassType(false) + : recursiveConcreteType(false), invalidConcreteType(false), + recursiveSuperclassType(false) { members.push_back(representative); } @@ -2398,25 +2500,12 @@ PotentialArchetype *GenericSignatureBuilder::resolveArchetype( if (!base) return nullptr; - // Figure out what kind of nested type update we want. - typedef PotentialArchetype::NestedTypeUpdate NestedTypeUpdate; - NestedTypeUpdate updateKind; - switch (resolutionKind) { - case ArchetypeResolutionKind::AlreadyKnown: - updateKind = NestedTypeUpdate::ResolveExisting; - break; - - case ArchetypeResolutionKind::AlwaysPartial: - case ArchetypeResolutionKind::CompleteWellFormed: - updateKind = NestedTypeUpdate::AddIfMissing; - break; - } - // If we know the associated type already, get that specific type. if (auto assocType = dependentMember->getAssocType()) - return base->updateNestedTypeForConformance(assocType, updateKind); + return base->updateNestedTypeForConformance(assocType, resolutionKind); // Resolve based on name alone. + // FIXME: Pass through the resolution kind? auto name = dependentMember->getName(); switch (resolutionKind) { case ArchetypeResolutionKind::AlreadyKnown: { @@ -2429,7 +2518,8 @@ PotentialArchetype *GenericSignatureBuilder::resolveArchetype( case ArchetypeResolutionKind::AlwaysPartial: case ArchetypeResolutionKind::CompleteWellFormed: - return base->getNestedArchetypeAnchor(name, *this, updateKind); + case ArchetypeResolutionKind::WellFormed: + return base->getNestedArchetypeAnchor(name, *this, resolutionKind); } } @@ -2743,7 +2833,20 @@ ConstraintResult GenericSignatureBuilder::addConformanceRequirement( continue; } - // FIXME: this is a weird situation. + // We inherited a type; this associated type will be identical + // to that typealias. + if (Source->kind == RequirementSource::RequirementSignatureSelf) { + auto inheritedOwningDecl = + inheritedType->getDeclContext() + ->getAsNominalTypeOrNominalTypeExtensionContext(); + Diags.diagnose(assocTypeDecl, + diag::associated_type_override_typealias, + assocTypeDecl->getFullName(), + inheritedOwningDecl->getDescriptiveKind(), + inheritedOwningDecl->getDeclaredInterfaceType()); + } + + addInferredSameTypeReq(assocTypeDecl, inheritedType); } inheritedTypeDecls.erase(knownInherited); @@ -2786,7 +2889,8 @@ ConstraintResult GenericSignatureBuilder::addConformanceRequirement( continue; } - // FIXME: More typealiases + // Two typealiases that should be the same. + addInferredSameTypeReq(inheritedType, typealias); } inheritedTypeDecls.erase(knownInherited); @@ -2874,7 +2978,7 @@ ConstraintResult GenericSignatureBuilder::resolveUnresolvedType( parentPA->getNestedArchetypeAnchor( pa->getNestedName(), *this, - PotentialArchetype::NestedTypeUpdate::ResolveExisting); + ArchetypeResolutionKind::WellFormed); if (resolvedPA) { assert(!pa->isUnresolved() && "This type must have been resolved"); return ConstraintResult::Resolved; @@ -2997,8 +3101,9 @@ void GenericSignatureBuilder::updateSuperclass( for (auto &nested : T->getNestedTypes()) { if (nested.second.empty()) continue; if (nested.second.front()->isUnresolved()) { - (void)T->getNestedArchetypeAnchor(nested.first, *this, - PotentialArchetype::NestedTypeUpdate::ResolveExisting); + (void)T->getNestedArchetypeAnchor( + nested.first, *this, + ArchetypeResolutionKind::AlreadyKnown); } } }; @@ -3012,7 +3117,7 @@ void GenericSignatureBuilder::updateSuperclass( // Presence of a superclass constraint implies a _Class layout // constraint. - auto layoutReqSource = source->viaSuperclass(*this, nullptr); + auto layoutReqSource = source->viaDerived(*this); addLayoutRequirementDirect(T, LayoutConstraint::getLayoutConstraint( superclass->getClassOrBoundGenericClass()->isObjC() @@ -3218,16 +3323,22 @@ GenericSignatureBuilder::addSameTypeRequirementBetweenArchetypes( if (T1 == T2) return ConstraintResult::Resolved; + unsigned nestingDepth1 = T1->getNestingDepth(); + unsigned nestingDepth2 = T2->getNestingDepth(); + // Decide which potential archetype is to be considered the representative. - // It doesn't specifically matter which we use, but it's a minor optimization - // to prefer the canonical type. - if (compareDependentTypes(&T2, &T1) < 0) { + // We prefer potential archetypes with lower nesting depths (because it + // prevents us from unnecessarily building deeply nested potential archetypes) + // and prefer anchors because it's a minor optimization. + if (nestingDepth2 < nestingDepth1 || + compareDependentTypes(&T2, &T1) < 0) { std::swap(T1, T2); std::swap(OrigT1, OrigT2); } // Merge the equivalence classes. auto equivClass = T1->getOrCreateEquivalenceClass(); + auto equivClass1Members = equivClass->members; auto equivClass2Members = T2->getEquivalenceClassMembers(); for (auto equiv : equivClass2Members) equivClass->members.push_back(equiv); @@ -3249,14 +3360,17 @@ GenericSignatureBuilder::addSameTypeRequirementBetweenArchetypes( } // Same-type-to-concrete requirements. - if (equivClass2 && equivClass2->concreteType) { - if (equivClass->concreteType) { + bool t1IsConcrete = !equivClass->concreteType.isNull(); + bool t2IsConcrete = equivClass2 && !equivClass2->concreteType.isNull(); + if (t2IsConcrete) { + if (t1IsConcrete) { (void)addSameTypeRequirement(equivClass->concreteType, equivClass2->concreteType, Source, UnresolvedHandlingKind::GenerateConstraints, - DiagnoseSameTypeConflict{Diags, Source, T1}); + SameTypeConflictCheckedLater()); } else { equivClass->concreteType = equivClass2->concreteType; + equivClass->invalidConcreteType = equivClass2->invalidConcreteType; } equivClass->concreteTypeConstraints.insert( @@ -3277,12 +3391,13 @@ GenericSignatureBuilder::addSameTypeRequirementBetweenArchetypes( else source2 = equivClass2->superclassConstraints.front().source; - (void)updateSuperclass(T1, equivClass2->superclass, source2); - + // Add the superclass constraints from the second equivalence class. equivClass->superclassConstraints.insert( equivClass->superclassConstraints.end(), equivClass2->superclassConstraints.begin(), equivClass2->superclassConstraints.end()); + + (void)updateSuperclass(T1, equivClass2->superclass, source2); } // Add all of the protocol conformance requirements of T2 to T1. @@ -3301,6 +3416,14 @@ GenericSignatureBuilder::addSameTypeRequirementBetweenArchetypes( auto dependentT1 = T1->getDependentType({ }, /*allowUnresolved=*/true); for (auto equivT2 : equivClass2Members) { for (auto T2Nested : equivT2->NestedTypes) { + // If T1 is concrete but T2 is not, concretize the nested types of T2. + if (t1IsConcrete && !t2IsConcrete) { + concretizeNestedTypeFromConcreteParent(T1, T2Nested.second.front(), + *this); + continue; + } + + // Otherwise, make the nested types equivalent. Type nestedT1 = DependentMemberType::get(dependentT1, T2Nested.first); if (isErrorResult( addSameTypeRequirement( @@ -3312,6 +3435,16 @@ GenericSignatureBuilder::addSameTypeRequirementBetweenArchetypes( } } + // If T2 is concrete but T1 was not, concretize the nested types of T1. + if (t2IsConcrete && !t1IsConcrete) { + for (auto equivT1 : equivClass1Members) { + for (auto T1Nested : equivT1->NestedTypes) { + concretizeNestedTypeFromConcreteParent(T2, T1Nested.second.front(), + *this); + } + } + } + return ConstraintResult::Resolved; } @@ -3331,56 +3464,26 @@ ConstraintResult GenericSignatureBuilder::addSameTypeRequirementToConcrete( if (equivClass->concreteType) { return addSameTypeRequirement(equivClass->concreteType, Concrete, Source, UnresolvedHandlingKind::GenerateConstraints, - DiagnoseSameTypeConflict{ Diags, Source, T}); + SameTypeConflictCheckedLater()); } // Record the requirement. equivClass->concreteType = Concrete; - // Make sure the concrete type fulfills the requirements on the archetype. - // FIXME: Move later... - DenseMap conformances; - CanType depTy = rep->getDependentType({ }, /*allowUnresolved=*/true) - ->getCanonicalType(); + // Make sure the concrete type fulfills the conformance requirements of + // this equivalence class. for (auto protocol : rep->getConformsTo()) { - auto conformance = - getLookupConformanceFn()(depTy, Concrete, - protocol->getDeclaredInterfaceType() - ->castTo()); - if (!conformance) { - if (!Concrete->hasError()) { - Diags.diagnose(Source->getLoc(), - diag::requires_generic_param_same_type_does_not_conform, - Concrete, protocol->getName()); - } + if (!resolveConcreteConformance(rep, protocol)) return ConstraintResult::Conflicting; - } - - conformances.insert({protocol, *conformance}); - - // Abstract conformances are acceptable for existential types. - assert(conformance->isConcrete() || Concrete->isExistentialType()); - - // Update the requirement source now that we know it's concrete. - // FIXME: Bad concrete source info. - auto concreteSource = Source->viaConcrete(*this, - conformance->isConcrete() - ? conformance->getConcrete() - : nullptr); - equivClass->conformsTo[protocol].push_back({T, protocol, concreteSource}); - ++NumConformanceConstraints; } // Eagerly resolve any existing nested types to their concrete forms (others // will be "concretized" as they are constructed, in getNestedType). for (auto equivT : rep->getEquivalenceClassMembers()) { for (auto nested : equivT->getNestedTypes()) { - concretizeNestedTypeFromConcreteParent( - equivT, Source, nested.second.front(), *this, - [&](ProtocolDecl *proto) -> ProtocolConformanceRef { - return conformances.find(proto)->second; - }); + concretizeNestedTypeFromConcreteParent(equivT, nested.second.front(), + *this); } } @@ -3464,17 +3567,6 @@ ConstraintResult GenericSignatureBuilder::addSameTypeRequirement( diagnoseMismatch); } -ConstraintResult GenericSignatureBuilder::addSameTypeRequirementDirect( - ResolvedType paOrT1, - ResolvedType paOrT2, - FloatingRequirementSource source) { - return addSameTypeRequirementDirect(paOrT1, paOrT2, source, - [&](Type type1, Type type2) { - Diags.diagnose(source.getLoc(), diag::requires_same_concrete_type, - type1, type2); - }); -} - ConstraintResult GenericSignatureBuilder::addSameTypeRequirementDirect( ResolvedType paOrT1, ResolvedType paOrT2, FloatingRequirementSource source, llvm::function_ref diagnoseMismatch) { @@ -3612,7 +3704,7 @@ ConstraintResult GenericSignatureBuilder::addRequirement( ModuleDecl *inferForModule) { auto subst = [&](Type t) { if (subMap) - return t.subst(*subMap); + return t.subst(*subMap, SubstFlags::UseErrorType); return t; }; @@ -4536,7 +4628,9 @@ static PotentialArchetype *getLocalAnchor(PotentialArchetype *pa, if (!parent) return pa; auto parentAnchor = getLocalAnchor(parent, builder); - return parentAnchor->getNestedArchetypeAnchor(pa->getNestedName(), builder); + return parentAnchor->getNestedArchetypeAnchor( + pa->getNestedName(), builder, + ArchetypeResolutionKind::AlwaysPartial); } /// Computes the ordered set of archetype anchors required to form a minimum @@ -4907,8 +5001,8 @@ void GenericSignatureBuilder::checkConcreteTypeConstraints( checkConstraintList( genericParams, equivClass->concreteTypeConstraints, - [](const ConcreteConstraint &constraint) { - return true; + [&](const ConcreteConstraint &constraint) { + return constraint.value->isEqual(equivClass->concreteType); }, [&](Type concreteType) { // If the concrete type is equivalent, the constraint is redundant. @@ -4917,10 +5011,14 @@ void GenericSignatureBuilder::checkConcreteTypeConstraints( if (concreteType->isEqual(equivClass->concreteType)) return ConstraintRelation::Redundant; - // Call this unrelated. - return ConstraintRelation::Unrelated; + // If either has a type parameter, call them unrelated. + if (concreteType->hasTypeParameter() || + equivClass->concreteType->hasTypeParameter()) + return ConstraintRelation::Unrelated; + + return ConstraintRelation::Conflicting; }, - None, + diag::same_type_conflict, diag::redundant_same_type_to_concrete, diag::same_type_redundancy_here); @@ -5139,8 +5237,11 @@ void GenericSignatureBuilder::enumerateRequirements(llvm::function_ref< auto equivClass = rep->getOrCreateEquivalenceClass(); // If we didn't compute the derived same-type components yet, do so now. - if (equivClass->derivedSameTypeComponents.empty()) + if (equivClass->derivedSameTypeComponents.empty()) { checkSameTypeConstraints(Impl->GenericParams, rep); + rep = archetype->getRepresentative(); + equivClass = rep->getOrCreateEquivalenceClass(); + } assert(!equivClass->derivedSameTypeComponents.empty() && "Didn't compute derived same-type components?"); @@ -5167,8 +5268,9 @@ void GenericSignatureBuilder::enumerateRequirements(llvm::function_ref< ? knownAnchor->concreteTypeSource : RequirementSource::forAbstract(archetype); - // Drop recursive concrete-type constraints. - if (equivClass->recursiveConcreteType) + // Drop recursive and invalid concrete-type constraints. + if (equivClass->recursiveConcreteType || + equivClass->invalidConcreteType) continue; f(RequirementKind::SameType, archetype, concreteType, source); diff --git a/test/Constraints/same_types.swift b/test/Constraints/same_types.swift index ff56973fa2758..b22ae3bfb69bd 100644 --- a/test/Constraints/same_types.swift +++ b/test/Constraints/same_types.swift @@ -58,14 +58,16 @@ func test3(_ t: T, u: U) -> (X, X) func fail1< T: Fooable, U: Fooable >(_ t: T, u: U) -> (X, Y) - where T.Foo == X, U.Foo == Y, T.Foo == U.Foo { // expected-error{{associated type 'T.Foo' cannot be equal to both 'X' and 'Y'}} + where T.Foo == X, U.Foo == Y, T.Foo == U.Foo { // expected-error{{'U.Foo' cannot be equal to both 'Y' and 'X'}} + // expected-note@-1{{same-type constraint 'T.Foo' == 'X' written here}} return (t.foo, u.foo) // expected-error{{cannot convert return expression of type 'X' to return type 'Y'}} } func fail2< T: Fooable, U: Fooable >(_ t: T, u: U) -> (X, Y) - where T.Foo == U.Foo, T.Foo == X, U.Foo == Y { // expected-error{{associated type 'U.Foo' cannot be equal to both 'X' and 'Y'}} + where T.Foo == U.Foo, T.Foo == X, U.Foo == Y { // expected-error{{'U.Foo' cannot be equal to both 'Y' and 'X'}} + // expected-note@-1{{same-type constraint 'T.Foo' == 'X' written here}} return (t.foo, u.foo) // expected-error{{cannot convert return expression of type 'X' to return type 'Y'}} } @@ -75,7 +77,7 @@ func test4(_ t: T) -> Y where T.Bar == Y { func fail3(_ t: T) -> X where T.Bar == X { // expected-error {{'X' does not conform to required protocol 'Fooable'}} - return t.bar + return t.bar // expected-error{{cannot convert return expression of type 'T.Bar' }} } func test5(_ t: T) -> X where T.Bar.Foo == X { @@ -88,25 +90,28 @@ func test6(_ t: T) -> (Y, X) where T.Bar == Y { func test7(_ t: T) -> (Y, X) where T.Bar == Y, T.Bar.Foo == X { // expected-warning@-1{{redundant same-type constraint 'T.Bar.Foo' == 'X'}} + // expected-note@-2{{same-type constraint 'T.Bar.Foo' == 'Y.Foo' (aka 'X') implied here}} return (t.bar, t.bar.foo) } func fail4(_ t: T) -> (Y, Z) where - T.Bar == Y, - T.Bar.Foo == Z { // expected-error{{associated type 'T.Bar.Foo' cannot be equal to both 'Y.Foo' (aka 'X') and 'Z'}} + T.Bar == Y, // expected-note{{same-type constraint 'T.Bar.Foo' == 'Y.Foo' (aka 'X') implied here}} + T.Bar.Foo == Z { // expected-error{{'T.Bar.Foo' cannot be equal to both 'Z' and 'Y.Foo' (aka 'X')}} return (t.bar, t.bar.foo) // expected-error{{cannot convert return expression of type 'X' to return type 'Z'}} } func fail5(_ t: T) -> (Y, Z) where - T.Bar.Foo == Z, // expected-warning{{redundant same-type constraint 'T.Bar.Foo' == 'Z'}} - T.Bar == Y { // expected-error{{associated type 'T.Bar.Foo' cannot be equal to both 'Z' and 'X'}} - // expected-note@-1{{same-type constraint 'T.Bar.Foo' == 'Y.Foo' (aka 'X') implied here}} + T.Bar.Foo == Z, // expected-note{{same-type constraint 'T.Bar.Foo' == 'Z' written here}} + T.Bar == Y { // expected-error{{'T.Bar.Foo' cannot be equal to both 'Y.Foo' (aka 'X') and 'Z'}} return (t.bar, t.bar.foo) // expected-error{{cannot convert return expression of type 'X' to return type 'Z'}} } -func test8(_ t: T) where T.Foo == X, T.Foo == Y {} // expected-error{{associated type 'T.Foo' cannot be equal to both 'X' and 'Y'}} +func test8(_ t: T) + where T.Foo == X, // expected-note{{same-type constraint 'T.Foo' == 'X' written here}} + T.Foo == Y {} // expected-error{{'T.Foo' cannot be equal to both 'Y' and 'X'}} + func testAssocTypeEquivalence(_ fooable: T) -> X.Type where T.Foo == X { @@ -118,16 +123,24 @@ func fail6(_ t: T) -> Int where T == Int { // expected-error{{same-type requi } func test8(_ t: T, u: U) -> (Y, Y, X, X) - where T.Bar == Y, U.Bar.Foo == X, T.Bar == U.Bar { // expected-warning{{redundant same-type constraint 'U.Bar.Foo' == 'X'}} + where T.Bar == Y, // expected-note{{same-type constraint 'U.Bar.Foo' == 'Y.Foo' (aka 'X') implied here}} + U.Bar.Foo == X, T.Bar == U.Bar { // expected-warning{{redundant same-type constraint 'U.Bar.Foo' == 'X'}} return (t.bar, u.bar, t.bar.foo, u.bar.foo) } func test8a(_ t: T, u: U) -> (Y, Y, X, X) where - T.Bar == Y, U.Bar.Foo == X, U.Bar == T.Bar { // expected-warning{{redundant same-type constraint 'U.Bar.Foo' == 'X'}} + T.Bar == Y, // expected-note{{same-type constraint 'U.Bar.Foo' == 'Y.Foo' (aka 'X') implied here}} + U.Bar.Foo == X, U.Bar == T.Bar { // expected-warning{{redundant same-type constraint 'U.Bar.Foo' == 'X'}} return (t.bar, u.bar, t.bar.foo, u.bar.foo) } +func test8b(_ t: T, u: U) + where U.Bar.Foo == X, // expected-warning{{redundant same-type constraint 'U.Bar.Foo' == 'X'}} + T.Bar == Y, // expected-note{{same-type constraint 'U.Bar.Foo' == 'Y.Foo' (aka 'X') implied here}} + T.Bar == U.Bar { +} + // rdar://problem/19137463 func rdar19137463(_ t: T) where T.a == T {} // expected-error{{'a' is not a member type of 'T'}} rdar19137463(1) @@ -237,6 +250,34 @@ func structuralSameTypeRecursive1(_: T, _: U) where T.Assoc1 == Tuple2 // expected-error{{same-type constraint 'T.Assoc1' == '(T.Assoc1, U)' is recursive}} { } +protocol P3 { +} + +protocol P4 { + associatedtype A +} + +func test9(_: T) where T.A == X, T: P4, T.A: P3 { } // expected-error{{same-type constraint type 'X' does not conform to required protocol 'P3'}} + +// Same-type constraint conflict through protocol where clauses. +protocol P5 where Foo1 == Foo2 { + associatedtype Foo1 + associatedtype Foo2 +} + +protocol P6 { + associatedtype Bar: P5 +} + +struct X5a {} + +struct X5b { } + +func test9(_ t: T, u: U) + where T.Bar.Foo1 == X5a, // expected-note{{same-type constraint 'T.Bar.Foo1' == 'X5a' written here}} + U.Bar.Foo2 == X5b, // expected-error{{'U.Bar.Foo2' cannot be equal to both 'X5b' and 'X5a'}} + T.Bar == U.Bar { +} // FIXME: Remove -verify-ignore-unknown. // :0: error: unexpected error produced: generic parameter τ_0_0.Bar.Foo cannot be equal to both 'Y.Foo' (aka 'X') and 'Z' diff --git a/test/Generics/protocol_type_aliases.swift b/test/Generics/protocol_type_aliases.swift index cf0cfbd168c27..4626349314569 100644 --- a/test/Generics/protocol_type_aliases.swift +++ b/test/Generics/protocol_type_aliases.swift @@ -53,21 +53,17 @@ func concreteRequirementOnConcreteNestedTypeAlias(_: T) where T: Q2, S = // Incompatible concrete typealias types are flagged as such protocol P3 { - typealias T = Int // expected-error{{type alias 'T' requires types 'Q3.T' (aka 'Float') and 'Int' to be the same}} + typealias T = Int } -protocol Q3: P3 { +protocol Q3: P3 { // expected-error{{generic signature requires types 'Int'}} typealias T = Float } protocol P3_1 { - typealias T = Float // expected-error{{type alias 'T' requires types 'P3.T' (aka 'Int') and 'Float' to be the same}} + typealias T = Float } protocol Q3_1: P3, P3_1 {} // expected-error{{generic signature requires types 'Float'}} -// FIXME: these shouldn't be necessary to trigger the errors above, but are, due to -// the 'recursive decl validation' FIXME in GenericSignatureBuilder.cpp. -func useTypealias(_: T, _: T.T) {} -func useTypealias1(_: T, _: T.T) {} // Subprotocols can force associated types in their parents to be concrete, and // this should be understood for types constrained by the subprotocols. @@ -114,3 +110,11 @@ func checkQ6(x: T.Type) { sameType(getP6_1_A(x), getP6_2_B(x)) } +protocol P7 { + typealias A = Int +} + +protocol P7a : P7 { + associatedtype A // expected-warning{{associated type 'A' is redundant with type 'A' declared in inherited protocol 'P7'}} +} + diff --git a/test/Generics/requirement_inference.swift b/test/Generics/requirement_inference.swift index 45904ce1df0ba..df14ea8716d32 100644 --- a/test/Generics/requirement_inference.swift +++ b/test/Generics/requirement_inference.swift @@ -224,8 +224,8 @@ struct X8 : P12 { struct X9 where T.B == U.B { // CHECK-LABEL: X9.upperSameTypeConstraint - // CHECK: Generic signature: - // CHECK: Canonical generic signature: <τ_0_0, τ_0_1, τ_1_0 where τ_0_1 : P12, τ_0_0 == X8, τ_0_1.B == X7> + // CHECK: Generic signature: + // CHECK: Canonical generic signature: <τ_0_0, τ_0_1, τ_1_0 where τ_0_0 == X8, τ_0_1 : P12, τ_0_1.B == X7> func upperSameTypeConstraint(_: V) where T == X8 { } } diff --git a/test/Generics/superclass_constraint.swift b/test/Generics/superclass_constraint.swift index 94a37e081eda6..16ed23b689a6f 100644 --- a/test/Generics/superclass_constraint.swift +++ b/test/Generics/superclass_constraint.swift @@ -76,7 +76,7 @@ extension P2 where Self.T : C { // CHECK: superclassConformance1 // CHECK: Requirements: // CHECK-NEXT: τ_0_0 : C [τ_0_0: Explicit @ {{.*}}:11] -// CHECK-NEXT: τ_0_0 : _NativeClass [τ_0_0: Explicit @ {{.*}}:11 -> Superclass] +// CHECK-NEXT: τ_0_0 : _NativeClass [τ_0_0: Explicit @ {{.*}}:11 -> Derived] // CHECK-NEXT: τ_0_0 : P3 [τ_0_0: Explicit @ {{.*}}:11 -> Superclass (C: P3)] // CHECK: Canonical generic signature: <τ_0_0 where τ_0_0 : C> func superclassConformance1(t: T) @@ -88,7 +88,7 @@ func superclassConformance1(t: T) // CHECK: superclassConformance2 // CHECK: Requirements: // CHECK-NEXT: τ_0_0 : C [τ_0_0: Explicit @ {{.*}}:11] -// CHECK-NEXT: τ_0_0 : _NativeClass [τ_0_0: Explicit @ {{.*}}:11 -> Superclass] +// CHECK-NEXT: τ_0_0 : _NativeClass [τ_0_0: Explicit @ {{.*}}:11 -> Derived] // CHECK-NEXT: τ_0_0 : P3 [τ_0_0: Explicit @ {{.*}}:11 -> Superclass (C: P3)] // CHECK: Canonical generic signature: <τ_0_0 where τ_0_0 : C> func superclassConformance2(t: T) @@ -102,7 +102,7 @@ class C2 : C, P4 { } // CHECK: superclassConformance3 // CHECK: Requirements: // CHECK-NEXT: τ_0_0 : C2 [τ_0_0: Explicit @ {{.*}}:61] -// CHECK-NEXT: τ_0_0 : _NativeClass [τ_0_0: Explicit @ {{.*}}:46 -> Superclass] +// CHECK-NEXT: τ_0_0 : _NativeClass [τ_0_0: Explicit @ {{.*}}:46 -> Derived] // CHECK-NEXT: τ_0_0 : P4 [τ_0_0: Explicit @ {{.*}}:61 -> Superclass (C2: P4)] // CHECK: Canonical generic signature: <τ_0_0 where τ_0_0 : C2> func superclassConformance3(t: T) where T : C, T : P4, T : C2 {} diff --git a/test/attr/attr_specialize.swift b/test/attr/attr_specialize.swift index 5fc979d15e97b..a3cea524d0a88 100644 --- a/test/attr/attr_specialize.swift +++ b/test/attr/attr_specialize.swift @@ -87,7 +87,7 @@ struct FloatElement : HasElt { typealias Element = Float } @_specialize(where T == FloatElement) -@_specialize(where T == IntElement) // expected-error{{associated type 'T.Element' cannot be equal to both 'Float' and 'Int'}} +@_specialize(where T == IntElement) // expected-error{{'T.Element' cannot be equal to both 'IntElement.Element' (aka 'Int') and 'Float'}} func sameTypeRequirement(_ t: T) where T.Element == Float {} @_specialize(where T == Sub) diff --git a/validation-test/compiler_crashers_2_fixed/0042-rdar21775089.swift b/validation-test/compiler_crashers_2_fixed/0042-rdar21775089.swift index 065d92159e7c7..790285361c1e9 100644 --- a/validation-test/compiler_crashers_2_fixed/0042-rdar21775089.swift +++ b/validation-test/compiler_crashers_2_fixed/0042-rdar21775089.swift @@ -7,7 +7,7 @@ protocol MySequenceType {} protocol MyIndexableType {} protocol MyCollectionType : MySequenceType, MyIndexableType { - typealias SubSequence = MySlice + associatedtype SubSequence = MySlice func makeSubSequence() -> SubSequence } extension MyCollectionType { @@ -18,7 +18,7 @@ extension MyCollectionType { } protocol MyMutableCollectionType : MyCollectionType { - typealias SubSequence = MyMutableSlice + associatedtype SubSequence = MyMutableSlice } extension MyMutableCollectionType { func makeSubSequence() -> MyMutableSlice { diff --git a/validation-test/compiler_crashers_2/0100-sr4295.swift b/validation-test/compiler_crashers_2_fixed/0100-sr4295.swift similarity index 87% rename from validation-test/compiler_crashers_2/0100-sr4295.swift rename to validation-test/compiler_crashers_2_fixed/0100-sr4295.swift index c3952ee27b187..699a1ccea49ec 100644 --- a/validation-test/compiler_crashers_2/0100-sr4295.swift +++ b/validation-test/compiler_crashers_2_fixed/0100-sr4295.swift @@ -1,4 +1,4 @@ -// RUN: not --crash %target-swift-frontend -emit-ir -primary-file %s +// RUN: not %target-swift-frontend -emit-ir -primary-file %s // REQUIRES: asserts diff --git a/validation-test/compiler_crashers_2/0101-sr5014.swift b/validation-test/compiler_crashers_2_fixed/0101-sr5014.swift similarity index 61% rename from validation-test/compiler_crashers_2/0101-sr5014.swift rename to validation-test/compiler_crashers_2_fixed/0101-sr5014.swift index cf6b121ed538d..ced11e6174d82 100644 --- a/validation-test/compiler_crashers_2/0101-sr5014.swift +++ b/validation-test/compiler_crashers_2_fixed/0101-sr5014.swift @@ -1,6 +1,4 @@ -// RUN: not --crash %target-swift-frontend -emit-ir -primary-file %s - -// REQUIRES: asserts +// RUN: not %target-swift-frontend -emit-ir -primary-file %s struct Version { } diff --git a/validation-test/compiler_crashers_2_fixed/0109-sr4737.swift b/validation-test/compiler_crashers_2_fixed/0109-sr4737.swift new file mode 100644 index 0000000000000..ad7b1be72f2f7 --- /dev/null +++ b/validation-test/compiler_crashers_2_fixed/0109-sr4737.swift @@ -0,0 +1,2952 @@ +// RUN: not %target-swift-frontend %s -typecheck + +// REQUIRES: long_test + +//===----------------------------------------------------------------------===// +extension UnicodeScalar { + // Hack providing an efficient API that is available to the standard library + @_versioned + @inline(__always) + init(_unchecked x: UInt32) { self = unsafeBitCast(x, to: UnicodeScalar.self) } + + static var replacementCharacter: UnicodeScalar { + return UnicodeScalar(_unchecked: 0xfffd) + } +} +//===----------------------------------------------------------------------===// +@_fixed_layout +public struct _UIntBuffer< + Storage: UnsignedInteger & FixedWidthInteger, + Element: UnsignedInteger & FixedWidthInteger +> { + @_versioned + var _storage: Storage + @_versioned + var _bitCount: UInt8 + + @inline(__always) + @_versioned + internal init(_storage: Storage, _bitCount: UInt8) { + self._storage = _storage + self._bitCount = _bitCount + } + + @inline(__always) + public init(containing e: Element) { + _storage = Storage(extendingOrTruncating: e) + _bitCount = UInt8(extendingOrTruncating: Element.bitWidth) + } +} + +extension _UIntBuffer : Sequence { + @_fixed_layout + public struct Iterator : IteratorProtocol, Sequence { + @inline(__always) + public init(_ x: _UIntBuffer) { _impl = x } + + @inline(__always) + public mutating func next() -> Element? { + if _impl._bitCount == 0 { return nil } + defer { + _impl._storage = _impl._storage &>> Element.bitWidth + _impl._bitCount = _impl._bitCount &- _impl._elementWidth + } + return Element(extendingOrTruncating: _impl._storage) + } + @_versioned + var _impl: _UIntBuffer + } + + @inline(__always) + public func makeIterator() -> Iterator { + return Iterator(self) + } + + @inline(__always) + public func reversed() -> _UIntBuffer { + if Element.bitWidth == 8 { + return _UIntBuffer( + _storage: + storage.byteSwapped &>> (Storage.bitWidth &- numericCast(_bitCount)), + _bitCount: _bitCount) + } + else { + var s: Storage = 0 + for x in self { + s <<= Element.bitWidth + s |= Storage(extendingOrTruncating: x) + } + return Self(_storage: s, _bitCount: _bitCount) + } + } +} + +extension _UIntBuffer : Collection { + public typealias _Element = Element + + public struct Index : Comparable { + @_versioned + var bitOffset: UInt8 + + @_versioned + init(bitOffset: UInt8) { self.bitOffset = bitOffset } + + public static func == (lhs: Index, rhs: Index) -> Bool { + return lhs.bitOffset == rhs.bitOffset + } + public static func < (lhs: Index, rhs: Index) -> Bool { + return lhs.bitOffset < rhs.bitOffset + } + } + + public var startIndex : Index { + @inline(__always) + get { return Index(bitOffset: 0) } + } + + public var endIndex : Index { + @inline(__always) + get { return Index(bitOffset: _bitCount) } + } + + @inline(__always) + public func index(after i: Index) -> Index { + return Index(bitOffset: i.bitOffset &+ _elementWidth) + } + + @_versioned + internal var _elementWidth : UInt8 { + return UInt8(extendingOrTruncating: Element.bitWidth) + } + + public subscript(i: Index) -> Element { + @inline(__always) + get { + return Element(extendingOrTruncating: _storage &>> i.bitOffset) + } + } +} + +extension _UIntBuffer : BidirectionalCollection { + @inline(__always) + public func index(before i: Index) -> Index { + return Index(bitOffset: i.bitOffset &- _elementWidth) + } +} + +extension _UIntBuffer : RandomAccessCollection { + public typealias Indices = DefaultRandomAccessIndices<_UIntBuffer> + public typealias IndexDistance = Int + + @inline(__always) + public func index(_ i: Index, offsetBy n: IndexDistance) -> Index { + let x = IndexDistance(i.bitOffset) &+ n &* Element.bitWidth + return Index(bitOffset: UInt8(extendingOrTruncating: x)) + } + + @inline(__always) + public func distance(from i: Index, to j: Index) -> IndexDistance { + return (Int(j.bitOffset) &- Int(i.bitOffset)) / Element.bitWidth + } +} + +extension FixedWidthInteger { + @inline(__always) + @_versioned + func _fullShiftLeft(_ n: N) -> Self { + return (self &<< ((n &+ 1) &>> 1)) &<< (n &>> 1) + } + @inline(__always) + @_versioned + func _fullShiftRight(_ n: N) -> Self { + return (self &>> ((n &+ 1) &>> 1)) &>> (n &>> 1) + } + @inline(__always) + @_versioned + static func _lowBits(_ n: N) -> Self { + return ~((~0 as Self)._fullShiftLeft(n)) + } +} + +extension Range { + @inline(__always) + @_versioned + func _contains_(_ other: Range) -> Bool { + return other.clamped(to: self) == other + } +} + +extension _UIntBuffer : RangeReplaceableCollection { + @inline(__always) + public init() { + _storage = 0 + _bitCount = 0 + } + + public var capacity: Int { + return Storage.bitWidth / Element.bitWidth + } + + @inline(__always) + public mutating func append(_ newElement: Element) { + _debugPrecondition(count < capacity) + _storage |= Storage(newElement) &<< _bitCount + _bitCount = _bitCount &+ _elementWidth + } + + @inline(__always) + public mutating func replaceSubrange( + _ target: Range, with replacement: C + ) where C._Element == Element { + _debugPrecondition( + (0..<_bitCount)._contains_( + target.lowerBound.bitOffset.. = (T, consumedCodeUnits: UInt8, isValid: Bool) +} + +public protocol UnicodeDecoder { + associatedtype CodeUnit : UnsignedInteger, FixedWidthInteger + associatedtype EncodedScalar : BidirectionalCollection + where EncodedScalar.Iterator.Element == CodeUnit + + init() + + mutating func parseOne( + _ input: inout I + ) -> Unicode.ParseResult where I.Element == CodeUnit +} + +extension UnicodeDecoder { + @inline(__always) + @discardableResult + public static func decode( + _ input: inout I, + repairingIllFormedSequences makeRepairs: Bool, + into output: (UnicodeScalar)->Void + ) -> Int + where I.Element == CodeUnit + { + var errors = 0 + var d = Self() + while true { + switch d.parseOne(&input) { + case let .valid(scalarContent): + output(decodeOne(scalarContent)) + case .invalid: + if !makeRepairs { return 1 } + errors += 1 + output(UnicodeScalar(_unchecked: 0xFFFD)) + case .emptyInput: + return errors + } + } + } +} + + +extension Unicode { + struct ParsingIterator< + CodeUnitIterator : IteratorProtocol, + Encoding: UnicodeEncoding, + Decoder: UnicodeDecoder + > where Decoder.CodeUnit == CodeUnitIterator.Element, + Encoding.EncodedScalar == Decoder.EncodedScalar { + var codeUnits: CodeUnitIterator + var decoder: Decoder + } +} +extension Unicode.ParsingIterator : IteratorProtocol, Sequence { + mutating func next() -> Decoder.EncodedScalar? { + switch decoder.parseOne(&codeUnits) { + case let .valid(scalarContent): return scalarContent + case .invalid: return Encoding.encodedReplacementScalar + case .emptyInput: return nil + } + } +} + +extension Unicode { + struct DefaultScalarView< + CodeUnits: BidirectionalCollection, + Encoding: UnicodeEncoding + > where CodeUnits.Iterator.Element == Encoding.CodeUnit { + var codeUnits: CodeUnits + init( + _ codeUnits: CodeUnits, + fromEncoding _: Encoding.Type = Encoding.self) { + self.codeUnits = codeUnits + } + } +} + +extension Unicode.DefaultScalarView : Sequence { + struct Iterator { + var parsing: Unicode.ParsingIterator< + CodeUnits.Iterator, Encoding, Encoding.ForwardDecoder + > + } + + func makeIterator() -> Iterator { + return Iterator( + parsing: Unicode.ParsingIterator( + codeUnits: codeUnits.makeIterator(), + decoder: Encoding.ForwardDecoder() + )) + } +} + +extension Unicode.DefaultScalarView.Iterator : IteratorProtocol, Sequence { + mutating func next() -> UnicodeScalar? { + return parsing.next().map { + Encoding.ForwardDecoder.decodeOne($0) + } + } +} + +extension Unicode.DefaultScalarView { + struct Index { + var codeUnitIndex: CodeUnits.Index + var scalar: UnicodeScalar + var stride: UInt8 + } +} + +extension Unicode.DefaultScalarView.Index : Comparable { + @inline(__always) + public static func < ( + lhs: Unicode.DefaultScalarView.Index, + rhs: Unicode.DefaultScalarView.Index + ) -> Bool { + return lhs.codeUnitIndex < rhs.codeUnitIndex + } + + @inline(__always) + public static func == ( + lhs: Unicode.DefaultScalarView.Index, + rhs: Unicode.DefaultScalarView.Index + ) -> Bool { + return lhs.codeUnitIndex == rhs.codeUnitIndex + } +} + +extension Unicode.DefaultScalarView : Collection { + public var startIndex: Index { + @inline(__always) + get { + return index( + after: Index( + codeUnitIndex: codeUnits.startIndex, + scalar: UnicodeScalar(_unchecked: 0), + stride: 0) + ) + } + } + + public var endIndex: Index { + @inline(__always) + get { + return Index( + codeUnitIndex: codeUnits.endIndex, + scalar: UnicodeScalar(_unchecked: 0), + stride: 0) + } + } + + public subscript(i: Index) -> UnicodeScalar { + @inline(__always) get { return i.scalar } + } + + @inline(__always) + public func index(after i: Index) -> Index { + let nextPosition = codeUnits.index( + i.codeUnitIndex, offsetBy: numericCast(i.stride)) + var i = IndexingIterator( + _elements: codeUnits, _position: nextPosition + ) + var d = Encoding.ForwardDecoder() + switch d.parseOne(&i) { + case .valid(let scalarContent): + return Index( + codeUnitIndex: nextPosition, + scalar: Encoding.ForwardDecoder.decodeOne(scalarContent), + stride: numericCast(scalarContent.count)) + case .invalid(let stride): + return Index( + codeUnitIndex: nextPosition, + scalar: UnicodeScalar(_unchecked: 0xfffd), + stride: numericCast(stride)) + case .emptyInput: + return endIndex + } + } +} + +// This should go in the standard library; see +// https://github.com/apple/swift/pull/9074 and +// https://bugs.swift.org/browse/SR-4721 +@_fixed_layout +public struct ReverseIndexingIterator< + Elements : BidirectionalCollection +> : IteratorProtocol, Sequence { + + @_inlineable + @inline(__always) + /// Creates an iterator over the given collection. + public /// @testable + init(_elements: Elements, _position: Elements.Index) { + self._elements = _elements + self._position = _position + } + + @_inlineable + @inline(__always) + public mutating func next() -> Elements._Element? { + guard _fastPath(_position != _elements.startIndex) else { return nil } + _position = _elements.index(before: _position) + return _elements[_position] + } + + @_versioned + internal let _elements: Elements + @_versioned + internal var _position: Elements.Index +} + +extension Unicode.DefaultScalarView : BidirectionalCollection { + @inline(__always) + public func index(before i: Index) -> Index { + var d = Encoding.ReverseDecoder() + + var more = ReverseIndexingIterator( + _elements: codeUnits, _position: i.codeUnitIndex) + + switch d.parseOne(&more) { + case .valid(let scalarContent): + let d: CodeUnits.IndexDistance = -numericCast(scalarContent.count) + return Index( + codeUnitIndex: codeUnits.index(i.codeUnitIndex, offsetBy: d), + scalar: Encoding.ReverseDecoder.decodeOne(scalarContent), + stride: numericCast(scalarContent.count)) + case .invalid(let stride): + let d: CodeUnits.IndexDistance = -numericCast(stride) + return Index( + codeUnitIndex: codeUnits.index(i.codeUnitIndex, offsetBy: d) , + scalar: UnicodeScalar(_unchecked: 0xfffd), + stride: numericCast(stride)) + case .emptyInput: fatalError("index out of bounds.") + } + } +} + +public protocol UnicodeEncoding { + associatedtype CodeUnit + + associatedtype EncodedScalar + where CodeUnit == EncodedScalar.Iterator.Element + + static var encodedReplacementScalar : EncodedScalar { get } + static func decode(_ content: EncodedScalar) -> UnicodeScalar + + associatedtype ForwardDecoder : UnicodeDecoder + where EncodedScalar == ForwardDecoder.EncodedScalar + + associatedtype ReverseDecoder : UnicodeDecoder + where EncodedScalar == ReverseDecoder.EncodedScalar +} + +internal protocol _UTFEncoding : UnicodeEncoding { + static func _isScalar(_: CodeUnit) -> Bool +} + +public protocol _UTFDecoderBase : UnicodeDecoder { + + associatedtype Buffer : RangeReplaceableCollection = EncodedScalar + var buffer: Buffer { get set } + + associatedtype BufferStorage : UnsignedInteger, FixedWidthInteger = UInt32 +} + +public protocol _UTFDecoder : _UTFDecoderBase +where Buffer == _UIntBuffer, Buffer == EncodedScalar { + static func _isScalar(_: CodeUnit) -> Bool + func _parseMultipleCodeUnits() -> Unicode.ParseResult +} + +extension _UTFEncoding { + public mutating func parseScalar( + from input: inout I, with decoder: inout Decoder + ) -> Unicode.ParseResult + where I.Element == CodeUnit { + + // Bufferless single-scalar fastpath. + if _fastPath(buffer.isEmpty) { + guard let codeUnit = input.next() else { return .emptyInput } + // ASCII, return immediately. + if Self._isScalar(codeUnit) { + return ( + EncodedScalar(containing: codeUnit), + consumedCodeUnits: 1, isValid: true) + } + // Non-ASCII, proceed to buffering mode. + buffer.append(codeUnit) + } else if Self._isScalar(CodeUnit(extendingOrTruncating: buffer._storage)) { + // ASCII in buffer. We don't refill the buffer so we can return + // to bufferless mode once we've exhausted it. + let codeUnit = CodeUnit(extendingOrTruncating: buffer._storage) + buffer.remove(at: buffer.startIndex) + return ( + EncodedScalar(containing: codeUnit), + consumedCodeUnits: 1, isValid: true) + } + // Buffering mode. + // Fill buffer back to 4 bytes (or as many as are left in the iterator). + _sanityCheck(buffer._bitCount < BufferStorage.bitWidth) + repeat { + if let codeUnit = input.next() { + buffer.append(codeUnit) + } else { + if buffer.isEmpty { return .emptyInput } + break // We still have some bytes left in our buffer. + } + } while buffer._bitCount < BufferStorage.bitWidth + + // Find one unicode scalar. + return _parseMultipleCodeUnits() + } +} + +//===----------------------------------------------------------------------===// +//===--- UTF8 Decoders ----------------------------------------------------===// +//===----------------------------------------------------------------------===// + +public protocol _UTF8Decoder : _UTFDecoder {} + +extension _UTF8Decoder { + public static func _isScalar(_ x: CodeUnit) -> Bool { return x & 0x80 == 0 } +} + +extension Unicode.UTF8 : UnicodeEncoding { + public typealias EncodedScalar = _UIntBuffer + public static var encodedReplacementScalar : EncodedScalar { + return EncodedScalar(_storage: 0xbdbfef, _bitCount: 24) + } + + public struct ForwardDecoder { + public typealias Buffer = _UIntBuffer + public typealias EncodedScalar = _UIntBuffer + public init() { } + public var buffer = Buffer() + } + + public struct ReverseDecoder { + public typealias Buffer = _UIntBuffer + public typealias EncodedScalar = _UIntBuffer + public init() { } + public var buffer = Buffer() + } + + public static func decode(_ source: EncodedScalar) -> UnicodeScalar { + let bits = source._storage + switch source._bitCount { + case 8: + return UnicodeScalar(_unchecked: bits) + case 16: + var value = (bits & 0b0_______________________11_1111__0000_0000) &>> 8 + value |= (bits & 0b0________________________________0001_1111) &<< 6 + return UnicodeScalar(_unchecked: value) + case 24: + var value = (bits & 0b0____________11_1111__0000_0000__0000_0000) &>> 16 + value |= (bits & 0b0_______________________11_1111__0000_0000) &>> 2 + value |= (bits & 0b0________________________________0000_1111) &<< 12 + return UnicodeScalar(_unchecked: value) + default: + _sanityCheck(source.count == 4) + var value = (bits & 0b0_11_1111__0000_0000__0000_0000__0000_0000) &>> 24 + value |= (bits & 0b0____________11_1111__0000_0000__0000_0000) &>> 10 + value |= (bits & 0b0_______________________11_1111__0000_0000) &<< 4 + value |= (bits & 0b0________________________________0000_0111) &<< 18 + return UnicodeScalar(_unchecked: value) + } + } +} + +extension Unicode.UTF8.ReverseDecoder : _UTF8Decoder { + public typealias CodeUnit = UInt8 + + @inline(__always) + @_versioned + internal mutating func _consumeCodeUnits(_ n: UInt8) -> EncodedScalar { + let s = buffer._storage + let bitCount = n &* UInt8(CodeUnit.bitWidth) + buffer._storage >>= bitCount + buffer._bitCount -= bitCount + return EncodedScalar( + _storage: s.byteSwapped >> (type(of: s).bitWidth - bitCount), + _bitCount: bitCount) + } + + @inline(__always) + @_versioned + internal mutating func _consumeValidCodeUnits( + _ n: UInt8 + ) -> Unicode.ParseResult { + return ParseResult( + _consumeCodeUnits(n), consumedCodeUnits: n, isValid: true) + } + + @inline(__always) + @_versioned + internal mutating func _consumeInvalidCodeUnits( + _ n: UInt8 + ) -> Unicode.ParseResult { + _ = _consumeCodeUnits(n) + return ParseResult( + UTF8.encodedReplacementScalar, consumedCodeUnits: n, isValid: false) + } + + public // @testable + func _parseMultipleCodeUnits() -> Unicode.ParseResult { + _sanityCheck(buffer._storage & 0x80 != 0) // this case handled elsewhere + + if buffer._storage & 0b0__1110_0000__1100_0000 + == 0b0__1100_0000__1000_0000 { + // 2-byte sequence. Top 4 bits of decoded result must be nonzero + let top4Bits = buffer._storage & 0b0__0001_1110__0000_0000 + if _fastPath(top4Bits != 0) { + return _consumeValidCodeUnits(2) + } + } + else if buffer._storage & 0b0__1111_0000__1100_0000__1100_0000 + == 0b0__1110_0000__1000_0000__1000_0000 { + // 3-byte sequence. The top 5 bits of the decoded result must be nonzero + // and not a surrogate + let top5Bits = buffer._storage & 0b0__1111__0010_0000__0000_0000 + if _fastPath( + top5Bits != 0 && top5Bits != 0b0__1101__0010_0000__0000_0000) { + return _consumeValidCodeUnits(3) + } + } + else if buffer._storage & 0b0__1111_1000__1100_0000__1100_0000__1100_0000 + == 0b0__1111_0000__1000_0000__1000_0000__1000_0000 { + // Make sure the top 5 bits of the decoded result would be in range + let top5bits = buffer._storage + & 0b0__0111__0011_0000__0000_0000__0000_0000 + if _fastPath( + top5bits != 0 + && top5bits <= 0b0__0100__0000_0000__0000_0000__0000_0000 + ) { + return _consumeValidCodeUnits(4) + } + } + return _parseInvalid() + } + + @inline(never) + mutating func _parseInvalid() -> Unicode.ParseResult { + if buffer._storage & 0b0__1111_0000__1100_0000 + == 0b0__1110_0000__1000_0000 { + // 2-byte prefix of 3-byte sequence. The top 5 bits of the decoded result + // must be nonzero and not a surrogate + let top5Bits = buffer._storage & 0b0__1111__0010_0000 + if top5Bits != 0 && top5Bits != 0b0__1101__0010_0000 { + return invalid(codeUnitCount: 2) + } + } + else if buffer._storage & 0b0__1111_1000__1100_0000 + == 0b0__1111_0000__1000_0000 + { + // 2-byte prefix of 4-byte sequence + // Make sure the top 5 bits of the decoded result would be in range + let top5bits = buffer._storage & 0b0__0111__0011_0000 + if top5bits != 0 && top5bits <= 0b0__0100__0000_0000 { + return invalid(codeUnitCount: 2) + } + } + else if buffer._storage & 0b0__1111_1000__1100_0000__1100_0000 + == 0b0__1111_0000__1000_0000__1000_0000 { + // 3-byte prefix of 4-byte sequence + // Make sure the top 5 bits of the decoded result would be in range + let top5bits = buffer._storage & 0b0__0111__0011_0000__0000_0000 + if top5bits != 0 && top5bits <= 0b0__0100__0000_0000__0000_0000 { + return invalid(codeUnitCount: 3) + } + } + return invalid(codeUnitCount: 1) + } +} + +extension Unicode.UTF8.ForwardDecoder : _UTF8Decoder { + public typealias CodeUnit = UInt8 + + @inline(__always) + @_versioned + internal mutating func _consumeCodeUnits(_ n: UInt8) -> EncodedScalar { + let s = buffer._storage + let bitCount = n &* UInt8(CodeUnit.bitWidth) + buffer._storage >>= bitCount + buffer._bitCount -= bitCount + return EncodedScalar(_storage: s, _bitCount: bitCount) + } + + @inline(__always) + @_versioned + internal mutating func _consumeValidCodeUnits( + _ n: UInt8 + ) -> Unicode.ParseResult { + return ParseResult( + _consumeCodeUnits(codeUnitCount, consumedCodeUnits: n, isValid: true)) + } + + @inline(__always) + @_versioned + internal func _consumeInvalidCodeUnits( + codeUnitCount n: UInt8 + ) -> Unicode.ParseResult { + _ = _consumeCodeUnits(n) + return ParseResult( + UTF8.encodedReplacementScalar, consumedCodeUnits: n, isValid: false) + } + + public // @testable + func _parseMultipleCodeUnits() -> Unicode.ParseResult { + _sanityCheck(buffer._storage & 0x80 != 0) // this case handled elsewhere + + if buffer._storage & 0b0__1100_0000__1110_0000 + == 0b0__1000_0000__1100_0000 { + // 2-byte sequence. At least one of the top 4 bits of the decoded result + // must be nonzero. + if _fastPath(buffer._storage & 0b0_0001_1110 != 0) { + return _consumeValidCodeUnits(2) + } + } + else if buffer._storage & 0b0__1100_0000__1100_0000__1111_0000 + == 0b0__1000_0000__1000_0000__1110_0000 { + // 3-byte sequence. The top 5 bits of the decoded result must be nonzero + // and not a surrogate + let top5Bits = buffer._storage & 0b0___0010_0000__0000_1111 + if _fastPath(top5Bits != 0 && top5Bits != 0b0___0010_0000__0000_1101) { + return _consumeValidCodeUnits(3) + } + } + else if buffer._storage & 0b0__1100_0000__1100_0000__1100_0000__1111_1000 + == 0b0__1000_0000__1000_0000__1000_0000__1111_0000 { + // 4-byte sequence. The top 5 bits of the decoded result must be nonzero + // and no greater than 0b0__0100_0000 + let top5bits = UInt16(buffer._storage & 0b0__0011_0000__0000_0111) + if _fastPath( + top5bits != 0 && top5bits.byteSwapped <= 0b0__0000_0100__0000_0000 + ) { + return _consumeValidCodeUnits(4) + } + } + return _parseInvalid() + } + + @inline(never) + mutating func _parseInvalid() -> Unicode.ParseResult { + + if buffer._storage & 0b0__1100_0000__1111_0000 + == 0b0__1000_0000__1110_0000 { + // 2-byte prefix of 3-byte sequence. The top 5 bits of the decoded result + // must be nonzero and not a surrogate + let top5Bits = buffer._storage & 0b0__0010_0000__0000_1111 + if top5Bits != 0 && top5Bits != 0b0__0010_0000__0000_1101 { + return _consumeInvalidCodeUnits(2) + } + } + else if buffer._storage & 0b0__1100_0000__1111_1000 + == 0b0__1000_0000__1111_0000 + { + // Prefix of 4-byte sequence. The top 5 bits of the decoded result + // must be nonzero and no greater than 0b0__0100_0000 + let top5bits = UInt16(buffer._storage & 0b0__0011_0000__0000_0111) + if top5bits != 0 && top5bits.byteSwapped <= 0b0__0000_0100__0000_0000 { + return _consumeInvalidCodeUnits( + buffer._storage & 0b0__1100_0000__0000_0000__0000_0000 + == 0b0__1000_0000__0000_0000__0000_0000 ? 3 : 2) + } + } + return 1 + } +} + +//===----------------------------------------------------------------------===// +//===--- UTF-16 Decoders --------------------------------------------------===// +//===----------------------------------------------------------------------===// + +public protocol _UTF16Decoder : _UTFDecoder where CodeUnit == UTF16.CodeUnit { + var buffer: Buffer { get set } + static var _surrogatePattern : UInt32 { get } +} + +extension _UTF16Decoder { + public static func _isScalar(_ x: CodeUnit) -> Bool { + return x & 0xf800 != 0xd800 + } + + internal mutating func _consume(bitCount: UInt8) -> EncodedScalar { + _sanityCheck(bitCount == 16) + let s = buffer._storage + buffer._storage = 0 + buffer._bitCount = 0 + return EncodedScalar(_storage: s, _bitCount: bitCount) + } + + public // @testable + func _parseMultipleCodeUnits() -> (isValid: Bool, bitCount: UInt8) { + _sanityCheck( // this case handled elsewhere + !Self._isScalar(UInt16(extendingOrTruncating: buffer._storage))) + + if _fastPath(buffer._storage & 0xFC00_FC00 == Self._surrogatePattern) { + return (true, 2*16) + } + return (false, 1*16) + } +} + +extension Unicode.UTF16 : UnicodeEncoding { + public typealias EncodedScalar = _UIntBuffer + public static var encodedReplacementScalar : EncodedScalar { + return EncodedScalar(_storage: 0xFFFD, _bitCount: 16) + } + + public struct ForwardDecoder { + public typealias Buffer = _UIntBuffer + public init() { buffer = Buffer() } + public var buffer: Buffer + } + + public struct ReverseDecoder { + public typealias Buffer = _UIntBuffer + public init() { buffer = Buffer() } + public var buffer: Buffer + } + + public static func decode(_ source: EncodedScalar) -> UnicodeScalar { + let bits = source._storage + if _fastPath(source._bitCount == 16) { + return UnicodeScalar(_unchecked: bits & 0xffff) + } + _sanityCheck(source._bitCount == 32) + let value = 0x10000 + (bits >> 16 & 0x03ff | (bits & 0x03ff) << 10) + return UnicodeScalar(_unchecked: value) + } +} + +extension UTF16.ReverseDecoder : _UTF16Decoder { + public typealias CodeUnit = UInt16 + public typealias EncodedScalar = Buffer + + public static var _surrogatePattern : UInt32 { return 0xD800_DC00 } +} + +extension Unicode.UTF16.ForwardDecoder : _UTF16Decoder { + public typealias CodeUnit = UInt16 + public typealias EncodedScalar = Buffer + + public static var _surrogatePattern : UInt32 { return 0xDC00_D800 } +} + +#if !BENCHMARK +//===--- testing ----------------------------------------------------------===// +import StdlibUnittest +import SwiftPrivate + +func checkDecodeUTF( + _ codec: Codec.Type, _ expectedHead: [UInt32], + _ expectedRepairedTail: [UInt32], _ utfStr: [Codec.CodeUnit] +) -> AssertionResult { + var decoded = [UInt32]() + var expected = expectedHead + func output(_ scalar: UInt32) { decoded.append(scalar) } + func output1(_ scalar: UnicodeScalar) { decoded.append(scalar.value) } + + var result = assertionSuccess() + + func check(_ expected: C, _ description: String) + where C.Iterator.Element == UInt32 + { + if !expected.elementsEqual(decoded) { + if result.description == "" { result = assertionFailure() } + result = result.withDescription(" [\(description)]\n") + .withDescription("expected: \(asHex(expectedHead))\n") + .withDescription("actual: \(asHex(decoded))") + } + decoded.removeAll(keepingCapacity: true) + } + + //===--- Tests without repairs ------------------------------------------===// + do { + let iterator = utfStr.makeIterator() + _ = transcode( + iterator, from: codec, to: UTF32.self, + stoppingOnError: true, into: output) + } + check(expected, "legacy, repairing: false") + + do { + var iterator = utfStr.makeIterator() + let errorCount = Codec.ForwardDecoder.decode( + &iterator, repairingIllFormedSequences: false, into: output1) + expectEqual(expectedRepairedTail.isEmpty ? 0 : 1, errorCount) + } + check(expected, "forward, repairing: false") + + do { + var iterator = utfStr.reversed().makeIterator() + let errorCount = Codec.ReverseDecoder.decode( + &iterator, repairingIllFormedSequences: false, into: output1) + if expectedRepairedTail.isEmpty { + expectEqual(0, errorCount) + check(expected.reversed(), "reverse, repairing: false") + } + else { + expectEqual(1, errorCount) + let x = (expected + expectedRepairedTail).reversed() + expectTrue( + x.starts(with: decoded), + "reverse, repairing: false\n\t\(Array(x)) does not start with \(decoded)") + decoded.removeAll(keepingCapacity: true) + } + } + + //===--- Tests with repairs ------------------------------------------===// + expected += expectedRepairedTail + do { + let iterator = utfStr.makeIterator() + _ = transcode(iterator, from: codec, to: UTF32.self, + stoppingOnError: false, into: output) + } + check(expected, "legacy, repairing: true") + do { + var iterator = utfStr.makeIterator() + let errorCount = Codec.ForwardDecoder.decode( + &iterator, repairingIllFormedSequences: true, into: output1) + + if expectedRepairedTail.isEmpty { expectEqual(0, errorCount) } + else { expectNotEqual(0, errorCount) } + } + check(expected, "forward, repairing: true") + do { + var iterator = utfStr.reversed().makeIterator() + let errorCount = Codec.ReverseDecoder.decode( + &iterator, repairingIllFormedSequences: true, into: output1) + if expectedRepairedTail.isEmpty { expectEqual(0, errorCount) } + else { expectNotEqual(0, errorCount) } + } + check(expected.reversed(), "reverse, repairing: true") + + let scalars = Unicode.DefaultScalarView(utfStr, fromEncoding: Codec.self) + expectEqualSequence(expected, scalars.map { $0.value }) + expectEqualSequence( + expected.reversed(), + scalars.reversed().map { $0.value }) + + do { + var x = scalars.makeIterator() + var j = scalars.startIndex + while (j != scalars.endIndex) { + expectEqual(x.next()!, scalars[j]) + j = scalars.index(after: j) + } + expectNil(x.next()) + } + return result +} + +func checkDecodeUTF8( + _ expectedHead: [UInt32], + _ expectedRepairedTail: [UInt32], _ utf8Str: [UInt8] +) -> AssertionResult { + return checkDecodeUTF(UTF8.self, expectedHead, expectedRepairedTail, utf8Str) +} + +func checkDecodeUTF16( + _ expectedHead: [UInt32], + _ expectedRepairedTail: [UInt32], _ utf16Str: [UInt16] +) -> AssertionResult { + return checkDecodeUTF(UTF16.self, expectedHead, expectedRepairedTail, + utf16Str) +} + +/* +func checkDecodeUTF32( + _ expectedHead: [UInt32], + _ expectedRepairedTail: [UInt32], _ utf32Str: [UInt32] +) -> AssertionResult { + return checkDecodeUTF(UTF32.self, expectedHead, expectedRepairedTail, + utf32Str) +} +*/ + +func checkEncodeUTF8(_ expected: [UInt8], + _ scalars: [UInt32]) -> AssertionResult { + var encoded = [UInt8]() + let output: (UInt8) -> Void = { encoded.append($0) } + let iterator = scalars.makeIterator() + let hadError = transcode( + iterator, + from: UTF32.self, + to: UTF8.self, + stoppingOnError: true, + into: output) + expectFalse(hadError) + if expected != encoded { + return assertionFailure() + .withDescription("\n") + .withDescription("expected: \(asHex(expected))\n") + .withDescription("actual: \(asHex(encoded))") + } + + return assertionSuccess() +} + +var UTF8Decoder = TestSuite("UTF8Decoder") + +//===----------------------------------------------------------------------===// +public struct UTFTest { + public struct Flags : OptionSet { + public let rawValue: Int + + public init(rawValue: Int) { + self.rawValue = rawValue + } + + public static let utf8IsInvalid = Flags(rawValue: 1 << 0) + public static let utf16IsInvalid = Flags(rawValue: 1 << 1) + } + + public let string: String + public let utf8: [UInt8] + public let utf16: [UInt16] + public let unicodeScalars: [UnicodeScalar] + public let unicodeScalarsRepairedTail: [UnicodeScalar] + public let flags: Flags + public let loc: SourceLoc + + public var utf32: [UInt32] { + return unicodeScalars.map(UInt32.init) + } + + public var utf32RepairedTail: [UInt32] { + return unicodeScalarsRepairedTail.map(UInt32.init) + } + + public init( + string: String, + utf8: [UInt8], + utf16: [UInt16], + scalars: [UInt32], + scalarsRepairedTail: [UInt32] = [], + flags: Flags = [], + file: String = #file, line: UInt = #line + ) { + self.string = string + self.utf8 = utf8 + self.utf16 = utf16 + self.unicodeScalars = scalars.map { UnicodeScalar($0)! } + self.unicodeScalarsRepairedTail = + scalarsRepairedTail.map { UnicodeScalar($0)! } + self.flags = flags + self.loc = SourceLoc(file, line, comment: "test data") + } +} + +public var utfTests: [UTFTest] = [] + // + // Empty sequence. + // + +utfTests.append( + UTFTest( + string: "", + utf8: [], + utf16: [], + scalars: [])) + + // + // 1-byte sequences. + // + + // U+0000 NULL +utfTests.append( + UTFTest( + string: "\u{0000}", + utf8: [ 0x00 ], + utf16: [ 0x00 ], + scalars: [ 0x00 ])) + + // U+0041 LATIN CAPITAL LETTER A +utfTests.append( + UTFTest( + string: "A", + utf8: [ 0x41 ], + utf16: [ 0x41 ], + scalars: [ 0x41 ])) + + // U+0041 LATIN CAPITAL LETTER A + // U+0042 LATIN CAPITAL LETTER B +utfTests.append( + UTFTest( + string: "AB", + utf8: [ 0x41, 0x42 ], + utf16: [ 0x41, 0x42 ], + scalars: [ 0x41, 0x42 ])) + + // U+0061 LATIN SMALL LETTER A + // U+0062 LATIN SMALL LETTER B + // U+0063 LATIN SMALL LETTER C +utfTests.append( + UTFTest( + string: "ABC", + utf8: [ 0x41, 0x42, 0x43 ], + utf16: [ 0x41, 0x42, 0x43 ], + scalars: [ 0x41, 0x42, 0x43 ])) + + // U+0000 NULL + // U+0041 LATIN CAPITAL LETTER A + // U+0042 LATIN CAPITAL LETTER B + // U+0000 NULL +utfTests.append( + UTFTest( + string: "\u{0000}AB\u{0000}", + utf8: [ 0x00, 0x41, 0x42, 0x00 ], + utf16: [ 0x00, 0x41, 0x42, 0x00 ], + scalars: [ 0x00, 0x41, 0x42, 0x00 ])) + + // U+007F DELETE +utfTests.append( + UTFTest( + string: "\u{007F}", + utf8: [ 0x7F ], + utf16: [ 0x7F ], + scalars: [ 0x7F ])) + + // + // 2-byte sequences. + // + + // U+0283 LATIN SMALL LETTER ESH +utfTests.append( + UTFTest( + string: "\u{0283}", + utf8: [ 0xCA, 0x83 ], + utf16: [ 0x0283 ], + scalars: [ 0x0283 ])) + + // U+03BA GREEK SMALL LETTER KAPPA + // U+1F79 GREEK SMALL LETTER OMICRON WITH OXIA + // U+03C3 GREEK SMALL LETTER SIGMA + // U+03BC GREEK SMALL LETTER MU + // U+03B5 GREEK SMALL LETTER EPSILON +utfTests.append( + UTFTest( + string: "\u{03BA}\u{1F79}\u{03C3}\u{03BC}\u{03B5}", + utf8: [ 0xCE, 0xBA, 0xE1, 0xBD, 0xB9, 0xCF, 0x83, 0xCE, 0xBC, 0xCE, 0xB5 ], + utf16: [ 0x03BA, 0x1F79, 0x03C3, 0x03BC, 0x03B5 ], + scalars: [ 0x03BA, 0x1F79, 0x03C3, 0x03BC, 0x03B5 ])) + + // U+0430 CYRILLIC SMALL LETTER A + // U+0431 CYRILLIC SMALL LETTER BE + // U+0432 CYRILLIC SMALL LETTER VE +utfTests.append( + UTFTest( + string: "\u{0430}\u{0431}\u{0432}", + utf8: [ 0xD0, 0xB0, 0xD0, 0xB1, 0xD0, 0xB2 ], + utf16: [ 0x0430, 0x0431, 0x0432 ], + scalars: [ 0x0430, 0x0431, 0x0432 ])) + + // + // 3-byte sequences. + // + + // U+4F8B CJK UNIFIED IDEOGRAPH-4F8B + // U+6587 CJK UNIFIED IDEOGRAPH-6587 +utfTests.append( + UTFTest( + string: "\u{4F8b}\u{6587}", + utf8: [ 0xE4, 0xBE, 0x8B, 0xE6, 0x96, 0x87 ], + utf16: [ 0x4F8B, 0x6587 ], + scalars: [ 0x4F8B, 0x6587 ])) + + // U+D55C HANGUL SYLLABLE HAN + // U+AE00 HANGUL SYLLABLE GEUL +utfTests.append( + UTFTest( + string: "\u{d55c}\u{ae00}", + utf8: [ 0xED, 0x95, 0x9C, 0xEA, 0xB8, 0x80 ], + utf16: [ 0xD55C, 0xAE00 ], + scalars: [ 0xD55C, 0xAE00 ])) + + // U+1112 HANGUL CHOSEONG HIEUH + // U+1161 HANGUL JUNGSEONG A + // U+11AB HANGUL JONGSEONG NIEUN + // U+1100 HANGUL CHOSEONG KIYEOK + // U+1173 HANGUL JUNGSEONG EU + // U+11AF HANGUL JONGSEONG RIEUL +utfTests.append( + UTFTest( + string: "\u{1112}\u{1161}\u{11ab}\u{1100}\u{1173}\u{11af}", + utf8: + [ 0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xAB, + 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF ], + utf16: [ 0x1112, 0x1161, 0x11AB, 0x1100, 0x1173, 0x11AF ], + scalars: [ 0x1112, 0x1161, 0x11AB, 0x1100, 0x1173, 0x11AF ])) + + // U+3042 HIRAGANA LETTER A + // U+3044 HIRAGANA LETTER I + // U+3046 HIRAGANA LETTER U + // U+3048 HIRAGANA LETTER E + // U+304A HIRAGANA LETTER O +utfTests.append( + UTFTest( + string: "\u{3042}\u{3044}\u{3046}\u{3048}\u{304a}", + utf8: + [ 0xE3, 0x81, 0x82, 0xE3, 0x81, 0x84, 0xE3, 0x81, 0x86, + 0xE3, 0x81, 0x88, 0xE3, 0x81, 0x8A ], + utf16: [ 0x3042, 0x3044, 0x3046, 0x3048, 0x304A ], + scalars: [ 0x3042, 0x3044, 0x3046, 0x3048, 0x304A ])) + + // U+D7FF (unassigned) +utfTests.append( + UTFTest( + string: "\u{D7FF}", + utf8: [ 0xED, 0x9F, 0xBF ], + utf16: [ 0xD7FF ], + scalars: [ 0xD7FF ])) + + // U+E000 (private use) +utfTests.append( + UTFTest( + string: "\u{E000}", + utf8: [ 0xEE, 0x80, 0x80 ], + utf16: [ 0xE000 ], + scalars: [ 0xE000 ])) + + // U+FFFD REPLACEMENT CHARACTER +utfTests.append( + UTFTest( + string: "\u{FFFD}", + utf8: [ 0xEF, 0xBF, 0xBD ], + utf16: [ 0xFFFD ], + scalars: [ 0xFFFD ])) + + // U+FFFF (noncharacter) +utfTests.append( + UTFTest( + string: "\u{FFFF}", + utf8: [ 0xEF, 0xBF, 0xBF ], + utf16: [ 0xFFFF ], + scalars: [ 0xFFFF ])) + + // + // 4-byte sequences. + // + + // U+1F425 FRONT-FACING BABY CHICK +utfTests.append( + UTFTest( + string: "\u{1F425}", + utf8: [ 0xF0, 0x9F, 0x90, 0xA5 ], + utf16: [ 0xD83D, 0xDC25 ], + scalars: [ 0x0001_F425 ])) + + // U+0041 LATIN CAPITAL LETTER A + // U+1F425 FRONT-FACING BABY CHICK +utfTests.append( + UTFTest( + string: "A\u{1F425}", + utf8: [ 0x41, 0xF0, 0x9F, 0x90, 0xA5 ], + utf16: [ 0x41, 0xD83D, 0xDC25 ], + scalars: [ 0x41, 0x0001_F425 ])) + + // U+0041 LATIN CAPITAL LETTER A + // U+0042 LATIN CAPITAL LETTER B + // U+1F425 FRONT-FACING BABY CHICK +utfTests.append( + UTFTest( + string: "AB\u{1F425}", + utf8: [ 0x41, 0x42, 0xF0, 0x9F, 0x90, 0xA5 ], + utf16: [ 0x41, 0x42, 0xD83D, 0xDC25 ], + scalars: [ 0x41, 0x42, 0x0001_F425 ])) + + // U+0041 LATIN CAPITAL LETTER A + // U+0042 LATIN CAPITAL LETTER B + // U+0043 LATIN CAPITAL LETTER C + // U+1F425 FRONT-FACING BABY CHICK +utfTests.append( + UTFTest( + string: "ABC\u{1F425}", + utf8: [ 0x41, 0x42, 0x43, 0xF0, 0x9F, 0x90, 0xA5 ], + utf16: [ 0x41, 0x42, 0x43, 0xD83D, 0xDC25 ], + scalars: [ 0x41, 0x42, 0x43, 0x0001_F425 ])) + + // U+0041 LATIN CAPITAL LETTER A + // U+0042 LATIN CAPITAL LETTER B + // U+0043 LATIN CAPITAL LETTER C + // U+0044 LATIN CAPITAL LETTER D + // U+1F425 FRONT-FACING BABY CHICK +utfTests.append( + UTFTest( + string: "ABCD\u{1F425}", + utf8: [ 0x41, 0x42, 0x43, 0x44, 0xF0, 0x9F, 0x90, 0xA5 ], + utf16: [ 0x41, 0x42, 0x43, 0x44, 0xD83D, 0xDC25 ], + scalars: [ 0x41, 0x42, 0x43, 0x44, 0x0001_F425 ])) + + // U+0041 LATIN CAPITAL LETTER A + // U+0042 LATIN CAPITAL LETTER B + // U+0043 LATIN CAPITAL LETTER C + // U+0044 LATIN CAPITAL LETTER D + // U+0045 LATIN CAPITAL LETTER E + // U+1F425 FRONT-FACING BABY CHICK +utfTests.append( + UTFTest( + string: "ABCDE\u{1F425}", + utf8: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0xF0, 0x9F, 0x90, 0xA5 ], + utf16: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0xD83D, 0xDC25 ], + scalars: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x0001_F425 ])) + + // U+0041 LATIN CAPITAL LETTER A + // U+0042 LATIN CAPITAL LETTER B + // U+0043 LATIN CAPITAL LETTER C + // U+0044 LATIN CAPITAL LETTER D + // U+0045 LATIN CAPITAL LETTER E + // U+0046 LATIN CAPITAL LETTER F + // U+1F425 FRONT-FACING BABY CHICK +utfTests.append( + UTFTest( + string: "ABCDEF\u{1F425}", + utf8: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0xF0, 0x9F, 0x90, 0xA5 ], + utf16: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0xD83D, 0xDC25 ], + scalars: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x0001_F425 ])) + + // U+0041 LATIN CAPITAL LETTER A + // U+0042 LATIN CAPITAL LETTER B + // U+0043 LATIN CAPITAL LETTER C + // U+0044 LATIN CAPITAL LETTER D + // U+0045 LATIN CAPITAL LETTER E + // U+0046 LATIN CAPITAL LETTER F + // U+0047 LATIN CAPITAL LETTER G + // U+1F425 FRONT-FACING BABY CHICK +utfTests.append( + UTFTest( + string: "ABCDEFG\u{1F425}", + utf8: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0xF0, 0x9F, 0x90, 0xA5 ], + utf16: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0xD83D, 0xDC25 ], + scalars: [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x0001_F425 ])) + + // U+0041 LATIN CAPITAL LETTER A + // U+0042 LATIN CAPITAL LETTER B + // U+0043 LATIN CAPITAL LETTER C + // U+0044 LATIN CAPITAL LETTER D + // U+0045 LATIN CAPITAL LETTER E + // U+0046 LATIN CAPITAL LETTER F + // U+0047 LATIN CAPITAL LETTER G + // U+0048 LATIN CAPITAL LETTER H + // U+1F425 FRONT-FACING BABY CHICK +utfTests.append( + UTFTest( + string: "ABCDEFGH\u{1F425}", + utf8: + [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, + 0xF0, 0x9F, 0x90, 0xA5 ], + utf16: + [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, + 0xD83D, 0xDC25 ], + scalars: + [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x0001_F425 ])) + + // U+0041 LATIN CAPITAL LETTER A + // U+0042 LATIN CAPITAL LETTER B + // U+0043 LATIN CAPITAL LETTER C + // U+0044 LATIN CAPITAL LETTER D + // U+0045 LATIN CAPITAL LETTER E + // U+0046 LATIN CAPITAL LETTER F + // U+0047 LATIN CAPITAL LETTER G + // U+0048 LATIN CAPITAL LETTER H + // U+0049 LATIN CAPITAL LETTER I + // U+1F425 FRONT-FACING BABY CHICK +utfTests.append( + UTFTest( + string: "ABCDEFGHI\u{1F425}", + utf8: + [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, + 0xF0, 0x9F, 0x90, 0xA5 ], + utf16: + [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, + 0xD83D, 0xDC25 ], + scalars: + [ 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x0001_F425 ])) + + // U+10000 LINEAR B SYLLABLE B008 A +utfTests.append( + UTFTest( + string: "\u{10000}", + utf8: [ 0xF0, 0x90, 0x80, 0x80 ], + utf16: [ 0xD800, 0xDC00 ], + scalars: [ 0x0001_0000 ])) + + // U+10100 AEGEAN WORD SEPARATOR LINE +utfTests.append( + UTFTest( + string: "\u{10100}", + utf8: [ 0xF0, 0x90, 0x84, 0x80 ], + utf16: [ 0xD800, 0xDD00 ], + scalars: [ 0x0001_0100 ])) + + // U+103FF (unassigned) +utfTests.append( + UTFTest( + string: "\u{103FF}", + utf8: [ 0xF0, 0x90, 0x8F, 0xBF ], + utf16: [ 0xD800, 0xDFFF ], + scalars: [ 0x0001_03FF ])) + + // U+E0000 (unassigned) +utfTests.append( + UTFTest( + string: "\u{E0000}", + utf8: [ 0xF3, 0xA0, 0x80, 0x80 ], + utf16: [ 0xDB40, 0xDC00 ], + scalars: [ 0x000E_0000 ])) + + // U+E0100 VARIATION SELECTOR-17 +utfTests.append( + UTFTest( + string: "\u{E0100}", + utf8: [ 0xF3, 0xA0, 0x84, 0x80 ], + utf16: [ 0xDB40, 0xDD00 ], + scalars: [ 0x000E_0100 ])) + + // U+E03FF (unassigned) +utfTests.append( + UTFTest( + string: "\u{E03FF}", + utf8: [ 0xF3, 0xA0, 0x8F, 0xBF ], + utf16: [ 0xDB40, 0xDFFF ], + scalars: [ 0x000E_03FF ])) + + // U+10FC00 (private use) +utfTests.append( + UTFTest( + string: "\u{10FC00}", + utf8: [ 0xF4, 0x8F, 0xB0, 0x80 ], + utf16: [ 0xDBFF, 0xDC00 ], + scalars: [ 0x0010_FC00 ])) + + // U+10FD00 (private use) +utfTests.append( + UTFTest( + string: "\u{10FD00}", + utf8: [ 0xF4, 0x8F, 0xB4, 0x80 ], + utf16: [ 0xDBFF, 0xDD00 ], + scalars: [ 0x0010_FD00 ])) + + // U+10FFFF (private use, noncharacter) +utfTests.append( + UTFTest( + string: "\u{10FFFF}", + utf8: [ 0xF4, 0x8F, 0xBF, 0xBF ], + utf16: [ 0xDBFF, 0xDFFF ], + scalars: [ 0x0010_FFFF ])) +//===----------------------------------------------------------------------===// + +UTF8Decoder.test("SmokeTest").forEach(in: utfTests) { + test in + + expectTrue( + checkDecodeUTF8(test.utf32, [], test.utf8), + stackTrace: test.loc.withCurrentLoc()) + return () +} + +UTF8Decoder.test("FirstPossibleSequence") { + // + // First possible sequence of a certain length + // + + // U+0000 NULL + expectTrue(checkDecodeUTF8([ 0x0000 ], [], [ 0x00 ])) + + // U+0080 PADDING CHARACTER + expectTrue(checkDecodeUTF8([ 0x0080 ], [], [ 0xc2, 0x80 ])) + + // U+0800 SAMARITAN LETTER ALAF + expectTrue(checkDecodeUTF8( + [ 0x0800 ], [], + [ 0xe0, 0xa0, 0x80 ])) + + // U+10000 LINEAR B SYLLABLE B008 A + expectTrue(checkDecodeUTF8( + [ 0x10000 ], [], + [ 0xf0, 0x90, 0x80, 0x80 ])) + + // U+200000 (invalid) + expectTrue(checkDecodeUTF8( + [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf8, 0x88, 0x80, 0x80, 0x80 ])) + + // U+4000000 (invalid) + expectTrue(checkDecodeUTF8( + [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0x84, 0x80, 0x80, 0x80, 0x80 ])) +} + +UTF8Decoder.test("LastPossibleSequence") { + // + // Last possible sequence of a certain length + // + + // U+007F DELETE + expectTrue(checkDecodeUTF8([ 0x007f ], [], [ 0x7f ])) + + // U+07FF (unassigned) + expectTrue(checkDecodeUTF8([ 0x07ff ], [], [ 0xdf, 0xbf ])) + + // U+FFFF (noncharacter) + expectTrue(checkDecodeUTF8( + [ 0xffff ], [], + [ 0xef, 0xbf, 0xbf ])) + + // U+1FFFFF (invalid) + expectTrue(checkDecodeUTF8( + [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf7, 0xbf, 0xbf, 0xbf ])) + + // U+3FFFFFF (invalid) + expectTrue(checkDecodeUTF8( + [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfb, 0xbf, 0xbf, 0xbf, 0xbf ])) + + // U+7FFFFFFF (invalid) + expectTrue(checkDecodeUTF8( + [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf ])) +} + +UTF8Decoder.test("CodeSpaceBoundaryConditions") { + // + // Other boundary conditions + // + + // U+D7FF (unassigned) + expectTrue(checkDecodeUTF8([ 0xd7ff ], [], [ 0xed, 0x9f, 0xbf ])) + + // U+E000 (private use) + expectTrue(checkDecodeUTF8([ 0xe000 ], [], [ 0xee, 0x80, 0x80 ])) + + // U+FFFD REPLACEMENT CHARACTER + expectTrue(checkDecodeUTF8([ 0xfffd ], [], [ 0xef, 0xbf, 0xbd ])) + + // U+10FFFF (noncharacter) + expectTrue(checkDecodeUTF8([ 0x10ffff ], [], [ 0xf4, 0x8f, 0xbf, 0xbf ])) + + // U+110000 (invalid) + expectTrue(checkDecodeUTF8( + [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf4, 0x90, 0x80, 0x80 ])) +} + +UTF8Decoder.test("UnexpectedContinuationBytes") { + // + // Unexpected continuation bytes + // + + // A sequence of unexpected continuation bytes that don't follow a first + // byte, every byte is a maximal subpart. + + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0x80, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xbf, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], [ 0xfffd, 0xfffd, 0xfffd ], + [ 0x80, 0xbf, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0x80, 0xbf, 0x80, 0xbf ])) + expectTrue(checkDecodeUTF8( + [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0x80, 0xbf, 0x82, 0xbf, 0xaa ])) + expectTrue(checkDecodeUTF8( + [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xaa, 0xb0, 0xbb, 0xbf, 0xaa, 0xa0 ])) + expectTrue(checkDecodeUTF8( + [], [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xaa, 0xb0, 0xbb, 0xbf, 0xaa, 0xa0, 0x8f ])) + + // All continuation bytes (0x80--0xbf). + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf ])) +} + +UTF8Decoder.test("LonelyStartBytes") { + // + // Lonely start bytes + // + + // Start bytes of 2-byte sequences (0xc0--0xdf). + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf ])) + + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020 ], + [ 0xc0, 0x20, 0xc1, 0x20, 0xc2, 0x20, 0xc3, 0x20, + 0xc4, 0x20, 0xc5, 0x20, 0xc6, 0x20, 0xc7, 0x20, + 0xc8, 0x20, 0xc9, 0x20, 0xca, 0x20, 0xcb, 0x20, + 0xcc, 0x20, 0xcd, 0x20, 0xce, 0x20, 0xcf, 0x20, + 0xd0, 0x20, 0xd1, 0x20, 0xd2, 0x20, 0xd3, 0x20, + 0xd4, 0x20, 0xd5, 0x20, 0xd6, 0x20, 0xd7, 0x20, + 0xd8, 0x20, 0xd9, 0x20, 0xda, 0x20, 0xdb, 0x20, + 0xdc, 0x20, 0xdd, 0x20, 0xde, 0x20, 0xdf, 0x20 ])) + + // Start bytes of 3-byte sequences (0xe0--0xef). + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef ])) + + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020 ], + [ 0xe0, 0x20, 0xe1, 0x20, 0xe2, 0x20, 0xe3, 0x20, + 0xe4, 0x20, 0xe5, 0x20, 0xe6, 0x20, 0xe7, 0x20, + 0xe8, 0x20, 0xe9, 0x20, 0xea, 0x20, 0xeb, 0x20, + 0xec, 0x20, 0xed, 0x20, 0xee, 0x20, 0xef, 0x20 ])) + + // Start bytes of 4-byte sequences (0xf0--0xf7). + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7 ])) + + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, + 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020 ], + [ 0xf0, 0x20, 0xf1, 0x20, 0xf2, 0x20, 0xf3, 0x20, + 0xf4, 0x20, 0xf5, 0x20, 0xf6, 0x20, 0xf7, 0x20 ])) + + // Start bytes of 5-byte sequences (0xf8--0xfb). + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf8, 0xf9, 0xfa, 0xfb ])) + + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020 ], + [ 0xf8, 0x20, 0xf9, 0x20, 0xfa, 0x20, 0xfb, 0x20 ])) + + // Start bytes of 6-byte sequences (0xfc--0xfd). + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfc, 0xfd ])) + + expectTrue(checkDecodeUTF8( + [], [ 0xfffd, 0x0020, 0xfffd, 0x0020 ], + [ 0xfc, 0x20, 0xfd, 0x20 ])) +} + +UTF8Decoder.test("InvalidStartBytes") { + // + // Other bytes (0xc0--0xc1, 0xfe--0xff). + // + + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xc0 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xc1 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfe ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xff ])) + + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xc0, 0xc1, 0xfe, 0xff ])) + + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfe, 0xfe, 0xff, 0xff ])) + + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfe, 0x80, 0x80, 0x80, 0x80, 0x80 ])) + + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xff, 0x80, 0x80, 0x80, 0x80, 0x80 ])) + + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020, 0xfffd, 0x0020 ], + [ 0xc0, 0x20, 0xc1, 0x20, 0xfe, 0x20, 0xff, 0x20 ])) +} + +UTF8Decoder.test("MissingContinuationBytes") { + // + // Sequences with one continuation byte missing + // + + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xc2 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xdf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xc2, 0x41 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xdf, 0x41 ])) + + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xe0, 0xa0 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xe0, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xe0, 0xa0, 0x41 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xe0, 0xbf, 0x41 ])) + + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xe1, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xec, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xe1, 0x80, 0x41 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xec, 0xbf, 0x41 ])) + + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xed, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xed, 0x9f ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xed, 0x80, 0x41 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xed, 0x9f, 0x41 ])) + + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xee, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xef, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xee, 0x80, 0x41 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xef, 0xbf, 0x41 ])) + + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0, 0x90, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0, 0xbf, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf0, 0x90, 0x80, 0x41 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf0, 0xbf, 0xbf, 0x41 ])) + + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf1, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf3, 0xbf, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf1, 0x80, 0x80, 0x41 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf3, 0xbf, 0xbf, 0x41 ])) + + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf4, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf4, 0x8f, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf4, 0x80, 0x80, 0x41 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0x0041 ], [ 0xf4, 0x8f, 0xbf, 0x41 ])) + + // Overlong sequences with one trailing byte missing. + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xc0 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xc1 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xe0, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xe0, 0x9f ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xf0, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xf0, 0x8f, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf8, 0x80, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0x80, 0x80, 0x80, 0x80 ])) + + // Sequences that represent surrogates with one trailing byte missing. + // High-surrogates + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xa0 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xac ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xaf ])) + // Low-surrogates + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xb0 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xb4 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xed, 0xbf ])) + + // Ill-formed 4-byte sequences. + // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx + // U+1100xx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xf4, 0x90, 0x80 ])) + // U+13FBxx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xf4, 0xbf, 0xbf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xf5, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xf6, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xf7, 0x80, 0x80 ])) + // U+1FFBxx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xf7, 0xbf, 0xbf ])) + + // Ill-formed 5-byte sequences. + // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx + // U+2000xx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf8, 0x88, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf8, 0xbf, 0xbf, 0xbf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf9, 0x80, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfa, 0x80, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfb, 0x80, 0x80, 0x80 ])) + // U+3FFFFxx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfb, 0xbf, 0xbf, 0xbf ])) + + // Ill-formed 6-byte sequences. + // 1111110u 10uuuuuu 10uzzzzz 10zzzyyyy 10yyyyxx 10xxxxxx + // U+40000xx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0x84, 0x80, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0xbf, 0xbf, 0xbf, 0xbf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfd, 0x80, 0x80, 0x80, 0x80 ])) + // U+7FFFFFxx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfd, 0xbf, 0xbf, 0xbf, 0xbf ])) + + // + // Sequences with two continuation bytes missing + // + + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0, 0x90 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf1, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf3, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf4, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf4, 0x8f ])) + + // Overlong sequences with two trailing byte missing. + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xe0 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf0, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf0, 0x8f ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xf8, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0x80, 0x80, 0x80 ])) + + // Sequences that represent surrogates with two trailing bytes missing. + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xed ])) + + // Ill-formed 4-byte sequences. + // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx + // U+110yxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf4, 0x90 ])) + // U+13Fyxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf4, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf5, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf6, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf7, 0x80 ])) + // U+1FFyxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf7, 0xbf ])) + + // Ill-formed 5-byte sequences. + // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx + // U+200yxx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xf8, 0x88, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xf8, 0xbf, 0xbf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xf9, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xfa, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xfb, 0x80, 0x80 ])) + // U+3FFFyxx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xfb, 0xbf, 0xbf ])) + + // Ill-formed 6-byte sequences. + // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx + // U+4000yxx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0x84, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0xbf, 0xbf, 0xbf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfd, 0x80, 0x80, 0x80 ])) + // U+7FFFFyxx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfd, 0xbf, 0xbf, 0xbf ])) + + // + // Sequences with three continuation bytes missing + // + + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf1 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf2 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf3 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf4 ])) + + // Broken overlong sequences. + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf0 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf8, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0x80, 0x80 ])) + + // Ill-formed 4-byte sequences. + // 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx + // U+14yyxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf5 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf6 ])) + // U+1Cyyxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf7 ])) + + // Ill-formed 5-byte sequences. + // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx + // U+20yyxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf8, 0x88 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf8, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xf9, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfa, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfb, 0x80 ])) + // U+3FCyyxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfb, 0xbf ])) + + // Ill-formed 6-byte sequences. + // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx + // U+400yyxx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0x84, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0xbf, 0xbf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xfd, 0x80, 0x80 ])) + // U+7FFCyyxx (invalid) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xfd, 0xbf, 0xbf ])) + + // + // Sequences with four continuation bytes missing + // + + // Ill-formed 5-byte sequences. + // 111110uu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx + // U+uzyyxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf8 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf9 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfa ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfb ])) + // U+3zyyxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfb ])) + + // Broken overlong sequences. + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xf8 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfc, 0x80 ])) + + // Ill-formed 6-byte sequences. + // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx + // U+uzzyyxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfc, 0x84 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfc, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfd, 0x80 ])) + // U+7Fzzyyxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xfd, 0xbf ])) + + // + // Sequences with five continuation bytes missing + // + + // Ill-formed 6-byte sequences. + // 1111110u 10uuuuuu 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx + // U+uzzyyxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfc ])) + // U+uuzzyyxx (invalid) + expectTrue(checkDecodeUTF8([], [ 0xfffd ], [ 0xfd ])) + + // + // Consecutive sequences with trailing bytes missing + // + + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, /**/ 0xfffd, 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, /**/ 0xfffd, /**/ 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, + 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xc0, /**/ 0xe0, 0x80, /**/ 0xf0, 0x80, 0x80, + 0xf8, 0x80, 0x80, 0x80, + 0xfc, 0x80, 0x80, 0x80, 0x80, + 0xdf, /**/ 0xef, 0xbf, /**/ 0xf7, 0xbf, 0xbf, + 0xfb, 0xbf, 0xbf, 0xbf, + 0xfd, 0xbf, 0xbf, 0xbf, 0xbf ])) +} + +UTF8Decoder.test("OverlongSequences") { + // + // Overlong UTF-8 sequences + // + + // U+002F SOLIDUS + expectTrue(checkDecodeUTF8([ 0x002f ], [], [ 0x2f ])) + + // Overlong sequences of the above. + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xc0, 0xaf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xe0, 0x80, 0xaf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf0, 0x80, 0x80, 0xaf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf8, 0x80, 0x80, 0x80, 0xaf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf ])) + + // U+0000 NULL + expectTrue(checkDecodeUTF8([ 0x0000 ], [], [ 0x00 ])) + + // Overlong sequences of the above. + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xc0, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xe0, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf0, 0x80, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf8, 0x80, 0x80, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0x80, 0x80, 0x80, 0x80, 0x80 ])) + + // Other overlong and ill-formed sequences. + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xc0, 0xbf ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xc1, 0x80 ])) + expectTrue(checkDecodeUTF8([], [ 0xfffd, 0xfffd ], [ 0xc1, 0xbf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xe0, 0x9f, 0xbf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xa0, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xbf, 0xbf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf0, 0x8f, 0x80, 0x80 ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf0, 0x8f, 0xbf, 0xbf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xf8, 0x87, 0xbf, 0xbf, 0xbf ])) + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf ])) +} + +UTF8Decoder.test("IsolatedSurrogates") { + // Unicode 6.3.0: + // + // D71. High-surrogate code point: A Unicode code point in the range + // U+D800 to U+DBFF. + // + // D73. Low-surrogate code point: A Unicode code point in the range + // U+DC00 to U+DFFF. + + // Note: U+E0100 is in UTF-16. + + // High-surrogates + + // U+D800 + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xa0, 0x80 ])) + expectTrue(checkDecodeUTF8( + [ 0x0041 ], + [ 0xfffd, 0xfffd, 0xfffd, 0x0041 ], + [ 0x41, 0xed, 0xa0, 0x80, 0x41 ])) + + // U+DB40 + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xac, 0xa0 ])) + + // U+DBFF + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xaf, 0xbf ])) + + // Low-surrogates + + // U+DC00 + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xb0, 0x80 ])) + + // U+DD00 + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xb4, 0x80 ])) + + // U+DFFF + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xbf, 0xbf ])) +} + +UTF8Decoder.test("SurrogatePairs") { + // Surrogate pairs + + // U+D800 U+DC00 + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80 ])) + + // U+D800 U+DD00 + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xa0, 0x80, 0xed, 0xb4, 0x80 ])) + + // U+D800 U+DFFF + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf ])) + + // U+DB40 U+DC00 + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xac, 0xa0, 0xed, 0xb0, 0x80 ])) + + // U+DB40 U+DD00 + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xac, 0xa0, 0xed, 0xb4, 0x80 ])) + + // U+DB40 U+DFFF + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xac, 0xa0, 0xed, 0xbf, 0xbf ])) + + // U+DBFF U+DC00 + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80 ])) + + // U+DBFF U+DD00 + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xaf, 0xbf, 0xed, 0xb4, 0x80 ])) + + // U+DBFF U+DFFF + expectTrue(checkDecodeUTF8( + [], + [ 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd ], + [ 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf ])) +} + +UTF8Decoder.test("Noncharacters") { + // + // Noncharacters + // + + // Unicode 6.3.0: + // + // D14. Noncharacter: A code point that is permanently reserved for + // internal use and that should never be interchanged. Noncharacters + // consist of the values U+nFFFE and U+nFFFF (where n is from 0 to 1016) + // and the values U+FDD0..U+FDEF. + + // U+FFFE + expectTrue(checkDecodeUTF8([ 0xfffe ], [], [ 0xef, 0xbf, 0xbe ])) + + // U+FFFF + expectTrue(checkDecodeUTF8([ 0xffff ], [], [ 0xef, 0xbf, 0xbf ])) + + // U+1FFFE + expectTrue(checkDecodeUTF8([ 0x1fffe ], [], [ 0xf0, 0x9f, 0xbf, 0xbe ])) + + // U+1FFFF + expectTrue(checkDecodeUTF8([ 0x1ffff ], [], [ 0xf0, 0x9f, 0xbf, 0xbf ])) + + // U+2FFFE + expectTrue(checkDecodeUTF8([ 0x2fffe ], [], [ 0xf0, 0xaf, 0xbf, 0xbe ])) + + // U+2FFFF + expectTrue(checkDecodeUTF8([ 0x2ffff ], [], [ 0xf0, 0xaf, 0xbf, 0xbf ])) + + // U+3FFFE + expectTrue(checkDecodeUTF8([ 0x3fffe ], [], [ 0xf0, 0xbf, 0xbf, 0xbe ])) + + // U+3FFFF + expectTrue(checkDecodeUTF8([ 0x3ffff ], [], [ 0xf0, 0xbf, 0xbf, 0xbf ])) + + // U+4FFFE + expectTrue(checkDecodeUTF8([ 0x4fffe ], [], [ 0xf1, 0x8f, 0xbf, 0xbe ])) + + // U+4FFFF + expectTrue(checkDecodeUTF8([ 0x4ffff ], [], [ 0xf1, 0x8f, 0xbf, 0xbf ])) + + // U+5FFFE + expectTrue(checkDecodeUTF8([ 0x5fffe ], [], [ 0xf1, 0x9f, 0xbf, 0xbe ])) + + // U+5FFFF + expectTrue(checkDecodeUTF8([ 0x5ffff ], [], [ 0xf1, 0x9f, 0xbf, 0xbf ])) + + // U+6FFFE + expectTrue(checkDecodeUTF8([ 0x6fffe ], [], [ 0xf1, 0xaf, 0xbf, 0xbe ])) + + // U+6FFFF + expectTrue(checkDecodeUTF8([ 0x6ffff ], [], [ 0xf1, 0xaf, 0xbf, 0xbf ])) + + // U+7FFFE + expectTrue(checkDecodeUTF8([ 0x7fffe ], [], [ 0xf1, 0xbf, 0xbf, 0xbe ])) + + // U+7FFFF + expectTrue(checkDecodeUTF8([ 0x7ffff ], [], [ 0xf1, 0xbf, 0xbf, 0xbf ])) + + // U+8FFFE + expectTrue(checkDecodeUTF8([ 0x8fffe ], [], [ 0xf2, 0x8f, 0xbf, 0xbe ])) + + // U+8FFFF + expectTrue(checkDecodeUTF8([ 0x8ffff ], [], [ 0xf2, 0x8f, 0xbf, 0xbf ])) + + // U+9FFFE + expectTrue(checkDecodeUTF8([ 0x9fffe ], [], [ 0xf2, 0x9f, 0xbf, 0xbe ])) + + // U+9FFFF + expectTrue(checkDecodeUTF8([ 0x9ffff ], [], [ 0xf2, 0x9f, 0xbf, 0xbf ])) + + // U+AFFFE + expectTrue(checkDecodeUTF8([ 0xafffe ], [], [ 0xf2, 0xaf, 0xbf, 0xbe ])) + + // U+AFFFF + expectTrue(checkDecodeUTF8([ 0xaffff ], [], [ 0xf2, 0xaf, 0xbf, 0xbf ])) + + // U+BFFFE + expectTrue(checkDecodeUTF8([ 0xbfffe ], [], [ 0xf2, 0xbf, 0xbf, 0xbe ])) + + // U+BFFFF + expectTrue(checkDecodeUTF8([ 0xbffff ], [], [ 0xf2, 0xbf, 0xbf, 0xbf ])) + + // U+CFFFE + expectTrue(checkDecodeUTF8([ 0xcfffe ], [], [ 0xf3, 0x8f, 0xbf, 0xbe ])) + + // U+CFFFF + expectTrue(checkDecodeUTF8([ 0xcfffF ], [], [ 0xf3, 0x8f, 0xbf, 0xbf ])) + + // U+DFFFE + expectTrue(checkDecodeUTF8([ 0xdfffe ], [], [ 0xf3, 0x9f, 0xbf, 0xbe ])) + + // U+DFFFF + expectTrue(checkDecodeUTF8([ 0xdffff ], [], [ 0xf3, 0x9f, 0xbf, 0xbf ])) + + // U+EFFFE + expectTrue(checkDecodeUTF8([ 0xefffe ], [], [ 0xf3, 0xaf, 0xbf, 0xbe ])) + + // U+EFFFF + expectTrue(checkDecodeUTF8([ 0xeffff ], [], [ 0xf3, 0xaf, 0xbf, 0xbf ])) + + // U+FFFFE + expectTrue(checkDecodeUTF8([ 0xffffe ], [], [ 0xf3, 0xbf, 0xbf, 0xbe ])) + + // U+FFFFF + expectTrue(checkDecodeUTF8([ 0xfffff ], [], [ 0xf3, 0xbf, 0xbf, 0xbf ])) + + // U+10FFFE + expectTrue(checkDecodeUTF8([ 0x10fffe ], [], [ 0xf4, 0x8f, 0xbf, 0xbe ])) + + // U+10FFFF + expectTrue(checkDecodeUTF8([ 0x10ffff ], [], [ 0xf4, 0x8f, 0xbf, 0xbf ])) + + // U+FDD0 + expectTrue(checkDecodeUTF8([ 0xfdd0 ], [], [ 0xef, 0xb7, 0x90 ])) + + // U+FDD1 + expectTrue(checkDecodeUTF8([ 0xfdd1 ], [], [ 0xef, 0xb7, 0x91 ])) + + // U+FDD2 + expectTrue(checkDecodeUTF8([ 0xfdd2 ], [], [ 0xef, 0xb7, 0x92 ])) + + // U+FDD3 + expectTrue(checkDecodeUTF8([ 0xfdd3 ], [], [ 0xef, 0xb7, 0x93 ])) + + // U+FDD4 + expectTrue(checkDecodeUTF8([ 0xfdd4 ], [], [ 0xef, 0xb7, 0x94 ])) + + // U+FDD5 + expectTrue(checkDecodeUTF8([ 0xfdd5 ], [], [ 0xef, 0xb7, 0x95 ])) + + // U+FDD6 + expectTrue(checkDecodeUTF8([ 0xfdd6 ], [], [ 0xef, 0xb7, 0x96 ])) + + // U+FDD7 + expectTrue(checkDecodeUTF8([ 0xfdd7 ], [], [ 0xef, 0xb7, 0x97 ])) + + // U+FDD8 + expectTrue(checkDecodeUTF8([ 0xfdd8 ], [], [ 0xef, 0xb7, 0x98 ])) + + // U+FDD9 + expectTrue(checkDecodeUTF8([ 0xfdd9 ], [], [ 0xef, 0xb7, 0x99 ])) + + // U+FDDA + expectTrue(checkDecodeUTF8([ 0xfdda ], [], [ 0xef, 0xb7, 0x9a ])) + + // U+FDDB + expectTrue(checkDecodeUTF8([ 0xfddb ], [], [ 0xef, 0xb7, 0x9b ])) + + // U+FDDC + expectTrue(checkDecodeUTF8([ 0xfddc ], [], [ 0xef, 0xb7, 0x9c ])) + + // U+FDDD + expectTrue(checkDecodeUTF8([ 0xfddd ], [], [ 0xef, 0xb7, 0x9d ])) + + // U+FDDE + expectTrue(checkDecodeUTF8([ 0xfdde ], [], [ 0xef, 0xb7, 0x9e ])) + + // U+FDDF + expectTrue(checkDecodeUTF8([ 0xfddf ], [], [ 0xef, 0xb7, 0x9f ])) + + // U+FDE0 + expectTrue(checkDecodeUTF8([ 0xfde0 ], [], [ 0xef, 0xb7, 0xa0 ])) + + // U+FDE1 + expectTrue(checkDecodeUTF8([ 0xfde1 ], [], [ 0xef, 0xb7, 0xa1 ])) + + // U+FDE2 + expectTrue(checkDecodeUTF8([ 0xfde2 ], [], [ 0xef, 0xb7, 0xa2 ])) + + // U+FDE3 + expectTrue(checkDecodeUTF8([ 0xfde3 ], [], [ 0xef, 0xb7, 0xa3 ])) + + // U+FDE4 + expectTrue(checkDecodeUTF8([ 0xfde4 ], [], [ 0xef, 0xb7, 0xa4 ])) + + // U+FDE5 + expectTrue(checkDecodeUTF8([ 0xfde5 ], [], [ 0xef, 0xb7, 0xa5 ])) + + // U+FDE6 + expectTrue(checkDecodeUTF8([ 0xfde6 ], [], [ 0xef, 0xb7, 0xa6 ])) + + // U+FDE7 + expectTrue(checkDecodeUTF8([ 0xfde7 ], [], [ 0xef, 0xb7, 0xa7 ])) + + // U+FDE8 + expectTrue(checkDecodeUTF8([ 0xfde8 ], [], [ 0xef, 0xb7, 0xa8 ])) + + // U+FDE9 + expectTrue(checkDecodeUTF8([ 0xfde9 ], [], [ 0xef, 0xb7, 0xa9 ])) + + // U+FDEA + expectTrue(checkDecodeUTF8([ 0xfdea ], [], [ 0xef, 0xb7, 0xaa ])) + + // U+FDEB + expectTrue(checkDecodeUTF8([ 0xfdeb ], [], [ 0xef, 0xb7, 0xab ])) + + // U+FDEC + expectTrue(checkDecodeUTF8([ 0xfdec ], [], [ 0xef, 0xb7, 0xac ])) + + // U+FDED + expectTrue(checkDecodeUTF8([ 0xfded ], [], [ 0xef, 0xb7, 0xad ])) + + // U+FDEE + expectTrue(checkDecodeUTF8([ 0xfdee ], [], [ 0xef, 0xb7, 0xae ])) + + // U+FDEF + expectTrue(checkDecodeUTF8([ 0xfdef ], [], [ 0xef, 0xb7, 0xaf ])) + + // U+FDF0 + expectTrue(checkDecodeUTF8([ 0xfdf0 ], [], [ 0xef, 0xb7, 0xb0 ])) + + // U+FDF1 + expectTrue(checkDecodeUTF8([ 0xfdf1 ], [], [ 0xef, 0xb7, 0xb1 ])) + + // U+FDF2 + expectTrue(checkDecodeUTF8([ 0xfdf2 ], [], [ 0xef, 0xb7, 0xb2 ])) + + // U+FDF3 + expectTrue(checkDecodeUTF8([ 0xfdf3 ], [], [ 0xef, 0xb7, 0xb3 ])) + + // U+FDF4 + expectTrue(checkDecodeUTF8([ 0xfdf4 ], [], [ 0xef, 0xb7, 0xb4 ])) + + // U+FDF5 + expectTrue(checkDecodeUTF8([ 0xfdf5 ], [], [ 0xef, 0xb7, 0xb5 ])) + + // U+FDF6 + expectTrue(checkDecodeUTF8([ 0xfdf6 ], [], [ 0xef, 0xb7, 0xb6 ])) + + // U+FDF7 + expectTrue(checkDecodeUTF8([ 0xfdf7 ], [], [ 0xef, 0xb7, 0xb7 ])) + + // U+FDF8 + expectTrue(checkDecodeUTF8([ 0xfdf8 ], [], [ 0xef, 0xb7, 0xb8 ])) + + // U+FDF9 + expectTrue(checkDecodeUTF8([ 0xfdf9 ], [], [ 0xef, 0xb7, 0xb9 ])) + + // U+FDFA + expectTrue(checkDecodeUTF8([ 0xfdfa ], [], [ 0xef, 0xb7, 0xba ])) + + // U+FDFB + expectTrue(checkDecodeUTF8([ 0xfdfb ], [], [ 0xef, 0xb7, 0xbb ])) + + // U+FDFC + expectTrue(checkDecodeUTF8([ 0xfdfc ], [], [ 0xef, 0xb7, 0xbc ])) + + // U+FDFD + expectTrue(checkDecodeUTF8([ 0xfdfd ], [], [ 0xef, 0xb7, 0xbd ])) + + // U+FDFE + expectTrue(checkDecodeUTF8([ 0xfdfe ], [], [ 0xef, 0xb7, 0xbe ])) + + // U+FDFF + expectTrue(checkDecodeUTF8([ 0xfdff ], [], [ 0xef, 0xb7, 0xbf ])) +} + +var UTF16Decoder = TestSuite("UTF16Decoder") + +UTF16Decoder.test("UTF16.transcodedLength") { + do { + let u8: [UTF8.CodeUnit] = [ 0, 1, 2, 3, 4, 5 ] + let (count, isASCII) = UTF16.transcodedLength( + of: u8.makeIterator(), + decodedAs: UTF8.self, + repairingIllFormedSequences: false)! + expectEqual(6, count) + expectTrue(isASCII) + } + + do { + // "€" == U+20AC. + let u8: [UTF8.CodeUnit] = [ 0xF0, 0xA4, 0xAD, 0xA2 ] + let (count, isASCII) = UTF16.transcodedLength( + of: u8.makeIterator(), + decodedAs: UTF8.self, + repairingIllFormedSequences: false)! + expectEqual(2, count) + expectFalse(isASCII) + } + + do { + let u16: [UTF16.CodeUnit] = [ 6, 7, 8, 9, 10, 11 ] + let (count, isASCII) = UTF16.transcodedLength( + of: u16.makeIterator(), + decodedAs: UTF16.self, + repairingIllFormedSequences: false)! + expectEqual(6, count) + expectTrue(isASCII) + } +} + +UTF16Decoder.test("Decoding1").forEach(in: utfTests) { + test in + + expectTrue( + checkDecodeUTF16( + test.utf32, test.utf32RepairedTail, test.utf16), + stackTrace: test.loc.withCurrentLoc()) + return () +} + +UTF16Decoder.test("Decoding2") { + for (name, batch) in utf16Tests { + print("Batch: \(name)") + for test in batch { + expectTrue(checkDecodeUTF16(test.scalarsHead, test.scalarsRepairedTail, + test.encoded), stackTrace: test.loc.withCurrentLoc()) + } + } +} + +public struct UTF16Test { + public let scalarsHead: [UInt32] + public let scalarsRepairedTail: [UInt32] + public let encoded: [UInt16] + public let loc: SourceLoc + + public init( + _ scalarsHead: [UInt32], _ scalarsRepairedTail: [UInt32], + _ encoded: [UInt16], + file: String = #file, line: UInt = #line + ) { + self.scalarsHead = scalarsHead + self.scalarsRepairedTail = scalarsRepairedTail + self.encoded = encoded + self.loc = SourceLoc(file, line, comment: "test data") + } +} + +public let utf16Tests = [ + "Incomplete": [ + // + // Incomplete sequences that end right before EOF. + // + + // U+D800 (high-surrogate) + UTF16Test([], [ 0xFFFD ], [ 0xD800 ]), + + // U+D800 (high-surrogate) + // U+D800 (high-surrogate) + UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xD800, 0xD800 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+D800 (high-surrogate) + UTF16Test([ 0x0041 ], [ 0xFFFD ], [ 0x0041, 0xD800 ]), + + // U+10000 LINEAR B SYLLABLE B008 A + // U+D800 (high-surrogate) + UTF16Test( + [ 0x0001_0000 ], [ 0xFFFD ], + [ 0xD800, 0xDC00, 0xD800 ]), + + // + // Incomplete sequences with more code units following them. + // + + // U+D800 (high-surrogate) + // U+0041 LATIN CAPITAL LETTER A + UTF16Test([], [ 0xFFFD, 0x0041 ], [ 0xD800, 0x0041 ]), + + // U+D800 (high-surrogate) + // U+10000 LINEAR B SYLLABLE B008 A + UTF16Test( + [], [ 0xFFFD, 0x0001_0000 ], + [ 0xD800, 0xD800, 0xDC00 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+D800 (high-surrogate) + // U+0041 LATIN CAPITAL LETTER A + UTF16Test( + [ 0x0041 ], [ 0xFFFD, 0x0041 ], + [ 0x0041, 0xD800, 0x0041 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+D800 (high-surrogate) + // U+10000 LINEAR B SYLLABLE B008 A + UTF16Test( + [ 0x0041 ], [ 0xFFFD, 0x0001_0000 ], + [ 0x0041, 0xD800, 0xD800, 0xDC00 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+D800 (high-surrogate) + // U+DB40 (high-surrogate) + // U+0041 LATIN CAPITAL LETTER A + UTF16Test( + [ 0x0041 ], [ 0xFFFD, 0xFFFD, 0x0041 ], + [ 0x0041, 0xD800, 0xDB40, 0x0041 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+D800 (high-surrogate) + // U+DB40 (high-surrogate) + // U+10000 LINEAR B SYLLABLE B008 A + UTF16Test( + [ 0x0041 ], [ 0xFFFD, 0xFFFD, 0x0001_0000 ], + [ 0x0041, 0xD800, 0xDB40, 0xD800, 0xDC00 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+D800 (high-surrogate) + // U+DB40 (high-surrogate) + // U+DBFF (high-surrogate) + // U+0041 LATIN CAPITAL LETTER A + UTF16Test( + [ 0x0041 ], [ 0xFFFD, 0xFFFD, 0xFFFD, 0x0041 ], + [ 0x0041, 0xD800, 0xDB40, 0xDBFF, 0x0041 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+D800 (high-surrogate) + // U+DB40 (high-surrogate) + // U+DBFF (high-surrogate) + // U+10000 LINEAR B SYLLABLE B008 A + UTF16Test( + [ 0x0041 ], [ 0xFFFD, 0xFFFD, 0xFFFD, 0x0001_0000 ], + [ 0x0041, 0xD800, 0xDB40, 0xDBFF, 0xD800, 0xDC00 ]), + ], + + "IllFormed": [ + // + // Low-surrogate right before EOF. + // + + // U+DC00 (low-surrogate) + UTF16Test([], [ 0xFFFD ], [ 0xDC00 ]), + + // U+DC00 (low-surrogate) + // U+DC00 (low-surrogate) + UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDC00, 0xDC00 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+DC00 (low-surrogate) + UTF16Test([ 0x0041 ], [ 0xFFFD ], [ 0x0041, 0xDC00 ]), + + // U+10000 LINEAR B SYLLABLE B008 A + // U+DC00 (low-surrogate) + UTF16Test( + [ 0x0001_0000 ], [ 0xFFFD ], + [ 0xD800, 0xDC00, 0xDC00 ]), + + // + // Low-surrogate with more code units following it. + // + + // U+DC00 (low-surrogate) + // U+0041 LATIN CAPITAL LETTER A + UTF16Test([], [ 0xFFFD, 0x0041 ], [ 0xDC00, 0x0041 ]), + + // U+DC00 (low-surrogate) + // U+10000 LINEAR B SYLLABLE B008 A + UTF16Test( + [], [ 0xFFFD, 0x0001_0000 ], + [ 0xDC00, 0xD800, 0xDC00 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+DC00 (low-surrogate) + // U+0041 LATIN CAPITAL LETTER A + UTF16Test( + [ 0x0041 ], [ 0xFFFD, 0x0041 ], + [ 0x0041, 0xDC00, 0x0041 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+DC00 (low-surrogate) + // U+10000 LINEAR B SYLLABLE B008 A + UTF16Test( + [ 0x0041 ], [ 0xFFFD, 0x0001_0000 ], + [ 0x0041, 0xDC00, 0xD800, 0xDC00 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+DC00 (low-surrogate) + // U+DD00 (low-surrogate) + // U+0041 LATIN CAPITAL LETTER A + UTF16Test( + [ 0x0041 ], [ 0xFFFD, 0xFFFD, 0x0041 ], + [ 0x0041, 0xDC00, 0xDD00, 0x0041 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+DC00 (low-surrogate) + // U+DD00 (low-surrogate) + // U+10000 LINEAR B SYLLABLE B008 A + UTF16Test( + [ 0x0041 ], [ 0xFFFD, 0xFFFD, 0x0001_0000 ], + [ 0x0041, 0xDC00, 0xDD00, 0xD800, 0xDC00 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+DC00 (low-surrogate) + // U+DD00 (low-surrogate) + // U+DFFF (low-surrogate) + // U+0041 LATIN CAPITAL LETTER A + UTF16Test( + [ 0x0041 ], [ 0xFFFD, 0xFFFD, 0xFFFD, 0x0041 ], + [ 0x0041, 0xDC00, 0xDD00, 0xDFFF, 0x0041 ]), + + // U+0041 LATIN CAPITAL LETTER A + // U+DC00 (low-surrogate) + // U+DD00 (low-surrogate) + // U+DFFF (low-surrogate) + // U+10000 LINEAR B SYLLABLE B008 A + UTF16Test( + [ 0x0041 ], [ 0xFFFD, 0xFFFD, 0xFFFD, 0x0001_0000 ], + [ 0x0041, 0xDC00, 0xDD00, 0xDFFF, 0xD800, 0xDC00 ]), + + // + // Low-surrogate followed by high-surrogate. + // + + // U+DC00 (low-surrogate) + // U+D800 (high-surrogate) + UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDC00, 0xD800 ]), + + // U+DC00 (low-surrogate) + // U+DB40 (high-surrogate) + UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDC00, 0xDB40 ]), + + // U+DC00 (low-surrogate) + // U+DBFF (high-surrogate) + UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDC00, 0xDBFF ]), + + + // U+DD00 (low-surrogate) + // U+D800 (high-surrogate) + UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDD00, 0xD800 ]), + + // U+DD00 (low-surrogate) + // U+DB40 (high-surrogate) + UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDD00, 0xDB40 ]), + + // U+DD00 (low-surrogate) + // U+DBFF (high-surrogate) + UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDD00, 0xDBFF ]), + + + // U+DFFF (low-surrogate) + // U+D800 (high-surrogate) + UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDFFF, 0xD800 ]), + + // U+DFFF (low-surrogate) + // U+DB40 (high-surrogate) + UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDFFF, 0xDB40 ]), + + // U+DFFF (low-surrogate) + // U+DBFF (high-surrogate) + UTF16Test([], [ 0xFFFD, 0xFFFD ], [ 0xDFFF, 0xDBFF ]), + + + // U+DC00 (low-surrogate) + // U+D800 (high-surrogate) + // U+0041 LATIN CAPITAL LETTER A + UTF16Test( + [], [ 0xFFFD, 0xFFFD, 0x0041 ], + [ 0xDC00, 0xD800, 0x0041 ]), + + // U+DC00 (low-surrogate) + // U+D800 (high-surrogate) + // U+10000 LINEAR B SYLLABLE B008 A + UTF16Test( + [], [ 0xFFFD, 0xFFFD, 0x10000 ], + [ 0xDC00, 0xD800, 0xD800, 0xDC00 ]), + ], +] + +runAllTests() + +#else +//===--- benchmarking -----------------------------------------------------===// + +@inline(never) +public func run_UTF8Decode(_ N: Int) { + // 1-byte sequences + // This test case is the longest as it's the most performance sensitive. + let ascii = "Swift is a multi-paradigm, compiled programming language created for iOS, OS X, watchOS, tvOS and Linux development by Apple Inc. Swift is designed to work with Apple's Cocoa and Cocoa Touch frameworks and the large body of existing Objective-C code written for Apple products. Swift is intended to be more resilient to erroneous code (\"safer\") than Objective-C and also more concise. It is built with the LLVM compiler framework included in Xcode 6 and later and uses the Objective-C runtime, which allows C, Objective-C, C++ and Swift code to run within a single program." + // 2-byte sequences + let russian = "Ру́сский язы́к один из восточнославянских языков, национальный язык русского народа." + // 3-byte sequences + let japanese = "日本語(にほんご、にっぽんご)は、主に日本国内や日本人同士の間で使われている言語である。" + // 4-byte sequences + // Most commonly emoji, which are usually mixed with other text. + let emoji = "Panda 🐼, Dog 🐶, Cat 🐱, Mouse 🐭." + + let strings = [ascii, russian, japanese, emoji].map { Array($0.utf8) } + + func isEmpty(_ result: UnicodeDecodingResult) -> Bool { + switch result { + case .emptyInput: + return true + default: + return false + } + } + + var total: UInt32 = 0 + + for _ in 1...200*N { + for string in strings { +#if BASELINE + _ = transcode( + string.makeIterator(), from: UTF8.self, to: UTF32.self, + stoppingOnError: false + ) { + total = total &+ $0 + } +#else + #if FORWARD + var it = string.makeIterator() + typealias D = UTF8.ForwardDecoder + D.decode(&it, repairingIllFormedSequences: true) { total = total &+ $0.value } + #elseif REVERSE + var it = string.reversed().makeIterator() + typealias D = UTF8.ReverseDecoder + D.decode(&it, repairingIllFormedSequences: true) { total = total &+ $0.value } + #elseif SEQUENCE + for s in Unicode.DefaultScalarView(string, fromEncoding: UTF8.self) { + total = total &+ s.value + } + #elseif COLLECTION + let scalars = Unicode.DefaultScalarView(string, fromEncoding: UTF8.self) + var i = scalars.startIndex + while i != scalars.endIndex { + total = total &+ scalars[i].value + i = scalars.index(after: i) + } +#elseif REVERSE_COLLECTION + let scalars = Unicode.DefaultScalarView(string, fromEncoding: UTF8.self) + var i = scalars.endIndex + while i != scalars.startIndex { + i = scalars.index(before: i) + total = total &+ scalars[i].value + } + #else + Error_Unknown_Benchmark() + #endif +#endif + } + } + if CommandLine.arguments.count > 1000 { print(total) } +} + +run_UTF8Decode(10000) +#endif + diff --git a/validation-test/compiler_crashers_2_fixed/0110-sr4786.swift b/validation-test/compiler_crashers_2_fixed/0110-sr4786.swift new file mode 100644 index 0000000000000..7712938bfb220 --- /dev/null +++ b/validation-test/compiler_crashers_2_fixed/0110-sr4786.swift @@ -0,0 +1,13 @@ +// RUN: not %target-swift-frontend %s -typecheck + +public protocol _UTFEncoding { + associatedtype EncodedScalar where EncodedScalar == Int +} + +public protocol UnicodeEncoding { + associatedtype EncodedScalar: BidirectionalCollection +} + +public protocol _UTFParser { + associatedtype Encoding: UnicodeEncoding, _UTFEncoding +} diff --git a/validation-test/compiler_crashers/28706-conformance-failed-to-find-pas-conformance-to-known-protocol.swift b/validation-test/compiler_crashers_fixed/28706-conformance-failed-to-find-pas-conformance-to-known-protocol.swift similarity index 89% rename from validation-test/compiler_crashers/28706-conformance-failed-to-find-pas-conformance-to-known-protocol.swift rename to validation-test/compiler_crashers_fixed/28706-conformance-failed-to-find-pas-conformance-to-known-protocol.swift index 8df4f0e359b98..8be1db13a9f99 100644 --- a/validation-test/compiler_crashers/28706-conformance-failed-to-find-pas-conformance-to-known-protocol.swift +++ b/validation-test/compiler_crashers_fixed/28706-conformance-failed-to-find-pas-conformance-to-known-protocol.swift @@ -6,5 +6,5 @@ // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors // REQUIRES: asserts -// RUN: not --crash %target-swift-frontend %s -emit-ir +// RUN: not %target-swift-frontend %s -emit-ir protocol P{let c{}typealias e:RangeReplaceableCollection}extension P{typealias e:a diff --git a/validation-test/compiler_crashers/28758-swift-genericsignaturebuilder-resolvesuperconformance-swift-genericsignaturebuil.swift b/validation-test/compiler_crashers_fixed/28758-swift-genericsignaturebuilder-resolvesuperconformance-swift-genericsignaturebuil.swift similarity index 88% rename from validation-test/compiler_crashers/28758-swift-genericsignaturebuilder-resolvesuperconformance-swift-genericsignaturebuil.swift rename to validation-test/compiler_crashers_fixed/28758-swift-genericsignaturebuilder-resolvesuperconformance-swift-genericsignaturebuil.swift index 456a3b45861ad..68538284edaf0 100644 --- a/validation-test/compiler_crashers/28758-swift-genericsignaturebuilder-resolvesuperconformance-swift-genericsignaturebuil.swift +++ b/validation-test/compiler_crashers_fixed/28758-swift-genericsignaturebuilder-resolvesuperconformance-swift-genericsignaturebuil.swift @@ -5,5 +5,5 @@ // See https://swift.org/LICENSE.txt for license information // See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors -// RUN: not --crash %target-swift-frontend %s -emit-ir +// RUN: not %target-swift-frontend %s -emit-ir protocol P{{}typealias e:a{}}class a:P=extension P{typealias e:Self diff --git a/validation-test/compiler_crashers_fixed/28764-swift-protocolconformanceref-llvm-function-ref-swift-protocolconformanceref-swif.swift b/validation-test/compiler_crashers_fixed/28764-swift-protocolconformanceref-llvm-function-ref-swift-protocolconformanceref-swif.swift new file mode 100644 index 0000000000000..f254454afec43 --- /dev/null +++ b/validation-test/compiler_crashers_fixed/28764-swift-protocolconformanceref-llvm-function-ref-swift-protocolconformanceref-swif.swift @@ -0,0 +1,9 @@ +// This source file is part of the Swift.org open source project +// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors + +// RUN: not %target-swift-frontend %s -emit-ir +protocol P{typealias a}{protocol A:P{{}class a{{}}typealias a:RangeReplaceableCollection diff --git a/validation-test/compiler_crashers_fixed/28788-conformance-isconcrete-concrete-isexistentialtype.swift b/validation-test/compiler_crashers_fixed/28788-conformance-isconcrete-concrete-isexistentialtype.swift new file mode 100644 index 0000000000000..f0e4bf627a347 --- /dev/null +++ b/validation-test/compiler_crashers_fixed/28788-conformance-isconcrete-concrete-isexistentialtype.swift @@ -0,0 +1,13 @@ +// This source file is part of the Swift.org open source project +// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors + +// REQUIRES: asserts +// RUN: not %target-swift-frontend %s -emit-ir +protocol P{ +typealias e:RangeReplaceableCollection +}{}extension P{{}func e +typealias e:FlattenCollection diff --git a/validation-test/compiler_crashers_fixed/28793-nestedpabyname-didnt-find-the-associated-type-we-wanted.swift b/validation-test/compiler_crashers_fixed/28793-nestedpabyname-didnt-find-the-associated-type-we-wanted.swift new file mode 100644 index 0000000000000..efc3af830ae0b --- /dev/null +++ b/validation-test/compiler_crashers_fixed/28793-nestedpabyname-didnt-find-the-associated-type-we-wanted.swift @@ -0,0 +1,13 @@ +// This source file is part of the Swift.org open source project +// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors + +// REQUIRES: asserts +// RUN: not %target-swift-frontend %s -emit-ir +protocol A:RangeReplaceableCollection +protocol P{ +protocol A +class a:A{}typealias a:A{}typealias a:RangeReplaceableCollection diff --git a/validation-test/compiler_crashers_fixed/28802-constrainttype-missing-constraint-type.swift b/validation-test/compiler_crashers_fixed/28802-constrainttype-missing-constraint-type.swift new file mode 100644 index 0000000000000..df54499ba8b37 --- /dev/null +++ b/validation-test/compiler_crashers_fixed/28802-constrainttype-missing-constraint-type.swift @@ -0,0 +1,10 @@ +// This source file is part of the Swift.org open source project +// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors + +// REQUIRES: asserts +// RUN: not %target-swift-frontend %s -emit-ir +class a