Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add email and idn-email format support #103

Merged
merged 7 commits into from
Apr 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 4 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -332,23 +332,10 @@ val valid = schema.validate(elementToValidate, errors::add)

## Format assertion

The library supports `format` assertion. Not all formats are supported yet. The supported formats are:
* date
* time
* date-time
* duration
* json-pointer
* relative-json-pointer
* ipv4
* ipv6
* uuid
* hostname
* idn-hostname
* uri
* uri-reference
* uri-template
* iri
* iri-reference
The library supports `format` assertion.
Almost all formats from [JSON schema draft 2020-12](https://json-schema.org/draft/2020-12/draft-bhutton-json-schema-validation-01#section-7.3) are supported.
Unsupported formats:
* regex

But there is an API to implement the user's defined format validation.
The [FormatValidator](src/commonMain/kotlin/io/github/optimumcode/json/schema/ValidationError.kt) interface can be user for that.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ import io.github.optimumcode.json.schema.internal.factories.AbstractAssertionFac
import io.github.optimumcode.json.schema.internal.formats.DateFormatValidator
import io.github.optimumcode.json.schema.internal.formats.DateTimeFormatValidator
import io.github.optimumcode.json.schema.internal.formats.DurationFormatValidator
import io.github.optimumcode.json.schema.internal.formats.EmailFormatValidator
import io.github.optimumcode.json.schema.internal.formats.HostnameFormatValidator
import io.github.optimumcode.json.schema.internal.formats.IdnEmailFormatValidator
import io.github.optimumcode.json.schema.internal.formats.IdnHostnameFormatValidator
import io.github.optimumcode.json.schema.internal.formats.IpV4FormatValidator
import io.github.optimumcode.json.schema.internal.formats.IpV6FormatValidator
Expand Down Expand Up @@ -82,6 +84,8 @@ internal sealed class FormatAssertionFactory(
"iri" to IriFormatValidator,
"iri-reference" to IriReferenceFormatValidator,
"uri-template" to UriTemplateFormatValidator,
"email" to EmailFormatValidator,
"idn-email" to IdnEmailFormatValidator,
)
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
package io.github.optimumcode.json.schema.internal.formats

import de.cketti.codepoints.CodePoints
import de.cketti.codepoints.codePointAt
import io.github.optimumcode.json.schema.FormatValidationResult
import io.github.optimumcode.json.schema.FormatValidator
import io.github.optimumcode.json.schema.internal.util.allCodepoints

private const val AT_CHAR = '@'
private const val IP_PART_START = '['
private const val IP_PART_END = ']'
private const val QUOTE = '"'
private const val BACK_SLASH = '\\'.code
private const val IPV6_PREFIX = "IPv6:"
internal const val MAX_ASCII_CODEPOINT = 0x7F

internal abstract class AbstractEmailFormatValidator(
private val hostnameValidator: AbstractStringFormatValidator,
) : AbstractStringFormatValidator() {
override fun validate(value: String): FormatValidationResult {
if (value.isEmpty()) {
return FormatValidator.Invalid()
}
val delimiterIndex = value.lastIndexOf(AT_CHAR)
if (delimiterIndex <= 0 || delimiterIndex == value.lastIndex) {
// either local-part of domain is empty
return FormatValidator.Invalid()
}
val localPart = value.substring(0, delimiterIndex)
val domainPart = value.substring(delimiterIndex + 1)
return if (isValidLocalPart(localPart) && isValidDomainPart(domainPart)) {
FormatValidator.Valid()
} else {
FormatValidator.Invalid()
}
}

private fun isValidDomainPart(domainPart: String): Boolean {
return if (domainPart.run { startsWith(IP_PART_START) && endsWith(IP_PART_END) }) {
val ipPart = domainPart.substring(1, domainPart.lastIndex)
isValidIpPart(ipPart)
} else {
hostnameValidator.validate(domainPart).isValid()
}
}

private fun isValidIpPart(ipPart: String): Boolean {
return if (ipPart.startsWith(IPV6_PREFIX)) {
IpV6FormatValidator.validate(ipPart.removePrefix(IPV6_PREFIX))
} else {
IpV4FormatValidator.validate(ipPart)
}.isValid()
}

private fun isValidLocalPart(localPart: String): Boolean {
return if (localPart.run { startsWith(QUOTE) || endsWith(QUOTE) }) {
isValidQuotedString(localPart)
} else {
isValidDotString(localPart)
}
}

private fun isValidDotString(localPart: String): Boolean {
return Validation.eachSeparatedPart(localPart, separator = '.') {
it.isNotEmpty() && it.allCodepoints(::isAText)
}
}

protected open fun isAText(codepoint: Int): Boolean {
if (codepoint > MAX_ASCII_CODEPOINT) {
return false
}
val asChar = codepoint.toChar()
return Validation.isAlpha(asChar) || Validation.isDigit(asChar) || isSpecialCharacter(asChar)
}

private fun isSpecialCharacter(codepoint: Char): Boolean =
codepoint == '!' || codepoint == '#' || codepoint == '$' || codepoint == '%' ||
codepoint == '&' || codepoint == '\'' || codepoint == '*' || codepoint == '+' ||
codepoint == '-' || codepoint == '/' || codepoint == '=' || codepoint == '?' ||
codepoint == '^' || codepoint == '_' || codepoint == '`' || codepoint == '{' ||
codepoint == '}' || codepoint == '~' || codepoint == '|'

private fun isValidQuotedString(localPart: String): Boolean {
if (localPart.length <= 2) {
return false
}
if (localPart.run { !startsWith(QUOTE) || !endsWith(QUOTE) }) {
return false
}
val quotedContent = localPart.substring(1, localPart.lastIndex)
return isValidQuotedContent(quotedContent)
}

private fun isValidQuotedContent(quotedContent: String): Boolean {
// cannot be empty at this point
var index = 0
val length = quotedContent.length
while (index < length) {
val codePoint = quotedContent.codePointAt(index)
index += CodePoints.charCount(codePoint)
if (codePoint != BACK_SLASH) {
if (isValidQText(codePoint)) {
continue
}
return false
}
if (index >= length) {
// last backslash is not allowed
// E.g.: "\"
return false
}
val nextChar = quotedContent.codePointAt(index)
if (nextChar !in ' '.code..'~'.code) {
// invalid quote pair
return false
}
// always one because of condition above
index += 1
}
return true
}

protected open fun isValidQText(codepoint: Int): Boolean =
// \ is checked explicitly
codepoint == ' '.code || codepoint == '!'.code || codepoint in '#'.code..'~'.code
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
package io.github.optimumcode.json.schema.internal.formats

internal object EmailFormatValidator : AbstractEmailFormatValidator(HostnameFormatValidator)
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package io.github.optimumcode.json.schema.internal.formats

internal object IdnEmailFormatValidator : AbstractEmailFormatValidator(IdnHostnameFormatValidator) {
override fun isAText(codepoint: Int): Boolean = super.isAText(codepoint) || isUtf8NonAscii(codepoint)

override fun isValidQText(codepoint: Int): Boolean = super.isValidQText(codepoint) || isUtf8NonAscii(codepoint)

/**
* The spec is quite clear about which codepoints are allowed.
* So, this method allows all codepoints that are greater than 0x7F
*/
private fun isUtf8NonAscii(codepoint: Int): Boolean = codepoint > MAX_ASCII_CODEPOINT
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
package io.github.optimumcode.json.schema.internal.formats

import io.github.optimumcode.json.schema.internal.formats.Validation.isAlpha
import io.github.optimumcode.json.schema.internal.formats.Validation.isDigit

internal object UriSpec {
const val SCHEMA_DELIMITER = ':'
const val QUERY_DELIMITER = '?'
Expand Down Expand Up @@ -268,10 +271,6 @@ internal object UriSpec {
return str[index] == '%' && isHexDigit(str[index + 1]) && isHexDigit(str[index + 2])
}

fun isAlpha(c: Char): Boolean = c in 'a'..'z' || c in 'A'..'Z'

fun isDigit(c: Char): Boolean = c in '0'..'9'

private fun isPChar(c: Char): Boolean = isUnreserved(c) || isSubDelimiter(c) || c == ':' || c == '@'

private fun isUnreserved(c: Char): Boolean = isAlpha(c) || isDigit(c) || c == '_' || c == '-' || c == '.' || c == '~'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import de.cketti.codepoints.CodePoints
import de.cketti.codepoints.codePointAt
import io.github.optimumcode.json.schema.FormatValidationResult
import io.github.optimumcode.json.schema.FormatValidator
import io.github.optimumcode.json.schema.internal.formats.Validation.eachSeparatedPart

internal object UriTemplateFormatValidator : AbstractStringFormatValidator() {
private const val EXPRESSION_START = '{'.code
Expand Down Expand Up @@ -121,28 +122,6 @@ internal object UriTemplateFormatValidator : AbstractStringFormatValidator() {
return eachSeparatedPart(varList, separator = ',', ::isValidVarSpec)
}

private inline fun eachSeparatedPart(
value: String,
separator: Char,
isValid: (String) -> Boolean,
): Boolean {
var lastSeparator = -1
do {
val separatorIndex = value.indexOf(separator, startIndex = lastSeparator + 1)
val part =
if (separatorIndex < 0) {
value.substring(lastSeparator + 1)
} else {
value.substring(lastSeparator + 1, separatorIndex)
}
if (!isValid(part)) {
return false
}
lastSeparator = separatorIndex
} while (separatorIndex > 0)
return true
}

private fun isValidVarSpec(varSpec: String): Boolean {
if (varSpec.isEmpty()) {
return false
Expand Down Expand Up @@ -172,7 +151,7 @@ internal object UriTemplateFormatValidator : AbstractStringFormatValidator() {
return eachSeparatedPart(varName, separator = '.') { part ->
part.isNotEmpty() &&
UriSpec.hasValidCharsOrPctEncoded(part) {
UriSpec.isAlpha(it) || UriSpec.isDigit(it) || it == '_'
Validation.isAlpha(it) || Validation.isDigit(it) || it == '_'
}
}
}
Expand All @@ -186,7 +165,7 @@ internal object UriTemplateFormatValidator : AbstractStringFormatValidator() {
// to long value
return false
}
return maxLength.all(UriSpec::isDigit)
return maxLength.all(Validation::isDigit)
}

private fun isOperator(char: Char): Boolean =
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package io.github.optimumcode.json.schema.internal.formats

internal object Validation {
fun isAlpha(c: Char): Boolean = c in 'a'..'z' || c in 'A'..'Z'

fun isDigit(c: Char): Boolean = c in '0'..'9'

inline fun eachSeparatedPart(
value: String,
separator: Char,
isValid: (String) -> Boolean,
): Boolean {
var lastSeparator = -1
do {
val separatorIndex = value.indexOf(separator, startIndex = lastSeparator + 1)
val part =
if (separatorIndex < 0) {
value.substring(lastSeparator + 1)
} else {
value.substring(lastSeparator + 1, separatorIndex)
}
if (!isValid(part)) {
return false
}
lastSeparator = separatorIndex
} while (separatorIndex > 0)
return true
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,13 @@ internal inline fun CharSequence.forEachCodePointIndexed(
}
block(startIndex, firstChar.code)
}
}

internal fun CharSequence.allCodepoints(condition: (Int) -> Boolean): Boolean {
forEachCodePointIndexed { _, codePoint ->
if (!condition(codePoint)) {
return false
}
}
return true
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package io.github.optimumcode.json.schema.assertions.general.format

import io.kotest.core.spec.style.FunSpec

class JsonSchemaEmailFormatValidationTest : FunSpec() {
init {
formatValidationTestSuite(
format = "email",
validTestCases =
listOf(
"a2!#$%&'*+-/=?^_`{}~|@domain.com",
"\"\\\"\\ \\@\\!\\#\\[\\]\\~\"@example.com",
"\" !#[]~a2\"@example.com",
"test@[127.0.0.1]",
"test@[IPv6:FF01::101]",
),
invalidTestCases =
listOf(
TestCase("", "empty email"),
TestCase("@example.com", "empty local part"),
TestCase("test@", "empty domain part"),
TestCase("\"\"@example.com", "empty quoted string"),
TestCase("\"[email protected]", "only start quote"),
TestCase("test\"@example.com", "only end quote"),
TestCase("\"test\\\"@example.com", "quoted last quote"),
TestCase("\"te\\\nst\"@example.com", "invalid quoted character < space"),
TestCase("\"te\\\u007fst\"@example.com", "invalid quoted character > ~"),
TestCase("\"te\"st\"@example.com", "invalid character in quoted string"),
TestCase("test@[127.0.0.300]", "invalid IPv4 in domain part"),
TestCase("test@[IPv6:1:2:3:4:5:6:7:8:9]", "invalid IPv6 in domain part"),
TestCase("test@[FF01::101]", "valid IPv6 in domain part without prefix"),
TestCase("test@hostname.", "valid hostname in domain part"),
TestCase("te\nst@hostname", "invalid character < space"),
TestCase("te\u007fst@hostname", "invalid character > ~"),
TestCase("\"te\nst\"@hostname", "invalid character in quoted local part < space"),
TestCase("\"te\u007fst\"@hostname", "invalid character in quoted local part > ~"),
),
)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package io.github.optimumcode.json.schema.assertions.general.format

import io.kotest.core.spec.style.FunSpec

class JsonSchemaIdnEmailFormatValidationTest : FunSpec() {
init {
formatValidationTestSuite(
format = "idn-email",
validTestCases =
listOf(
"실례@실례.테스트",
"\"실a\\~례\"@실례.테스트",
),
invalidTestCases =
listOf(
TestCase("실\u007F례@실례.테스트", "invalid codepoint in local part"),
TestCase("\"실\u007F례\"@실례.테스트", "invalid codepoint in quoted local part"),
),
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,15 @@ class JsonSchemaUriTemplateFormatValidationTest : FunSpec() {
"https://simple.uri",
"https://test%20uri.com",
"https://testname/{first%20name}",
"https://testname/{first.name}",
"https://testname/{name_1.name_2}",
"https://\u00a0\ud7ff\uf900\ufdcf\ufdf0\uffef\uf8ff",
),
invalidTestCases =
listOf(
TestCase("https://example.com/{}", "empty expression"),
TestCase("https://example.com/{,}", "empty expression with var delimiter"),
TestCase("https://example.com/{test.}", "empty expression with name delimiter"),
TestCase("https://example.com/{te~st}", "invalid character in var name"),
TestCase("https://example.com/}", "end expression without start"),
TestCase("https://example.com/{t{e}st}", "expression inside expression"),
TestCase("https://example.com/{test:0}", "leading zero"),
Expand Down
Loading
Loading