Skip to content

Commit

Permalink
Add email and idn-email format support (#103)
Browse files Browse the repository at this point in the history
Related to #54
  • Loading branch information
OptimumCode authored Apr 29, 2024
1 parent ddaf1bd commit cdfe7dc
Show file tree
Hide file tree
Showing 13 changed files with 258 additions and 48 deletions.
21 changes: 4 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -332,23 +332,10 @@ val valid = schema.validate(elementToValidate, errors::add)

## Format assertion

The library supports `format` assertion. Not all formats are supported yet. The supported formats are:
* date
* time
* date-time
* duration
* json-pointer
* relative-json-pointer
* ipv4
* ipv6
* uuid
* hostname
* idn-hostname
* uri
* uri-reference
* uri-template
* iri
* iri-reference
The library supports `format` assertion.
Almost all formats from [JSON schema draft 2020-12](https://json-schema.org/draft/2020-12/draft-bhutton-json-schema-validation-01#section-7.3) are supported.
Unsupported formats:
* regex

But there is an API to implement the user's defined format validation.
The [FormatValidator](src/commonMain/kotlin/io/github/optimumcode/json/schema/ValidationError.kt) interface can be user for that.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ import io.github.optimumcode.json.schema.internal.factories.AbstractAssertionFac
import io.github.optimumcode.json.schema.internal.formats.DateFormatValidator
import io.github.optimumcode.json.schema.internal.formats.DateTimeFormatValidator
import io.github.optimumcode.json.schema.internal.formats.DurationFormatValidator
import io.github.optimumcode.json.schema.internal.formats.EmailFormatValidator
import io.github.optimumcode.json.schema.internal.formats.HostnameFormatValidator
import io.github.optimumcode.json.schema.internal.formats.IdnEmailFormatValidator
import io.github.optimumcode.json.schema.internal.formats.IdnHostnameFormatValidator
import io.github.optimumcode.json.schema.internal.formats.IpV4FormatValidator
import io.github.optimumcode.json.schema.internal.formats.IpV6FormatValidator
Expand Down Expand Up @@ -82,6 +84,8 @@ internal sealed class FormatAssertionFactory(
"iri" to IriFormatValidator,
"iri-reference" to IriReferenceFormatValidator,
"uri-template" to UriTemplateFormatValidator,
"email" to EmailFormatValidator,
"idn-email" to IdnEmailFormatValidator,
)
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
package io.github.optimumcode.json.schema.internal.formats

import de.cketti.codepoints.CodePoints
import de.cketti.codepoints.codePointAt
import io.github.optimumcode.json.schema.FormatValidationResult
import io.github.optimumcode.json.schema.FormatValidator
import io.github.optimumcode.json.schema.internal.util.allCodepoints

private const val AT_CHAR = '@'
private const val IP_PART_START = '['
private const val IP_PART_END = ']'
private const val QUOTE = '"'
private const val BACK_SLASH = '\\'.code
private const val IPV6_PREFIX = "IPv6:"
internal const val MAX_ASCII_CODEPOINT = 0x7F

internal abstract class AbstractEmailFormatValidator(
private val hostnameValidator: AbstractStringFormatValidator,
) : AbstractStringFormatValidator() {
override fun validate(value: String): FormatValidationResult {
if (value.isEmpty()) {
return FormatValidator.Invalid()
}
val delimiterIndex = value.lastIndexOf(AT_CHAR)
if (delimiterIndex <= 0 || delimiterIndex == value.lastIndex) {
// either local-part of domain is empty
return FormatValidator.Invalid()
}
val localPart = value.substring(0, delimiterIndex)
val domainPart = value.substring(delimiterIndex + 1)
return if (isValidLocalPart(localPart) && isValidDomainPart(domainPart)) {
FormatValidator.Valid()
} else {
FormatValidator.Invalid()
}
}

private fun isValidDomainPart(domainPart: String): Boolean {
return if (domainPart.run { startsWith(IP_PART_START) && endsWith(IP_PART_END) }) {
val ipPart = domainPart.substring(1, domainPart.lastIndex)
isValidIpPart(ipPart)
} else {
hostnameValidator.validate(domainPart).isValid()
}
}

private fun isValidIpPart(ipPart: String): Boolean {
return if (ipPart.startsWith(IPV6_PREFIX)) {
IpV6FormatValidator.validate(ipPart.removePrefix(IPV6_PREFIX))
} else {
IpV4FormatValidator.validate(ipPart)
}.isValid()
}

private fun isValidLocalPart(localPart: String): Boolean {
return if (localPart.run { startsWith(QUOTE) || endsWith(QUOTE) }) {
isValidQuotedString(localPart)
} else {
isValidDotString(localPart)
}
}

private fun isValidDotString(localPart: String): Boolean {
return Validation.eachSeparatedPart(localPart, separator = '.') {
it.isNotEmpty() && it.allCodepoints(::isAText)
}
}

protected open fun isAText(codepoint: Int): Boolean {
if (codepoint > MAX_ASCII_CODEPOINT) {
return false
}
val asChar = codepoint.toChar()
return Validation.isAlpha(asChar) || Validation.isDigit(asChar) || isSpecialCharacter(asChar)
}

private fun isSpecialCharacter(codepoint: Char): Boolean =
codepoint == '!' || codepoint == '#' || codepoint == '$' || codepoint == '%' ||
codepoint == '&' || codepoint == '\'' || codepoint == '*' || codepoint == '+' ||
codepoint == '-' || codepoint == '/' || codepoint == '=' || codepoint == '?' ||
codepoint == '^' || codepoint == '_' || codepoint == '`' || codepoint == '{' ||
codepoint == '}' || codepoint == '~' || codepoint == '|'

private fun isValidQuotedString(localPart: String): Boolean {
if (localPart.length <= 2) {
return false
}
if (localPart.run { !startsWith(QUOTE) || !endsWith(QUOTE) }) {
return false
}
val quotedContent = localPart.substring(1, localPart.lastIndex)
return isValidQuotedContent(quotedContent)
}

private fun isValidQuotedContent(quotedContent: String): Boolean {
// cannot be empty at this point
var index = 0
val length = quotedContent.length
while (index < length) {
val codePoint = quotedContent.codePointAt(index)
index += CodePoints.charCount(codePoint)
if (codePoint != BACK_SLASH) {
if (isValidQText(codePoint)) {
continue
}
return false
}
if (index >= length) {
// last backslash is not allowed
// E.g.: "\"
return false
}
val nextChar = quotedContent.codePointAt(index)
if (nextChar !in ' '.code..'~'.code) {
// invalid quote pair
return false
}
// always one because of condition above
index += 1
}
return true
}

protected open fun isValidQText(codepoint: Int): Boolean =
// \ is checked explicitly
codepoint == ' '.code || codepoint == '!'.code || codepoint in '#'.code..'~'.code
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
package io.github.optimumcode.json.schema.internal.formats

internal object EmailFormatValidator : AbstractEmailFormatValidator(HostnameFormatValidator)
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package io.github.optimumcode.json.schema.internal.formats

internal object IdnEmailFormatValidator : AbstractEmailFormatValidator(IdnHostnameFormatValidator) {
override fun isAText(codepoint: Int): Boolean = super.isAText(codepoint) || isUtf8NonAscii(codepoint)

override fun isValidQText(codepoint: Int): Boolean = super.isValidQText(codepoint) || isUtf8NonAscii(codepoint)

/**
* The spec is quite clear about which codepoints are allowed.
* So, this method allows all codepoints that are greater than 0x7F
*/
private fun isUtf8NonAscii(codepoint: Int): Boolean = codepoint > MAX_ASCII_CODEPOINT
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
package io.github.optimumcode.json.schema.internal.formats

import io.github.optimumcode.json.schema.internal.formats.Validation.isAlpha
import io.github.optimumcode.json.schema.internal.formats.Validation.isDigit

internal object UriSpec {
const val SCHEMA_DELIMITER = ':'
const val QUERY_DELIMITER = '?'
Expand Down Expand Up @@ -268,10 +271,6 @@ internal object UriSpec {
return str[index] == '%' && isHexDigit(str[index + 1]) && isHexDigit(str[index + 2])
}

fun isAlpha(c: Char): Boolean = c in 'a'..'z' || c in 'A'..'Z'

fun isDigit(c: Char): Boolean = c in '0'..'9'

private fun isPChar(c: Char): Boolean = isUnreserved(c) || isSubDelimiter(c) || c == ':' || c == '@'

private fun isUnreserved(c: Char): Boolean = isAlpha(c) || isDigit(c) || c == '_' || c == '-' || c == '.' || c == '~'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import de.cketti.codepoints.CodePoints
import de.cketti.codepoints.codePointAt
import io.github.optimumcode.json.schema.FormatValidationResult
import io.github.optimumcode.json.schema.FormatValidator
import io.github.optimumcode.json.schema.internal.formats.Validation.eachSeparatedPart

internal object UriTemplateFormatValidator : AbstractStringFormatValidator() {
private const val EXPRESSION_START = '{'.code
Expand Down Expand Up @@ -121,28 +122,6 @@ internal object UriTemplateFormatValidator : AbstractStringFormatValidator() {
return eachSeparatedPart(varList, separator = ',', ::isValidVarSpec)
}

private inline fun eachSeparatedPart(
value: String,
separator: Char,
isValid: (String) -> Boolean,
): Boolean {
var lastSeparator = -1
do {
val separatorIndex = value.indexOf(separator, startIndex = lastSeparator + 1)
val part =
if (separatorIndex < 0) {
value.substring(lastSeparator + 1)
} else {
value.substring(lastSeparator + 1, separatorIndex)
}
if (!isValid(part)) {
return false
}
lastSeparator = separatorIndex
} while (separatorIndex > 0)
return true
}

private fun isValidVarSpec(varSpec: String): Boolean {
if (varSpec.isEmpty()) {
return false
Expand Down Expand Up @@ -172,7 +151,7 @@ internal object UriTemplateFormatValidator : AbstractStringFormatValidator() {
return eachSeparatedPart(varName, separator = '.') { part ->
part.isNotEmpty() &&
UriSpec.hasValidCharsOrPctEncoded(part) {
UriSpec.isAlpha(it) || UriSpec.isDigit(it) || it == '_'
Validation.isAlpha(it) || Validation.isDigit(it) || it == '_'
}
}
}
Expand All @@ -186,7 +165,7 @@ internal object UriTemplateFormatValidator : AbstractStringFormatValidator() {
// to long value
return false
}
return maxLength.all(UriSpec::isDigit)
return maxLength.all(Validation::isDigit)
}

private fun isOperator(char: Char): Boolean =
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package io.github.optimumcode.json.schema.internal.formats

internal object Validation {
fun isAlpha(c: Char): Boolean = c in 'a'..'z' || c in 'A'..'Z'

fun isDigit(c: Char): Boolean = c in '0'..'9'

inline fun eachSeparatedPart(
value: String,
separator: Char,
isValid: (String) -> Boolean,
): Boolean {
var lastSeparator = -1
do {
val separatorIndex = value.indexOf(separator, startIndex = lastSeparator + 1)
val part =
if (separatorIndex < 0) {
value.substring(lastSeparator + 1)
} else {
value.substring(lastSeparator + 1, separatorIndex)
}
if (!isValid(part)) {
return false
}
lastSeparator = separatorIndex
} while (separatorIndex > 0)
return true
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,13 @@ internal inline fun CharSequence.forEachCodePointIndexed(
}
block(startIndex, firstChar.code)
}
}

internal fun CharSequence.allCodepoints(condition: (Int) -> Boolean): Boolean {
forEachCodePointIndexed { _, codePoint ->
if (!condition(codePoint)) {
return false
}
}
return true
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package io.github.optimumcode.json.schema.assertions.general.format

import io.kotest.core.spec.style.FunSpec

class JsonSchemaEmailFormatValidationTest : FunSpec() {
init {
formatValidationTestSuite(
format = "email",
validTestCases =
listOf(
"a2!#$%&'*+-/=?^_`{}~|@domain.com",
"\"\\\"\\ \\@\\!\\#\\[\\]\\~\"@example.com",
"\" !#[]~a2\"@example.com",
"test@[127.0.0.1]",
"test@[IPv6:FF01::101]",
),
invalidTestCases =
listOf(
TestCase("", "empty email"),
TestCase("@example.com", "empty local part"),
TestCase("test@", "empty domain part"),
TestCase("\"\"@example.com", "empty quoted string"),
TestCase("\"[email protected]", "only start quote"),
TestCase("test\"@example.com", "only end quote"),
TestCase("\"test\\\"@example.com", "quoted last quote"),
TestCase("\"te\\\nst\"@example.com", "invalid quoted character < space"),
TestCase("\"te\\\u007fst\"@example.com", "invalid quoted character > ~"),
TestCase("\"te\"st\"@example.com", "invalid character in quoted string"),
TestCase("test@[127.0.0.300]", "invalid IPv4 in domain part"),
TestCase("test@[IPv6:1:2:3:4:5:6:7:8:9]", "invalid IPv6 in domain part"),
TestCase("test@[FF01::101]", "valid IPv6 in domain part without prefix"),
TestCase("test@hostname.", "valid hostname in domain part"),
TestCase("te\nst@hostname", "invalid character < space"),
TestCase("te\u007fst@hostname", "invalid character > ~"),
TestCase("\"te\nst\"@hostname", "invalid character in quoted local part < space"),
TestCase("\"te\u007fst\"@hostname", "invalid character in quoted local part > ~"),
),
)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package io.github.optimumcode.json.schema.assertions.general.format

import io.kotest.core.spec.style.FunSpec

class JsonSchemaIdnEmailFormatValidationTest : FunSpec() {
init {
formatValidationTestSuite(
format = "idn-email",
validTestCases =
listOf(
"실례@실례.테스트",
"\"실a\\~례\"@실례.테스트",
),
invalidTestCases =
listOf(
TestCase("\u007F례@실례.테스트", "invalid codepoint in local part"),
TestCase("\"\u007F\"@실례.테스트", "invalid codepoint in quoted local part"),
),
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,15 @@ class JsonSchemaUriTemplateFormatValidationTest : FunSpec() {
"https://simple.uri",
"https://test%20uri.com",
"https://testname/{first%20name}",
"https://testname/{first.name}",
"https://testname/{name_1.name_2}",
"https://\u00a0\ud7ff\uf900\ufdcf\ufdf0\uffef\uf8ff",
),
invalidTestCases =
listOf(
TestCase("https://example.com/{}", "empty expression"),
TestCase("https://example.com/{,}", "empty expression with var delimiter"),
TestCase("https://example.com/{test.}", "empty expression with name delimiter"),
TestCase("https://example.com/{te~st}", "invalid character in var name"),
TestCase("https://example.com/}", "end expression without start"),
TestCase("https://example.com/{t{e}st}", "expression inside expression"),
TestCase("https://example.com/{test:0}", "leading zero"),
Expand Down
Loading

0 comments on commit cdfe7dc

Please sign in to comment.