From ce5ca4416a0457bb9c230207482fe86a0fae29cc Mon Sep 17 00:00:00 2001
From: Mrinal Wadhwa <mrinal@ockam.io>
Date: Wed, 7 Nov 2018 21:48:04 -0800
Subject: [PATCH] feat: add parser

---
 benchmark_test.go |  62 +++++++
 did.go            | 457 +++++++++++++++++++++++++++++++++++++++++++++
 did_test.go       | 462 ++++++++++++++++++++++++++++++++++++++++++++++
 example_test.go   |  33 ++++
 4 files changed, 1014 insertions(+)
 create mode 100644 benchmark_test.go
 create mode 100644 did.go
 create mode 100644 did_test.go
 create mode 100644 example_test.go

diff --git a/benchmark_test.go b/benchmark_test.go
new file mode 100644
index 0000000..2b322dc
--- /dev/null
+++ b/benchmark_test.go
@@ -0,0 +1,62 @@
+package did_test
+
+import (
+	"net/url"
+	"testing"
+
+	"github.com/ockam-network/did"
+)
+
+var parsed *did.DID
+
+func BenchmarkParse(b *testing.B) {
+	var p *did.DID
+	for n := 0; n < b.N; n++ {
+		p, _ = did.Parse("did:ockam:amzbjdl8etgpgwoe841sfi6fc4q9yh82m6pkmkw5pteabvtzm7p6qe106ysiawmo")
+	}
+	parsed = p
+}
+
+func BenchmarkParseWithPath(b *testing.B) {
+	var p *did.DID
+	for n := 0; n < b.N; n++ {
+		p, _ = did.Parse("did:ockam:amzbjdl8etgpgwoe841sfi6fc4q9yh82/6pkmkw5pteabvtzm7p6qe106ysiawmo")
+	}
+	parsed = p
+}
+
+func BenchmarkParseWithFragment(b *testing.B) {
+	var p *did.DID
+	for n := 0; n < b.N; n++ {
+		p, _ = did.Parse("did:ockam:amzbjdl8etgpgwoe841sfi6fc4q9yh82#6pkmkw5pteabvtzm7p6qe106ysiawmo")
+	}
+	parsed = p
+}
+
+// Sanity check against Go's URL parsing to make sure we're in the same order of magnitude
+
+var parsedURL *url.URL
+
+func BenchmarkUrlParse(b *testing.B) {
+	var u *url.URL
+	for n := 0; n < b.N; n++ {
+		u, _ = url.Parse("http://amzbjdl8etgpgwoe841sfi6fc4q9yh82m6pkmkw5pteabvtzm7p6qe106ysiawm.com")
+	}
+	parsedURL = u
+}
+
+func BenchmarkUrlParseWithPath(b *testing.B) {
+	var u *url.URL
+	for n := 0; n < b.N; n++ {
+		u, _ = url.Parse("http://amzbjdl8etgpgwoe841sfi6fc4q9yh82.com/6pkmkw5pteabvtzm7p6qe106ysiawm")
+	}
+	parsedURL = u
+}
+
+func BenchmarkUrlParseWithFragment(b *testing.B) {
+	var u *url.URL
+	for n := 0; n < b.N; n++ {
+		u, _ = url.Parse("http://amzbjdl8etgpgwoe841sfi6fc4q9yh82.com#6pkmkw5pteabvtzm7p6qe106ysiawm")
+	}
+	parsedURL = u
+}
diff --git a/did.go b/did.go
new file mode 100644
index 0000000..aa087f7
--- /dev/null
+++ b/did.go
@@ -0,0 +1,457 @@
+// Package did is a set of tools to work with Decentralized Identifiers (DIDs) as described
+// in the DID spec https://w3c-ccg.github.io/did-spec
+package did
+
+import (
+	"fmt"
+	"strings"
+)
+
+// A DID represents a parsed DID or a DID Reference
+type DID struct {
+	// DID Method
+	// https://w3c-ccg.github.io/did-spec#dfn-did-method
+	Method string
+
+	// The specific-idstring component of a DID
+	ID string
+
+	// specific-idstring may be composed of multiple `:` separated idstrings
+	// did = "did:" method ":" specific-idstring
+	// specific-idstring = idstring *( ":" idstring )
+	IDStrings []string
+
+	// DID Path, the portion of a DID reference that follows the first forward slash character.
+	// https://w3c-ccg.github.io/did-spec/#dfn-did-path
+	Path string
+
+	// Path may be composed of multiple `/` separated segments
+	// did-path = segment-nz *( "/" segment )
+	PathSegments []string
+
+	// DID Fragment, the portion of a DID reference that follows the first hash sign character ("#")
+	// https://w3c-ccg.github.io/did-spec/#dfn-did-fragment
+	Fragment string
+}
+
+// the parsers internal state
+type parser struct {
+	input        string // input to the parser
+	currentIndex int    // index in the input which the parser is currently processing
+	out          *DID   // the output DID that the parser will assemble as it steps through its state machine
+	err          error  // an error in the parser state machine
+}
+
+// a step in the parser state machine that returns the next step
+type parserStep func() parserStep
+
+// Parse parses the input string into a DID structure.
+func Parse(input string) (*DID, error) {
+	// intialize the parser state
+	p := &parser{input: input, out: &DID{}}
+
+	// the parser state machine is implemented as a loop over parser steps
+	// steps increment p.currentIndex as they consume the input, each step returns the next step to run
+	// the state machine halts when one of the steps returns nil
+	//
+	// This design is based on this talk from Rob Pike, although the talk focuses on lexical scanning,
+	// the DID grammar is simple enough for us to combine lexing and parsing into one lexerless parse
+	// http://www.youtube.com/watch?v=HxaD_trXwRE
+	parserState := p.checkLength
+	for parserState != nil {
+		parserState = parserState()
+	}
+
+	// If one of the steps added an err to the parser state, exit. Return nil and the error.
+	err := p.err
+	if err != nil {
+		return nil, err
+	}
+
+	// join IDStrings with : to make up ID
+	p.out.ID = strings.Join(p.out.IDStrings[:], ":")
+
+	// join PathSegments with / to make up Path
+	p.out.Path = strings.Join(p.out.PathSegments[:], "/")
+
+	return p.out, nil
+}
+
+// checkLength is a parserStep that checks if the input length is atleast 7
+// the grammar requires
+//   `did:` prefix (4 chars)
+//   + atleast one methodchar (1 char)
+//   + `:` (1 char)
+//   + atleast one idchar (1 char)
+// i.e atleast 7 chars
+// The current specification does not take a position on maximum length of a DID.
+// https://w3c-ccg.github.io/did-spec/#upper-limits-on-did-character-length
+func (p *parser) checkLength() parserStep {
+	inputLength := len(p.input)
+
+	if inputLength < 7 {
+		return p.errorf(inputLength, "input length is less than 7")
+	}
+
+	return p.parseScheme
+}
+
+// parseScheme is a parserStep that validates that the input begins with 'did:'
+func (p *parser) parseScheme() parserStep {
+
+	currentIndex := 3 // 4 bytes in 'did:', i.e index 3
+
+	// the grammer requires `did:` prefix
+	if p.input[:currentIndex+1] != "did:" {
+		return p.errorf(currentIndex, "input does not begin with 'did:' prefix")
+	}
+
+	p.currentIndex = currentIndex
+	return p.parseMethod
+}
+
+// parseMethod is a parserStep that extracts the DID Method
+// from the grammar:
+//   did        = "did:" method ":" specific-idstring
+//   method     = 1*methodchar
+//   methodchar = %x61-7A / DIGIT ; 61-7A is a-z in US-ASCII
+func (p *parser) parseMethod() parserStep {
+	input := p.input
+	inputLength := len(input)
+	currentIndex := p.currentIndex + 1
+	startIndex := currentIndex
+
+	// parse method name
+	// loop over every byte following the ':' in 'did:' unlil the second ':'
+	// method is the string between the two ':'s
+	for {
+		if currentIndex == inputLength {
+			// we got to the end of the input and didn't find a second ':'
+			return p.errorf(currentIndex, "input does not have a second `:` marking end of method name")
+		}
+
+		// read the input character at currentIndex
+		char := input[currentIndex]
+
+		if char == ':' {
+			// we've found the second : in the input that marks the end of the method
+			if currentIndex == startIndex {
+				// return error is method is empty, ex- did::1234
+				return p.errorf(currentIndex, "method is empty")
+			}
+			break
+		}
+
+		// as per the grammer method can only be made of digits 0-9 or small letters a-z
+		if isNotDigit(char) && isNotSmallLetter(char) {
+			return p.errorf(currentIndex, "character is not a-z OR 0-9")
+		}
+
+		// move to the next char
+		currentIndex = currentIndex + 1
+	}
+
+	// set parser state
+	p.currentIndex = currentIndex
+	p.out.Method = input[startIndex:currentIndex]
+
+	// method is followed by specific-idstring, parse that next
+	return p.parseID
+}
+
+// parseID is a parserStep that extracts : separated idstrings that are part of a specific-idstring
+// and adds them to p.out.IDStrings
+// from the grammar:
+//   specific-idstring = idstring *( ":" idstring )
+//   idstring          = 1*idchar
+//   idchar            = ALPHA / DIGIT / "." / "-"
+// p.out.IDStrings is later concatented by the Parse function before it returns.
+func (p *parser) parseID() parserStep {
+	input := p.input
+	inputLength := len(input)
+	currentIndex := p.currentIndex + 1
+	startIndex := currentIndex
+
+	var next parserStep
+
+	for {
+		if currentIndex == inputLength {
+			// we've reached end of input, no next state
+			next = nil
+			break
+		}
+
+		char := input[currentIndex]
+
+		if char == ':' {
+			// encountered : input may have another idstring, parse ID again
+			next = p.parseID
+			break
+		}
+
+		if char == '/' {
+			// encountered / input may have a path following specific-idstring, parse that next
+			next = p.parsePath
+			break
+		}
+
+		if char == '#' {
+			// encountered # input may have a fragment following specific-idstring, parse that next
+			next = p.parseFragment
+			break
+		}
+
+		// make sure current char is a valid idchar
+		// idchar = ALPHA / DIGIT / "." / "-"
+		if isNotValidIDChar(char) {
+			return p.errorf(currentIndex, "byte is not ALPHA OR DIGIT OR '.' OR '-'")
+		}
+
+		// move to the next char
+		currentIndex = currentIndex + 1
+	}
+
+	if currentIndex == startIndex {
+		// idstring length is zero
+		// from the grammar:
+		//   idstring = 1*idchar
+		// return error because idstring is empty, ex- did:a::123:456
+		return p.errorf(currentIndex, "idstring must be atleast one char long")
+	}
+
+	// set parser state
+	p.currentIndex = currentIndex
+	p.out.IDStrings = append(p.out.IDStrings, input[startIndex:currentIndex])
+
+	// return the next parser step
+	return next
+}
+
+// parsePath is a parserStep that extracts a DID Path from a DID Reference
+// from the grammar:
+//   did-path      = segment-nz *( "/" segment )
+//   segment       = *pchar
+//   segment-nz    = 1*pchar
+//   pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
+//   unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
+//   pct-encoded   = "%" HEXDIG HEXDIG
+//   sub-delims    = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
+func (p *parser) parsePath() parserStep {
+	input := p.input
+	inputLength := len(input)
+	currentIndex := p.currentIndex + 1
+	startIndex := currentIndex
+
+	var indexIncrement int
+	var next parserStep
+	var percentEncoded bool
+
+	for {
+		if currentIndex == inputLength {
+			next = nil
+			break
+		}
+
+		char := input[currentIndex]
+
+		if char == '/' {
+			next = p.parsePath
+			break
+		}
+
+		if char == '%' {
+			// a % must be followed by 2 hex digits
+			if (currentIndex+2 >= inputLength) ||
+				isNotHexDigit(input[currentIndex+1]) ||
+				isNotHexDigit(input[currentIndex+2]) {
+				return p.errorf(currentIndex, "% is not followed by 2 hex digits")
+			}
+			// if we got here, we're dealing with percent encoded char, jump three chars
+			percentEncoded = true
+			indexIncrement = 3
+		} else {
+			// not pecent encoded
+			percentEncoded = false
+			indexIncrement = 1
+		}
+
+		// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+		if !percentEncoded && isNotValidPathChar(char) {
+			return p.errorf(currentIndex, "character is not allowed in fragment")
+		}
+
+		// move to the next char
+		currentIndex = currentIndex + indexIncrement
+	}
+
+	if currentIndex == startIndex && len(p.out.PathSegments) == 0 {
+		// path segment length is zero
+		// first path segment must have atleast one character
+		// from the grammar
+		//   did-path = segment-nz *( "/" segment )
+		return p.errorf(currentIndex, "first path segment must have atleast one character")
+	}
+
+	// update parser state
+	p.currentIndex = currentIndex
+	p.out.PathSegments = append(p.out.PathSegments, input[startIndex:currentIndex])
+
+	return next
+}
+
+// parseFragment is a parserStep that extracts a DID Fragment from a DID Reference
+// from the grammar:
+//   did-fragment  = *( pchar / "/" / "?" )
+//   pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
+//   unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
+//   pct-encoded   = "%" HEXDIG HEXDIG
+//   sub-delims    = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
+func (p *parser) parseFragment() parserStep {
+	input := p.input
+	inputLength := len(input)
+	currentIndex := p.currentIndex + 1
+	startIndex := currentIndex
+
+	var indexIncrement int
+	var percentEncoded bool
+
+	for {
+		if currentIndex == inputLength {
+			// we've reached the end of input
+			// it's ok for reference to be empty, so we don't need a check for that
+			// did-fragment = *( pchar / "/" / "?" )
+			break
+		}
+
+		char := input[currentIndex]
+
+		if char == '%' {
+			// a % must be followed by 2 hex digits
+			if (currentIndex+2 >= inputLength) ||
+				isNotHexDigit(input[currentIndex+1]) ||
+				isNotHexDigit(input[currentIndex+2]) {
+				return p.errorf(currentIndex, "% is not followed by 2 hex digits")
+			}
+			// if we got here, we're dealing with percent encoded char, jump three chars
+			percentEncoded = true
+			indexIncrement = 3
+		} else {
+			// not pecent encoded
+			percentEncoded = false
+			indexIncrement = 1
+		}
+
+		// did-fragment = *( pchar / "/" / "?" )
+		// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+		// isNotValidFragmentChar checks for all othe valid chars except pct-encoded
+		if !percentEncoded && isNotValidFragmentChar(char) {
+			return p.errorf(currentIndex, "character is not allowed in fragment")
+		}
+
+		// move to the next char
+		currentIndex = currentIndex + indexIncrement
+	}
+
+	// update parser state
+	p.currentIndex = currentIndex
+	p.out.Fragment = input[startIndex:currentIndex]
+
+	// no more parsing needed after a fragment,
+	// cause the state machine to exit by returning nil
+	return nil
+}
+
+// errorf is a parserStep that returns nil to cause the state machine to exit
+// before returning it sets the currentIndex and err field in parser state
+// other parser steps use this function to exit the state machine with an error
+func (p *parser) errorf(index int, format string, args ...interface{}) parserStep {
+	p.currentIndex = index
+	p.err = fmt.Errorf(format, args)
+	return nil
+}
+
+// INLINABLE
+// Calls to all functions below this point should be inlined by the go compiler
+// See output of `go build -gcflags -m` to confirm
+
+// isNotValidIDChar returns true if a byte is not allowed in a ID
+// from the greammar:
+//   idchar = ALPHA / DIGIT / "." / "-"
+func isNotValidIDChar(char byte) bool {
+	return isNotAlpha(char) && isNotDigit(char) && char != '.' && char != '-'
+}
+
+// isNotValidFragmentChar returns true if a byte is not allowed in a Fragment
+// from the grammar:
+//   did-fragment = *( pchar / "/" / "?" )
+//   pchar        = unreserved / pct-encoded / sub-delims / ":" / "@"
+// pct-encoded is not checked in this function
+func isNotValidFragmentChar(char byte) bool {
+	return isNotValidPathChar(char) && char != '/' && char != '?'
+}
+
+// isNotValidPathChar returns true if a byte is not allowed in Path
+//   did-path    = segment-nz *( "/" segment )
+//   segment     = *pchar
+//   segment-nz  = 1*pchar
+//   pchar       = unreserved / pct-encoded / sub-delims / ":" / "@"
+// pct-encoded is not checked in this function
+func isNotValidPathChar(char byte) bool {
+	return isNotUnreservedOrSubdelim(char) && char != ':' && char != '@'
+}
+
+// isNotUnreservedOrSubdelim returns true if a byte is not unreserved or sub-delims
+// from the grammar:
+//   unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+//   sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
+// https://tools.ietf.org/html/rfc3986#appendix-A
+func isNotUnreservedOrSubdelim(char byte) bool {
+	switch char {
+	case '-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=':
+		return false
+	default:
+		if isNotAlpha(char) && isNotDigit(char) {
+			return true
+		}
+		return false
+	}
+}
+
+// isNotHexDigit returns true if a byte is not a digit between 0-9 or A-F or a-f
+// in US-ASCII http://www.columbia.edu/kermit/ascii.html
+// https://tools.ietf.org/html/rfc5234#appendix-B.1
+func isNotHexDigit(char byte) bool {
+	// '\x41' is A, '\x46' is F
+	// '\x61' is a, '\x66' is f
+	return isNotDigit(char) && (char < '\x41' || char > '\x46') && (char < '\x61' || char > '\x66')
+}
+
+// isNotDigit returns true if a byte is not a digit between 0-9
+// in US-ASCII http://www.columbia.edu/kermit/ascii.html
+// https://tools.ietf.org/html/rfc5234#appendix-B.1
+func isNotDigit(char byte) bool {
+	// '\x30' is digit 0, '\x39' is digit 9
+	return (char < '\x30' || char > '\x39')
+}
+
+// isNotAlpha returns true if a byte is not a big letter between A-Z or small letter between a-z
+// https://tools.ietf.org/html/rfc5234#appendix-B.1
+func isNotAlpha(char byte) bool {
+	return isNotSmallLetter(char) && isNotBigLetter(char)
+}
+
+// isNotBigLetter returns true if a byte is not a big letter between A-Z
+// in US-ASCII http://www.columbia.edu/kermit/ascii.html
+// https://tools.ietf.org/html/rfc5234#appendix-B.1
+func isNotBigLetter(char byte) bool {
+	// '\x41' is big letter A, '\x5A' small letter Z
+	return (char < '\x41' || char > '\x5A')
+}
+
+// isNotSmallLetter returns true if a byte is not a small letter between a-z
+// in US-ASCII http://www.columbia.edu/kermit/ascii.html
+// https://tools.ietf.org/html/rfc5234#appendix-B.1
+func isNotSmallLetter(char byte) bool {
+	// '\x61' is small letter a, '\x7A' small letter z
+	return (char < '\x61' || char > '\x7A')
+}
diff --git a/did_test.go b/did_test.go
new file mode 100644
index 0000000..a8752b2
--- /dev/null
+++ b/did_test.go
@@ -0,0 +1,462 @@
+package did
+
+import (
+	"testing"
+)
+
+// nolint
+func TestParse(t *testing.T) {
+
+	t.Run("returns error if input is empty", func(t *testing.T) {
+		_, err := Parse("")
+		if err == nil {
+			t.Errorf("error is nil")
+		}
+	})
+
+	t.Run("returns error if input length is less than length 7", func(t *testing.T) {
+		_, err := Parse("did:")
+		if err == nil {
+			t.Errorf("error is nil")
+		}
+
+		_, err = Parse("did:a")
+		if err == nil {
+			t.Errorf("error is nil")
+		}
+
+		_, err = Parse("did:a:")
+		if err == nil {
+			t.Errorf("error is nil")
+		}
+	})
+
+	t.Run("returns error if input does not have a second : to mark end of method", func(t *testing.T) {
+		_, err := Parse("did:aaaaaaaaaaa")
+		if err == nil {
+			t.Errorf("error is nil")
+		}
+	})
+
+	t.Run("returns error if method is empty", func(t *testing.T) {
+		_, err := Parse("did::aaaaaaaaaaa")
+		if err == nil {
+			t.Errorf("error is nil")
+		}
+	})
+
+	t.Run("returns error if idstring is empty", func(t *testing.T) {
+		_, err := Parse("did:a::123:456")
+		if err == nil {
+			t.Errorf("error is nil")
+		}
+
+		_, err = Parse("did:a:123::456")
+		if err == nil {
+			t.Errorf("error is nil")
+		}
+
+		_, err = Parse("did:a:123:456:")
+		if err == nil {
+			t.Errorf("error is nil")
+		}
+
+		_, err = Parse("did:a:123:/abc")
+		if err == nil {
+			t.Errorf("error is nil")
+		}
+
+		_, err = Parse("did:a:123:#abc")
+		if err == nil {
+			t.Errorf("error is nil")
+		}
+	})
+
+	t.Run("returns error if input does not begin with did: scheme", func(t *testing.T) {
+		_, err := Parse("a:12345")
+		if err == nil {
+			t.Errorf("error is nil")
+		}
+	})
+
+	t.Run("returned value is nil if input does not begin with did: scheme", func(t *testing.T) {
+		d, _ := Parse("a:12345")
+		if d != nil {
+			t.Errorf("output value is not nil - %+v", d)
+		}
+	})
+
+	t.Run("succeeds if it has did prefix and length is greater than 7", func(t *testing.T) {
+		d, err := Parse("did:a:1")
+		if err != nil {
+			t.Errorf("error is not nil - %+v", err)
+		}
+		if d == nil {
+			t.Errorf("return value is nil")
+		}
+	})
+
+	t.Run("succeeds to extract method", func(t *testing.T) {
+		d, err := Parse("did:a:1")
+		if err != nil {
+			t.Errorf("error is not nil - %+v", err)
+		}
+
+		method := d.Method
+		if method != "a" {
+			t.Errorf("method is %s, expected: a", method)
+		}
+
+		d, err = Parse("did:abcdef:11111")
+		if err != nil {
+			t.Errorf("error is not nil - %+v", err)
+		}
+		method = d.Method
+		if method != "abcdef" {
+			t.Errorf("method is %s, expected: abcdef", method)
+		}
+	})
+
+	t.Run("returns error if method has any other char than 0-9 or a-z", func(t *testing.T) {
+		_, err := Parse("did:aA:1")
+		if err == nil {
+			t.Errorf("error is nil")
+		}
+
+		_, err = Parse("did:aa-aa:1")
+		if err == nil {
+			t.Errorf("error is nil")
+		}
+	})
+
+	t.Run("succeeds to extract id", func(t *testing.T) {
+		d, err := Parse("did:a:1")
+		if err != nil {
+			t.Errorf("error is not nil - %+v", err)
+		}
+
+		id := d.ID
+		if id != "1" {
+			t.Errorf("id is %s, expected: 1", id)
+		}
+	})
+
+	t.Run("succeeds to extract id parts", func(t *testing.T) {
+		d, err := Parse("did:a:123:456")
+		if err != nil {
+			t.Errorf("error is not nil - %+v", err)
+		}
+
+		parts := d.IDStrings
+		if parts[0] != "123" || parts[1] != "456" {
+			t.Errorf("parts is %s, expected: [123 456]", parts)
+		}
+	})
+
+	t.Run("fails of ID has an invalid char", func(t *testing.T) {
+		_, err := Parse("did:a:1&&111")
+		if err == nil {
+			t.Errorf("error is nil")
+		}
+	})
+
+	t.Run("succeeds to extract path", func(t *testing.T) {
+		d, err := Parse("did:a:123:456/someService")
+		if err != nil {
+			t.Errorf("error is not nil - %+v", err)
+		}
+
+		path := d.Path
+		if path != "someService" {
+			t.Errorf("path is %s, expected: someService", path)
+		}
+	})
+
+	t.Run("succeeds to extract path segements", func(t *testing.T) {
+		d, err := Parse("did:a:123:456/a/b")
+		if err != nil {
+			t.Errorf("error is not nil - %+v", err)
+		}
+
+		segments := d.PathSegments
+		if segments[0] != "a" || segments[1] != "b" {
+			t.Errorf("segments is %s, expected: [a b]", segments)
+		}
+	})
+
+	t.Run("succeeds with percent encoded chars in path", func(t *testing.T) {
+		d, err := Parse("did:a:123:456/a/%20a")
+		if err != nil {
+			t.Errorf("error is not nil - %+v", err)
+		}
+
+		path := d.Path
+		if path != "a/%20a" {
+			t.Errorf("path is %s, expected: a/%%20a", path)
+		}
+	})
+
+	t.Run("fails if % in path is not followed by 2 hex chars", func(t *testing.T) {
+		_, err := Parse("did:a:123:456/%")
+		if err == nil {
+			t.Errorf("error is nil")
+		}
+
+		_, err = Parse("did:a:123:456/%a")
+		if err == nil {
+			t.Errorf("error is nil")
+		}
+
+	})
+
+	t.Run("fails if path is empty but there is a slash", func(t *testing.T) {
+		_, err := Parse("did:a:123:456/")
+		if err == nil {
+			t.Errorf("error is nil")
+		}
+	})
+
+	t.Run("fails if first path segment is empty", func(t *testing.T) {
+		_, err := Parse("did:a:123:456//abc")
+		if err == nil {
+			t.Errorf("error is nil")
+		}
+	})
+
+	t.Run("does not fail if second path segment is empty", func(t *testing.T) {
+		_, err := Parse("did:a:123:456/abc//pqr")
+		if err != nil {
+			t.Errorf("error is not nil")
+		}
+	})
+
+	t.Run("fails if path has invalid char", func(t *testing.T) {
+		_, err := Parse("did:a:123:456/ssss^sss")
+		if err == nil {
+			t.Errorf("error is nil")
+		}
+	})
+
+	t.Run("does not fail if path has atleast on segment and a trailing slash", func(t *testing.T) {
+		_, err := Parse("did:a:123:456/a/b/")
+		if err != nil {
+			t.Errorf("error is not nil - %+v", err)
+		}
+	})
+
+	t.Run("succeeds to extract fragment", func(t *testing.T) {
+		d, err := Parse("did:a:123:456#keys-1")
+		if err != nil {
+			t.Errorf("error is not nil - %+v", err)
+		}
+
+		f := d.Fragment
+		if f != "keys-1" {
+			t.Errorf("fragment is %s, expected: keys-1", f)
+		}
+	})
+
+	t.Run("succeeds with percent encoded chars in fragement", func(t *testing.T) {
+		d, err := Parse("did:a:123:456#aaaaaa%20a")
+		if err != nil {
+			t.Errorf("error is not nil - %+v", err)
+		}
+
+		path := d.Fragment
+		if path != "aaaaaa%20a" {
+			t.Errorf("path is %s, expected: aaaaaa%%20a", path)
+		}
+	})
+
+	t.Run("fails if % in fragment is not followed by 2 hex chars", func(t *testing.T) {
+		_, err := Parse("did:a:123:456#%")
+		if err == nil {
+			t.Errorf("error is nil")
+		}
+
+		_, err = Parse("did:a:123:456#%a")
+		if err == nil {
+			t.Errorf("error is nil")
+		}
+	})
+
+	t.Run("fails if fragment has invalid char", func(t *testing.T) {
+		_, err := Parse("did:a:123:456#ssss^sss")
+		if err == nil {
+			t.Errorf("error is nil")
+		}
+	})
+}
+
+func Test_isNotValidIDChar(t *testing.T) {
+	a := []byte{'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+		'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'Z', 'Y', 'Z',
+		'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+		'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'z', 'y', 'z',
+		'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+		'.', '-'}
+	for _, c := range a {
+		if isNotValidIDChar(c) {
+			t.Errorf("should be false but returned true: %v", c)
+		}
+	}
+
+	a = []byte{'%', '^', '#', ' ', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '@', '/', '?'}
+	for _, c := range a {
+		if !isNotValidIDChar(c) {
+			t.Errorf("should be true but returned false: %v", c)
+		}
+	}
+}
+
+func Test_isNotValidFragmentChar(t *testing.T) {
+	a := []byte{'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+		'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'Z', 'Y', 'Z',
+		'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+		'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'z', 'y', 'z',
+		'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+		'-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=',
+		':', '@',
+		'/', '?'}
+	for _, c := range a {
+		if isNotValidFragmentChar(c) {
+			t.Errorf("should be false but returned true: %v", c)
+		}
+	}
+
+	a = []byte{'%', '^', '#', ' '}
+	for _, c := range a {
+		if !isNotValidFragmentChar(c) {
+			t.Errorf("should be true but returned false: %v", c)
+		}
+	}
+}
+
+func Test_isNotValidPathChar(t *testing.T) {
+	a := []byte{'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+		'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'Z', 'Y', 'Z',
+		'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+		'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'z', 'y', 'z',
+		'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+		'-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=',
+		':', '@'}
+	for _, c := range a {
+		if isNotValidPathChar(c) {
+			t.Errorf("should be false but returned true: %v", c)
+		}
+	}
+
+	a = []byte{'%', '/', '?'}
+	for _, c := range a {
+		if !isNotValidPathChar(c) {
+			t.Errorf("should be true but returned false: %v", c)
+		}
+	}
+}
+
+func Test_isNotUnreservedOrSubdelim(t *testing.T) {
+	a := []byte{'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+		'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'Z', 'Y', 'Z',
+		'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+		'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'z', 'y', 'z',
+		'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+		'-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '='}
+	for _, c := range a {
+		if isNotUnreservedOrSubdelim(c) {
+			t.Errorf("should be false but returned true: %v", c)
+		}
+	}
+
+	a = []byte{'%', ':', '@', '/', '?'}
+	for _, c := range a {
+		if !isNotUnreservedOrSubdelim(c) {
+			t.Errorf("should be true but returned false: %v", c)
+		}
+	}
+}
+
+func Test_isNotHexDigit(t *testing.T) {
+	a := []byte{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+		'A', 'B', 'C', 'D', 'E', 'F', 'a', 'b', 'c', 'd', 'e', 'f'}
+	for _, c := range a {
+		if isNotHexDigit(c) {
+			t.Errorf("should be false but returned true: %v", c)
+		}
+	}
+
+	a = []byte{'G', 'g', '%', '\x40', '\x47', '\x60', '\x67'}
+	for _, c := range a {
+		if !isNotHexDigit(c) {
+			t.Errorf("should be true but returned false: %v", c)
+		}
+	}
+}
+
+func Test_isNotDigit(t *testing.T) {
+	a := []byte{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}
+	for _, c := range a {
+		if isNotDigit(c) {
+			t.Errorf("should be false but returned true: %v", c)
+		}
+	}
+
+	a = []byte{'A', 'a', '\x29', '\x40', '/'}
+	for _, c := range a {
+		if !isNotDigit(c) {
+			t.Errorf("should be true but returned false: %v", c)
+		}
+	}
+}
+
+func Test_isNotAlpha(t *testing.T) {
+	a := []byte{'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+		'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'Z', 'Y', 'Z',
+		'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+		'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'z', 'y', 'z'}
+	for _, c := range a {
+		if isNotAlpha(c) {
+			t.Errorf("should be false but returned true: %v", c)
+		}
+	}
+
+	a = []byte{'\x40', '\x5B', '\x60', '\x7B', '0', '9', '-', '%'}
+	for _, c := range a {
+		if !isNotAlpha(c) {
+			t.Errorf("should be true but returned false: %v", c)
+		}
+	}
+}
+
+func Test_isNotBigLetter(t *testing.T) {
+	a := []byte{'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+		'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'Z', 'Y', 'Z'}
+	for _, c := range a {
+		if isNotBigLetter(c) {
+			t.Errorf("should be false but returned true: %v", c)
+		}
+	}
+
+	a = []byte{'\x40', '\x5B', 'a', 'z', '1', '9', '-', '%'}
+	for _, c := range a {
+		if !isNotBigLetter(c) {
+			t.Errorf("should be true but returned false: %v", c)
+		}
+	}
+}
+
+func Test_isNotSmallLetter(t *testing.T) {
+	a := []byte{'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+		'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'z', 'y', 'z'}
+	for _, c := range a {
+		if isNotSmallLetter(c) {
+			t.Errorf("should be false but returned true: %v", c)
+		}
+	}
+
+	a = []byte{'\x60', '\x7B', 'A', 'Z', '1', '9', '-', '%'}
+	for _, c := range a {
+		if !isNotSmallLetter(c) {
+			t.Errorf("should be true but returned false: %v", c)
+		}
+	}
+}
diff --git a/example_test.go b/example_test.go
new file mode 100644
index 0000000..b707f78
--- /dev/null
+++ b/example_test.go
@@ -0,0 +1,33 @@
+package did_test
+
+import (
+	"fmt"
+	"github.com/ockam-network/did"
+)
+
+func ExampleParse() {
+	d, err := did.Parse("did:example:q7ckgxeq1lxmra0r")
+	if err != nil {
+		fmt.Printf("%#v", err)
+	}
+	fmt.Printf("%#v", d)
+	// Output: &did.DID{Method:"example", ID:"q7ckgxeq1lxmra0r", IDStrings:[]string{"q7ckgxeq1lxmra0r"}, Path:"", PathSegments:[]string(nil), Fragment:""}
+}
+
+func ExampleParse_withPath() {
+	d, err := did.Parse("did:example:q7ckgxeq1lxmra0r/a/b")
+	if err != nil {
+		fmt.Printf("%#v", err)
+	}
+	fmt.Printf("%#v", d)
+	// Output: &did.DID{Method:"example", ID:"q7ckgxeq1lxmra0r", IDStrings:[]string{"q7ckgxeq1lxmra0r"}, Path:"a/b", PathSegments:[]string{"a", "b"}, Fragment:""}
+}
+
+func ExampleParse_withFragment() {
+	d, err := did.Parse("did:example:q7ckgxeq1lxmra0r#keys-1")
+	if err != nil {
+		fmt.Printf("%#v", err)
+	}
+	fmt.Printf("%#v", d)
+	// Output: &did.DID{Method:"example", ID:"q7ckgxeq1lxmra0r", IDStrings:[]string{"q7ckgxeq1lxmra0r"}, Path:"", PathSegments:[]string(nil), Fragment:"keys-1"}
+}