feat: add heading tokenizer (#1723)

usememos · May 23, 2023 · fa53a25 · fa53a25
1 parent 616b8b0
commit fa53a25
Show file tree

Hide file tree

Showing 5 changed files with 191 additions and 50 deletions.
diff --git a/plugin/gomark/parser/heading.go b/plugin/gomark/parser/heading.go
@@ -1,41 +1,52 @@
 package parser
 
 import (
-	"strings"
-
-	"github.com/usememos/memos/plugin/gomark/ast"
+	"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
 )
 
 type HeadingTokenizer struct {
+	Level         int
+	ContentTokens []*tokenizer.Token
 }
 
 func NewHeadingTokenizer() *HeadingTokenizer {
 	return &HeadingTokenizer{}
 }
 
-func (*HeadingTokenizer) Trigger() []byte {
-	return []byte{'#'}
-}
-
-func (*HeadingTokenizer) Parse(parent *ast.Node, block string) *ast.Node {
-	line := block
-	level := 0
-	for _, c := range line {
-		if c == '#' {
-			level++
-		} else if c == ' ' {
-			break
+func (*HeadingTokenizer) Match(tokens []*tokenizer.Token) *HeadingTokenizer {
+	cursor := 0
+	for _, token := range tokens {
+		if token.Type == tokenizer.Hash {
+			cursor++
 		} else {
-			return nil
+			break
 		}
 	}
+	if len(tokens) <= cursor+1 {
+		return nil
+	}
+	if tokens[cursor].Type != tokenizer.Space {
+		return nil
+	}
+	level := cursor
 	if level == 0 || level > 6 {
 		return nil
 	}
-	text := strings.TrimSpace(line[level+1:])
-	node := ast.NewNode("h1", text)
-	if parent != nil {
-		parent.AddChild(node)
+
+	cursor++
+	contentTokens := []*tokenizer.Token{}
+	for _, token := range tokens[cursor:] {
+		if token.Type == tokenizer.Newline {
+			break
+		}
+		contentTokens = append(contentTokens, token)
+	}
+	if len(contentTokens) == 0 {
+		return nil
+	}
+
+	return &HeadingTokenizer{
+		Level:         level,
+		ContentTokens: contentTokens,
 	}
-	return node
 }
diff --git a/plugin/gomark/parser/heading_test.go b/plugin/gomark/parser/heading_test.go
@@ -1 +1,95 @@
 package parser
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+	"github.com/usememos/memos/plugin/gomark/parser/tokenizer"
+)
+
+func TestHeadingParser(t *testing.T) {
+	tests := []struct {
+		text    string
+		heading *HeadingTokenizer
+	}{
+		{
+			text:    "*Hello world!",
+			heading: nil,
+		},
+		{
+			text: "## Hello World!",
+			heading: &HeadingTokenizer{
+				Level: 2,
+				ContentTokens: []*tokenizer.Token{
+					{
+						Type:  tokenizer.Text,
+						Value: "Hello",
+					},
+					{
+						Type:  tokenizer.Space,
+						Value: " ",
+					},
+					{
+						Type:  tokenizer.Text,
+						Value: "World!",
+					},
+				},
+			},
+		},
+		{
+			text: "# # Hello World",
+			heading: &HeadingTokenizer{
+				Level: 1,
+				ContentTokens: []*tokenizer.Token{
+					{
+						Type:  tokenizer.Hash,
+						Value: "#",
+					},
+					{
+						Type:  tokenizer.Space,
+						Value: " ",
+					},
+					{
+						Type:  tokenizer.Text,
+						Value: "Hello",
+					},
+					{
+						Type:  tokenizer.Space,
+						Value: " ",
+					},
+					{
+						Type:  tokenizer.Text,
+						Value: "World",
+					},
+				},
+			},
+		},
+		{
+			text:    " # 123123 Hello World!",
+			heading: nil,
+		},
+		{
+			text: `# 123 
+Hello World!`,
+			heading: &HeadingTokenizer{
+				Level: 1,
+				ContentTokens: []*tokenizer.Token{
+					{
+						Type:  tokenizer.Text,
+						Value: "123",
+					},
+					{
+						Type:  tokenizer.Space,
+						Value: " ",
+					},
+				},
+			},
+		},
+	}
+
+	for _, test := range tests {
+		tokens := tokenizer.Tokenize(test.text)
+		headingTokenizer := NewHeadingTokenizer()
+		require.Equal(t, test.heading, headingTokenizer.Match(tokens))
+	}
+}
diff --git a/plugin/gomark/parser/tokenizer/token.go b/plugin/gomark/parser/tokenizer/token.go
diff --git a/plugin/gomark/parser/tokenizer/tokenizer.go b/plugin/gomark/parser/tokenizer/tokenizer.go
@@ -1,13 +1,41 @@
 package tokenizer
 
-func tokenize(text string) []*Token {
+type TokenType = string
+
+const (
+	Underline TokenType = "_"
+	Star      TokenType = "*"
+	Hash      TokenType = "#"
+	Newline   TokenType = "\n"
+	Space     TokenType = " "
+)
+
+const (
+	Text TokenType = ""
+)
+
+type Token struct {
+	Type  TokenType
+	Value string
+}
+
+func NewToken(tp, text string) *Token {
+	return &Token{
+		Type:  tp,
+		Value: text,
+	}
+}
+
+func Tokenize(text string) []*Token {
 	tokens := []*Token{}
 	for _, c := range text {
 		switch c {
 		case '_':
 			tokens = append(tokens, NewToken(Underline, "_"))
 		case '*':
 			tokens = append(tokens, NewToken(Star, "*"))
+		case '#':
+			tokens = append(tokens, NewToken(Hash, "#"))
 		case '\n':
 			tokens = append(tokens, NewToken(Newline, "\n"))
 		case ' ':

diff --git a/plugin/gomark/parser/tokenizer/tokenizer_test.go b/plugin/gomark/parser/tokenizer/tokenizer_test.go
@@ -32,9 +32,44 @@ func TestTokenize(t *testing.T) {
 				},
 			},
 		},
+		{
+			text: `# hello 
+ world`,
+			tokens: []*Token{
+				{
+					Type:  Hash,
+					Value: "#",
+				},
+				{
+					Type:  Space,
+					Value: " ",
+				},
+				{
+					Type:  Text,
+					Value: "hello",
+				},
+				{
+					Type:  Space,
+					Value: " ",
+				},
+				{
+					Type:  Newline,
+					Value: "\n",
+				},
+				{
+					Type:  Space,
+					Value: " ",
+				},
+				{
+					Type:  Text,
+					Value: "world",
+				},
+			},
+		},
 	}
+
 	for _, test := range tests {
-		result := tokenize(test.text)
+		result := Tokenize(test.text)
 		require.Equal(t, test.tokens, result)
 	}
 }