Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Marking typo support #18

Merged
merged 12 commits into from
Feb 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ version: 2
jobs:
build-and-test:
docker:
- image: cimg/go:1.17
- image: cimg/go:1.18

environment:
BINARY_NAME: "TySug-linux-amd64"
TEST_RESULTS: "/tmp/test-results"
#GOFLAGS: "-buildvcs=false" # for 1.18
GOFLAGS: "-buildvcs=false -trimpath"


steps:
Expand All @@ -23,7 +23,7 @@ jobs:
name: Build
command: |
TAG=${CIRCLE_TAG:-dev}
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o "${TEST_RESULTS}/${BINARY_NAME}" -a -ldflags="-w -s -X main.Version=${TAG}" ./cmd/web
GOFLAGS="-buildvcs=false -trimpath" CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o "${TEST_RESULTS}/${BINARY_NAME}" -a -ldflags="-w -s -X main.Version=${TAG}" ./cmd/web

- run:
# Check if we have updates to minor/patch level packages we're explicitly referencing
Expand All @@ -34,7 +34,7 @@ jobs:
- run:
name: Lint
command: |
curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v1.44.2
curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b $(go env GOPATH)/bin v1.47.0
golangci-lint run

- run:
Expand Down
6 changes: 2 additions & 4 deletions .golangci.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
min-complexity = 20

[linters-settings.gofumpt]
lang-version = "1.17"
lang-version = "1.19"
extra-rules = true

[linters-settings.goconst]
Expand All @@ -27,9 +27,7 @@
"govet",
"errcheck",
"unused",
"structcheck",
"varcheck",
"deadcode",
"exhaustive",

"stylecheck",
"gosec",
Expand Down
3 changes: 0 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -212,9 +212,6 @@ Dealing with typos is complicated and heavily context dependent.
- Using the Web for Language Independent Spellchecking and Autocorrection - [http://static.googleusercontent.com/media/research.google.com/en/us/pubs/archive/36180.pdf](http://static.googleusercontent.com/media/research.google.com/en/us/pubs/archive/36180.pdf)
- Spellchecking by computer - [https://www.dcs.bbk.ac.uk/..roger/spellchecking.html](https://www.dcs.bbk.ac.uk/~roger/spellchecking.html)

# Wishlist
- Support for [Marking Typos](https://en.wikipedia.org/wiki/Typographical_error#Marking_typos). -- Probably not particularly useful, but seems fun to implement.

# Contributing

First of all: Awesome!
Expand Down
8 changes: 4 additions & 4 deletions buildDocker.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/bin/sh
LATEST_TAG="$(git describe --tags $(git rev-list --tags --max-count=1 4b825dc642cb6eb9a060e54bf8d69288fbee4904))"
TAG_REF="$(git show-ref --hash --tags ${LATEST_TAG})"
LATEST_TAG="$(git describe --tags "$(git rev-list --tags --max-count=1 4b825dc642cb6eb9a060e54bf8d69288fbee4904)")"
TAG_REF="$(git show-ref --hash --tags "${LATEST_TAG}")"

docker build -t dynom/tysug:${LATEST_TAG} \
docker build -t "dynom/tysug:${LATEST_TAG}" \
--build-arg VERSION="${LATEST_TAG}" \
--build-arg GIT_REF="${TAG_REF}" \
. &&
docker tag dynom/tysug:${LATEST_TAG} dynom/tysug:latest
docker tag "dynom/tysug:${LATEST_TAG}" dynom/tysug:latest
3 changes: 1 addition & 2 deletions cmd/web/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package main

import (
"fmt"
"io/ioutil"
"net/http"
"os"

Expand Down Expand Up @@ -87,7 +86,7 @@ func main() {
func buildConfig(fileName string) (Config, error) {
c := Config{}

b, err := ioutil.ReadFile(fileName)
b, err := os.ReadFile(fileName)
if err != nil {
return c, fmt.Errorf("unable to open %q, reason: %s", fileName, err)
}
Expand Down
52 changes: 31 additions & 21 deletions createRelease.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,29 @@ set -o pipefail -o nounset -o errexit -o errtrace
ROOT_DIR="$(pwd)"
while [ ! -d "${ROOT_DIR}/.git" ]; do

ROOT_DIR="$(dirname ${ROOT_DIR})"
if [ "x${ROOT_DIR}" == "x/" ]; then
ROOT_DIR="$(dirname "${ROOT_DIR}")"
if [[ "x${ROOT_DIR}" == "x/" ]]; then
echo "Cannot find .git directory, I use that as reference for the commands."
exit 1
fi
done

# Determine our projectname
NAME="$(basename $(pwd))"
# Determine our project name
NAME="$(basename "$(pwd)")"

# Checking if we have any tags to start with, the cid is Git's magical initial repo hash
TAGS=$(git rev-list --tags --count 4b825dc642cb6eb9a060e54bf8d69288fbee4904)
if [ ${TAGS} -eq 0 ];
if [[ "${TAGS}" -eq 0 ]];
then
echo "No tags detected for ${ROOT_DIR}, please create a tag first!"
exit 1;
fi

# Figuring out what tag's we're on
LATEST_TAG=$(git describe --tags $(git rev-list --tags --max-count=1 4b825dc642cb6eb9a060e54bf8d69288fbee4904))
LATEST_TAG=$(git describe --tags "$(git rev-list --tags --max-count=1 4b825dc642cb6eb9a060e54bf8d69288fbee4904)")
PREV_TAG=$(git tag --sort version:refname | tail -2 | head -1 || true)

if [ "x${LATEST_TAG}" == "x" -a "x${PREV_TAG}" == "x" ];
if [[ "x${LATEST_TAG}" == "x" && "x${PREV_TAG}" == "x" ]];
then
echo "No tag has been found?"
exit 1
Expand All @@ -37,7 +37,7 @@ echo "Previous tag is: ${PREV_TAG}"
echo "Building a release for tag: ${LATEST_TAG}"

# Falling back to the first commit, if we only have one tag
if [ "x${PREV_TAG}" == "x${LATEST_TAG}" ];
if [[ "x${PREV_TAG}" == "x${LATEST_TAG}" ]];
then
PREV_TAG=$(git rev-list --max-parents=0 HEAD)
fi
Expand All @@ -59,22 +59,32 @@ gox -ldflags "-s -w -X main.Version=${LATEST_TAG}" \
./cmd/web

# Archive
HERE=$(pwd)
BUILDDIR=${HERE}/build
for DIR in $(ls build/);
HERE="$(pwd)"
BUILD_DIR="${HERE}/build"
for DIR in "${BUILD_DIR}"/*;
do
OUTDIR="${HERE}/dist"
OUTFILENAME="${DIR}.tar.gz"
OUTFILE="${OUTDIR}/${OUTFILENAME}"
cd ${BUILDDIR}/${DIR} && \
tar -czf ${OUTFILE} * && \
cd ${OUTDIR} && \
shasum -a 512 ${OUTFILENAME} > ${OUTFILE}.sha512
BASE="$(basename "${DIR}")"
OUT_DIR="${HERE}/dist"
OUT_FILE_NAME="${BASE}.tar.gz"
OUT_FILE="${OUT_DIR}/${OUT_FILE_NAME}"
cd "${DIR}" && \
tar -czf "${OUT_FILE}" ./* && \
cd "${OUT_DIR}" && \
shasum -a 512 "${OUT_FILE_NAME}" > "${OUT_FILE}".sha512
done
cd ${HERE}
cd "${HERE}"

# Building the changelog
DIFF_REF="${PREV_TAG}..${LATEST_TAG}"
CHANGELOG="$(printf '# %s\n%s' 'Changelog' "$(git log ${DIFF_REF} --oneline --no-merges --reverse)")"
CHANGELOG="$(printf '# %s\n%s' 'Changelog' "$(git log "${DIFF_REF}" --oneline --no-merges --reverse)")"

echo "Building the changelog based on these two ref's: '${DIFF_REF}'"
github-release Dynom/${NAME} ${LATEST_TAG} "$(git rev-parse --abbrev-ref HEAD)" "${CHANGELOG}" 'dist/*';
ghr -owner "${GITHUB_USERNAME:-Dynom}" \
-repository "${GITHUB_REPOSITORY:${NAME}}" \
-commitish "$(git rev-parse HEAD)" \
-delete \
-body "${CHANGELOG}" \
"${LATEST_TAG}" \
./dist/


1 change: 1 addition & 0 deletions finder/algorithm.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ func NewWagnerFischer(insert, delete, substitution int) Algorithm {
// - Reduced allocations
// - Added rounding on the unaligned matches as per: http://www.alias-i.com/lingpipe/docs/api/com/aliasi/spell/JaroWinklerDistance.html
// - Added support for 1 character inputs, by making sure the match distances is never negative
//
//nolint:gocognit
func NewJaro() Algorithm {
return func(a, b string) float64 {
Expand Down
28 changes: 17 additions & 11 deletions finder/find.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,16 @@ import (

// Finder is the type to find the nearest reference
type Finder struct {
referenceMap referenceMapType
reference []string
referenceBucket referenceBucketType
Alg Algorithm
LengthTolerance float64 // A number between 0.0-1.0 (percentage) to allow for length miss-match, anything outside this is considered not similar. Set to 0 to disable.
lock *rwc.RWCMutex
bucketChars uint // @todo figure out what (type of) bucket approach to take. Prefix or perhaps using an ngram/trie approach
referenceMap referenceMapType
reference []string
referenceBucket referenceBucketType
algorithm Algorithm
inputPreProcessors []Processor
lengthTolerance float64 // A number between 0.0-1.0 (percentage) to allow for length miss-match, anything outside this is considered not similar. Set to 0 to disable.
lock *rwc.RWCMutex
bucketChars uint // @todo figure out what (type of) bucket approach to take. Prefix or perhaps using an ngram/trie approach
}

// Errors
var (
ErrNoAlgorithmDefined = errors.New("no algorithm defined")
ErrPrefixExceedsInputLen = errors.New("prefix length exceeds input length")
Expand Down Expand Up @@ -51,7 +51,7 @@ func New(list []string, options ...Option) (*Finder, error) {

i.Refresh(list)

if i.Alg == nil {
if i.algorithm == nil {
return i, ErrNoAlgorithmDefined
}

Expand Down Expand Up @@ -173,6 +173,12 @@ func (t *Finder) findTopRankingCtx(ctx context.Context, input string, prefixLeng
return []string{input}, WorstScoreValue, false, ErrPrefixExceedsInputLen
}

if len(t.inputPreProcessors) > 0 {
for _, p := range t.inputPreProcessors {
input = p(input)
}
}

t.lock.RLock()
defer t.lock.RUnlock()

Expand All @@ -198,11 +204,11 @@ func (t *Finder) findTopRankingCtx(ctx context.Context, input string, prefixLeng
}

// Test if the input length differs too much from the reference, making it an unlikely typo.
if !meetsLengthTolerance(t.LengthTolerance, input, ref) {
if !meetsLengthTolerance(t.lengthTolerance, input, ref) {
continue
}

score := t.Alg(input, ref)
score := t.algorithm(input, ref)
if score > hs {
hs = score
sameScore = sameScore[0:1]
Expand Down
12 changes: 6 additions & 6 deletions finder/find_benchmarks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func BenchmarkFindWithBucket(b *testing.B) {
f, _ := New(refs,
WithAlgorithm(alg),
WithLengthTolerance(0),
WithPrefixBuckets(false),
WithPrefixBuckets(true),
)

b.ResetTimer()
Expand All @@ -120,7 +120,7 @@ func BenchmarkFindWithBucket(b *testing.B) {
f, _ := New(refs,
WithAlgorithm(alg),
WithLengthTolerance(0),
WithPrefixBuckets(true),
WithPrefixBuckets(false),
)

b.ResetTimer()
Expand Down Expand Up @@ -166,11 +166,11 @@ func BenchmarkCopyOrAppend(b *testing.B) {
}
})

// "dst smaller copy" can't work, since the result won't contain all items or requires logic which'll make the
// implementation slower than an append
// "dst smaller copy" can't work, since the result won't contain all items or requires logic which makes the
// implementation slower then append

b.Run("dst smaller append", func(b *testing.B) {
refsAppendDst = make([]string, int(numToAllocate/2))
refsAppendDst = make([]string, numToAllocate/2)
b.ResetTimer()
for i := 0; i < b.N; i++ {
refsAppendDst = append(refsAppendSrc[:0:0], refsAppendSrc...)
Expand Down Expand Up @@ -198,7 +198,7 @@ func BenchmarkCopyOrAppend(b *testing.B) {
})

b.Run("dst larger append", func(b *testing.B) {
refsAppendDst = make([]string, int(numToAllocate*2))
refsAppendDst = make([]string, numToAllocate*2)
b.ResetTimer()
for i := 0; i < b.N; i++ {
refsAppendDst = append(refsAppendSrc[:0:0], refsAppendSrc...)
Expand Down
8 changes: 2 additions & 6 deletions finder/find_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ func TestNoInput(t *testing.T) {

func TestContextCancel(t *testing.T) {
sug, err := New([]string{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m"}, func(sug *Finder) {
sug.Alg = func(a, b string) float64 {
sug.algorithm = func(a, b string) float64 {
time.Sleep(10 * time.Millisecond)
return 1
}
Expand Down Expand Up @@ -453,7 +453,7 @@ func TestFinder_FindTopRankingPrefixCtx(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t1 *testing.T) {
finder, _ := New(refs, func(sug *Finder) {
sug.Alg = func(a, b string) float64 {
sug.algorithm = func(a, b string) float64 {
return 1
}
})
Expand Down Expand Up @@ -750,15 +750,13 @@ func Test_meetsPrefixLengthMatch(t *testing.T) {
}

func BenchmarkFindTopRankingCTXRace(b *testing.B) {

sort.Strings(inspirationalRefList)
f, err := New(
inspirationalRefList[0:5],
WithAlgorithm(exampleAlgorithm),
WithLengthTolerance(0),
WithPrefixBuckets(false),
)

if err != nil {
b.Fatal("Setting up test failed")
}
Expand All @@ -781,15 +779,13 @@ func BenchmarkFindTopRankingCTXRace(b *testing.B) {
}

func BenchmarkFindTopRankingCTX(b *testing.B) {

sort.Strings(inspirationalRefList)
f, err := New(
inspirationalRefList[0:5],
WithAlgorithm(exampleAlgorithm),
WithLengthTolerance(0),
WithPrefixBuckets(false),
)

if err != nil {
b.Fatal("Setting up test failed")
}
Expand Down
10 changes: 8 additions & 2 deletions finder/option.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ type Option func(sug *Finder)
// WithAlgorithm allows you to set any algorithm
func WithAlgorithm(alg Algorithm) Option {
return func(s *Finder) {
s.Alg = alg
s.algorithm = alg
}
}

Expand All @@ -15,7 +15,7 @@ func WithAlgorithm(alg Algorithm) Option {
// size, with a minimum of 1 character. A value of 0 (the default) disables this feature.
func WithLengthTolerance(t float64) Option {
return func(s *Finder) {
s.LengthTolerance = t
s.lengthTolerance = t
}
}

Expand All @@ -28,3 +28,9 @@ func WithPrefixBuckets(enable bool) Option {
}
}
}

func WithPreProcessor(p ...Processor) Option {
return func(sug *Finder) {
sug.inputPreProcessors = p
}
}
2 changes: 1 addition & 1 deletion finder/option_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ func TestSetAlgorithm(t *testing.T) {

sug, err := New([]string{}, WithAlgorithm(veryPositiveAlg))

if sug.Alg == nil || err == ErrNoAlgorithmDefined {
if sug.algorithm == nil || err == ErrNoAlgorithmDefined {
t.Errorf("Expected the algorithm to be set")
}
}
Expand Down
Loading