Skip to content

Commit

Permalink
feat(helm): update helm search
Browse files Browse the repository at this point in the history
Switch 'helm search' from file crawling to using the indices. Also
add scorable indexing, forward porting the search code I originally
wrote for Helm Classic.

Closes helm#1226
Partially addresses helm#1199
  • Loading branch information
technosophos authored and Ville Aikas committed Oct 17, 2016
1 parent 7aad6ba commit f0f4331
Show file tree
Hide file tree
Showing 8 changed files with 593 additions and 124 deletions.
1 change: 1 addition & 0 deletions cmd/helm/helm.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ func newRootCmd(out io.Writer) *cobra.Command {
newVersionCmd(nil, out),
newRepoCmd(out),
newDependencyCmd(out),
newSearchCmd(out),
)
return cmd
}
Expand Down
125 changes: 67 additions & 58 deletions cmd/helm/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,90 +17,99 @@ limitations under the License.
package main

import (
"errors"
"fmt"
"os"
"path/filepath"
"io"
"strings"

"github.com/spf13/cobra"

"k8s.io/helm/cmd/helm/helmpath"
"k8s.io/helm/cmd/helm/search"
"k8s.io/helm/pkg/repo"
)

func init() {
RootCommand.AddCommand(searchCmd)
const searchDesc = `
Search reads through all of the repositories configured on the system, and
looks for matches.
Repositories are managed with 'helm repo' commands.
`

// searchMaxScore suggests that any score higher than this is not considered a match.
const searchMaxScore = 25

type searchCmd struct {
out io.Writer
helmhome helmpath.Home

regexp bool
}

var searchCmd = &cobra.Command{
Use: "search [keyword]",
Short: "search for a keyword in charts",
Long: "Searches the known repositories cache files for the specified search string, looks at name and keywords",
RunE: search,
PreRunE: requireInit,
func newSearchCmd(out io.Writer) *cobra.Command {
sc := &searchCmd{out: out, helmhome: helmpath.Home(homePath())}

cmd := &cobra.Command{
Use: "search [keyword]",
Short: "search for a keyword in charts",
Long: searchDesc,
RunE: func(cmd *cobra.Command, args []string) error {
return sc.run(args)
},
PreRunE: requireInit,
}

cmd.Flags().BoolVarP(&sc.regexp, "regexp", "r", false, "use regular expressions for searching")

return cmd
}

func search(cmd *cobra.Command, args []string) error {
func (s *searchCmd) run(args []string) error {
index, err := s.buildIndex()
if err != nil {
return err
}

if len(args) == 0 {
return errors.New("This command needs at least one argument (search string)")
s.showAllCharts(index)
}

// TODO: This needs to be refactored to use loadChartRepositories
results, err := searchCacheForPattern(cacheDirectory(), args[0])
q := strings.Join(args, " ")
res, err := index.Search(q, searchMaxScore, s.regexp)
if err != nil {
return err
return nil
}
if len(results) > 0 {
for _, result := range results {
fmt.Println(result)
}
search.SortScore(res)

for _, r := range res {
fmt.Fprintln(s.out, r.Name)
}

return nil
}

func searchChartRefsForPattern(search string, chartRefs map[string]*repo.ChartRef) []string {
matches := []string{}
for k, c := range chartRefs {
if strings.Contains(c.Name, search) && !c.Removed {
matches = append(matches, k)
continue
}
if c.Chartfile == nil {
continue
}
for _, keyword := range c.Chartfile.Keywords {
if strings.Contains(keyword, search) {
matches = append(matches, k)
}
}
func (s *searchCmd) showAllCharts(i *search.Index) {
for name := range i.Entries() {
fmt.Fprintln(s.out, name)
}
return matches
}

func searchCacheForPattern(dir string, search string) ([]string, error) {
fileList := []string{}
filepath.Walk(dir, func(path string, f os.FileInfo, err error) error {
if !f.IsDir() {
fileList = append(fileList, path)
}
return nil
})
matches := []string{}
for _, f := range fileList {
index, err := repo.LoadIndexFile(f)
func (s *searchCmd) buildIndex() (*search.Index, error) {
// Load the repositories.yaml
rf, err := repo.LoadRepositoriesFile(s.helmhome.RepositoryFile())
if err != nil {
return nil, err
}

i := search.NewIndex()
for n := range rf.Repositories {
f := s.helmhome.CacheIndex(n)
ind, err := repo.LoadIndexFile(f)
if err != nil {
return matches, fmt.Errorf("index %s corrupted: %s", f, err)
fmt.Fprintf(s.out, "WARNING: Repo %q is corrupt. Try 'helm update': %s", f, err)
continue
}

m := searchChartRefsForPattern(search, index.Entries)
repoName := strings.TrimSuffix(filepath.Base(f), "-index.yaml")
for _, c := range m {
// TODO: Is it possible for this file to be missing? Or to have
// an extension other than .tgz? Should the actual filename be in
// the YAML?
fname := filepath.Join(repoName, c+".tgz")
matches = append(matches, fname)
}
i.AddRepo(n, ind)
}
return matches, nil
return i, nil
}
183 changes: 183 additions & 0 deletions cmd/helm/search/search.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
/*
Copyright 2016 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

/*Package search provides client-side repository searching.
This supports building an in-memory search index based on the contents of
multiple repositories, and then using string matching or regular expressions
to find matches.
*/
package search

import (
"errors"
"path/filepath"
"regexp"
"sort"
"strings"

"k8s.io/helm/pkg/repo"
)

// Result is a search result.
//
// Score indicates how close it is to match. The higher the score, the longer
// the distance.
type Result struct {
Name string
Score int
}

// Index is a searchable index of chart information.
type Index struct {
lines map[string]string
charts map[string]*repo.ChartRef
}

const sep = "\v"

// NewIndex creats a new Index.
func NewIndex() *Index {
return &Index{lines: map[string]string{}, charts: map[string]*repo.ChartRef{}}
}

// AddRepo adds a repository index to the search index.
func (i *Index) AddRepo(rname string, ind *repo.IndexFile) {
for name, ref := range ind.Entries {
fname := filepath.Join(rname, name)
i.lines[fname] = indstr(rname, ref)
i.charts[fname] = ref
}
}

// Entries returns the entries in an index.
func (i *Index) Entries() map[string]*repo.ChartRef {
return i.charts
}

// Search searches an index for the given term.
//
// Threshold indicates the maximum score a term may have before being marked
// irrelevant. (Low score means higher relevance. Golf, not bowling.)
//
// If regexp is true, the term is treated as a regular expression. Otherwise,
// term is treated as a literal string.
func (i *Index) Search(term string, threshold int, regexp bool) ([]*Result, error) {
if regexp == true {
return i.SearchRegexp(term, threshold)
}
return i.SearchLiteral(term, threshold), nil
}

// calcScore calculates a score for a match.
func (i *Index) calcScore(index int, matchline string) int {

// This is currently tied to the fact that sep is a single char.
splits := []int{}
s := rune(sep[0])
for i, ch := range matchline {
if ch == s {
splits = append(splits, i)
}
}

for i, pos := range splits {
if index > pos {
continue
}
return i
}
return len(splits)
}

// SearchLiteral does a literal string search (no regexp).
func (i *Index) SearchLiteral(term string, threshold int) []*Result {
term = strings.ToLower(term)
buf := []*Result{}
for k, v := range i.lines {
res := strings.Index(v, term)
if score := i.calcScore(res, v); res != -1 && score < threshold {
buf = append(buf, &Result{Name: k, Score: score})
}
}
return buf
}

// SearchRegexp searches using a regular expression.
func (i *Index) SearchRegexp(re string, threshold int) ([]*Result, error) {
matcher, err := regexp.Compile(re)
if err != nil {
return []*Result{}, err
}
buf := []*Result{}
for k, v := range i.lines {
ind := matcher.FindStringIndex(v)
if len(ind) == 0 {
continue
}
if score := i.calcScore(ind[0], v); ind[0] >= 0 && score < threshold {
buf = append(buf, &Result{Name: k, Score: score})
}
}
return buf, nil
}

// Chart returns the ChartRef for a particular name.
func (i *Index) Chart(name string) (*repo.ChartRef, error) {
c, ok := i.charts[name]
if !ok {
return nil, errors.New("no such chart")
}
return c, nil
}

// SortScore does an in-place sort of the results.
//
// Lowest scores are highest on the list. Matching scores are subsorted alphabetically.
func SortScore(r []*Result) {
sort.Sort(scoreSorter(r))
}

// scoreSorter sorts results by score, and subsorts by alpha Name.
type scoreSorter []*Result

// Len returns the length of this scoreSorter.
func (s scoreSorter) Len() int { return len(s) }

// Swap performs an in-place swap.
func (s scoreSorter) Swap(i, j int) { s[i], s[j] = s[j], s[i] }

// Less compares a to b, and returns true if a is less than b.
func (s scoreSorter) Less(a, b int) bool {
first := s[a]
second := s[b]

if first.Score > second.Score {
return false
}
if first.Score < second.Score {
return true
}
return first.Name < second.Name
}

func indstr(name string, ref *repo.ChartRef) string {
i := ref.Name + sep + name + "/" + ref.Name + sep
if ref.Chartfile != nil {
i += ref.Chartfile.Description + sep + strings.Join(ref.Chartfile.Keywords, sep)
}
return strings.ToLower(i)
}
Loading

0 comments on commit f0f4331

Please sign in to comment.