Skip to content

Commit

Permalink
Evaluate URLs within angle brackets
Browse files Browse the repository at this point in the history
Match URLs enclosed within angle brackets as URL patterns,
removing them before parsing and further evaluation as potential
Safe Links URLs.

Remove double-cleaning of inputURL left over from earlier
refactor work on cmd/usl to "pull back" functionality from
the safelinks package.

refs GH-246
  • Loading branch information
atc0005 committed Apr 10, 2024
1 parent 4bfceb2 commit 57cc8a1
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 20 deletions.
6 changes: 3 additions & 3 deletions cmd/usl/urls.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,13 @@ func ReadURLsFromInput(inputURL string) ([]string, error) {
return nil, safelinks.ErrInvalidURL
}

inputURLs = append(inputURLs, safelinks.CleanURL(flag.Args()[0]))
inputURLs = append(inputURLs, flag.Args()[0])

// We received a URL via flag.
case inputURL != "":
// fmt.Fprintln(os.Stderr, "Received URL via flag")

inputURLs = append(inputURLs, safelinks.CleanURL(inputURL))
inputURLs = append(inputURLs, inputURL)

// Input URL not given via positional argument, not given via flag either.
// We prompt the user for a single input value.
Expand All @@ -77,7 +77,7 @@ func ReadURLsFromInput(inputURL string) ([]string, error) {
return nil, safelinks.ErrInvalidURL
}

inputURLs = append(inputURLs, safelinks.CleanURL(input))
inputURLs = append(inputURLs, input)
}

return inputURLs, nil
Expand Down
60 changes: 43 additions & 17 deletions internal/safelinks/safelinks.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@ func CleanURL(s string) string {
s = strings.ReplaceAll(s, `'`, "")
s = strings.ReplaceAll(s, `"`, "")

// Strip of potential enclosing angle brackets.
s = strings.Trim(s, `<>`)

// Replace escaped ampersands with literal ampersands.
// inputURL = strings.ReplaceAll(flag.Args()[1], "&amp;", "&")

Expand Down Expand Up @@ -140,28 +143,29 @@ func assertValidURLParameter(u *url.URL) error {
// NOTE: Validation is not performed to ensure that matched patterns are valid
// URLs.
//
// Internal logic uses a regular expression to match URL patterns beginning
// with 'https://' and ending with a whitespace character.
// Internal logic uses a regular expression to match URL patterns optionally
// beginning with a left angle bracket, then 'https://' and ending with a
// whitespace character or a right angle bracket. Any angle brackets present
// are trimmed from returned matches.
func GetURLPatternsUsingRegex(input string) ([]FoundURLPattern, error) {
// urls := make([]url.URL, 0, 5)

urlPatterns := make([]FoundURLPattern, 0, 5)

if !strings.Contains(input, SafeLinksURLRequiredPrefix) {
return nil, ErrNoURLsFound
}

// This works but would match regular http:// prefixes:
//
// https://www.honeybadger.io/blog/a-definitive-guide-to-regular-expressions-in-go/
// urlRegex := `https?://\S+|www\.\S+`

urlRegex := SafeLinksURLRequiredPrefix + `\S+|www\.\S+`
urlRegex := `<?` + SafeLinksURLRequiredPrefix + `\S+>?`

r := regexp.MustCompile(urlRegex)

matches := r.FindAllString(input, -1)
log.Println("Matches:", matches)
log.Printf("Matches (%d): %q\n", len(matches), matches)

log.Println("Cleaning URLs of enclosing angle brackets")
for i := range matches {
matches[i] = strings.Trim(matches[i], "<>")
}
log.Printf("Matches (%d) trimmed: %q\n", len(matches), matches)

for _, match := range matches {
urlPatterns = append(
Expand All @@ -186,7 +190,9 @@ func GetURLPatternsUsingRegex(input string) ([]FoundURLPattern, error) {
// valid URLs.
//
// Internal logic uses slice indexing/iteration to match URL patterns
// beginning with 'https://' and ending with a whitespace character.
// beginning with 'https://' and ending with a whitespace character or a right
// angle bracket. Any angle brackets present are trimmed from returned
// matches.
func GetURLPatternsUsingIndex(input string) ([]FoundURLPattern, error) {
// urls := make([]url.URL, 0, 5)
urlPatterns := make([]FoundURLPattern, 0, 5)
Expand All @@ -211,7 +217,17 @@ func GetURLPatternsUsingIndex(input string) ([]FoundURLPattern, error) {
break
}

urlEnd := getURLIndexEndPosition(remaining, next)
// Assume we found ending point until proven otherwise.
// urlEnd := next

// for _, char := range remaining[next:] {
// if unicode.IsSpace(char) {
// break // we found end of URL pattern
// }
// urlEnd++
// }

urlEnd := getURLIndexEndPosition(remaining[next:], next)

urlPatterns = append(
urlPatterns,
Expand Down Expand Up @@ -261,13 +277,22 @@ func GetURLPatternsUsingPrefixMatchingOnFields(input string) ([]FoundURLPattern,

fields := strings.Fields(input)
for _, field := range fields {
if strings.HasPrefix(field, SafeLinksURLRequiredPrefix) {
switch {
case strings.HasPrefix(field, SafeLinksURLRequiredPrefix):
urlPatterns = append(
urlPatterns,
FoundURLPattern{
URLPattern: field,
},
)

case strings.HasPrefix(field, "<"+SafeLinksURLRequiredPrefix):
urlPatterns = append(
urlPatterns,
FoundURLPattern{
URLPattern: strings.Trim(field, "<>"),
},
)
}
}

Expand Down Expand Up @@ -360,13 +385,14 @@ func FromURLs(urls []*url.URL) ([]SafeLinkURL, error) {
}

// getURLIndexEndPosition accepts an input string and a starting position and
// iterates until it finds the first space character. This is assumed to be
// the separator used to indicate the end of a URL pattern.
// iterates until it finds the first space character or the first right angle
// bracket. Either is assumed to be the separator used to indicate the end of
// a URL pattern.
func getURLIndexEndPosition(input string, startPos int) int {
endPos := startPos

for _, char := range input[startPos:] {
if unicode.IsSpace(char) {
if unicode.IsSpace(char) || char == '>' {
break // we found end of URL pattern
}
endPos++
Expand Down

0 comments on commit 57cc8a1

Please sign in to comment.