Skip to content

Commit

Permalink
fix: parse time function for nedlib
Browse files Browse the repository at this point in the history
# Motivation

`nedlib` data might contain a lot of different
time formats. This commit adds a function to parse
the time format used in `nedlib` data.

# Future work

Add parsing of custom time formats.
  • Loading branch information
maeb authored and trym-b committed May 16, 2024
1 parent 64cc03e commit 9125ea1
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 1 deletion.
2 changes: 1 addition & 1 deletion nedlibreader/nedlibreader.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ func (nedlibReader *NedlibReader) Next() (gowarc.WarcRecord, int64, *gowarc.Vali
header := response.Header
dateString := header.Get("Date")
if dateString != "" {
t, err := time.Parse(time.RFC1123, dateString)
t, _, err := parseTime(dateString)
if err != nil {
return nil, 0, validation, err
}
Expand Down
38 changes: 38 additions & 0 deletions nedlibreader/time.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package nedlibreader

import (
"fmt"
"time"
)

const (
RFC1123 = "RFC1123"
RFC850 = "RFC850"
ANSIC = "ANSIC"
UnixDate = "UnixDate"
)

func parseTime(dateString string) (time.Time, string, error) {
parsedTime, err := time.Parse(time.RFC1123, dateString)
if err == nil {
return parsedTime, RFC1123, err
}

parsedTime, err = time.Parse(time.RFC850, dateString)
if err == nil {
return parsedTime, RFC850, err
}

parsedTime, err = time.Parse(time.ANSIC, dateString)
if err == nil {
return parsedTime, ANSIC, err
}

parsedTime, err = time.Parse(time.UnixDate, dateString)
if err == nil {
return parsedTime, UnixDate, err
}

err = fmt.Errorf("failed to parse string as time.Time: '%s': '%w'", dateString, err)
return time.Time{}, "", err
}
78 changes: 78 additions & 0 deletions nedlibreader/time_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package nedlibreader

import (
"testing"
"time"
)

type testTime struct {
timeAsString string
time time.Time
shouldFail bool
}

type testTable struct {
expectedFormat string
tests []testTime
}

func TestParseTime(t *testing.T) {
var testTable = []testTable{
{
expectedFormat: RFC1123,
tests: []testTime{
{"Tue, 05 Apr 2024 15:30:00 GMT", time.Date(2024, time.April, 5, 15, 30, 0, 0, time.UTC), false},
{"Tue, 05 Apr 202 15:30:00 GMT", time.Date(2024, time.April, 5, 15, 30, 0, 0, time.UTC), true},
},
},
{
expectedFormat: RFC850,
tests: []testTime{
{"Tuesday, 05-Apr-24 15:30:00 GMT", time.Date(2024, time.April, 5, 15, 30, 0, 0, time.UTC), false},
{"Tue, 05 Ap 2024 15:30:00 GMT", time.Date(2024, time.April, 5, 15, 30, 0, 0, time.UTC), true},
},
},
{
expectedFormat: ANSIC,
tests: []testTime{
{"Tue Apr 5 15:30:00 2024", time.Date(2024, time.April, 5, 15, 30, 0, 0, time.UTC), false},
{"Tue, 05 Apr 204 15:30:00 GMT", time.Date(2024, time.April, 5, 15, 30, 0, 0, time.UTC), true},
},
},
{
expectedFormat: UnixDate,
tests: []testTime{
{"Tue Apr 5 15:30:00 UTC 2024", time.Date(2024, time.April, 5, 15, 30, 0, 0, time.UTC), false},
{"Tue, 5 Apr 2024 15:30:00 GMT", time.Date(2024, time.April, 5, 15, 30, 0, 0, time.UTC), true},
},
},
}

for _, formatGroup := range testTable {
for _, test := range formatGroup.tests {
t.Run(formatGroup.expectedFormat+"-"+test.timeAsString, func(t *testing.T) {
t.Log("Time string: ", test.timeAsString)
t.Log("Expected time: ", test.time)
t.Log("Should fail: ", test.shouldFail)
t.Log("Expected format: ", formatGroup.expectedFormat)
parsedTime, detectedFormat, err := parseTime(test.timeAsString)

if err != nil {
if !test.shouldFail {
t.Errorf("expected no error, got: %v", err)
}
} else {
if test.shouldFail {
t.Errorf("expected error, got none")
}
if detectedFormat != formatGroup.expectedFormat {
t.Errorf("expected format: '%v', got: '%v'", formatGroup.expectedFormat, detectedFormat)
}
if !parsedTime.Equal(test.time) {
t.Errorf("expected time: '%v', got: '%v'", test.time, parsedTime)
}
}
})
}
}
}

0 comments on commit 9125ea1

Please sign in to comment.