From d3c9b6a7bd6b758d745383ca8da4dc33fecdfa45 Mon Sep 17 00:00:00 2001 From: mengqingyan Date: Thu, 10 Jun 2021 16:53:13 +0800 Subject: [PATCH 01/60] go mod --- .gitignore | 2 + dataframe/dataframe.go | 22 +++---- dataframe/dataframe_test.go | 36 +++++------ dataframe/examples_test.go | 126 ++++++++++++++++++------------------ go.mod | 5 ++ 5 files changed, 99 insertions(+), 92 deletions(-) create mode 100644 .gitignore create mode 100644 go.mod diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bcf81ef --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.vscode +go.sum \ No newline at end of file diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go index cf1ae41..fc5bfcd 100644 --- a/dataframe/dataframe.go +++ b/dataframe/dataframe.go @@ -37,7 +37,7 @@ type DataFrame struct { // New is the generic DataFrame constructor func New(se ...series.Series) DataFrame { - if se == nil || len(se) == 0 { + if len(se) == 0 { return DataFrame{Err: fmt.Errorf("empty DataFrame")} } @@ -233,7 +233,7 @@ func (df DataFrame) print( } } if i < len(notShowing) { - notShownArr = append(notShownArr, notShowing[i:len(notShowing)]) + notShownArr = append(notShownArr, notShowing[i:]) } for k, ns := range notShownArr { notShown += strings.Join(ns, ", ") @@ -529,7 +529,7 @@ func (df DataFrame) Arrange(order ...Order) DataFrame { if df.Err != nil { return df } - if order == nil || len(order) == 0 { + if len(order) == 0 { return DataFrame{Err: fmt.Errorf("rename: no arguments")} } @@ -1392,7 +1392,7 @@ func (df DataFrame) LeftJoin(b DataFrame, keys ...string) DataFrame { newCols[ii].Append(elem) ii++ } - for _ = range iNotKeysB { + for range iNotKeysB { newCols[ii].Append(nil) ii++ } @@ -1496,7 +1496,7 @@ func (df DataFrame) RightJoin(b DataFrame, keys ...string) DataFrame { newCols[ii].Append(elem) ii++ } - for _ = range iNotKeysA { + for range iNotKeysA { newCols[ii].Append(nil) ii++ } @@ -1598,7 +1598,7 @@ func (df DataFrame) OuterJoin(b DataFrame, keys ...string) DataFrame { newCols[ii].Append(elem) ii++ } - for _ = range iNotKeysB { + for range iNotKeysB { newCols[ii].Append(nil) ii++ } @@ -1624,7 +1624,7 @@ func (df DataFrame) OuterJoin(b DataFrame, keys ...string) DataFrame { newCols[ii].Append(elem) ii++ } - for _ = range iNotKeysA { + for range iNotKeysA { newCols[ii].Append(nil) ii++ } @@ -1786,13 +1786,13 @@ func findInStringSlice(str string, s []string) int { func parseSelectIndexes(l int, indexes SelectIndexes, colnames []string) ([]int, error) { var idx []int - switch indexes.(type) { + switch idt := indexes.(type) { case []int: - idx = indexes.([]int) + idx = idt case int: - idx = []int{indexes.(int)} + idx = []int{idt} case []bool: - bools := indexes.([]bool) + bools := idt if len(bools) != l { return nil, fmt.Errorf("indexing error: index dimensions mismatch") } diff --git a/dataframe/dataframe_test.go b/dataframe/dataframe_test.go index 114c0e4..7de91af 100644 --- a/dataframe/dataframe_test.go +++ b/dataframe/dataframe_test.go @@ -1008,13 +1008,13 @@ func TestLoadMaps(t *testing.T) { { // Test: 0 LoadMaps( []map[string]interface{}{ - map[string]interface{}{ + { "A": "a", "B": 1, "C": true, "D": 0, }, - map[string]interface{}{ + { "A": "b", "B": 2, "C": true, @@ -1032,13 +1032,13 @@ func TestLoadMaps(t *testing.T) { { // Test: 1 LoadMaps( []map[string]interface{}{ - map[string]interface{}{ + { "A": "a", "B": 1, "C": true, "D": 0, }, - map[string]interface{}{ + { "A": "b", "B": 2, "C": true, @@ -1059,13 +1059,13 @@ func TestLoadMaps(t *testing.T) { { // Test: 2 LoadMaps( []map[string]interface{}{ - map[string]interface{}{ + { "A": "a", "B": 1, "C": true, "D": 0, }, - map[string]interface{}{ + { "A": "b", "B": 2, "C": true, @@ -1086,13 +1086,13 @@ func TestLoadMaps(t *testing.T) { { // Test: 3 LoadMaps( []map[string]interface{}{ - map[string]interface{}{ + { "A": "a", "B": 1, "C": true, "D": 0, }, - map[string]interface{}{ + { "A": "b", "B": 2, "C": true, @@ -1117,13 +1117,13 @@ func TestLoadMaps(t *testing.T) { { // Test: 4 LoadMaps( []map[string]interface{}{ - map[string]interface{}{ + { "A": "a", "B": 1, "C": true, "D": 0, }, - map[string]interface{}{ + { "A": "b", "B": 2, "C": true, @@ -1641,17 +1641,17 @@ func TestDataFrame_Maps(t *testing.T) { ) m := a.Maps() expected := []map[string]interface{}{ - map[string]interface{}{ + { "COL.1": "a", "COL.2": nil, "COL.3": nil, }, - map[string]interface{}{ + { "COL.1": "b", "COL.2": 2, "COL.3": nil, }, - map[string]interface{}{ + { "COL.1": "c", "COL.2": 3, "COL.3": 3, @@ -2491,11 +2491,11 @@ func TestDescribe(t *testing.T) { { LoadRecords( [][]string{ - []string{"A", "B", "C", "D"}, - []string{"a", "4", "5.1", "true"}, - []string{"b", "4", "6.0", "true"}, - []string{"c", "3", "6.0", "false"}, - []string{"a", "2", "7.1", "false"}, + {"A", "B", "C", "D"}, + {"a", "4", "5.1", "true"}, + {"b", "4", "6.0", "true"}, + {"c", "3", "6.0", "false"}, + {"a", "2", "7.1", "false"}, }), New( diff --git a/dataframe/examples_test.go b/dataframe/examples_test.go index 6687ea7..8cdb36c 100644 --- a/dataframe/examples_test.go +++ b/dataframe/examples_test.go @@ -24,9 +24,9 @@ func ExampleLoadStructs() { Accuracy float64 } users := []User{ - User{"Aram", 17, 0.2}, - User{"Juan", 18, 0.8}, - User{"Ana", 22, 0.5}, + {"Aram", 17, 0.2}, + {"Juan", 18, 0.8}, + {"Ana", 22, 0.5}, } df := dataframe.LoadStructs(users) fmt.Println(df) @@ -35,11 +35,11 @@ func ExampleLoadStructs() { func ExampleLoadRecords() { df := dataframe.LoadRecords( [][]string{ - []string{"A", "B", "C", "D"}, - []string{"a", "4", "5.1", "true"}, - []string{"k", "5", "7.0", "true"}, - []string{"k", "4", "6.0", "true"}, - []string{"a", "2", "7.1", "false"}, + {"A", "B", "C", "D"}, + {"a", "4", "5.1", "true"}, + {"k", "5", "7.0", "true"}, + {"k", "4", "6.0", "true"}, + {"a", "2", "7.1", "false"}, }, ) fmt.Println(df) @@ -48,11 +48,11 @@ func ExampleLoadRecords() { func ExampleLoadRecords_options() { df := dataframe.LoadRecords( [][]string{ - []string{"A", "B", "C", "D"}, - []string{"a", "4", "5.1", "true"}, - []string{"k", "5", "7.0", "true"}, - []string{"k", "4", "6.0", "true"}, - []string{"a", "2", "7.1", "false"}, + {"A", "B", "C", "D"}, + {"a", "4", "5.1", "true"}, + {"k", "5", "7.0", "true"}, + {"k", "4", "6.0", "true"}, + {"a", "2", "7.1", "false"}, }, dataframe.DetectTypes(false), dataframe.DefaultType(series.Float), @@ -67,13 +67,13 @@ func ExampleLoadRecords_options() { func ExampleLoadMaps() { df := dataframe.LoadMaps( []map[string]interface{}{ - map[string]interface{}{ + { "A": "a", "B": 1, "C": true, "D": 0, }, - map[string]interface{}{ + { "A": "b", "B": 2, "C": true, @@ -109,11 +109,11 @@ func ExampleReadJSON() { func ExampleDataFrame_Subset() { df := dataframe.LoadRecords( [][]string{ - []string{"A", "B", "C", "D"}, - []string{"a", "4", "5.1", "true"}, - []string{"k", "5", "7.0", "true"}, - []string{"k", "4", "6.0", "true"}, - []string{"a", "2", "7.1", "false"}, + {"A", "B", "C", "D"}, + {"a", "4", "5.1", "true"}, + {"k", "5", "7.0", "true"}, + {"k", "4", "6.0", "true"}, + {"a", "2", "7.1", "false"}, }, ) sub := df.Subset([]int{0, 2}) @@ -123,11 +123,11 @@ func ExampleDataFrame_Subset() { func ExampleDataFrame_Select() { df := dataframe.LoadRecords( [][]string{ - []string{"A", "B", "C", "D"}, - []string{"a", "4", "5.1", "true"}, - []string{"k", "5", "7.0", "true"}, - []string{"k", "4", "6.0", "true"}, - []string{"a", "2", "7.1", "false"}, + {"A", "B", "C", "D"}, + {"a", "4", "5.1", "true"}, + {"k", "5", "7.0", "true"}, + {"k", "4", "6.0", "true"}, + {"a", "2", "7.1", "false"}, }, ) sel1 := df.Select([]int{0, 2}) @@ -139,11 +139,11 @@ func ExampleDataFrame_Select() { func ExampleDataFrame_Filter() { df := dataframe.LoadRecords( [][]string{ - []string{"A", "B", "C", "D"}, - []string{"a", "4", "5.1", "true"}, - []string{"k", "5", "7.0", "true"}, - []string{"k", "4", "6.0", "true"}, - []string{"a", "2", "7.1", "false"}, + {"A", "B", "C", "D"}, + {"a", "4", "5.1", "true"}, + {"k", "5", "7.0", "true"}, + {"k", "4", "6.0", "true"}, + {"a", "2", "7.1", "false"}, }, ) fil := df.Filter( @@ -172,11 +172,11 @@ func ExampleDataFrame_Filter() { func ExampleDataFrame_Mutate() { df := dataframe.LoadRecords( [][]string{ - []string{"A", "B", "C", "D"}, - []string{"a", "4", "5.1", "true"}, - []string{"k", "5", "7.0", "true"}, - []string{"k", "4", "6.0", "true"}, - []string{"a", "2", "7.1", "false"}, + {"A", "B", "C", "D"}, + {"a", "4", "5.1", "true"}, + {"k", "5", "7.0", "true"}, + {"k", "4", "6.0", "true"}, + {"a", "2", "7.1", "false"}, }, ) // Change column C with a new one @@ -194,20 +194,20 @@ func ExampleDataFrame_Mutate() { func ExampleDataFrame_InnerJoin() { df := dataframe.LoadRecords( [][]string{ - []string{"A", "B", "C", "D"}, - []string{"a", "4", "5.1", "true"}, - []string{"k", "5", "7.0", "true"}, - []string{"k", "4", "6.0", "true"}, - []string{"a", "2", "7.1", "false"}, + {"A", "B", "C", "D"}, + {"a", "4", "5.1", "true"}, + {"k", "5", "7.0", "true"}, + {"k", "4", "6.0", "true"}, + {"a", "2", "7.1", "false"}, }, ) df2 := dataframe.LoadRecords( [][]string{ - []string{"A", "F", "D"}, - []string{"1", "1", "true"}, - []string{"4", "2", "false"}, - []string{"2", "8", "false"}, - []string{"5", "9", "false"}, + {"A", "F", "D"}, + {"1", "1", "true"}, + {"4", "2", "false"}, + {"2", "8", "false"}, + {"5", "9", "false"}, }, ) join := df.InnerJoin(df2, "D") @@ -217,20 +217,20 @@ func ExampleDataFrame_InnerJoin() { func ExampleDataFrame_Set() { df := dataframe.LoadRecords( [][]string{ - []string{"A", "B", "C", "D"}, - []string{"a", "4", "5.1", "true"}, - []string{"k", "5", "7.0", "true"}, - []string{"k", "4", "6.0", "true"}, - []string{"a", "2", "7.1", "false"}, + {"A", "B", "C", "D"}, + {"a", "4", "5.1", "true"}, + {"k", "5", "7.0", "true"}, + {"k", "4", "6.0", "true"}, + {"a", "2", "7.1", "false"}, }, ) df2 := df.Set( series.Ints([]int{0, 2}), dataframe.LoadRecords( [][]string{ - []string{"A", "B", "C", "D"}, - []string{"b", "4", "6.0", "true"}, - []string{"c", "3", "6.0", "false"}, + {"A", "B", "C", "D"}, + {"b", "4", "6.0", "true"}, + {"c", "3", "6.0", "false"}, }, ), ) @@ -240,11 +240,11 @@ func ExampleDataFrame_Set() { func ExampleDataFrame_Arrange() { df := dataframe.LoadRecords( [][]string{ - []string{"A", "B", "C", "D"}, - []string{"a", "4", "5.1", "true"}, - []string{"b", "4", "6.0", "true"}, - []string{"c", "3", "6.0", "false"}, - []string{"a", "2", "7.1", "false"}, + {"A", "B", "C", "D"}, + {"a", "4", "5.1", "true"}, + {"b", "4", "6.0", "true"}, + {"c", "3", "6.0", "false"}, + {"a", "2", "7.1", "false"}, }, ) sorted := df.Arrange( @@ -257,11 +257,11 @@ func ExampleDataFrame_Arrange() { func ExampleDataFrame_Describe() { df := dataframe.LoadRecords( [][]string{ - []string{"A", "B", "C", "D"}, - []string{"a", "4", "5.1", "true"}, - []string{"b", "4", "6.0", "true"}, - []string{"c", "3", "6.0", "false"}, - []string{"a", "2", "7.1", "false"}, + {"A", "B", "C", "D"}, + {"a", "4", "5.1", "true"}, + {"b", "4", "6.0", "true"}, + {"c", "3", "6.0", "false"}, + {"a", "2", "7.1", "false"}, }, ) fmt.Println(df.Describe()) diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..47f7068 --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module github.com/go-gota/gota + +go 1.14 + +require gonum.org/v1/gonum v0.9.1 From 37caee1ec8941bafca6c7f0b2645c532a7ee6d40 Mon Sep 17 00:00:00 2001 From: mengqingyan Date: Fri, 11 Jun 2021 16:20:59 +0800 Subject: [PATCH 02/60] =?UTF-8?q?1=E3=80=81add=20some=20functions=20in=20S?= =?UTF-8?q?eries=20:=20Shift=E3=80=81CumProd=E3=80=81Prod=E3=80=81AddConst?= =?UTF-8?q?=E3=80=81MulConst=E3=80=81FillNaN=E3=80=81FillNaNForward?= =?UTF-8?q?=E3=80=81FillNaNBackward?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- series/rolling.go | 10 ++ series/series.go | 125 +++++++++++++++-- series/series_test.go | 319 ++++++++++++++++++++++++++++++++++++++++-- series/type-bool.go | 28 ++-- series/type-float.go | 20 +-- series/type-int.go | 25 ++-- series/type-string.go | 23 ++- 7 files changed, 481 insertions(+), 69 deletions(-) create mode 100644 series/rolling.go diff --git a/series/rolling.go b/series/rolling.go new file mode 100644 index 0000000..15c3e1e --- /dev/null +++ b/series/rolling.go @@ -0,0 +1,10 @@ +package series + +type Rolling interface{ + Max() ElementValue + Min() ElementValue + Mean() float64 + Quantile(p float64) float64 + Median() float64 + StdDev() float64 +} \ No newline at end of file diff --git a/series/series.go b/series/series.go index 79fe0a0..7422bf6 100644 --- a/series/series.go +++ b/series/series.go @@ -8,6 +8,7 @@ import ( "math" + "gonum.org/v1/gonum/floats" "gonum.org/v1/gonum/stat" ) @@ -55,6 +56,8 @@ type Element interface { // Information methods IsNA() bool Type() Type + + NA() Element } // intElements is the concrete implementation of Elements for Int elements. @@ -114,6 +117,8 @@ const ( Bool Type = "bool" ) +const NaN = "NaN" + // Indexes represent the elements that can be used for selecting a subset of // elements within a Series. Currently supported are: // @@ -153,37 +158,37 @@ func New(values interface{}, t Type, name string) Series { return ret } - switch values.(type) { + switch vt := values.(type) { case []string: - v := values.([]string) + v := vt l := len(v) preAlloc(l) for i := 0; i < l; i++ { ret.elements.Elem(i).Set(v[i]) } case []float64: - v := values.([]float64) + v := vt l := len(v) preAlloc(l) for i := 0; i < l; i++ { ret.elements.Elem(i).Set(v[i]) } case []int: - v := values.([]int) + v := vt l := len(v) preAlloc(l) for i := 0; i < l; i++ { ret.elements.Elem(i).Set(v[i]) } case []bool: - v := values.([]bool) + v := vt l := len(v) preAlloc(l) for i := 0; i < l; i++ { ret.elements.Elem(i).Set(v[i]) } case Series: - v := values.(Series) + v := vt l := v.Len() preAlloc(l) for i := 0; i < l; i++ { @@ -576,13 +581,13 @@ func (s Series) Elem(i int) Element { // out of bounds checks is performed. func parseIndexes(l int, indexes Indexes) ([]int, error) { var idx []int - switch indexes.(type) { + switch vt := indexes.(type) { case []int: - idx = indexes.([]int) + idx = vt case int: - idx = []int{indexes.(int)} + idx = []int{vt} case []bool: - bools := indexes.([]bool) + bools := vt if len(bools) != l { return nil, fmt.Errorf("indexing error: index dimensions mismatch") } @@ -592,7 +597,7 @@ func parseIndexes(l int, indexes Indexes) ([]int, error) { } } case Series: - s := indexes.(Series) + s := vt if err := s.Err; err != nil { return nil, fmt.Errorf("indexing error: new values has errors: %v", err) } @@ -785,3 +790,101 @@ func (s Series) Map(f MapFunction) Series { } return New(mappedValues, s.Type(), s.Name) } + +//Shift series by desired number of periods and returning a new Series object. +func (s Series) Shift(periods int, newName string) Series { + if s.Len() == 0 { + return s.Empty() + } + if periods == 0 { + return s.Copy() + } + shiftElements := make([]Element, s.Len()) + if periods < 0 { + for i := 0; i - periods < s.Len(); i++ { + shiftElements[i] = s.Elem(i - periods).Copy() + } + for i := s.Len() + periods; i < s.Len(); i++ { + shiftElements[i] = s.Elem(0).NA() + } + } else if periods > 0 { + for i := 0; i < periods; i++ { + shiftElements[i] = s.Elem(0).NA() + } + for i := 0 ; i + periods < s.Len(); i++ { + shiftElements[i + periods] = s.Elem(i).Copy() + } + } + return New(shiftElements, s.Type(), newName) +} +// CumProd finds the cumulative product of the first i elements in s and returning a new Series object. +func(s Series) CumProd() Series { + dst := make([]float64, s.Len()) + floats.CumProd(dst, s.Float()) + return New(dst,s.Type(), "") +} + +// Prod returns the product of the elements of the Series. Returns 1 if len(s) = 0. +func(s Series) Prod() float64 { + return floats.Prod(s.Float()) +} + +// AddConst adds the scalar c to all of the values in Series and returning a new Series object. +func(s Series) AddConst(c float64) Series { + dst := s.Float() + floats.AddConst(c, dst) + return New(dst,s.Type(), "") +} + +// AddConst multiply the scalar c to all of the values in Series and returning a new Series object. +func(s Series) MulConst(c float64) Series { + sm := s.Map(func(e Element) Element { + result := e.Copy() + f := result.Float() + result.Set(f * c) + return Element(result) + }) + return sm +} + +// FillNaN Fill NaN values using the specified value. +func(s Series) FillNaN(value ElementValue) { + for i := 0; i < s.Len(); i++ { + ele := s.Elem(i) + if ele.IsNA() { + ele.Set(value) + } + } +} + +// FillNaNForward Fill NaN values using the last non-NaN value +func(s Series) FillNaNForward() { + var lastNotNaNValue ElementValue = nil + for i := 0; i < s.Len(); i++ { + ele := s.Elem(i) + if !ele.IsNA() { + lastNotNaNValue = ele.Val() + } else { + if lastNotNaNValue != nil { + ele.Set(lastNotNaNValue) + } + } + } +} + +// FillNaNBackward Fill NaN values using the next non-NaN value +func(s Series) FillNaNBackward() { + var lastNotNaNValue ElementValue = nil + for i := s.Len() - 1 ; i >= 0; i-- { + ele := s.Elem(i) + if !ele.IsNA() { + lastNotNaNValue = ele.Val() + } else { + if lastNotNaNValue != nil { + ele.Set(lastNotNaNValue) + } + } + } +} + + diff --git a/series/series_test.go b/series/series_test.go index c7d0516..b8d6780 100644 --- a/series/series_test.go +++ b/series/series_test.go @@ -4,8 +4,9 @@ import ( "fmt" "math" "reflect" - "testing" + "strconv" "strings" + "testing" ) // Check that there are no shared memory addreses between the elements of two Series @@ -1562,16 +1563,14 @@ func TestSeries_Map(t *testing.T) { } doubleFloat64 := func(e Element) Element { - var result Element - result = e.Copy() + result := e.Copy() result.Set(result.Float() * 2) return Element(result) } // and two booleans and := func(e Element) Element { - var result Element - result = e.Copy() + result := e.Copy() b, err := result.Bool() if err != nil { t.Errorf("%v", err) @@ -1583,8 +1582,7 @@ func TestSeries_Map(t *testing.T) { // add constant (+5) to value (v) add5Int := func(e Element) Element { - var result Element - result = e.Copy() + result := e.Copy() i, err := result.Int() if err != nil { return Element(&intElement{ @@ -1598,8 +1596,7 @@ func TestSeries_Map(t *testing.T) { // trim (XyZ) prefix from string trimXyZPrefix := func(e Element) Element { - var result Element - result = e.Copy() + result := e.Copy() result.Set(strings.TrimPrefix(result.String(), "XyZ")) return Element(result) } @@ -1661,4 +1658,306 @@ func TestSeries_Map(t *testing.T) { default: } } -} \ No newline at end of file +} +func TestSeries_Shift(t *testing.T) { + tests := []struct { + series Series + shift int + expected Series + }{ + { + Bools([]string{"false", "true", "false", "false", "true"}), + 2, + Bools([]string{"NaN", "NaN", "false", "true", "false"}), + }, + { + Bools([]string{"false", "true", "false", "false", "true"}), + -2, + Bools([]string{"false", "false", "true", "NaN", "NaN"}), + }, + { + Floats([]string{"1.5", "-3.23", "-0.337397", "-0.380079", "1.60979", "34."}), + -1, + Floats([]string{"-3.23", "-0.337397", "-0.380079", "1.60979", "34.", "NaN"}), + }, + { + Floats([]string{"1.5", "-3.23", "-0.337397", "-0.380079", "1.60979", "34."}), + 1, + Floats([]string{ "NaN", "1.5", "-3.23", "-0.337397", "-0.380079", "1.60979"}), + }, + + { + Strings([]string{"XyZApple", "XyZBanana", "XyZCitrus", "XyZDragonfruit"}), + 2, + Strings([]string{"NaN", "NaN", "XyZApple", "XyZBanana"}), + }, + { + Strings([]string{"San Francisco", "XyZTokyo", "MoscowXyZ", "XyzSydney"}), + -2, + Strings([]string{"MoscowXyZ", "XyzSydney", "NaN", "NaN"}), + }, + { + Ints([]string{"23", "13", "101", "-64", "-3"}), + 2, + Ints([]string{"NaN", "NaN", "23", "13", "101"}), + }, + { + Ints([]string{"23", "13", "101", "-64", "-3"}), + -2, + Ints([]string{"101", "-64", "-3", "NaN", "NaN"}), + }, + { + Ints([]string{"23", "13", "101", "-64", "-3"}), + 0, + Ints([]string{"23", "13", "101", "-64", "-3"}), + }, + } + + for testnum, test := range tests { + expected := test.expected.Records() + b := test.series.Shift(test.shift, "") + received := b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + if err := checkTypes(b); err != nil { + t.Errorf( + "Test:%v\nError:%v", + testnum, err, + ) + } + } +} + +func TestSeries_CumProd(t *testing.T) { + tests := []struct { + series Series + expected Series + }{ + { + Floats([]string{"1.5", "-3.23", "-0.337397", "-0.380079", "1.60979"}), + Floats([]string{"1.5", "-4.845", "1.634688465", "-0.62131075708873", "-1.00017984365386"}), + }, + { + Ints([]string{"23", "13", "101", "-64", "-3"}), + Ints([]string{"23", "299", "30199", "-1932736", "5798208"}), + }, + } + + for testnum, test := range tests { + expected := test.expected.Records() + b := test.series.CumProd() + received := b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + } +} +func TestSeries_Prod(t *testing.T) { + tests := []struct { + series Series + expected float64 + }{ + { + Floats([]string{"1.5", "-3.23", "-0.337397", "-0.380079", "1.60979"}), + -1.000180, + }, + { + Ints([]string{"23", "13", "101", "-64", "-3"}), + 5798208, + }, + } + + for testnum, test := range tests { + expected, _ := strconv.ParseFloat(fmt.Sprintf("%.6f", test.expected), 64) + received, _ := strconv.ParseFloat(fmt.Sprintf("%.6f", test.series.Prod()), 64) + if expected != received { + t.Errorf( + "Test:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + } +} + +func TestSeries_AddConst(t *testing.T) { + tests := []struct { + series Series + c float64 + expected Series + }{ + { + Floats([]string{"1.5", "-3.23", "0.337397", "0.380079", "1.60979"}), + 2, + Floats([]string{"3.5", "-1.23", "2.337397", "2.380079", "3.60979"}), + }, + { + Ints([]string{"23", "13", "101", "-64", "-3"}), + -2, + Ints([]string{"21", "11", "99", "-66", "-5"}), + }, + } + + for testnum, test := range tests { + expected := test.expected.Records() + b := test.series.AddConst(test.c) + received := b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + } +} + +func TestSeries_MulConst(t *testing.T) { + tests := []struct { + series Series + c float64 + expected Series + }{ + { + Floats([]string{"1.5", "-3.23", "0.337397", "0.380079", "1.60979"}), + 2, + Floats([]string{"3", "-6.46", "0.674794", "0.760158", "3.21958"}), + }, + { + Ints([]string{"23", "13", "101", "-64", "-3"}), + -2, + Ints([]string{"-46", "-26", "-202", "128", "6"}), + }, + } + + for testnum, test := range tests { + expected := test.expected.Records() + b := test.series.MulConst(test.c) + received := b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + } +} + +func TestSeries_FillNA(t *testing.T) { + tests := []struct { + series Series + nanValue ElementValue + expected Series + }{ + { + Floats([]string{"1.5", NaN, "0.337397", NaN, "1.60979"}), + 0.0, + Floats([]string{"1.5", "0.0", "0.337397", "0.0", "1.60979"}), + }, + { + Ints([]string{"23", "13", NaN, "-64", NaN}), + 0, + Ints([]string{"23", "13", "0", "-64", "0"}), + }, + { + Bools([]string{"false", NaN, "false", NaN, "true"}), + false, + Bools([]string{"false", "false", "false", "false", "true"}), + }, + { + Strings([]string{"XyZApple", NaN, NaN, "XyZDragonfruit"}), + "null", + Strings([]string{"XyZApple", "null", "null", "XyZDragonfruit"}), + }, + } + + for testnum, test := range tests { + expected := test.expected.Records() + test.series.FillNaN(test.nanValue) + received := test.series.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + } +} + +func TestSeries_FillNaNForward(t *testing.T) { + tests := []struct { + series Series + expected Series + }{ + { + Floats([]string{"1.5", NaN, "0.337397", NaN, "1.60979"}), + Floats([]string{"1.5", "1.5", "0.337397", "0.337397", "1.60979"}), + }, + { + Ints([]string{NaN, "13", NaN, "-64", NaN}), + Ints([]string{NaN, "13", "13", "-64", "-64"}), + }, + { + Bools([]string{"false", NaN, "false", NaN, "true"}), + Bools([]string{"false", "false", "false", "false", "true"}), + }, + { + Strings([]string{"XyZApple", NaN, NaN, "XyZDragonfruit"}), + Strings([]string{"XyZApple", "XyZApple", "XyZApple", "XyZDragonfruit"}), + }, + } + + for testnum, test := range tests { + expected := test.expected.Records() + test.series.FillNaNForward() + received := test.series.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + } +} + +func TestSeries_FillNaNBackward(t *testing.T) { + tests := []struct { + series Series + expected Series + }{ + { + Floats([]string{"1.5", NaN, "0.337397", NaN, "1.60979"}), + Floats([]string{"1.5", "0.337397", "0.337397", "1.60979", "1.60979"}), + }, + { + Ints([]string{"23", "13", NaN, "-64", NaN}), + Ints([]string{"23", "13", "-64", "-64", NaN}), + }, + { + Bools([]string{"false", NaN, "false", NaN, "true"}), + Bools([]string{"false", "false", "false", "true", "true"}), + }, + { + Strings([]string{"XyZApple", NaN, NaN, "XyZDragonfruit"}), + Strings([]string{"XyZApple", "XyZDragonfruit", "XyZDragonfruit", "XyZDragonfruit"}), + }, + } + + for testnum, test := range tests { + expected := test.expected.Records() + test.series.FillNaNBackward() + received := test.series.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + } +} + diff --git a/series/type-bool.go b/series/type-bool.go index 034bb3f..8448ce5 100644 --- a/series/type-bool.go +++ b/series/type-bool.go @@ -13,13 +13,13 @@ type boolElement struct { func (e *boolElement) Set(value interface{}) { e.nan = false - switch value.(type) { + switch vt := value.(type) { case string: - if value.(string) == "NaN" { + if vt == "NaN" { e.nan = true return } - switch strings.ToLower(value.(string)) { + switch strings.ToLower(vt) { case "true", "t", "1": e.e = true case "false", "f", "0": @@ -29,7 +29,7 @@ func (e *boolElement) Set(value interface{}) { return } case int: - switch value.(int) { + switch vt { case 1: e.e = true case 0: @@ -39,7 +39,7 @@ func (e *boolElement) Set(value interface{}) { return } case float64: - switch value.(float64) { + switch vt { case 1: e.e = true case 0: @@ -49,9 +49,9 @@ func (e *boolElement) Set(value interface{}) { return } case bool: - e.e = value.(bool) + e.e = vt case Element: - b, err := value.(Element).Bool() + b, err := vt.Bool() if err != nil { e.nan = true return @@ -59,9 +59,7 @@ func (e *boolElement) Set(value interface{}) { e.e = b default: e.nan = true - return } - return } func (e boolElement) Copy() Element { @@ -71,11 +69,12 @@ func (e boolElement) Copy() Element { return &boolElement{e.e, false} } +func (e boolElement) NA() Element { + return &boolElement{false, true} +} + func (e boolElement) IsNA() bool { - if e.nan { - return true - } - return false + return e.nan } func (e boolElement) Type() Type { @@ -103,7 +102,7 @@ func (e boolElement) Int() (int, error) { if e.IsNA() { return 0, fmt.Errorf("can't convert NaN to int") } - if e.e == true { + if e.e { return 1, nil } return 0, nil @@ -173,3 +172,4 @@ func (e boolElement) GreaterEq(elem Element) bool { } return e.e || !b } + diff --git a/series/type-float.go b/series/type-float.go index a722cbd..9c59a60 100644 --- a/series/type-float.go +++ b/series/type-float.go @@ -13,36 +13,34 @@ type floatElement struct { func (e *floatElement) Set(value interface{}) { e.nan = false - switch value.(type) { + switch vt := value.(type) { case string: - if value.(string) == "NaN" { + if vt == "NaN" { e.nan = true return } - f, err := strconv.ParseFloat(value.(string), 64) + f, err := strconv.ParseFloat(vt, 64) if err != nil { e.nan = true return } e.e = f case int: - e.e = float64(value.(int)) + e.e = float64(vt) case float64: - e.e = float64(value.(float64)) + e.e = float64(vt) case bool: - b := value.(bool) + b := vt if b { e.e = 1 } else { e.e = 0 } case Element: - e.e = value.(Element).Float() + e.e = vt.Float() default: e.nan = true - return } - return } func (e floatElement) Copy() Element { @@ -52,6 +50,10 @@ func (e floatElement) Copy() Element { return &floatElement{e.e, false} } +func (e floatElement) NA() Element { + return &floatElement{0.0, true} +} + func (e floatElement) IsNA() bool { if e.nan || math.IsNaN(e.e) { return true diff --git a/series/type-int.go b/series/type-int.go index 94082a1..ac1f9ee 100644 --- a/series/type-int.go +++ b/series/type-int.go @@ -13,22 +13,22 @@ type intElement struct { func (e *intElement) Set(value interface{}) { e.nan = false - switch value.(type) { + switch vt := value.(type) { case string: - if value.(string) == "NaN" { + if vt == "NaN" { e.nan = true return } - i, err := strconv.Atoi(value.(string)) + i, err := strconv.Atoi(vt) if err != nil { e.nan = true return } e.e = i case int: - e.e = int(value.(int)) + e.e = int(vt) case float64: - f := value.(float64) + f := vt if math.IsNaN(f) || math.IsInf(f, 0) || math.IsInf(f, 1) { @@ -37,14 +37,14 @@ func (e *intElement) Set(value interface{}) { } e.e = int(f) case bool: - b := value.(bool) + b := vt if b { e.e = 1 } else { e.e = 0 } case Element: - v, err := value.(Element).Int() + v, err := vt.Int() if err != nil { e.nan = true return @@ -52,9 +52,7 @@ func (e *intElement) Set(value interface{}) { e.e = v default: e.nan = true - return } - return } func (e intElement) Copy() Element { @@ -64,11 +62,12 @@ func (e intElement) Copy() Element { return &intElement{e.e, false} } +func (e intElement) NA() Element { + return &intElement{0, true} +} + func (e intElement) IsNA() bool { - if e.nan { - return true - } - return false + return e.nan } func (e intElement) Type() Type { diff --git a/series/type-string.go b/series/type-string.go index f50e3db..4bf1262 100644 --- a/series/type-string.go +++ b/series/type-string.go @@ -14,31 +14,29 @@ type stringElement struct { func (e *stringElement) Set(value interface{}) { e.nan = false - switch value.(type) { + switch vt := value.(type) { case string: - e.e = string(value.(string)) + e.e = string(vt) if e.e == "NaN" { e.nan = true return } case int: - e.e = strconv.Itoa(value.(int)) + e.e = strconv.Itoa(vt) case float64: - e.e = strconv.FormatFloat(value.(float64), 'f', 6, 64) + e.e = strconv.FormatFloat(vt, 'f', 6, 64) case bool: - b := value.(bool) + b := vt if b { e.e = "true" } else { e.e = "false" } case Element: - e.e = value.(Element).String() + e.e = vt.String() default: e.nan = true - return } - return } func (e stringElement) Copy() Element { @@ -48,11 +46,12 @@ func (e stringElement) Copy() Element { return &stringElement{e.e, false} } +func (e stringElement) NA() Element { + return &stringElement{"", true} +} + func (e stringElement) IsNA() bool { - if e.nan { - return true - } - return false + return e.nan } func (e stringElement) Type() Type { From e357493b788012e45c0c0c028bcf135e46bddc63 Mon Sep 17 00:00:00 2001 From: mengqingyan Date: Fri, 11 Jun 2021 18:14:26 +0800 Subject: [PATCH 03/60] rolling not test --- series/rolling.go | 104 ++++++++++++++++++++++++++++++++++++++--- series/rolling_test.go | 67 ++++++++++++++++++++++++++ series/series.go | 4 +- 3 files changed, 167 insertions(+), 8 deletions(-) create mode 100644 series/rolling_test.go diff --git a/series/rolling.go b/series/rolling.go index 15c3e1e..a48d10a 100644 --- a/series/rolling.go +++ b/series/rolling.go @@ -1,10 +1,100 @@ package series -type Rolling interface{ - Max() ElementValue - Min() ElementValue - Mean() float64 - Quantile(p float64) float64 - Median() float64 - StdDev() float64 +type Rolling interface { + Max() Series + Min() Series + Mean() Series + Quantile(p float64) Series + Median() Series + StdDev() Series +} + +type rollingSeries struct { + Series + window int + minPeriods int +} + +func NewRollingSeries(window int, minPeriods int, s Series) Rolling { + if window < 1 { + panic("window must >= 1") + } + if minPeriods < 1 || minPeriods > window { + panic("minPeriods must >= 1 && minPeriods must <= window") + } + return &rollingSeries{ + Series: s, + window: window, + minPeriods: minPeriods, + } +} + +func (s rollingSeries) Max() Series { + if s.Len() == 0 { + return s.Empty() + } + eles := make([]Element, s.Len()) + for i := 0; i < s.minPeriods-1; i++ { + eles[i] = s.Elem(0).NA() + } + for i := s.minPeriods-1; i < s.Len(); i++ { + eles[i] = findMax(i + 1 - s.minPeriods, s.window, s.Series).Copy() + } + newS := New(eles, s.Type(), "") + return newS +} + + +func (s rollingSeries) Min() Series { + if s.Len() == 0 { + return s.Empty() + } + eles := make([]Element, s.Len()) + for i := 0; i < s.minPeriods-1; i++ { + eles[i] = s.Elem(0).NA() + } + for i := s.minPeriods-1; i < s.Len(); i++ { + eles[i] = findMin(i + 1 - s.minPeriods, s.window, s.Series).Copy() + } + newS := New(eles, s.Type(), "") + return newS +} + +// todo +func (s rollingSeries) Mean() Series { + return s.Series +} +// todo +func (s rollingSeries) Quantile(p float64) Series { + return s.Series +} +// todo +func (s rollingSeries) Median() Series { + return s.Series +} +// todo +func (s rollingSeries) StdDev() Series { + return s.Series +} + +func findMax(startIndex, window int, s Series) Element { + max := s.Elem(startIndex) + for i := startIndex + 1; i < startIndex + window && i < s.Len(); i++ { + elem := s.Elem(i) + if elem.Greater(max) { + max = elem + } + } + return max +} + +func findMin(startIndex, window int, s Series) Element { + min := s.Elem(startIndex) + for i := startIndex + 1; i < startIndex + window && i < s.Len(); i++ { + elem := s.Elem(i) + if elem.Less(min) { + min = elem + } + } + return min } \ No newline at end of file diff --git a/series/rolling_test.go b/series/rolling_test.go new file mode 100644 index 0000000..618c22c --- /dev/null +++ b/series/rolling_test.go @@ -0,0 +1,67 @@ +package series + +import ( + "reflect" + "testing") + + +func TestSeries_Rolling(t *testing.T) { + tests := []struct { + series Series + window int + minPeriod int + maxExpected Series + minExpected Series + }{ + { + Bools([]string{"false", "true", "false", "false", "true"}), + 2, + 1, + Bools([]string{"false", "true", "true", "false", "true"}), + Bools([]string{"false", "false", "false", "false", "false"}), + }, + { + Floats([]string{"1.5", "-3.23", "-0.337397", "-0.380079", "1.60979", "34."}), + 3, + 2, + Floats([]string{NaN, "1.5", "1.5", "-0.337397", "1.60979", "34."}), + Floats([]string{NaN, "-3.23", "-3.23", "-3.23", "-0.380079", "-0.380079"}), + }, + { + Strings([]string{"20210618", "20200909", "20200910", "20200912","20200911"}), + 3, + 2, + Strings([]string{NaN, "20210618", "20210618", "20200912", "20200912"}), + Strings([]string{NaN, "20200909", "20200909", "20200912", "20200910"}), + }, + { + Ints([]string{"23", "13", "101", "-64", "-3"}), + 3, + 1, + Ints([]string{"23", "23", "101", "101", "101"}), + Ints([]string{"23", "13", "13", "-64", "-64"}), + }, + } + + for testnum, test := range tests { + expected := test.maxExpected.Records() + b := test.series.Rolling(test.window, test.minPeriod).Max() + received := b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-0:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + + expected = test.minExpected.Records() + b = test.series.Rolling(test.window, test.minPeriod).Min() + received = b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-1:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + } +} \ No newline at end of file diff --git a/series/series.go b/series/series.go index 7422bf6..eec76df 100644 --- a/series/series.go +++ b/series/series.go @@ -887,4 +887,6 @@ func(s Series) FillNaNBackward() { } } - +func(s Series) Rolling(window int, minPeriods int) Rolling { + return NewRollingSeries(window, minPeriods, s) +} From 8e217e532ab1c7c3fae7577a366d3752d4385093 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Sat, 12 Jun 2021 00:01:42 +0800 Subject: [PATCH 04/60] fix rolling.max and rolling.min --- series/rolling.go | 27 ++++++++++++++++++--------- series/rolling_test.go | 6 +++--- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/series/rolling.go b/series/rolling.go index a48d10a..5841abd 100644 --- a/series/rolling.go +++ b/series/rolling.go @@ -38,7 +38,12 @@ func (s rollingSeries) Max() Series { eles[i] = s.Elem(0).NA() } for i := s.minPeriods-1; i < s.Len(); i++ { - eles[i] = findMax(i + 1 - s.minPeriods, s.window, s.Series).Copy() + start := i - s.window + 1 + if start < 0 { + start = 0 + } + end := i + eles[i] = findMax(start, end, s.Series).Copy() } newS := New(eles, s.Type(), "") return newS @@ -54,7 +59,12 @@ func (s rollingSeries) Min() Series { eles[i] = s.Elem(0).NA() } for i := s.minPeriods-1; i < s.Len(); i++ { - eles[i] = findMin(i + 1 - s.minPeriods, s.window, s.Series).Copy() + start := i - s.window + 1 + if start < 0 { + start = 0 + } + end := i + eles[i] = findMin(start, end, s.Series).Copy() } newS := New(eles, s.Type(), "") return newS @@ -77,9 +87,9 @@ func (s rollingSeries) StdDev() Series { return s.Series } -func findMax(startIndex, window int, s Series) Element { - max := s.Elem(startIndex) - for i := startIndex + 1; i < startIndex + window && i < s.Len(); i++ { +func findMax(start, end int, s Series) Element { + max := s.Elem(start) + for i := start + 1; i <= end; i++ { elem := s.Elem(i) if elem.Greater(max) { max = elem @@ -87,10 +97,9 @@ func findMax(startIndex, window int, s Series) Element { } return max } - -func findMin(startIndex, window int, s Series) Element { - min := s.Elem(startIndex) - for i := startIndex + 1; i < startIndex + window && i < s.Len(); i++ { +func findMin(start, end int, s Series) Element { + min := s.Elem(start) + for i := start + 1; i <= end; i++ { elem := s.Elem(i) if elem.Less(min) { min = elem diff --git a/series/rolling_test.go b/series/rolling_test.go index 618c22c..065d190 100644 --- a/series/rolling_test.go +++ b/series/rolling_test.go @@ -32,7 +32,7 @@ func TestSeries_Rolling(t *testing.T) { 3, 2, Strings([]string{NaN, "20210618", "20210618", "20200912", "20200912"}), - Strings([]string{NaN, "20200909", "20200909", "20200912", "20200910"}), + Strings([]string{NaN, "20200909", "20200909", "20200909", "20200910"}), }, { Ints([]string{"23", "13", "101", "-64", "-3"}), @@ -49,7 +49,7 @@ func TestSeries_Rolling(t *testing.T) { received := b.Records() if !reflect.DeepEqual(expected, received) { t.Errorf( - "Test-0:%v\nExpected:\n%v\nReceived:\n%v", + "Test-Max:%v\nExpected:\n%v\nReceived:\n%v", testnum, expected, received, ) } @@ -59,7 +59,7 @@ func TestSeries_Rolling(t *testing.T) { received = b.Records() if !reflect.DeepEqual(expected, received) { t.Errorf( - "Test-1:%v\nExpected:\n%v\nReceived:\n%v", + "Test-Min:%v\nExpected:\n%v\nReceived:\n%v", testnum, expected, received, ) } From adce64048ba5ac7500129588f5f10d2826e05ff5 Mon Sep 17 00:00:00 2001 From: mengqingyan Date: Tue, 15 Jun 2021 18:07:53 +0800 Subject: [PATCH 05/60] =?UTF-8?q?Rolling.Mean=E3=80=81Quantile=E3=80=81Med?= =?UTF-8?q?ian=E3=80=81StdDev?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- go.mod | 5 +- series/rolling.go | 161 +++++++++++++++++++++++++++++------------ series/rolling_test.go | 84 ++++++++++++++++++--- series/window.go | 119 ++++++++++++++++++++++++++++++ util/util.go | 20 +++++ 5 files changed, 333 insertions(+), 56 deletions(-) create mode 100644 series/window.go create mode 100644 util/util.go diff --git a/go.mod b/go.mod index 47f7068..c188ad9 100644 --- a/go.mod +++ b/go.mod @@ -2,4 +2,7 @@ module github.com/go-gota/gota go 1.14 -require gonum.org/v1/gonum v0.9.1 +require ( + github.com/shopspring/decimal v1.2.0 + gonum.org/v1/gonum v0.9.1 +) diff --git a/series/rolling.go b/series/rolling.go index 5841abd..d769099 100644 --- a/series/rolling.go +++ b/series/rolling.go @@ -1,5 +1,12 @@ package series +import ( + "math" + + "github.com/go-gota/gota/util" + "gonum.org/v1/gonum/floats" +) + type Rolling interface { Max() Series Min() Series @@ -34,16 +41,16 @@ func (s rollingSeries) Max() Series { return s.Empty() } eles := make([]Element, s.Len()) - for i := 0; i < s.minPeriods-1; i++ { - eles[i] = s.Elem(0).NA() + var index int + for index = 0; index < s.minPeriods-1; index++ { + eles[index] = s.Elem(0).NA() } - for i := s.minPeriods-1; i < s.Len(); i++ { - start := i - s.window + 1 - if start < 0 { - start = 0 - } - end := i - eles[i] = findMax(start, end, s.Series).Copy() + frw := NewRollingWindow(s.Series, s.window, s.minPeriods) + for frw.HasNext() { + ele := s.Elem(0).NA() + ele.Set(frw.Next().Max()) + eles[index] = ele + index++ } newS := New(eles, s.Type(), "") return newS @@ -55,55 +62,117 @@ func (s rollingSeries) Min() Series { return s.Empty() } eles := make([]Element, s.Len()) - for i := 0; i < s.minPeriods-1; i++ { - eles[i] = s.Elem(0).NA() + var index int + for index = 0; index < s.minPeriods-1; index++ { + eles[index] = s.Elem(0).NA() } - for i := s.minPeriods-1; i < s.Len(); i++ { - start := i - s.window + 1 - if start < 0 { - start = 0 - } - end := i - eles[i] = findMin(start, end, s.Series).Copy() + frw := NewRollingWindow(s.Series, s.window, s.minPeriods) + for frw.HasNext() { + ele := s.Elem(0).NA() + ele.Set(frw.Next().Min()) + eles[index] = ele + index++ } newS := New(eles, s.Type(), "") return newS } -// todo + func (s rollingSeries) Mean() Series { - return s.Series + if s.Len() == 0 { + return s.Empty() + } + sf := s.Float() + sum := make([]float64, s.Len()) + floats.CumSum(sum, sf) + + eles := make([]float64, s.Len()) + for i := 0; i < s.minPeriods-1; i++ { + eles[i] = math.NaN() + } + + // sum0 / sfIndex0 + sum0 := sum[s.minPeriods-1 : s.window - 1] + sfIndex0 := util.MakeFloatSliceRange(s.window - s.minPeriods, float64(s.minPeriods), 1) + floats.DivTo(eles[s.minPeriods-1 : s.window - 1], sum0, sfIndex0) + + sum1 := sum[0 : s.Len() - s.window + 1] + sum2 := sum[s.window - 1 :] + sf1 := sf[0 : s.Len() - s.window + 1] + + // (sum2 - sum1 + sf1) / window + windows := util.MakeFloatSlice(s.Len() - s.window + 1, float64(s.window)) + floats.SubTo(eles[s.window - 1 : ], sum2, sum1) + floats.Add(eles[s.window - 1 : ], sf1) + floats.Div(eles[s.window - 1 : ], windows) + newS := New(eles, Float, "") + return newS } -// todo + + func (s rollingSeries) Quantile(p float64) Series { - return s.Series + if s.Len() == 0 { + return s.Empty() + } + eles := make([]Element, s.Len()) + var index int + for index = 0; index < s.minPeriods-1; index++ { + eles[index] = s.Elem(0).NA() + } + frw := NewRollingWindow(s.Series, s.window, s.minPeriods) + for frw.HasNext() { + ele := s.Elem(0).NA() + ele.Set(frw.Next().Quantile(p)) + eles[index] = ele + index++ + } + newS := New(eles, s.Type(), "") + return newS } -// todo + func (s rollingSeries) Median() Series { - return s.Series -} -// todo -func (s rollingSeries) StdDev() Series { - return s.Series + + if s.Len() == 0 { + return s.Empty() + } + eles := make([]Element, s.Len()) + var index int + for index = 0; index < s.minPeriods-1; index++ { + eles[index] = s.Elem(0).NA() + } + frw := NewRollingWindow(s.Series, s.window, s.minPeriods) + for frw.HasNext() { + ele := s.Elem(0).NA() + ele.Set(frw.Next().Median()) + eles[index] = ele + index++ + } + newS := New(eles, s.Type(), "") + return newS } -func findMax(start, end int, s Series) Element { - max := s.Elem(start) - for i := start + 1; i <= end; i++ { - elem := s.Elem(i) - if elem.Greater(max) { - max = elem - } + +func (s rollingSeries) StdDev() Series { + if s.Len() == 0 { + return s.Empty() + } + eles := make([]Element, s.Len()) + var index int + for index = 0; index < s.minPeriods-1; index++ { + eles[index] = &floatElement{0.0, true} + } + frw := NewRollingWindow(s.Series, s.window, s.minPeriods) + for frw.HasNext() { + ele := &floatElement{0.0, false} + ele.Set(frw.Next().StdDev()) + eles[index] = ele + index++ } - return max + newS := New(eles, Float, "") + return newS } -func findMin(start, end int, s Series) Element { - min := s.Elem(start) - for i := start + 1; i <= end; i++ { - elem := s.Elem(i) - if elem.Less(min) { - min = elem - } - } - return min -} \ No newline at end of file + + + + + diff --git a/series/rolling_test.go b/series/rolling_test.go index 065d190..a0ea8cb 100644 --- a/series/rolling_test.go +++ b/series/rolling_test.go @@ -2,16 +2,21 @@ package series import ( "reflect" - "testing") - + "testing" +) func TestSeries_Rolling(t *testing.T) { tests := []struct { - series Series - window int - minPeriod int - maxExpected Series - minExpected Series + series Series + window int + minPeriod int + maxExpected Series + minExpected Series + meanExpected Series + quantile float64 + quantileExpected Series + medianExpected Series + stdDevExpected Series }{ { Bools([]string{"false", "true", "false", "false", "true"}), @@ -19,6 +24,11 @@ func TestSeries_Rolling(t *testing.T) { 1, Bools([]string{"false", "true", "true", "false", "true"}), Bools([]string{"false", "false", "false", "false", "false"}), + Floats([]string{"0.000000", "0.500000", "0.500000", "0.000000", "0.500000"}), + 0.8, + Bools([]string{"false", "true", "true", "false", "true"}), + Bools([]string{NaN, NaN, NaN, NaN, NaN}), + Floats([]string{NaN, "0.707106781", "0.707106781", "0.000000", "0.707106781"}), }, { Floats([]string{"1.5", "-3.23", "-0.337397", "-0.380079", "1.60979", "34."}), @@ -26,13 +36,23 @@ func TestSeries_Rolling(t *testing.T) { 2, Floats([]string{NaN, "1.5", "1.5", "-0.337397", "1.60979", "34."}), Floats([]string{NaN, "-3.23", "-3.23", "-3.23", "-0.380079", "-0.380079"}), + Floats([]string{NaN, "-0.865", "-0.689132333", "-1.315825333", "0.297438", "11.743237"}), + 0.7, + Floats([]string{NaN, "1.500000", "1.500000", "-0.337397", "1.609790", "34.000000"}), + Floats([]string{NaN, "-0.865", "-0.337397", "-0.380079", "-0.337397", "1.60979"}), + Floats([]string{NaN, "3.344615075", "2.384536288", "1.657861251", "1.136730517", "19.30058339"}), }, { - Strings([]string{"20210618", "20200909", "20200910", "20200912","20200911"}), + Strings([]string{"20210618", "20200909", "20200910", "20200912", "20200911"}), 3, 2, Strings([]string{NaN, "20210618", "20210618", "20200912", "20200912"}), Strings([]string{NaN, "20200909", "20200909", "20200909", "20200910"}), + Floats([]string{NaN, "20205763.500000", "20204145.666667", "20200910.333333", "20200911.000000"}), + 0.8, + Strings([]string{NaN, "20210618.000000", "20210618.000000", "20200912.000000", "20200912.000000"}), + Strings([]string{NaN, NaN, NaN, NaN, NaN}), + Strings([]string{NaN, "6865.299739", "5605.205111", "1.527525", "1.000000"}), }, { Ints([]string{"23", "13", "101", "-64", "-3"}), @@ -40,6 +60,11 @@ func TestSeries_Rolling(t *testing.T) { 1, Ints([]string{"23", "23", "101", "101", "101"}), Ints([]string{"23", "13", "13", "-64", "-64"}), + Floats([]string{"23.000000", "18.000000", "45.666667", "16.666667", "11.333333"}), + 0.8, + Ints([]string{"23", "23", "101", "101", "101"}), + Ints([]string{"23", "18", "23", "13", "-3"}), + Floats([]string{NaN, "7.071067812", "48.18021724", "82.56108849", "83.4286122"}), }, } @@ -63,5 +88,46 @@ func TestSeries_Rolling(t *testing.T) { testnum, expected, received, ) } + + expected = test.meanExpected.Records() + b = test.series.Rolling(test.window, test.minPeriod).Mean() + received = b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-Mean:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + + expected = test.quantileExpected.Records() + b = test.series.Rolling(test.window, test.minPeriod).Quantile(test.quantile) + received = b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-Quantile:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + + expected = test.medianExpected.Records() + b = test.series.Rolling(test.window, test.minPeriod).Median() + received = b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-Median:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + + expected = test.stdDevExpected.Records() + b = test.series.Rolling(test.window, test.minPeriod).StdDev() + received = b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-StdDev:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } } -} \ No newline at end of file +} + diff --git a/series/window.go b/series/window.go new file mode 100644 index 0000000..ae75258 --- /dev/null +++ b/series/window.go @@ -0,0 +1,119 @@ +package series + +import ( + "sort" + + "gonum.org/v1/gonum/stat" +) + +type RollingWindow interface { + HasNext() bool + Next() Window +} + +type Window interface { + Max() interface{} + Min() interface{} + Quantile(p float64) float64 + Median() float64 + StdDev() float64 +} + +type rollingWindow struct { + floats []float64 + eles []Element + startIndex int + endIndexExclude int + windowSize int + eleType Type +} + +func NewRollingWindow(s Series, windowSize int, minPeriods int) RollingWindow { + + eles := make([]Element, s.Len()) + for i := 0; i < s.Len(); i++ { + eles[i] = s.Elem(i) + } + + return &rollingWindow{ + floats: s.Float(), + eles: eles, + startIndex: 0, + endIndexExclude: minPeriods, + windowSize: windowSize, + eleType: s.t, + } +} + +func (rw *rollingWindow) HasNext() bool { + return rw.endIndexExclude <= len(rw.eles) +} + +func (rw *rollingWindow) Next() Window { + fw := elementsWindow { + rw.floats[rw.startIndex:rw.endIndexExclude], + rw.eles[rw.startIndex:rw.endIndexExclude], + } + rw.endIndexExclude++ + startIndex := rw.endIndexExclude - rw.windowSize + if startIndex > rw.startIndex { + rw.startIndex = startIndex + } + + return fw +} + +type elementsWindow struct { + floats []float64 + eles []Element +} + +func (ew elementsWindow) Max() interface{} { + return findMax(ew.eles).Val() +} + +func (ew elementsWindow) Min() interface{} { + return findMin(ew.eles).Val() +} + +func (ew elementsWindow) Quantile(p float64) float64 { + fs := make([]float64, len(ew.floats)) + copy(fs, ew.floats) + sort.Float64s(fs) + return stat.Quantile(p, stat.Empirical, fs, nil) +} + +func (ew elementsWindow) Median() float64 { + fs := make([]Element, len(ew.eles)) + for i := 0; i < len(ew.eles); i++ { + fs[i] = ew.eles[i].Copy() + } + ns := New(fs, ew.eles[0].Type(),"") + median := ns.Median() + return median +} + +func (ew elementsWindow) StdDev() float64 { + return stat.StdDev(ew.floats, nil) +} + + +func findMax(eles []Element) Element { + max := eles[0] + for i := 1; i < len(eles); i++ { + if eles[i].Greater(max) { + max = eles[i] + } + } + return max +} + +func findMin(eles []Element) Element { + min := eles[0] + for i := 1; i < len(eles); i++ { + if eles[i].Less(min) { + min = eles[i] + } + } + return min +} diff --git a/util/util.go b/util/util.go new file mode 100644 index 0000000..0fc32aa --- /dev/null +++ b/util/util.go @@ -0,0 +1,20 @@ +package util + +import "github.com/shopspring/decimal" + +func MakeFloatSlice(size int, defaultValue float64) []float64 { + fs := make([]float64, size) + for i := 0; i < size; i++ { + fs[i] = defaultValue + } + return fs +} + +func MakeFloatSliceRange(size int, start float64, step float64) []float64 { + fs := make([]float64, size) + for i := 0; i < size; i++ { + fs[i], _ = decimal.NewFromFloat(start).Add(decimal.NewFromFloat(step).Mul(decimal.NewFromInt32(int32(i)))).Float64() + + } + return fs +} \ No newline at end of file From 1768517cd195ae4a544efb3daeaadb6c57b018e3 Mon Sep 17 00:00:00 2001 From: mengqingyan Date: Wed, 16 Jun 2021 12:49:57 +0800 Subject: [PATCH 06/60] =?UTF-8?q?series=20And=E3=80=81Or?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- go.mod | 5 +---- series/series.go | 11 +++++++++++ series/series_logic.go | 43 ++++++++++++++++++++++++++++++++++++++++++ series/window.go | 21 +++++++++++++++------ util/util.go | 5 +---- 5 files changed, 71 insertions(+), 14 deletions(-) create mode 100644 series/series_logic.go diff --git a/go.mod b/go.mod index c188ad9..47f7068 100644 --- a/go.mod +++ b/go.mod @@ -2,7 +2,4 @@ module github.com/go-gota/gota go 1.14 -require ( - github.com/shopspring/decimal v1.2.0 - gonum.org/v1/gonum v0.9.1 -) +require gonum.org/v1/gonum v0.9.1 diff --git a/series/series.go b/series/series.go index eec76df..748f7d5 100644 --- a/series/series.go +++ b/series/series.go @@ -847,6 +847,17 @@ func(s Series) MulConst(c float64) Series { return sm } +func(s Series) DivConst(c float64) Series { + sm := s.Map(func(e Element) Element { + result := e.Copy() + f := result.Float() + result.Set(f / c) + return Element(result) + }) + return sm +} + + // FillNaN Fill NaN values using the specified value. func(s Series) FillNaN(value ElementValue) { for i := 0; i < s.Len(); i++ { diff --git a/series/series_logic.go b/series/series_logic.go new file mode 100644 index 0000000..984d657 --- /dev/null +++ b/series/series_logic.go @@ -0,0 +1,43 @@ +package series + +import "fmt" + +func (s Series) And(in interface{}) Series { + result := seriesLogic(s, in, func(e1, e2 Element) bool { + e1b,_ := e1.Bool() + e2b,_ := e2.Bool() + return e1b && e2b + }) + return result +} + +func (s Series) Or(in interface{}) Series { + result := seriesLogic(s, in, func(e1, e2 Element) bool { + e1b,_ := e1.Bool() + e2b,_ := e2.Bool() + return e1b || e2b + }) + return result +} + +func seriesLogic (s Series, in interface{}, elementLogic func(e1, e2 Element) bool) Series { + inSeries := New(in, s.t, "") + if inSeries.Len() != 1 && inSeries.Len() != s.Len() { + s := s.Empty() + s.Err = fmt.Errorf("length mismatch") + return s + } + bools := make([]bool, s.Len()) + + if inSeries.Len() == 1 { + for i := 0; i < s.Len(); i++ { + bools[i] = elementLogic(s.elements.Elem(i), inSeries.elements.Elem(0)) + } + return Bools(bools) + } else { + for i := 0; i < s.Len(); i++ { + bools[i] = elementLogic(s.elements.Elem(i), inSeries.elements.Elem(i)) + } + return Bools(bools) + } +} \ No newline at end of file diff --git a/series/window.go b/series/window.go index ae75258..6394ea6 100644 --- a/series/window.go +++ b/series/window.go @@ -1,6 +1,7 @@ package series import ( + "math" "sort" "gonum.org/v1/gonum/stat" @@ -84,13 +85,21 @@ func (ew elementsWindow) Quantile(p float64) float64 { } func (ew elementsWindow) Median() float64 { - fs := make([]Element, len(ew.eles)) - for i := 0; i < len(ew.eles); i++ { - fs[i] = ew.eles[i].Copy() + if len(ew.eles) == 0 || + ew.eles[0].Type() == String || + ew.eles[0].Type() == Bool { + return math.NaN() } - ns := New(fs, ew.eles[0].Type(),"") - median := ns.Median() - return median + + fs := make([]float64, len(ew.floats)) + copy(fs, ew.floats) + sort.Float64s(fs) + + if len(ew.floats) %2 != 0 { + return fs[len(ew.floats)/2] + } + return (ew.floats[(len(ew.floats)/2)-1] + + ew.floats[len(ew.floats)/2]) * 0.5 } func (ew elementsWindow) StdDev() float64 { diff --git a/util/util.go b/util/util.go index 0fc32aa..f0fb50f 100644 --- a/util/util.go +++ b/util/util.go @@ -1,7 +1,5 @@ package util -import "github.com/shopspring/decimal" - func MakeFloatSlice(size int, defaultValue float64) []float64 { fs := make([]float64, size) for i := 0; i < size; i++ { @@ -13,8 +11,7 @@ func MakeFloatSlice(size int, defaultValue float64) []float64 { func MakeFloatSliceRange(size int, start float64, step float64) []float64 { fs := make([]float64, size) for i := 0; i < size; i++ { - fs[i], _ = decimal.NewFromFloat(start).Add(decimal.NewFromFloat(step).Mul(decimal.NewFromInt32(int32(i)))).Float64() - + fs[i] = start + step * float64(i) } return fs } \ No newline at end of file From aef0878224da5a9e98c5f970ec102b852c444cd3 Mon Sep 17 00:00:00 2001 From: mengqingyan Date: Wed, 16 Jun 2021 14:07:44 +0800 Subject: [PATCH 07/60] logic_test --- series/series_logic_test.go | 75 +++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 series/series_logic_test.go diff --git a/series/series_logic_test.go b/series/series_logic_test.go new file mode 100644 index 0000000..7944a70 --- /dev/null +++ b/series/series_logic_test.go @@ -0,0 +1,75 @@ +package series + +import ( + "reflect" + "testing" +) + +func TestSeries_Logic(t *testing.T) { + tests := []struct { + series Series + another interface{} + andExpected Series + orExpected Series + }{ + { + Bools([]string{"false", "true", "false", "false", "true"}), + "true", + Bools([]string{"false", "true", "false", "false", "true"}), + Bools([]string{"true", "true", "true", "true", "true"}), + }, + { + Bools([]string{"false", "true", "false", "false", "true"}), + []string {"true", "false", "true", "false", "false"}, + Bools([]string{"false", "false", "false", "false", "false"}), + Bools([]string{"true", "true", "true", "false", "true"}), + }, + { + Bools([]string{"false", "true", "false", "false", "true"}), + Bools([]string{"true", "false", "true", "false", "false"}), + Bools([]string{"false", "false", "false", "false", "false"}), + Bools([]string{"true", "true", "true", "false", "true"}), + }, + { + Bools([]string{"false", "true", "false", "false", "true"}), + []string {"1", "0", "1", "0", "0"}, + Bools([]string{"false", "false", "false", "false", "false"}), + Bools([]string{"true", "true", "true", "false", "true"}), + }, + { + Bools([]string{"false", "true", "false", "false", "true"}), + []float64 {1, 0, 1, 0, 0}, + Bools([]string{"false", "false", "false", "false", "false"}), + Bools([]string{"true", "true", "true", "false", "true"}), + }, + { + Bools([]string{"false", "true", "false", "false", "true"}), + []int {1, 0, 1, 0, 0}, + Bools([]string{"false", "false", "false", "false", "false"}), + Bools([]string{"true", "true", "true", "false", "true"}), + }, + } + + for testnum, test := range tests { + expected := test.andExpected.Records() + b := test.series.And(test.another) + received := b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-And:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + + expected = test.orExpected.Records() + b = test.series.Or(test.another) + received = b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-Or:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + } +} + From a40de6bc1b843f80e3bd2dac69769826bc72565a Mon Sep 17 00:00:00 2001 From: mengqingyan Date: Wed, 16 Jun 2021 17:34:47 +0800 Subject: [PATCH 08/60] modify MapFunction,add index param:index --- series/series.go | 84 ++++++++++++++++++++++++++++++++++++++++--- series/series_test.go | 6 ++-- 2 files changed, 83 insertions(+), 7 deletions(-) diff --git a/series/series.go b/series/series.go index 748f7d5..46d44f9 100644 --- a/series/series.go +++ b/series/series.go @@ -88,7 +88,7 @@ func (e boolElements) Elem(i int) Element { return &e[i] } // unmarshaling Elements. type ElementValue interface{} -type MapFunction func(Element) Element +type MapFunction func(Element, int) Element // Comparator is a convenience alias that can be used for a more type safe way of // reason and use comparators. @@ -215,6 +215,40 @@ func New(values interface{}, t Type, name string) Series { return ret } +func NewDefault(defaultValue interface{}, t Type, name string, len int) Series { + ret := Series{ + Name: name, + t: t, + } + + // Pre-allocate elements + preAlloc := func(n int) { + switch t { + case String: + ret.elements = make(stringElements, n) + case Int: + ret.elements = make(intElements, n) + case Float: + ret.elements = make(floatElements, n) + case Bool: + ret.elements = make(boolElements, n) + default: + panic(fmt.Sprintf("unknown type %v", t)) + } + } + + if defaultValue == nil { + preAlloc(1) + ret.elements.Elem(0).Set(nil) + return ret + } + preAlloc(len) + for i := 0; i < len; i++ { + ret.elements.Elem(i).Set(defaultValue) + } + return ret +} + // Strings is a constructor for a String Series func Strings(values interface{}) Series { return New(values, String, "") @@ -367,6 +401,15 @@ func (s Series) IsNaN() []bool { return ret } +// IsNaN returns an array that identifies which of the elements are not NaN. +func (s Series) IsNotNaN() []bool { + ret := make([]bool, s.Len()) + for i := 0; i < s.Len(); i++ { + ret[i] = !s.elements.Elem(i).IsNA() + } + return ret +} + // Compare compares the values of a Series with other elements. To do so, the // elements with are to be compared are first transformed to a Series of the same // type as the caller. @@ -785,7 +828,7 @@ func (s Series) Map(f MapFunction) Series { mappedValues := make([]Element, s.Len()) for i := 0; i < s.Len(); i++ { - value := f(s.elements.Elem(i)) + value := f(s.elements.Elem(i), i) mappedValues[i] = value } return New(mappedValues, s.Type(), s.Name) @@ -838,7 +881,7 @@ func(s Series) AddConst(c float64) Series { // AddConst multiply the scalar c to all of the values in Series and returning a new Series object. func(s Series) MulConst(c float64) Series { - sm := s.Map(func(e Element) Element { + sm := s.Map(func(e Element, index int) Element { result := e.Copy() f := result.Float() result.Set(f * c) @@ -847,8 +890,9 @@ func(s Series) MulConst(c float64) Series { return sm } +// DivConst Div the scalar c to all of the values in Series and returning a new Series object. func(s Series) DivConst(c float64) Series { - sm := s.Map(func(e Element) Element { + sm := s.Map(func(e Element, index int) Element { result := e.Copy() f := result.Float() result.Set(f / c) @@ -857,6 +901,38 @@ func(s Series) DivConst(c float64) Series { return sm } +func(s Series) Add(c Series) Series { + sf := s.Float() + cf := c.Float() + dst := make([]float64, s.Len()) + floats.AddTo(dst, sf, cf) + return New(dst, Float, "") +} + +func(s Series) Sub(c Series) Series { + sf := s.Float() + cf := c.Float() + dst := make([]float64, s.Len()) + floats.SubTo(dst, sf, cf) + return New(dst, Float, "") +} + +func(s Series) Mul(c Series) Series { + sf := s.Float() + cf := c.Float() + dst := make([]float64, s.Len()) + floats.MulTo(dst, sf, cf) + return New(dst, Float, "") +} + +func(s Series) Div(c Series) Series { + sf := s.Float() + cf := c.Float() + dst := make([]float64, s.Len()) + floats.DivTo(dst, sf, cf) + return New(dst, Float, "") +} + // FillNaN Fill NaN values using the specified value. func(s Series) FillNaN(value ElementValue) { diff --git a/series/series_test.go b/series/series_test.go index b8d6780..79a1d89 100644 --- a/series/series_test.go +++ b/series/series_test.go @@ -1562,14 +1562,14 @@ func TestSeries_Map(t *testing.T) { }, } - doubleFloat64 := func(e Element) Element { + doubleFloat64 := func(e Element, index int) Element { result := e.Copy() result.Set(result.Float() * 2) return Element(result) } // and two booleans - and := func(e Element) Element { + and := func(e Element, index int) Element { result := e.Copy() b, err := result.Bool() if err != nil { @@ -1581,7 +1581,7 @@ func TestSeries_Map(t *testing.T) { } // add constant (+5) to value (v) - add5Int := func(e Element) Element { + add5Int := func(e Element, index int) Element { result := e.Copy() i, err := result.Int() if err != nil { From be995283b1cf1a781a9a897f1916e95a29a11e21 Mon Sep 17 00:00:00 2001 From: mengqingyan Date: Wed, 16 Jun 2021 17:40:18 +0800 Subject: [PATCH 09/60] fix test case --- series/series_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/series/series_test.go b/series/series_test.go index 79a1d89..c9c83c0 100644 --- a/series/series_test.go +++ b/series/series_test.go @@ -1595,7 +1595,7 @@ func TestSeries_Map(t *testing.T) { } // trim (XyZ) prefix from string - trimXyZPrefix := func(e Element) Element { + trimXyZPrefix := func(e Element, index int) Element { result := e.Copy() result.Set(strings.TrimPrefix(result.String(), "XyZ")) return Element(result) From 957270babbaf8f642aaff27e764a44ee052085ef Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Thu, 17 Jun 2021 18:05:41 +0800 Subject: [PATCH 10/60] add rolling series name --- dataframe/dataframe.go | 2 +- series/rolling.go | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go index fc5bfcd..6b6e9d3 100644 --- a/dataframe/dataframe.go +++ b/dataframe/dataframe.go @@ -98,7 +98,7 @@ func (df DataFrame) Copy() DataFrame { // String implements the Stringer interface for DataFrame func (df DataFrame) String() (str string) { - return df.print(true, true, true, true, 10, 70, "DataFrame") + return df.print(true, false, true, true, 10, 70, "DataFrame") } func (df DataFrame) print( diff --git a/series/rolling.go b/series/rolling.go index d769099..b696f09 100644 --- a/series/rolling.go +++ b/series/rolling.go @@ -1,9 +1,10 @@ package series import ( + "fmt" "math" - "github.com/go-gota/gota/util" + "github.com/go-gota/gota/util" "gonum.org/v1/gonum/floats" ) @@ -52,7 +53,7 @@ func (s rollingSeries) Max() Series { eles[index] = ele index++ } - newS := New(eles, s.Type(), "") + newS := New(eles, s.Type(), fmt.Sprintf("%s-Max(w: %d)", s.Name, s.window)) return newS } @@ -73,7 +74,7 @@ func (s rollingSeries) Min() Series { eles[index] = ele index++ } - newS := New(eles, s.Type(), "") + newS := New(eles, s.Type(), fmt.Sprintf("%s-Min(w: %d)", s.Name, s.window)) return newS } @@ -105,7 +106,8 @@ func (s rollingSeries) Mean() Series { floats.SubTo(eles[s.window - 1 : ], sum2, sum1) floats.Add(eles[s.window - 1 : ], sf1) floats.Div(eles[s.window - 1 : ], windows) - newS := New(eles, Float, "") + newS := New(eles, Float, + fmt.Sprintf("%s-Mean(w: %d, p:%d)", s.Name, s.window, s.minPeriods)) return newS } @@ -126,7 +128,8 @@ func (s rollingSeries) Quantile(p float64) Series { eles[index] = ele index++ } - newS := New(eles, s.Type(), "") + newS := New(eles, s.Type(), + fmt.Sprintf("%s-Quantile(w: %d, p:%f)", s.Name, s.window, p)) return newS } @@ -147,7 +150,8 @@ func (s rollingSeries) Median() Series { eles[index] = ele index++ } - newS := New(eles, s.Type(), "") + newS := New(eles, s.Type(), + fmt.Sprintf("%s-Median(w: %d)", s.Name, s.window)) return newS } @@ -168,7 +172,8 @@ func (s rollingSeries) StdDev() Series { eles[index] = ele index++ } - newS := New(eles, Float, "") + newS := New(eles, Float, + fmt.Sprintf("%s-StdDev(w: %d)", s.Name, s.window)) return newS } From 7a5c902142703e24f78a93f61df2f2998ecaf594 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Fri, 18 Jun 2021 18:03:36 +0800 Subject: [PATCH 11/60] fix:series name --- series/rolling.go | 12 ++++++------ series/series.go | 29 +++++++++++++++++++++-------- series/series_test.go | 2 +- 3 files changed, 28 insertions(+), 15 deletions(-) diff --git a/series/rolling.go b/series/rolling.go index b696f09..46ce3bf 100644 --- a/series/rolling.go +++ b/series/rolling.go @@ -53,7 +53,7 @@ func (s rollingSeries) Max() Series { eles[index] = ele index++ } - newS := New(eles, s.Type(), fmt.Sprintf("%s-Max(w: %d)", s.Name, s.window)) + newS := New(eles, s.Type(), fmt.Sprintf("%s_RMax[w:%d]", s.Name, s.window)) return newS } @@ -74,7 +74,7 @@ func (s rollingSeries) Min() Series { eles[index] = ele index++ } - newS := New(eles, s.Type(), fmt.Sprintf("%s-Min(w: %d)", s.Name, s.window)) + newS := New(eles, s.Type(), fmt.Sprintf("%s_RMin[w:%d]", s.Name, s.window)) return newS } @@ -107,7 +107,7 @@ func (s rollingSeries) Mean() Series { floats.Add(eles[s.window - 1 : ], sf1) floats.Div(eles[s.window - 1 : ], windows) newS := New(eles, Float, - fmt.Sprintf("%s-Mean(w: %d, p:%d)", s.Name, s.window, s.minPeriods)) + fmt.Sprintf("%s_RMean[w:%d, p:%d]", s.Name, s.window, s.minPeriods)) return newS } @@ -129,7 +129,7 @@ func (s rollingSeries) Quantile(p float64) Series { index++ } newS := New(eles, s.Type(), - fmt.Sprintf("%s-Quantile(w: %d, p:%f)", s.Name, s.window, p)) + fmt.Sprintf("%s_RQuantile[w:%d, p:%f]", s.Name, s.window, p)) return newS } @@ -151,7 +151,7 @@ func (s rollingSeries) Median() Series { index++ } newS := New(eles, s.Type(), - fmt.Sprintf("%s-Median(w: %d)", s.Name, s.window)) + fmt.Sprintf("%s_RMedian[w:%d]", s.Name, s.window)) return newS } @@ -173,7 +173,7 @@ func (s rollingSeries) StdDev() Series { index++ } newS := New(eles, Float, - fmt.Sprintf("%s-StdDev(w: %d)", s.Name, s.window)) + fmt.Sprintf("%s_RStdDev[w:%d]", s.Name, s.window)) return newS } diff --git a/series/series.go b/series/series.go index 46d44f9..a642653 100644 --- a/series/series.go +++ b/series/series.go @@ -835,7 +835,7 @@ func (s Series) Map(f MapFunction) Series { } //Shift series by desired number of periods and returning a new Series object. -func (s Series) Shift(periods int, newName string) Series { +func (s Series) Shift(periods int) Series { if s.Len() == 0 { return s.Empty() } @@ -858,13 +858,13 @@ func (s Series) Shift(periods int, newName string) Series { shiftElements[i + periods] = s.Elem(i).Copy() } } - return New(shiftElements, s.Type(), newName) + return New(shiftElements, s.Type(), fmt.Sprintf("%s_Shift_%d", s.Name, periods)) } // CumProd finds the cumulative product of the first i elements in s and returning a new Series object. func(s Series) CumProd() Series { dst := make([]float64, s.Len()) floats.CumProd(dst, s.Float()) - return New(dst,s.Type(), "") + return New(dst,s.Type(), fmt.Sprintf("%s_CumProd", s.Name)) } // Prod returns the product of the elements of the Series. Returns 1 if len(s) = 0. @@ -876,11 +876,12 @@ func(s Series) Prod() float64 { func(s Series) AddConst(c float64) Series { dst := s.Float() floats.AddConst(c, dst) - return New(dst,s.Type(), "") + return New(dst,s.Type(), fmt.Sprintf("(%s + %v)", s.Name, c)) } // AddConst multiply the scalar c to all of the values in Series and returning a new Series object. func(s Series) MulConst(c float64) Series { + s.Name = fmt.Sprintf("(%s * %v)", s.Name, c) sm := s.Map(func(e Element, index int) Element { result := e.Copy() f := result.Float() @@ -892,6 +893,7 @@ func(s Series) MulConst(c float64) Series { // DivConst Div the scalar c to all of the values in Series and returning a new Series object. func(s Series) DivConst(c float64) Series { + s.Name = fmt.Sprintf("(%s / %v)", s.Name, c) sm := s.Map(func(e Element, index int) Element { result := e.Copy() f := result.Float() @@ -906,7 +908,7 @@ func(s Series) Add(c Series) Series { cf := c.Float() dst := make([]float64, s.Len()) floats.AddTo(dst, sf, cf) - return New(dst, Float, "") + return New(dst, Float, fmt.Sprintf("(%s + %s)", s.Name, c.Name)) } func(s Series) Sub(c Series) Series { @@ -914,7 +916,7 @@ func(s Series) Sub(c Series) Series { cf := c.Float() dst := make([]float64, s.Len()) floats.SubTo(dst, sf, cf) - return New(dst, Float, "") + return New(dst, Float, fmt.Sprintf("(%s - %s)", s.Name, c.Name)) } func(s Series) Mul(c Series) Series { @@ -922,7 +924,7 @@ func(s Series) Mul(c Series) Series { cf := c.Float() dst := make([]float64, s.Len()) floats.MulTo(dst, sf, cf) - return New(dst, Float, "") + return New(dst, Float, fmt.Sprintf("(%s * %s)", s.Name, c.Name)) } func(s Series) Div(c Series) Series { @@ -930,7 +932,18 @@ func(s Series) Div(c Series) Series { cf := c.Float() dst := make([]float64, s.Len()) floats.DivTo(dst, sf, cf) - return New(dst, Float, "") + return New(dst, Float, fmt.Sprintf("(%s / %s)", s.Name, c.Name)) +} + +func(s Series) Abs() Series { + s.Name = fmt.Sprintf("Abs(%s)", s.Name) + sm := s.Map(func(e Element, index int) Element { + result := e.Copy() + f := result.Float() + result.Set(math.Abs(f)) + return Element(result) + }) + return sm } diff --git a/series/series_test.go b/series/series_test.go index c9c83c0..bb8f522 100644 --- a/series/series_test.go +++ b/series/series_test.go @@ -1715,7 +1715,7 @@ func TestSeries_Shift(t *testing.T) { for testnum, test := range tests { expected := test.expected.Records() - b := test.series.Shift(test.shift, "") + b := test.series.Shift(test.shift) received := b.Records() if !reflect.DeepEqual(expected, received) { t.Errorf( From 49bffe289bc34dcc76ed8dc9485eef59fb38f998 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Mon, 21 Jun 2021 15:57:55 +0800 Subject: [PATCH 12/60] add method: series.Operation --- series/series.go | 44 +++++++++++++++++++++++++++++++++ series/series_logic.go | 55 +++++++++++++++++------------------------- 2 files changed, 66 insertions(+), 33 deletions(-) diff --git a/series/series.go b/series/series.go index a642653..98cad42 100644 --- a/series/series.go +++ b/series/series.go @@ -1,6 +1,7 @@ package series import ( + "errors" "fmt" "reflect" "sort" @@ -187,6 +188,12 @@ func New(values interface{}, t Type, name string) Series { for i := 0; i < l; i++ { ret.elements.Elem(i).Set(v[i]) } + case []Element: + l := len(vt) + preAlloc(l) + for i := 0; i < l; i++ { + ret.elements.Elem(i).Set(vt[i]) + } case Series: v := vt l := v.Len() @@ -990,3 +997,40 @@ func(s Series) FillNaNBackward() { func(s Series) Rolling(window int, minPeriods int) Rolling { return NewRollingSeries(window, minPeriods, s) } + + +func Operation(operate func(index int, eles ...Element) interface{}, seriess ...Series) (Series, error) { + if len(seriess) == 0 { + return Series{}, errors.New("seriess num must > 0") + } + sl := seriess[0].Len() + maxLen := sl + for i := 1; i < len(seriess); i++ { + slen := seriess[i].Len() + if sl != slen && slen != 1 { + return Series{}, errors.New("seriess length must be 1 or same") + } + if slen > maxLen { + maxLen = slen + } + } + + eles := make([]Element, maxLen) + baseEle := seriess[0].Elem(0) + for i := 0; i < maxLen; i++ { + operateParam := make([]Element, len(seriess)) + for j := 0; j < len(seriess); j++ { + if seriess[j].Len() == 1 { + operateParam[j] = seriess[j].Elem(0) + } else { + operateParam[j] = seriess[j].Elem(i) + } + } + res := operate(i, operateParam...) + e := baseEle.NA() + e.Set(res) + eles[i] = e + } + result := New(eles, seriess[0].t,"") + return result, nil +} \ No newline at end of file diff --git a/series/series_logic.go b/series/series_logic.go index 984d657..09cdb34 100644 --- a/series/series_logic.go +++ b/series/series_logic.go @@ -1,43 +1,32 @@ package series -import "fmt" +import ( + "log" +) + func (s Series) And(in interface{}) Series { - result := seriesLogic(s, in, func(e1, e2 Element) bool { - e1b,_ := e1.Bool() - e2b,_ := e2.Bool() - return e1b && e2b - }) - return result + inSeries := New(in, s.t, "") + result, err := Operation(func(index int, eles ...Element) interface{} { + e0b,_ := eles[0].Bool() + e1b,_ := eles[1].Bool() + return e0b && e1b + }, s, inSeries) + if err != nil { + log.Panic(err) + } + return result } func (s Series) Or(in interface{}) Series { - result := seriesLogic(s, in, func(e1, e2 Element) bool { - e1b,_ := e1.Bool() - e2b,_ := e2.Bool() - return e1b || e2b - }) - return result -} - -func seriesLogic (s Series, in interface{}, elementLogic func(e1, e2 Element) bool) Series { inSeries := New(in, s.t, "") - if inSeries.Len() != 1 && inSeries.Len() != s.Len() { - s := s.Empty() - s.Err = fmt.Errorf("length mismatch") - return s - } - bools := make([]bool, s.Len()) - - if inSeries.Len() == 1 { - for i := 0; i < s.Len(); i++ { - bools[i] = elementLogic(s.elements.Elem(i), inSeries.elements.Elem(0)) - } - return Bools(bools) - } else { - for i := 0; i < s.Len(); i++ { - bools[i] = elementLogic(s.elements.Elem(i), inSeries.elements.Elem(i)) - } - return Bools(bools) + result, err := Operation(func(index int, eles ...Element) interface{} { + e0b,_ := eles[0].Bool() + e1b,_ := eles[1].Bool() + return e0b || e1b + }, s, inSeries) + if err != nil { + log.Panic(err) } + return result } \ No newline at end of file From 6be74a6712cb5732e5597d296432529363b42982 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Tue, 22 Jun 2021 17:17:12 +0800 Subject: [PATCH 13/60] =?UTF-8?q?Number.Sub=E3=80=81Div=E3=80=81Mod?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- series/numberoperation.go | 35 ++++++++++ series/numberoperation_test.go | 119 +++++++++++++++++++++++++++++++++ series/type-float.go | 2 +- 3 files changed, 155 insertions(+), 1 deletion(-) create mode 100644 series/numberoperation.go create mode 100644 series/numberoperation_test.go diff --git a/series/numberoperation.go b/series/numberoperation.go new file mode 100644 index 0000000..a97be92 --- /dev/null +++ b/series/numberoperation.go @@ -0,0 +1,35 @@ +package series + +import "math" + +type Number float64 + +func (n Number) Sub(s Series) Series { + result := s.Map(func(e Element, i int) Element { + ele := e.NA() + v := float64(n) - e.Float() + ele.Set(v) + return ele + }) + return result +} + +func (n Number) Div(s Series) Series { + result := s.Map(func(e Element, i int) Element { + ele := e.NA() + v := float64(n) / e.Float() + ele.Set(v) + return ele + }) + return result +} + +func (n Number) Mod(s Series) Series { + result := s.Map(func(e Element, i int) Element { + ele := e.NA() + v := math.Mod(float64(n), e.Float()) + ele.Set(v) + return ele + }) + return result +} diff --git a/series/numberoperation_test.go b/series/numberoperation_test.go new file mode 100644 index 0000000..c4846f2 --- /dev/null +++ b/series/numberoperation_test.go @@ -0,0 +1,119 @@ +package series_test + +import ( + "reflect" + "testing" + + "github.com/go-gota/gota/series" +) + + +func Test_Sub(t *testing.T) { + tests := []struct { + number series.Number + s series.Series + expect series.Series + }{ + { + 5, + series.Floats([]string{series.NaN, "1.5", "1.5", "-0.3", "1.6", "34."}), + series.Floats([]string{series.NaN, "3.5", "3.5", "5.3", "3.4", "-29"}), + }, + { + 5, + series.Ints([]string{series.NaN, "1", "2", "3", "4", "34"}), + series.Ints([]string{series.NaN, "4", "3", "2", "1", "-29"}), + }, + } + for testnum, test := range tests { + expected := test.expect.Records() + b := test.number.Sub(test.s) + received := b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-Sub:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + if test.expect.Type() != b.Type() { + t.Errorf( + "Test-Sub-typeError:%v\nExpected:\n%v\nReceived:\n%v", + testnum, test.expect.Type(), b.Type(), + ) + } + } +} + + +func Test_Div(t *testing.T) { + tests := []struct { + number series.Number + s series.Series + expect series.Series + }{ + { + 5, + series.Floats([]string{series.NaN, "1.5", "1.5", "-0.3", "1.6", "34."}), + series.Floats([]string{series.NaN, "3.333333", "3.333333", "-16.666667", "3.125000", "0.147059"}), + }, + { + 5, + series.Ints([]string{series.NaN, "1", "2", "3", "4", "34"}), + series.Ints([]string{series.NaN, "5", "2", "1", "1", "0"}), + }, + } + for testnum, test := range tests { + expected := test.expect.Records() + b := test.number.Div(test.s) + received := b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-Div:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + if test.expect.Type() != b.Type() { + t.Errorf( + "Test-Div-typeError:%v\nExpected:\n%v\nReceived:\n%v", + testnum, test.expect.Type(), b.Type(), + ) + } + } +} + + +func Test_Mod(t *testing.T) { + tests := []struct { + number series.Number + s series.Series + expect series.Series + }{ + { + 5, + series.Floats([]string{series.NaN, "1.5", "1.5", "-0.3", "1.6", "34."}), + series.Floats([]string{series.NaN, "0.500000", "0.500000", "0.200000", "0.200000", "5.000000"}), + }, + { + 5, + series.Ints([]string{series.NaN, "1", "2", "3", "4", "34"}), + series.Ints([]string{series.NaN, "0", "1", "2", "1", "5"}), + }, + } + for testnum, test := range tests { + expected := test.expect.Records() + b := test.number.Mod(test.s) + received := b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-Mod:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + if test.expect.Type() != b.Type() { + t.Errorf( + "Test-Mod-typeError:%v\nExpected:\n%v\nReceived:\n%v", + testnum, test.expect.Type(), b.Type(), + ) + } + } +} diff --git a/series/type-float.go b/series/type-float.go index 9c59a60..08e5f65 100644 --- a/series/type-float.go +++ b/series/type-float.go @@ -51,7 +51,7 @@ func (e floatElement) Copy() Element { } func (e floatElement) NA() Element { - return &floatElement{0.0, true} + return &floatElement{math.NaN(), true} } func (e floatElement) IsNA() bool { From e49182b0456c8eaec53e33c8fa60a719588854c9 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Wed, 14 Jul 2021 15:53:51 +0800 Subject: [PATCH 14/60] modify go.mod --- dataframe/benchmark_test.go | 4 +-- dataframe/dataframe.go | 2 +- dataframe/dataframe_test.go | 2 +- dataframe/examples_test.go | 4 +-- go.mod | 2 +- series/benchmarks_test.go | 2 +- series/numberoperation_test.go | 11 +++---- series/rolling.go | 53 ++++++++++++++-------------------- 8 files changed, 34 insertions(+), 46 deletions(-) diff --git a/dataframe/benchmark_test.go b/dataframe/benchmark_test.go index 2b00a94..60db99a 100644 --- a/dataframe/benchmark_test.go +++ b/dataframe/benchmark_test.go @@ -5,8 +5,8 @@ import ( "strconv" "testing" - "github.com/go-gota/gota/dataframe" - "github.com/go-gota/gota/series" + "github.com/mengqingyan/gota/dataframe" + "github.com/mengqingyan/gota/series" ) func generateSeries(n, rep int) (data []series.Series) { diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go index 6b6e9d3..a770f64 100644 --- a/dataframe/dataframe.go +++ b/dataframe/dataframe.go @@ -13,7 +13,7 @@ import ( "strings" "unicode/utf8" - "github.com/go-gota/gota/series" + "github.com/mengqingyan/gota/series" ) // DataFrame is a data structure designed for operating on table like data (Such diff --git a/dataframe/dataframe_test.go b/dataframe/dataframe_test.go index 7de91af..40012b0 100644 --- a/dataframe/dataframe_test.go +++ b/dataframe/dataframe_test.go @@ -9,7 +9,7 @@ import ( "math" - "github.com/go-gota/gota/series" + "github.com/mengqingyan/gota/series" ) // compareFloats compares floating point values up to the number of digits specified. diff --git a/dataframe/examples_test.go b/dataframe/examples_test.go index 8cdb36c..9f5c8fd 100644 --- a/dataframe/examples_test.go +++ b/dataframe/examples_test.go @@ -4,8 +4,8 @@ import ( "fmt" "strings" - "github.com/go-gota/gota/dataframe" - "github.com/go-gota/gota/series" + "github.com/mengqingyan/gota/dataframe" + "github.com/mengqingyan/gota/series" ) func ExampleNew() { diff --git a/go.mod b/go.mod index 47f7068..b9342a0 100644 --- a/go.mod +++ b/go.mod @@ -1,4 +1,4 @@ -module github.com/go-gota/gota +module github.com/mengqingyan/gota go 1.14 diff --git a/series/benchmarks_test.go b/series/benchmarks_test.go index 9c7bb8f..aba61b3 100644 --- a/series/benchmarks_test.go +++ b/series/benchmarks_test.go @@ -5,7 +5,7 @@ import ( "strconv" "testing" - "github.com/go-gota/gota/series" + "github.com/mengqingyan/gota/series" ) func generateInts(n int) (data []int) { diff --git a/series/numberoperation_test.go b/series/numberoperation_test.go index c4846f2..adb9d6b 100644 --- a/series/numberoperation_test.go +++ b/series/numberoperation_test.go @@ -4,14 +4,13 @@ import ( "reflect" "testing" - "github.com/go-gota/gota/series" + "github.com/mengqingyan/gota/series" ) - func Test_Sub(t *testing.T) { tests := []struct { number series.Number - s series.Series + s series.Series expect series.Series }{ { @@ -44,11 +43,10 @@ func Test_Sub(t *testing.T) { } } - func Test_Div(t *testing.T) { tests := []struct { number series.Number - s series.Series + s series.Series expect series.Series }{ { @@ -81,11 +79,10 @@ func Test_Div(t *testing.T) { } } - func Test_Mod(t *testing.T) { tests := []struct { number series.Number - s series.Series + s series.Series expect series.Series }{ { diff --git a/series/rolling.go b/series/rolling.go index 46ce3bf..fd4fc5f 100644 --- a/series/rolling.go +++ b/series/rolling.go @@ -4,7 +4,7 @@ import ( "fmt" "math" - "github.com/go-gota/gota/util" + "github.com/mengqingyan/gota/util" "gonum.org/v1/gonum/floats" ) @@ -50,14 +50,13 @@ func (s rollingSeries) Max() Series { for frw.HasNext() { ele := s.Elem(0).NA() ele.Set(frw.Next().Max()) - eles[index] = ele + eles[index] = ele index++ } newS := New(eles, s.Type(), fmt.Sprintf("%s_RMax[w:%d]", s.Name, s.window)) return newS } - func (s rollingSeries) Min() Series { if s.Len() == 0 { return s.Empty() @@ -71,14 +70,13 @@ func (s rollingSeries) Min() Series { for frw.HasNext() { ele := s.Elem(0).NA() ele.Set(frw.Next().Min()) - eles[index] = ele + eles[index] = ele index++ } newS := New(eles, s.Type(), fmt.Sprintf("%s_RMin[w:%d]", s.Name, s.window)) return newS } - func (s rollingSeries) Mean() Series { if s.Len() == 0 { return s.Empty() @@ -86,32 +84,31 @@ func (s rollingSeries) Mean() Series { sf := s.Float() sum := make([]float64, s.Len()) floats.CumSum(sum, sf) - + eles := make([]float64, s.Len()) for i := 0; i < s.minPeriods-1; i++ { eles[i] = math.NaN() } // sum0 / sfIndex0 - sum0 := sum[s.minPeriods-1 : s.window - 1] - sfIndex0 := util.MakeFloatSliceRange(s.window - s.minPeriods, float64(s.minPeriods), 1) - floats.DivTo(eles[s.minPeriods-1 : s.window - 1], sum0, sfIndex0) + sum0 := sum[s.minPeriods-1 : s.window-1] + sfIndex0 := util.MakeFloatSliceRange(s.window-s.minPeriods, float64(s.minPeriods), 1) + floats.DivTo(eles[s.minPeriods-1:s.window-1], sum0, sfIndex0) - sum1 := sum[0 : s.Len() - s.window + 1] - sum2 := sum[s.window - 1 :] - sf1 := sf[0 : s.Len() - s.window + 1] + sum1 := sum[0 : s.Len()-s.window+1] + sum2 := sum[s.window-1:] + sf1 := sf[0 : s.Len()-s.window+1] // (sum2 - sum1 + sf1) / window - windows := util.MakeFloatSlice(s.Len() - s.window + 1, float64(s.window)) - floats.SubTo(eles[s.window - 1 : ], sum2, sum1) - floats.Add(eles[s.window - 1 : ], sf1) - floats.Div(eles[s.window - 1 : ], windows) - newS := New(eles, Float, + windows := util.MakeFloatSlice(s.Len()-s.window+1, float64(s.window)) + floats.SubTo(eles[s.window-1:], sum2, sum1) + floats.Add(eles[s.window-1:], sf1) + floats.Div(eles[s.window-1:], windows) + newS := New(eles, Float, fmt.Sprintf("%s_RMean[w:%d, p:%d]", s.Name, s.window, s.minPeriods)) return newS } - func (s rollingSeries) Quantile(p float64) Series { if s.Len() == 0 { return s.Empty() @@ -125,11 +122,11 @@ func (s rollingSeries) Quantile(p float64) Series { for frw.HasNext() { ele := s.Elem(0).NA() ele.Set(frw.Next().Quantile(p)) - eles[index] = ele + eles[index] = ele index++ } - newS := New(eles, s.Type(), - fmt.Sprintf("%s_RQuantile[w:%d, p:%f]", s.Name, s.window, p)) + newS := New(eles, s.Type(), + fmt.Sprintf("%s_RQuantile[w:%d, p:%f]", s.Name, s.window, p)) return newS } @@ -147,15 +144,14 @@ func (s rollingSeries) Median() Series { for frw.HasNext() { ele := s.Elem(0).NA() ele.Set(frw.Next().Median()) - eles[index] = ele + eles[index] = ele index++ } - newS := New(eles, s.Type(), - fmt.Sprintf("%s_RMedian[w:%d]", s.Name, s.window)) + newS := New(eles, s.Type(), + fmt.Sprintf("%s_RMedian[w:%d]", s.Name, s.window)) return newS } - func (s rollingSeries) StdDev() Series { if s.Len() == 0 { return s.Empty() @@ -169,15 +165,10 @@ func (s rollingSeries) StdDev() Series { for frw.HasNext() { ele := &floatElement{0.0, false} ele.Set(frw.Next().StdDev()) - eles[index] = ele + eles[index] = ele index++ } newS := New(eles, Float, fmt.Sprintf("%s_RStdDev[w:%d]", s.Name, s.window)) return newS } - - - - - From 63dd7f3a16ed57272fba2cae376146ea7f30621a Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Thu, 22 Jul 2021 11:41:29 +0800 Subject: [PATCH 15/60] modify mod path --- dataframe/benchmark_test.go | 4 ++-- dataframe/dataframe.go | 2 +- dataframe/dataframe_test.go | 2 +- dataframe/examples_test.go | 4 ++-- go.mod | 2 +- series/benchmarks_test.go | 2 +- series/numberoperation_test.go | 2 +- series/rolling.go | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/dataframe/benchmark_test.go b/dataframe/benchmark_test.go index 60db99a..1a545d2 100644 --- a/dataframe/benchmark_test.go +++ b/dataframe/benchmark_test.go @@ -5,8 +5,8 @@ import ( "strconv" "testing" - "github.com/mengqingyan/gota/dataframe" - "github.com/mengqingyan/gota/series" + "github.com/mqy527/gota/dataframe" + "github.com/mqy527/gota/series" ) func generateSeries(n, rep int) (data []series.Series) { diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go index a770f64..2c6cd91 100644 --- a/dataframe/dataframe.go +++ b/dataframe/dataframe.go @@ -13,7 +13,7 @@ import ( "strings" "unicode/utf8" - "github.com/mengqingyan/gota/series" + "github.com/mqy527/gota/series" ) // DataFrame is a data structure designed for operating on table like data (Such diff --git a/dataframe/dataframe_test.go b/dataframe/dataframe_test.go index 40012b0..64095a9 100644 --- a/dataframe/dataframe_test.go +++ b/dataframe/dataframe_test.go @@ -9,7 +9,7 @@ import ( "math" - "github.com/mengqingyan/gota/series" + "github.com/mqy527/gota/series" ) // compareFloats compares floating point values up to the number of digits specified. diff --git a/dataframe/examples_test.go b/dataframe/examples_test.go index 9f5c8fd..870e0f6 100644 --- a/dataframe/examples_test.go +++ b/dataframe/examples_test.go @@ -4,8 +4,8 @@ import ( "fmt" "strings" - "github.com/mengqingyan/gota/dataframe" - "github.com/mengqingyan/gota/series" + "github.com/mqy527/gota/dataframe" + "github.com/mqy527/gota/series" ) func ExampleNew() { diff --git a/go.mod b/go.mod index b9342a0..a92c549 100644 --- a/go.mod +++ b/go.mod @@ -1,4 +1,4 @@ -module github.com/mengqingyan/gota +module github.com/mqy527/gota go 1.14 diff --git a/series/benchmarks_test.go b/series/benchmarks_test.go index aba61b3..b33a2e7 100644 --- a/series/benchmarks_test.go +++ b/series/benchmarks_test.go @@ -5,7 +5,7 @@ import ( "strconv" "testing" - "github.com/mengqingyan/gota/series" + "github.com/mqy527/gota/series" ) func generateInts(n int) (data []int) { diff --git a/series/numberoperation_test.go b/series/numberoperation_test.go index adb9d6b..f741bc9 100644 --- a/series/numberoperation_test.go +++ b/series/numberoperation_test.go @@ -4,7 +4,7 @@ import ( "reflect" "testing" - "github.com/mengqingyan/gota/series" + "github.com/mqy527/gota/series" ) func Test_Sub(t *testing.T) { diff --git a/series/rolling.go b/series/rolling.go index fd4fc5f..e45a0b4 100644 --- a/series/rolling.go +++ b/series/rolling.go @@ -4,7 +4,7 @@ import ( "fmt" "math" - "github.com/mengqingyan/gota/util" + "github.com/mqy527/gota/util" "gonum.org/v1/gonum/floats" ) From dcc240197c70b9ff5858340db87c6458ba8690e5 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Sun, 26 Sep 2021 15:05:31 +0800 Subject: [PATCH 16/60] optimize Series.Elem --- series/series.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/series/series.go b/series/series.go index 98cad42..cc7761d 100644 --- a/series/series.go +++ b/series/series.go @@ -623,7 +623,11 @@ func (s Series) Val(i int) interface{} { // Elem returns the element of a series for the given index. Will panic if the // index is out of bounds. +// The index could be less than 0. When the index equals -1, Elem returns the last element of a series. func (s Series) Elem(i int) Element { + if i < 0 { + return s.elements.Elem(s.Len() + i) + } return s.elements.Elem(i) } From e5e5bed327dd4c1d66884d3bd5391345170d0f47 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Tue, 9 Nov 2021 15:19:57 +0800 Subject: [PATCH 17/60] optimize DataFrame --- dataframe/dataframe.go | 35 ++++++++++++++++++++++++++++------- dataframe/dataframe_test.go | 22 +++++++++++++++++++--- 2 files changed, 47 insertions(+), 10 deletions(-) diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go index 2c6cd91..f7c2a5f 100644 --- a/dataframe/dataframe.go +++ b/dataframe/dataframe.go @@ -429,21 +429,32 @@ func (df DataFrame) RBind(dfb DataFrame) DataFrame { // Mutate changes a column of the DataFrame with the given Series or adds it as // a new column if the column name does not exist. -func (df DataFrame) Mutate(s series.Series) DataFrame { - if df.Err != nil { +func (df DataFrame) Mutate(ss ...series.Series) DataFrame { + if df.Err != nil || len(ss) == 0{ return df } - if s.Len() != df.nrows { + + slen := ss[0].Len() + for i := 1; i < len(ss); i++ { + if slen != ss[i].Len() { + return DataFrame{Err: fmt.Errorf("mutate: serieses length not equal")} + } + } + if slen != df.nrows { return DataFrame{Err: fmt.Errorf("mutate: wrong dimensions")} } df = df.Copy() // Check that colname exist on dataframe columns := df.columns - if idx := findInStringSlice(s.Name, df.Names()); idx != -1 { - columns[idx] = s - } else { - columns = append(columns, s) + dfNames := df.Names() + for i := 0; i < len(ss); i++ { + if idx := findInStringSlice(ss[i].Name, dfNames); idx != -1 { + columns[idx] = ss[i] + } else { + columns = append(columns, ss[i]) + } } + nrows, ncols, err := checkColumnsDimensions(columns...) if err != nil { return DataFrame{Err: err} @@ -1714,6 +1725,16 @@ func (df DataFrame) Elem(r, c int) series.Element { return df.columns[c].Elem(r) } +// Elem returns the element on row `r` and column `c`. Will panic if the index is +// out of bounds. +func (df DataFrame) ElemByRowAndColName(row int, columnName string) series.Element { + colIndex := df.colIndex(columnName) + if colIndex < 0 { + return nil + } + return df.columns[colIndex].Elem(row) +} + // fixColnames assigns a name to the missing column names and makes it so that the // column names are unique. func fixColnames(colnames []string) { diff --git a/dataframe/dataframe_test.go b/dataframe/dataframe_test.go index 64095a9..a8c6a17 100644 --- a/dataframe/dataframe_test.go +++ b/dataframe/dataframe_test.go @@ -582,36 +582,52 @@ func TestDataFrame_Mutate(t *testing.T) { ) table := []struct { s series.Series + s1 series.Series expDf DataFrame }{ { series.New([]string{"A", "B", "A", "A", "A"}, series.String, "COL.1"), + series.New([]int{2, 3, 5, 6, 7}, series.String, "COL.3"), New( series.New([]string{"A", "B", "A", "A", "A"}, series.String, "COL.1"), series.New([]int{1, 2, 4, 5, 4}, series.Int, "COL.2"), - series.New([]float64{3.0, 4.0, 5.3, 3.2, 1.2}, series.Float, "COL.3"), + series.New([]int{2, 3, 5, 6, 7}, series.String, "COL.3"), ), }, { series.New([]string{"A", "B", "A", "A", "A"}, series.String, "COL.2"), + series.New([]string{"w", "e", "r", "t", "y"}, series.String, "COL.1"), New( - series.New([]string{"b", "a", "b", "c", "d"}, series.String, "COL.1"), + series.New([]string{"w", "e", "r", "t", "y"}, series.String, "COL.1"), series.New([]string{"A", "B", "A", "A", "A"}, series.String, "COL.2"), series.New([]float64{3.0, 4.0, 5.3, 3.2, 1.2}, series.Float, "COL.3"), ), }, { series.New([]string{"A", "B", "A", "A", "A"}, series.String, "COL.4"), + series.New([]int{2, 3, 5, 6, 7}, series.String, "COL.5"), + New( + series.New([]string{"b", "a", "b", "c", "d"}, series.String, "COL.1"), + series.New([]int{1, 2, 4, 5, 4}, series.Int, "COL.2"), + series.New([]float64{3.0, 4.0, 5.3, 3.2, 1.2}, series.Float, "COL.3"), + series.New([]string{"A", "B", "A", "A", "A"}, series.String, "COL.4"), + series.New([]int{2, 3, 5, 6, 7}, series.String, "COL.5"), + ), + }, + { + series.New([]string{"A", "B", "A", "A", "A"}, series.String, "COL.4"), + series.New([]float64{3.3, 4.3, 5.3, 5.5, 6.4}, series.Float, "COL.5"), New( series.New([]string{"b", "a", "b", "c", "d"}, series.String, "COL.1"), series.New([]int{1, 2, 4, 5, 4}, series.Int, "COL.2"), series.New([]float64{3.0, 4.0, 5.3, 3.2, 1.2}, series.Float, "COL.3"), series.New([]string{"A", "B", "A", "A", "A"}, series.String, "COL.4"), + series.New([]float64{3.3, 4.3, 5.3, 5.5, 6.4}, series.Float, "COL.5"), ), }, } for i, tc := range table { - b := a.Mutate(tc.s) + b := a.Mutate(tc.s, tc.s1) if b.Err != nil { t.Errorf("Test: %d\nError:%v", i, b.Err) From b7bcff4b48a63c29633da0646c3e62f753d4cc16 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Tue, 22 Feb 2022 16:18:09 +0800 Subject: [PATCH 18/60] =?UTF-8?q?Rolling,=20add=20method:=20Apply=E3=80=81?= =?UTF-8?q?MeanByWeights?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- series/rolling.go | 48 +++++++++++++++++++++++ series/rolling_test.go | 88 ++++++++++++++++++++++++++++++++++++++++++ series/window.go | 7 +++- 3 files changed, 142 insertions(+), 1 deletion(-) diff --git a/series/rolling.go b/series/rolling.go index e45a0b4..9ed4b88 100644 --- a/series/rolling.go +++ b/series/rolling.go @@ -12,9 +12,11 @@ type Rolling interface { Max() Series Min() Series Mean() Series + MeanByWeights(weights []float64) Series Quantile(p float64) Series Median() Series StdDev() Series + Apply(f func(windowFloats []float64, windowEles []Element) interface{}) Series } type rollingSeries struct { @@ -109,6 +111,31 @@ func (s rollingSeries) Mean() Series { return newS } +func (s rollingSeries) MeanByWeights(weights []float64) Series { + if s.window != len(weights) { + panic("window must be equal to weights length") + } + weightSum := floats.Sum(weights) + weightLen := len(weights) + ma := s.Apply( + func(windowFloats []float64, windowEles []Element) interface{} { + weightsUse := weights + weightSumUse := weightSum + wfL := len(windowFloats) + if wfL < weightLen { + weightsUse = weights[weightLen - wfL:] + weightSumUse = floats.Sum(weightsUse) + } + totalSum := 0.0 + for i := 0; i < wfL; i++ { + totalSum += weightsUse[i] * windowFloats[i] + } + return totalSum / weightSumUse + }) + return ma +} + + func (s rollingSeries) Quantile(p float64) Series { if s.Len() == 0 { return s.Empty() @@ -172,3 +199,24 @@ func (s rollingSeries) StdDev() Series { fmt.Sprintf("%s_RStdDev[w:%d]", s.Name, s.window)) return newS } + + +func (s rollingSeries) Apply(f func(windowFloats []float64, windowEles []Element) interface{}) Series { + if s.Len() == 0 { + return s.Empty() + } + eles := make([]Element, s.Len()) + var index int + for index = 0; index < s.minPeriods-1; index++ { + eles[index] = s.Elem(0).NA() + } + frw := NewRollingWindow(s.Series, s.window, s.minPeriods) + for frw.HasNext() { + ele := s.Elem(0).NA() + ele.Set(frw.Next().Apply(f)) + eles[index] = ele + index++ + } + newS := New(eles, s.Type(), fmt.Sprintf("%s_RApply[w:%d]", s.Name, s.window)) + return newS +} \ No newline at end of file diff --git a/series/rolling_test.go b/series/rolling_test.go index a0ea8cb..cec03c4 100644 --- a/series/rolling_test.go +++ b/series/rolling_test.go @@ -131,3 +131,91 @@ func TestSeries_Rolling(t *testing.T) { } } + +func TestSeries_MeanByWeights(t *testing.T) { + tests := []struct { + series Series + window int + minPeriod int + weights []float64 + meanExpected Series + }{ + { + Floats([]string{"1.5", "-3.23", "-0.337397", "-0.380079", "1.60979", "34."}), + 3, + 2, + []float64{0.5, 0.3, 0.2}, + Floats([]string{NaN, "-0.392", "-0.2864794", "-1.7922349", "0.0392358", "7.0928975"}), + }, + { + Floats([]string{"23", "13", "101", "-64", "-3"}), + 3, + 1, + []float64{5, 3, 2}, + Floats([]string{"23", "19", "35.6", "24", "30.7"}), + }, + } + + for testnum, test := range tests { + expected := test.meanExpected.Records() + b := test.series.Rolling(test.window, test.minPeriod).MeanByWeights(test.weights) + received := b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-MeanByWeights:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + } +} + +func TestSeries_RollingApply(t *testing.T) { + tests := []struct { + series Series + window int + minPeriod int + applyExpected Series + applyFunc func([]float64, []Element) interface{} + }{ + { + Floats([]string{"1.5", "-3.23", "-0.337397", "-0.380079", "1.60979", "34."}), + 3, + 2, + Floats([]string{NaN, "2.5", "2.5", "-2.23", "0.662603", "0.619921"}), + func(f []float64, e []Element) interface{} { + return f[0] + 1 + }, + }, + { + Strings([]string{"20210618", "20200909", "20200910", "20200912", "20200911"}), + 3, + 2, + Strings([]string{NaN, "20210618-", "20210618-", "20200909-", "20200910-"}), + func(f []float64, e []Element) interface{} { + return e[0].String() + "-" + }, + }, + { + Ints([]string{"23", "13", "101", "-64", "-3"}), + 3, + 1, + Ints([]string{"24", "14", "102", "-63", "-2"}), + func(f []float64, e []Element) interface{} { + return f[len(f)-1]+1 + }, + }, + } + + for testnum, test := range tests { + expected := test.applyExpected.Records() + b := test.series.Rolling(test.window, test.minPeriod).Apply(test.applyFunc) + received := b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-Apply:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + } +} + diff --git a/series/window.go b/series/window.go index 6394ea6..b50d1e1 100644 --- a/series/window.go +++ b/series/window.go @@ -18,6 +18,7 @@ type Window interface { Quantile(p float64) float64 Median() float64 StdDev() float64 + Apply(func(windowFloats []float64,windowEles []Element) interface{}) interface{} } type rollingWindow struct { @@ -33,7 +34,7 @@ func NewRollingWindow(s Series, windowSize int, minPeriods int) RollingWindow { eles := make([]Element, s.Len()) for i := 0; i < s.Len(); i++ { - eles[i] = s.Elem(i) + eles[i] = s.Elem(i).Copy() } return &rollingWindow{ @@ -106,6 +107,10 @@ func (ew elementsWindow) StdDev() float64 { return stat.StdDev(ew.floats, nil) } +func (ew elementsWindow) Apply(f func(windowFloats []float64, windowEles []Element) interface{}) interface{}{ + return f(ew.floats, ew.eles) +} + func findMax(eles []Element) Element { max := eles[0] From 7e733e34d0201d8543c78a86a5c82ab8ba35d0e4 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Thu, 3 Mar 2022 16:09:31 +0800 Subject: [PATCH 19/60] refactor rolling --- series/rolling.go | 236 ++++++++++++++++++----------------------- series/rolling_test.go | 29 ++--- series/series.go | 34 +++++- series/window.go | 133 ----------------------- 4 files changed, 153 insertions(+), 279 deletions(-) delete mode 100644 series/window.go diff --git a/series/rolling.go b/series/rolling.go index 9ed4b88..b0b6598 100644 --- a/series/rolling.go +++ b/series/rolling.go @@ -2,13 +2,10 @@ package series import ( "fmt" - "math" - - "github.com/mqy527/gota/util" "gonum.org/v1/gonum/floats" ) -type Rolling interface { +type RollingSeries interface { Max() Series Min() Series Mean() Series @@ -16,7 +13,7 @@ type Rolling interface { Quantile(p float64) Series Median() Series StdDev() Series - Apply(f func(windowFloats []float64, windowEles []Element) interface{}) Series + Apply(f func(window Series, windowIndex int) interface{}, t Type) Series } type rollingSeries struct { @@ -25,7 +22,47 @@ type rollingSeries struct { minPeriods int } -func NewRollingSeries(window int, minPeriods int, s Series) Rolling { +type RollingWindow interface { + HasNext() bool + NextWindow() Series +} + +type rollingWindow struct { + startIndex int + endIndexExclude int + windowSize int + s Series +} + +func NewRollingWindow(s Series, windowSize int) RollingWindow { + return &rollingWindow{ + startIndex: 0, + endIndexExclude: 1, + windowSize: windowSize, + s: s.Copy(), + } +} + +func (rw *rollingWindow) HasNext() bool { + return rw.endIndexExclude <= rw.s.Len() +} + +func (rw *rollingWindow) NextWindow() Series { + window := Series{ + Name: rw.s.Name, + t: rw.s.t, + } + window.elements = rw.s.elements.Slice(rw.startIndex, rw.endIndexExclude) + rw.endIndexExclude++ + startIndex := rw.endIndexExclude - rw.windowSize + if startIndex > rw.startIndex { + rw.startIndex = startIndex + } + return window +} + + +func NewRollingSeries(window int, minPeriods int, s Series) RollingSeries { if window < 1 { panic("window must >= 1") } @@ -40,74 +77,45 @@ func NewRollingSeries(window int, minPeriods int, s Series) Rolling { } func (s rollingSeries) Max() Series { - if s.Len() == 0 { - return s.Empty() - } - eles := make([]Element, s.Len()) - var index int - for index = 0; index < s.minPeriods-1; index++ { - eles[index] = s.Elem(0).NA() - } - frw := NewRollingWindow(s.Series, s.window, s.minPeriods) - for frw.HasNext() { - ele := s.Elem(0).NA() - ele.Set(frw.Next().Max()) - eles[index] = ele - index++ + + var maxFunc func(window Series, windowIndex int) interface{} + if s.Type() == String { + maxFunc = func(window Series, windowIndex int) interface{} { + return window.MaxStr() + } + } else { + maxFunc = func(window Series, windowIndex int) interface{} { + return window.Max() + } } - newS := New(eles, s.Type(), fmt.Sprintf("%s_RMax[w:%d]", s.Name, s.window)) + + newS := s.Apply(maxFunc, "") + newS.Name = fmt.Sprintf("%s_RMax[w:%d]", s.Name, s.window) return newS } func (s rollingSeries) Min() Series { - if s.Len() == 0 { - return s.Empty() - } - eles := make([]Element, s.Len()) - var index int - for index = 0; index < s.minPeriods-1; index++ { - eles[index] = s.Elem(0).NA() - } - frw := NewRollingWindow(s.Series, s.window, s.minPeriods) - for frw.HasNext() { - ele := s.Elem(0).NA() - ele.Set(frw.Next().Min()) - eles[index] = ele - index++ - } - newS := New(eles, s.Type(), fmt.Sprintf("%s_RMin[w:%d]", s.Name, s.window)) + var minFunc func(window Series, windowIndex int) interface{} + if s.Type() == String { + minFunc = func(window Series, windowIndex int) interface{} { + return window.MinStr() + } + } else { + minFunc = func(window Series, windowIndex int) interface{} { + return window.Min() + } + } + + newS := s.Apply(minFunc, "") + newS.Name = fmt.Sprintf("%s_RMin[w:%d]", s.Name, s.window) return newS } func (s rollingSeries) Mean() Series { - if s.Len() == 0 { - return s.Empty() - } - sf := s.Float() - sum := make([]float64, s.Len()) - floats.CumSum(sum, sf) - - eles := make([]float64, s.Len()) - for i := 0; i < s.minPeriods-1; i++ { - eles[i] = math.NaN() - } - - // sum0 / sfIndex0 - sum0 := sum[s.minPeriods-1 : s.window-1] - sfIndex0 := util.MakeFloatSliceRange(s.window-s.minPeriods, float64(s.minPeriods), 1) - floats.DivTo(eles[s.minPeriods-1:s.window-1], sum0, sfIndex0) - - sum1 := sum[0 : s.Len()-s.window+1] - sum2 := sum[s.window-1:] - sf1 := sf[0 : s.Len()-s.window+1] - - // (sum2 - sum1 + sf1) / window - windows := util.MakeFloatSlice(s.Len()-s.window+1, float64(s.window)) - floats.SubTo(eles[s.window-1:], sum2, sum1) - floats.Add(eles[s.window-1:], sf1) - floats.Div(eles[s.window-1:], windows) - newS := New(eles, Float, - fmt.Sprintf("%s_RMean[w:%d, p:%d]", s.Name, s.window, s.minPeriods)) + newS := s.Apply(func(window Series, windowIndex int) interface{} { + return window.Mean() + }, Float) + newS.Name = fmt.Sprintf("%s_RMean[w:%d]", s.Name, s.window) return newS } @@ -118,105 +126,67 @@ func (s rollingSeries) MeanByWeights(weights []float64) Series { weightSum := floats.Sum(weights) weightLen := len(weights) ma := s.Apply( - func(windowFloats []float64, windowEles []Element) interface{} { + func(window Series, windowIndex int) interface{} { weightsUse := weights weightSumUse := weightSum - wfL := len(windowFloats) + wfL := window.Len() if wfL < weightLen { - weightsUse = weights[weightLen - wfL:] + weightsUse = weights[weightLen-wfL:] weightSumUse = floats.Sum(weightsUse) } totalSum := 0.0 + windowFloats := window.Float() for i := 0; i < wfL; i++ { totalSum += weightsUse[i] * windowFloats[i] } return totalSum / weightSumUse - }) + }, Float) return ma } - func (s rollingSeries) Quantile(p float64) Series { - if s.Len() == 0 { - return s.Empty() - } - eles := make([]Element, s.Len()) - var index int - for index = 0; index < s.minPeriods-1; index++ { - eles[index] = s.Elem(0).NA() - } - frw := NewRollingWindow(s.Series, s.window, s.minPeriods) - for frw.HasNext() { - ele := s.Elem(0).NA() - ele.Set(frw.Next().Quantile(p)) - eles[index] = ele - index++ - } - newS := New(eles, s.Type(), - fmt.Sprintf("%s_RQuantile[w:%d, p:%f]", s.Name, s.window, p)) + newS := s.Apply(func(window Series, windowIndex int) interface{} { + return window.Quantile(p) + }, Float) + newS.Name = fmt.Sprintf("%s_RQuantile[w:%d, p:%f]", s.Name, s.window, p) return newS } func (s rollingSeries) Median() Series { - - if s.Len() == 0 { - return s.Empty() - } - eles := make([]Element, s.Len()) - var index int - for index = 0; index < s.minPeriods-1; index++ { - eles[index] = s.Elem(0).NA() - } - frw := NewRollingWindow(s.Series, s.window, s.minPeriods) - for frw.HasNext() { - ele := s.Elem(0).NA() - ele.Set(frw.Next().Median()) - eles[index] = ele - index++ - } - newS := New(eles, s.Type(), - fmt.Sprintf("%s_RMedian[w:%d]", s.Name, s.window)) + newS := s.Apply(func(window Series, windowIndex int) interface{} { + return window.Median() + }, Float) + newS.Name = fmt.Sprintf("%s_RMedian[w:%d]", s.Name, s.window) return newS } func (s rollingSeries) StdDev() Series { - if s.Len() == 0 { - return s.Empty() - } - eles := make([]Element, s.Len()) - var index int - for index = 0; index < s.minPeriods-1; index++ { - eles[index] = &floatElement{0.0, true} - } - frw := NewRollingWindow(s.Series, s.window, s.minPeriods) - for frw.HasNext() { - ele := &floatElement{0.0, false} - ele.Set(frw.Next().StdDev()) - eles[index] = ele - index++ - } - newS := New(eles, Float, - fmt.Sprintf("%s_RStdDev[w:%d]", s.Name, s.window)) + newS := s.Apply(func(window Series, windowIndex int) interface{} { + return window.StdDev() + }, Float) + newS.Name = fmt.Sprintf("%s_RStdDev[w:%d]", s.Name, s.window) return newS } - -func (s rollingSeries) Apply(f func(windowFloats []float64, windowEles []Element) interface{}) Series { +func (s rollingSeries) Apply(f func(window Series, windowIndex int) interface{}, t Type) Series { if s.Len() == 0 { return s.Empty() } - eles := make([]Element, s.Len()) - var index int - for index = 0; index < s.minPeriods-1; index++ { - eles[index] = s.Elem(0).NA() + if len(t) == 0 { + t = s.t } - frw := NewRollingWindow(s.Series, s.window, s.minPeriods) - for frw.HasNext() { - ele := s.Elem(0).NA() - ele.Set(frw.Next().Apply(f)) + eles := make([]Element, s.Len()) + index := 0 + rw := NewRollingWindow(s.Series, s.window) + for rw.HasNext() { + window := rw.NextWindow() + ele := NaNElementByType(t) + if window.Len() >= s.minPeriods { + ele.Set(f(window, index)) + } eles[index] = ele index++ } - newS := New(eles, s.Type(), fmt.Sprintf("%s_RApply[w:%d]", s.Name, s.window)) + newS := New(eles, t, fmt.Sprintf("%s_RApply[w:%d]", s.Name, s.window)) return newS } \ No newline at end of file diff --git a/series/rolling_test.go b/series/rolling_test.go index cec03c4..2a2dd0d 100644 --- a/series/rolling_test.go +++ b/series/rolling_test.go @@ -26,7 +26,7 @@ func TestSeries_Rolling(t *testing.T) { Bools([]string{"false", "false", "false", "false", "false"}), Floats([]string{"0.000000", "0.500000", "0.500000", "0.000000", "0.500000"}), 0.8, - Bools([]string{"false", "true", "true", "false", "true"}), + Floats([]string{"0.000000", "1.000000", "1.000000", "0.000000", "1.000000"}), Bools([]string{NaN, NaN, NaN, NaN, NaN}), Floats([]string{NaN, "0.707106781", "0.707106781", "0.000000", "0.707106781"}), }, @@ -50,7 +50,7 @@ func TestSeries_Rolling(t *testing.T) { Strings([]string{NaN, "20200909", "20200909", "20200909", "20200910"}), Floats([]string{NaN, "20205763.500000", "20204145.666667", "20200910.333333", "20200911.000000"}), 0.8, - Strings([]string{NaN, "20210618.000000", "20210618.000000", "20200912.000000", "20200912.000000"}), + Strings([]string{NaN, NaN, NaN, NaN, NaN}), Strings([]string{NaN, NaN, NaN, NaN, NaN}), Strings([]string{NaN, "6865.299739", "5605.205111", "1.527525", "1.000000"}), }, @@ -62,8 +62,8 @@ func TestSeries_Rolling(t *testing.T) { Ints([]string{"23", "13", "13", "-64", "-64"}), Floats([]string{"23.000000", "18.000000", "45.666667", "16.666667", "11.333333"}), 0.8, - Ints([]string{"23", "23", "101", "101", "101"}), - Ints([]string{"23", "18", "23", "13", "-3"}), + Floats([]string{"23", "23", "101", "101", "101"}), + Floats([]string{"23", "18", "23", "13", "-3"}), Floats([]string{NaN, "7.071067812", "48.18021724", "82.56108849", "83.4286122"}), }, } @@ -175,40 +175,45 @@ func TestSeries_RollingApply(t *testing.T) { window int minPeriod int applyExpected Series - applyFunc func([]float64, []Element) interface{} + applyFunc func(window Series, windowIndex int) interface{} + t Type }{ { Floats([]string{"1.5", "-3.23", "-0.337397", "-0.380079", "1.60979", "34."}), 3, 2, Floats([]string{NaN, "2.5", "2.5", "-2.23", "0.662603", "0.619921"}), - func(f []float64, e []Element) interface{} { - return f[0] + 1 + func(window Series, windowIndex int) interface{} { + return window.Float()[0] + 1 }, + "", }, { Strings([]string{"20210618", "20200909", "20200910", "20200912", "20200911"}), 3, 2, Strings([]string{NaN, "20210618-", "20210618-", "20200909-", "20200910-"}), - func(f []float64, e []Element) interface{} { - return e[0].String() + "-" + func(window Series, windowIndex int) interface{} { + return window.Elem(0).String() + "-" }, + String, }, { Ints([]string{"23", "13", "101", "-64", "-3"}), 3, 1, Ints([]string{"24", "14", "102", "-63", "-2"}), - func(f []float64, e []Element) interface{} { - return f[len(f)-1]+1 + func(window Series, windowIndex int) interface{} { + i, _ := window.Elem(-1).Int() + return i+1 }, + Int, }, } for testnum, test := range tests { expected := test.applyExpected.Records() - b := test.series.Rolling(test.window, test.minPeriod).Apply(test.applyFunc) + b := test.series.Rolling(test.window, test.minPeriod).Apply(test.applyFunc, test.t) received := b.Records() if !reflect.DeepEqual(expected, received) { t.Errorf( diff --git a/series/series.go b/series/series.go index cc7761d..37088ac 100644 --- a/series/series.go +++ b/series/series.go @@ -30,6 +30,7 @@ type Series struct { type Elements interface { Elem(int) Element Len() int + Slice(start, end int) Elements } // Element is the interface that defines the types of methods to be present for @@ -66,24 +67,28 @@ type intElements []intElement func (e intElements) Len() int { return len(e) } func (e intElements) Elem(i int) Element { return &e[i] } +func (e intElements) Slice(start, end int) Elements { return e[start : end] } // stringElements is the concrete implementation of Elements for String elements. type stringElements []stringElement func (e stringElements) Len() int { return len(e) } func (e stringElements) Elem(i int) Element { return &e[i] } +func (e stringElements) Slice(start, end int) Elements { return e[start : end] } // floatElements is the concrete implementation of Elements for Float elements. type floatElements []floatElement func (e floatElements) Len() int { return len(e) } func (e floatElements) Elem(i int) Element { return &e[i] } +func (e floatElements) Slice(start, end int) Elements { return e[start : end] } // boolElements is the concrete implementation of Elements for Bool elements. type boolElements []boolElement func (e boolElements) Len() int { return len(e) } func (e boolElements) Elem(i int) Element { return &e[i] } +func (e boolElements) Slice(start, end int) Elements { return e[start : end] } // ElementValue represents the value that can be used for marshaling or // unmarshaling Elements. @@ -998,7 +1003,7 @@ func(s Series) FillNaNBackward() { } } -func(s Series) Rolling(window int, minPeriods int) Rolling { +func(s Series) Rolling(window int, minPeriods int) RollingSeries { return NewRollingSeries(window, minPeriods, s) } @@ -1037,4 +1042,31 @@ func Operation(operate func(index int, eles ...Element) interface{}, seriess ... } result := New(eles, seriess[0].t,"") return result, nil +} + +func NaNElementByType(t Type) Element { + switch t { + case String: + return &stringElement{ + e: NaN, + nan: true, + } + case Float: + return &floatElement{ + e: math.NaN(), + nan: true, + } + case Bool: + return &boolElement{ + e: false, + nan: true, + } + case Int: + return &intElement{ + e: 0, + nan: true, + } + default: + panic("not supported type:" + t) + } } \ No newline at end of file diff --git a/series/window.go b/series/window.go deleted file mode 100644 index b50d1e1..0000000 --- a/series/window.go +++ /dev/null @@ -1,133 +0,0 @@ -package series - -import ( - "math" - "sort" - - "gonum.org/v1/gonum/stat" -) - -type RollingWindow interface { - HasNext() bool - Next() Window -} - -type Window interface { - Max() interface{} - Min() interface{} - Quantile(p float64) float64 - Median() float64 - StdDev() float64 - Apply(func(windowFloats []float64,windowEles []Element) interface{}) interface{} -} - -type rollingWindow struct { - floats []float64 - eles []Element - startIndex int - endIndexExclude int - windowSize int - eleType Type -} - -func NewRollingWindow(s Series, windowSize int, minPeriods int) RollingWindow { - - eles := make([]Element, s.Len()) - for i := 0; i < s.Len(); i++ { - eles[i] = s.Elem(i).Copy() - } - - return &rollingWindow{ - floats: s.Float(), - eles: eles, - startIndex: 0, - endIndexExclude: minPeriods, - windowSize: windowSize, - eleType: s.t, - } -} - -func (rw *rollingWindow) HasNext() bool { - return rw.endIndexExclude <= len(rw.eles) -} - -func (rw *rollingWindow) Next() Window { - fw := elementsWindow { - rw.floats[rw.startIndex:rw.endIndexExclude], - rw.eles[rw.startIndex:rw.endIndexExclude], - } - rw.endIndexExclude++ - startIndex := rw.endIndexExclude - rw.windowSize - if startIndex > rw.startIndex { - rw.startIndex = startIndex - } - - return fw -} - -type elementsWindow struct { - floats []float64 - eles []Element -} - -func (ew elementsWindow) Max() interface{} { - return findMax(ew.eles).Val() -} - -func (ew elementsWindow) Min() interface{} { - return findMin(ew.eles).Val() -} - -func (ew elementsWindow) Quantile(p float64) float64 { - fs := make([]float64, len(ew.floats)) - copy(fs, ew.floats) - sort.Float64s(fs) - return stat.Quantile(p, stat.Empirical, fs, nil) -} - -func (ew elementsWindow) Median() float64 { - if len(ew.eles) == 0 || - ew.eles[0].Type() == String || - ew.eles[0].Type() == Bool { - return math.NaN() - } - - fs := make([]float64, len(ew.floats)) - copy(fs, ew.floats) - sort.Float64s(fs) - - if len(ew.floats) %2 != 0 { - return fs[len(ew.floats)/2] - } - return (ew.floats[(len(ew.floats)/2)-1] + - ew.floats[len(ew.floats)/2]) * 0.5 -} - -func (ew elementsWindow) StdDev() float64 { - return stat.StdDev(ew.floats, nil) -} - -func (ew elementsWindow) Apply(f func(windowFloats []float64, windowEles []Element) interface{}) interface{}{ - return f(ew.floats, ew.eles) -} - - -func findMax(eles []Element) Element { - max := eles[0] - for i := 1; i < len(eles); i++ { - if eles[i].Greater(max) { - max = eles[i] - } - } - return max -} - -func findMin(eles []Element) Element { - min := eles[0] - for i := 1; i < len(eles); i++ { - if eles[i].Less(min) { - min = eles[i] - } - } - return min -} From 284747625caec38e5979933a39cfea7476132a9d Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Tue, 8 Mar 2022 13:45:16 +0800 Subject: [PATCH 20/60] optimize rolling --- series/rolling.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/series/rolling.go b/series/rolling.go index b0b6598..f766097 100644 --- a/series/rolling.go +++ b/series/rolling.go @@ -69,7 +69,7 @@ func NewRollingSeries(window int, minPeriods int, s Series) RollingSeries { if minPeriods < 1 || minPeriods > window { panic("minPeriods must >= 1 && minPeriods must <= window") } - return &rollingSeries{ + return rollingSeries{ Series: s, window: window, minPeriods: minPeriods, From c1c2ba80949f551bbfab816d2be36e5bfc74df75 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Tue, 8 Mar 2022 15:22:19 +0800 Subject: [PATCH 21/60] Remove redundant code --- series/series.go | 2 +- util/util.go | 17 ----------------- 2 files changed, 1 insertion(+), 18 deletions(-) delete mode 100644 util/util.go diff --git a/series/series.go b/series/series.go index 394cf73..6b9775a 100644 --- a/series/series.go +++ b/series/series.go @@ -96,7 +96,7 @@ func (e boolElements) Slice(start, end int) Elements { return e[start : end] } // unmarshaling Elements. type ElementValue interface{} -type MapFunction func(Element, int) Element +type MapFunction func(ele Element, index int) Element // Comparator is a convenience alias that can be used for a more type safe way of // reason and use comparators. diff --git a/util/util.go b/util/util.go deleted file mode 100644 index f0fb50f..0000000 --- a/util/util.go +++ /dev/null @@ -1,17 +0,0 @@ -package util - -func MakeFloatSlice(size int, defaultValue float64) []float64 { - fs := make([]float64, size) - for i := 0; i < size; i++ { - fs[i] = defaultValue - } - return fs -} - -func MakeFloatSliceRange(size int, start float64, step float64) []float64 { - fs := make([]float64, size) - for i := 0; i < size; i++ { - fs[i] = start + step * float64(i) - } - return fs -} \ No newline at end of file From b8b189817f143b2d09a1792ac969ab3fe3075d29 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Wed, 9 Mar 2022 09:58:29 +0800 Subject: [PATCH 22/60] =?UTF-8?q?optimize=EF=BC=9ASeries.Slice?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- series/series.go | 28 ++++++++++++++-------------- series/series_test.go | 6 ++++++ 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/series/series.go b/series/series.go index 6b9775a..396b997 100644 --- a/series/series.go +++ b/series/series.go @@ -918,25 +918,25 @@ func(s Series) AddConst(c float64) Series { // AddConst multiply the scalar c to all of the values in Series and returning a new Series object. func(s Series) MulConst(c float64) Series { - s.Name = fmt.Sprintf("(%s * %v)", s.Name, c) sm := s.Map(func(e Element, index int) Element { result := e.Copy() f := result.Float() result.Set(f * c) - return Element(result) + return result }) + sm.Name = fmt.Sprintf("(%s * %v)", s.Name, c) return sm } // DivConst Div the scalar c to all of the values in Series and returning a new Series object. func(s Series) DivConst(c float64) Series { - s.Name = fmt.Sprintf("(%s / %v)", s.Name, c) sm := s.Map(func(e Element, index int) Element { result := e.Copy() f := result.Float() result.Set(f / c) - return Element(result) + return result }) + sm.Name = fmt.Sprintf("(%s / %v)", s.Name, c) return sm } @@ -973,13 +973,13 @@ func(s Series) Div(c Series) Series { } func(s Series) Abs() Series { - s.Name = fmt.Sprintf("Abs(%s)", s.Name) sm := s.Map(func(e Element, index int) Element { result := e.Copy() f := result.Float() result.Set(math.Abs(f)) - return Element(result) + return result }) + sm.Name = fmt.Sprintf("Abs(%s)", s.Name) return sm } @@ -1104,22 +1104,22 @@ func (s Series) Sum() float64 { return sum } -// Slice slices Series from j to k-1 index. -func (s Series) Slice(j, k int) Series { +// Slice slices Series from start to end-1 index. +func (s Series) Slice(start, end int) Series { if s.Err != nil { return s } - if j > k || j < 0 || k >= s.Len() { + if start > end || start < 0 || end > s.Len() { empty := s.Empty() empty.Err = fmt.Errorf("slice index out of bounds") return empty } - idxs := make([]int, k-j) - for i := 0; j+i < k; i++ { - idxs[i] = j + i + ret := Series{ + Name: s.Name, + t: s.t, } - - return s.Subset(idxs) + ret.elements = s.elements.Slice(start, end) + return ret } diff --git a/series/series_test.go b/series/series_test.go index c7d9b46..2a40bc2 100644 --- a/series/series_test.go +++ b/series/series_test.go @@ -2110,6 +2110,12 @@ func TestSeries_Slice(t *testing.T) { 0, 5, Ints([]int{1, 2, 3, 4, 5}), + Ints([]int{1, 2, 3, 4, 5}), + }, + { + 0, + 6, + Ints([]int{1, 2, 3, 4, 5}), seriesWithErr, }, } From 9c5bef10292eab6234919293c25e752a2e86577b Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Wed, 9 Mar 2022 10:19:10 +0800 Subject: [PATCH 23/60] rolling: add descriptions --- series/rolling.go | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/series/rolling.go b/series/rolling.go index f766097..847b1ad 100644 --- a/series/rolling.go +++ b/series/rolling.go @@ -5,6 +5,7 @@ import ( "gonum.org/v1/gonum/floats" ) +//RollingSeries define rolling methods type RollingSeries interface { Max() Series Min() Series @@ -22,6 +23,7 @@ type rollingSeries struct { minPeriods int } +//RollingWindow define rolling window type RollingWindow interface { HasNext() bool NextWindow() Series @@ -61,7 +63,7 @@ func (rw *rollingWindow) NextWindow() Series { return window } - +//NewRollingSeries establish a rolling series func NewRollingSeries(window int, minPeriods int, s Series) RollingSeries { if window < 1 { panic("window must >= 1") @@ -76,6 +78,7 @@ func NewRollingSeries(window int, minPeriods int, s Series) RollingSeries { } } +// Max return the biggest element in the rollingSeries func (s rollingSeries) Max() Series { var maxFunc func(window Series, windowIndex int) interface{} @@ -94,6 +97,7 @@ func (s rollingSeries) Max() Series { return newS } +// Min return the lowest element in the rollingSeries func (s rollingSeries) Min() Series { var minFunc func(window Series, windowIndex int) interface{} if s.Type() == String { @@ -111,6 +115,7 @@ func (s rollingSeries) Min() Series { return newS } +// Mean calculates the average value of a rollingSeries func (s rollingSeries) Mean() Series { newS := s.Apply(func(window Series, windowIndex int) interface{} { return window.Mean() @@ -119,6 +124,7 @@ func (s rollingSeries) Mean() Series { return newS } +// MeanByWeights calculates the weighted average value of a rollingSeries func (s rollingSeries) MeanByWeights(weights []float64) Series { if s.window != len(weights) { panic("window must be equal to weights length") @@ -144,6 +150,7 @@ func (s rollingSeries) MeanByWeights(weights []float64) Series { return ma } +// Quantile calculates the quantile value of a rollingSeries func (s rollingSeries) Quantile(p float64) Series { newS := s.Apply(func(window Series, windowIndex int) interface{} { return window.Quantile(p) @@ -152,6 +159,7 @@ func (s rollingSeries) Quantile(p float64) Series { return newS } +// Median calculates the median value of a rollingSeries func (s rollingSeries) Median() Series { newS := s.Apply(func(window Series, windowIndex int) interface{} { return window.Median() @@ -160,6 +168,7 @@ func (s rollingSeries) Median() Series { return newS } +// StdDev calculates the standard deviation of a rollingSeries func (s rollingSeries) StdDev() Series { newS := s.Apply(func(window Series, windowIndex int) interface{} { return window.StdDev() @@ -168,6 +177,7 @@ func (s rollingSeries) StdDev() Series { return newS } +// Apply for extend the computation func (s rollingSeries) Apply(f func(window Series, windowIndex int) interface{}, t Type) Series { if s.Len() == 0 { return s.Empty() From 4292dbd9a245c7affb4127b7b8ed5997923eb817 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Wed, 9 Mar 2022 10:37:52 +0800 Subject: [PATCH 24/60] delete Element.NA() --- series/numberoperation.go | 6 +++--- series/series.go | 14 ++++++-------- series/type-bool.go | 4 ---- series/type-float.go | 4 ---- series/type-int.go | 4 ---- series/type-string.go | 4 ---- 6 files changed, 9 insertions(+), 27 deletions(-) diff --git a/series/numberoperation.go b/series/numberoperation.go index a97be92..26d3870 100644 --- a/series/numberoperation.go +++ b/series/numberoperation.go @@ -6,7 +6,7 @@ type Number float64 func (n Number) Sub(s Series) Series { result := s.Map(func(e Element, i int) Element { - ele := e.NA() + ele := e.Copy() v := float64(n) - e.Float() ele.Set(v) return ele @@ -16,7 +16,7 @@ func (n Number) Sub(s Series) Series { func (n Number) Div(s Series) Series { result := s.Map(func(e Element, i int) Element { - ele := e.NA() + ele := e.Copy() v := float64(n) / e.Float() ele.Set(v) return ele @@ -26,7 +26,7 @@ func (n Number) Div(s Series) Series { func (n Number) Mod(s Series) Series { result := s.Map(func(e Element, i int) Element { - ele := e.NA() + ele := e.Copy() v := math.Mod(float64(n), e.Float()) ele.Set(v) return ele diff --git a/series/series.go b/series/series.go index 396b997..9e9a791 100644 --- a/series/series.go +++ b/series/series.go @@ -60,8 +60,6 @@ type Element interface { // Information methods IsNA() bool Type() Type - - NA() Element } // intElements is the concrete implementation of Elements for Int elements. @@ -885,11 +883,12 @@ func (s Series) Shift(periods int) Series { shiftElements[i] = s.Elem(i - periods).Copy() } for i := s.Len() + periods; i < s.Len(); i++ { - shiftElements[i] = s.Elem(0).NA() + shiftElements[i] = NaNElementByType(s.t) + } } else if periods > 0 { for i := 0; i < periods; i++ { - shiftElements[i] = s.Elem(0).NA() + shiftElements[i] = NaNElementByType(s.t) } for i := 0 ; i + periods < s.Len(); i++ { shiftElements[i + periods] = s.Elem(i).Copy() @@ -1009,7 +1008,7 @@ func(s Series) FillNaNForward() { } } -// FillNaNBackward Fill NaN values using the next non-NaN value +// FillNaNBackward fill NaN values using the next non-NaN value func(s Series) FillNaNBackward() { var lastNotNaNValue ElementValue = nil for i := s.Len() - 1 ; i >= 0; i-- { @@ -1028,7 +1027,7 @@ func(s Series) Rolling(window int, minPeriods int) RollingSeries { return NewRollingSeries(window, minPeriods, s) } - +//Operation for multiple series calculation func Operation(operate func(index int, eles ...Element) interface{}, seriess ...Series) (Series, error) { if len(seriess) == 0 { return Series{}, errors.New("seriess num must > 0") @@ -1046,7 +1045,6 @@ func Operation(operate func(index int, eles ...Element) interface{}, seriess ... } eles := make([]Element, maxLen) - baseEle := seriess[0].Elem(0) for i := 0; i < maxLen; i++ { operateParam := make([]Element, len(seriess)) for j := 0; j < len(seriess); j++ { @@ -1057,7 +1055,7 @@ func Operation(operate func(index int, eles ...Element) interface{}, seriess ... } } res := operate(i, operateParam...) - e := baseEle.NA() + e := NaNElementByType(seriess[0].t) e.Set(res) eles[i] = e } diff --git a/series/type-bool.go b/series/type-bool.go index 17d7893..b977510 100644 --- a/series/type-bool.go +++ b/series/type-bool.go @@ -72,10 +72,6 @@ func (e boolElement) Copy() Element { return &boolElement{e.e, false} } -func (e boolElement) NA() Element { - return &boolElement{false, true} -} - func (e boolElement) IsNA() bool { return e.nan } diff --git a/series/type-float.go b/series/type-float.go index 8c8987b..7b19cc9 100644 --- a/series/type-float.go +++ b/series/type-float.go @@ -53,10 +53,6 @@ func (e floatElement) Copy() Element { return &floatElement{e.e, false} } -func (e floatElement) NA() Element { - return &floatElement{math.NaN(), true} -} - func (e floatElement) IsNA() bool { if e.nan || math.IsNaN(e.e) { return true diff --git a/series/type-int.go b/series/type-int.go index 5dd2197..446b35c 100644 --- a/series/type-int.go +++ b/series/type-int.go @@ -65,10 +65,6 @@ func (e intElement) Copy() Element { return &intElement{e.e, false} } -func (e intElement) NA() Element { - return &intElement{0, true} -} - func (e intElement) IsNA() bool { return e.nan } diff --git a/series/type-string.go b/series/type-string.go index 1f0c951..803096f 100644 --- a/series/type-string.go +++ b/series/type-string.go @@ -48,10 +48,6 @@ func (e stringElement) Copy() Element { return &stringElement{e.e, false} } -func (e stringElement) NA() Element { - return &stringElement{"", true} -} - func (e stringElement) IsNA() bool { return e.nan } From c0fe7cd2943638514ecc1ca268bb57ccc8203211 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Wed, 9 Mar 2022 10:58:17 +0800 Subject: [PATCH 25/60] modify module name --- dataframe/benchmark_test.go | 4 ++-- dataframe/dataframe.go | 4 +--- dataframe/dataframe_test.go | 4 ++-- dataframe/examples_test.go | 4 ++-- go.mod | 2 +- series/benchmarks_test.go | 2 +- series/numberoperation_test.go | 2 +- 7 files changed, 10 insertions(+), 12 deletions(-) diff --git a/dataframe/benchmark_test.go b/dataframe/benchmark_test.go index 1a545d2..2b00a94 100644 --- a/dataframe/benchmark_test.go +++ b/dataframe/benchmark_test.go @@ -5,8 +5,8 @@ import ( "strconv" "testing" - "github.com/mqy527/gota/dataframe" - "github.com/mqy527/gota/series" + "github.com/go-gota/gota/dataframe" + "github.com/go-gota/gota/series" ) func generateSeries(n, rep int) (data []series.Series) { diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go index 6e8d77c..7c72111 100644 --- a/dataframe/dataframe.go +++ b/dataframe/dataframe.go @@ -13,11 +13,9 @@ import ( "strings" "unicode/utf8" - - "github.com/mqy527/gota/series" + "github.com/go-gota/gota/series" "golang.org/x/net/html" "golang.org/x/net/html/atom" - ) // DataFrame is a data structure designed for operating on table like data (Such diff --git a/dataframe/dataframe_test.go b/dataframe/dataframe_test.go index 2f9087a..19e2157 100644 --- a/dataframe/dataframe_test.go +++ b/dataframe/dataframe_test.go @@ -10,7 +10,7 @@ import ( "math" - "github.com/mqy527/gota/series" + "github.com/go-gota/gota/series" ) // compareFloats compares floating point values up to the number of digits specified. @@ -694,7 +694,7 @@ func TestDataFrame_Mutate(t *testing.T) { ) table := []struct { s series.Series - s1 series.Series + s1 series.Series expDf DataFrame }{ { diff --git a/dataframe/examples_test.go b/dataframe/examples_test.go index fe17324..86d076a 100644 --- a/dataframe/examples_test.go +++ b/dataframe/examples_test.go @@ -4,8 +4,8 @@ import ( "fmt" "strings" - "github.com/mqy527/gota/dataframe" - "github.com/mqy527/gota/series" + "github.com/go-gota/gota/dataframe" + "github.com/go-gota/gota/series" ) func ExampleNew() { diff --git a/go.mod b/go.mod index 13022cf..6af1705 100644 --- a/go.mod +++ b/go.mod @@ -1,4 +1,4 @@ -module github.com/mqy527/gota +module github.com/go-gota/gota go 1.16 diff --git a/series/benchmarks_test.go b/series/benchmarks_test.go index b33a2e7..9c7bb8f 100644 --- a/series/benchmarks_test.go +++ b/series/benchmarks_test.go @@ -5,7 +5,7 @@ import ( "strconv" "testing" - "github.com/mqy527/gota/series" + "github.com/go-gota/gota/series" ) func generateInts(n int) (data []int) { diff --git a/series/numberoperation_test.go b/series/numberoperation_test.go index f741bc9..aacc147 100644 --- a/series/numberoperation_test.go +++ b/series/numberoperation_test.go @@ -4,7 +4,7 @@ import ( "reflect" "testing" - "github.com/mqy527/gota/series" + "github.com/go-gota/gota/series" ) func Test_Sub(t *testing.T) { From e3829e0beb1fdcb4cbf12407b7a3c73123ead510 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Wed, 9 Mar 2022 11:42:22 +0800 Subject: [PATCH 26/60] =?UTF-8?q?optimize=EF=BC=9Aseries=20logic?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- series/series_logic.go | 24 ++++++++++++++++++------ series/series_logic_test.go | 6 ++++++ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/series/series_logic.go b/series/series_logic.go index 09cdb34..c1163e9 100644 --- a/series/series_logic.go +++ b/series/series_logic.go @@ -6,10 +6,16 @@ import ( func (s Series) And(in interface{}) Series { - inSeries := New(in, s.t, "") + inSeries := New(in, Bool, "") result, err := Operation(func(index int, eles ...Element) interface{} { - e0b,_ := eles[0].Bool() - e1b,_ := eles[1].Bool() + e0b, err := eles[0].Bool() + if err != nil { + return nil + } + e1b, err := eles[1].Bool() + if err != nil { + return nil + } return e0b && e1b }, s, inSeries) if err != nil { @@ -19,10 +25,16 @@ func (s Series) And(in interface{}) Series { } func (s Series) Or(in interface{}) Series { - inSeries := New(in, s.t, "") + inSeries := New(in, Bool, "") result, err := Operation(func(index int, eles ...Element) interface{} { - e0b,_ := eles[0].Bool() - e1b,_ := eles[1].Bool() + e0b, err := eles[0].Bool() + if err != nil { + return nil + } + e1b, err := eles[1].Bool() + if err != nil { + return nil + } return e0b || e1b }, s, inSeries) if err != nil { diff --git a/series/series_logic_test.go b/series/series_logic_test.go index 7944a70..7218cfd 100644 --- a/series/series_logic_test.go +++ b/series/series_logic_test.go @@ -48,6 +48,12 @@ func TestSeries_Logic(t *testing.T) { Bools([]string{"false", "false", "false", "false", "false"}), Bools([]string{"true", "true", "true", "false", "true"}), }, + { + Bools([]string{"false", "true", "false", "false", "123"}), + []int {7, 0, 1, 0, 0}, + Bools([]string{"NaN", "false", "false", "false", "NaN"}), + Bools([]string{"NaN", "true", "true", "false", "NaN"}), + }, } for testnum, test := range tests { From ca5da43d9cc6aceb4e143c7d92020543638df8d4 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Tue, 15 Mar 2022 18:09:29 +0800 Subject: [PATCH 27/60] refactor some method --- series/benchmarks_test.go | 36 ++++++ series/series.go | 265 ++++++++++++++++++++------------------ 2 files changed, 175 insertions(+), 126 deletions(-) diff --git a/series/benchmarks_test.go b/series/benchmarks_test.go index 9c7bb8f..935da87 100644 --- a/series/benchmarks_test.go +++ b/series/benchmarks_test.go @@ -214,6 +214,42 @@ func BenchmarkSeries_Subset(b *testing.B) { } } + +func BenchmarkSeries_Append(b *testing.B) { + rand.Seed(100) + table := []struct { + name string + series series.Series + }{ + { + "[]int(100000)_Int", + series.Ints(generateInts(100000)), + }, + { + "[]int(100000)_String", + series.Strings(generateInts(100000)), + }, + { + "[]int(100000)_Bool", + series.Bools(generateInts(100000)), + }, + { + "[]int(100000)_Float", + series.Floats(generateInts(100000)), + }, + } + for _, test := range table { + origin := test.series.Copy() + b.Run(test.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + test.series.Append(test.series) + test.series = origin + } + }) + } +} + + func BenchmarkSeries_Set(b *testing.B) { rand.Seed(100) table := []struct { diff --git a/series/series.go b/series/series.go index 9e9a791..f9c76d7 100644 --- a/series/series.go +++ b/series/series.go @@ -33,6 +33,9 @@ type Elements interface { Elem(int) Element Len() int Slice(start, end int) Elements + Get(indexs ...int) Elements + Append(Elements) Elements + Copy() Elements } // Element is the interface that defines the types of methods to be present for @@ -65,30 +68,98 @@ type Element interface { // intElements is the concrete implementation of Elements for Int elements. type intElements []intElement -func (e intElements) Len() int { return len(e) } -func (e intElements) Elem(i int) Element { return &e[i] } -func (e intElements) Slice(start, end int) Elements { return e[start : end] } +func (e intElements) Len() int { return len(e) } +func (e intElements) Elem(i int) Element { return &e[i] } +func (e intElements) Slice(start, end int) Elements { return e[start:end] } +func (e intElements) Get(indexs ...int) Elements { + elements := make(intElements, len(indexs)) + for k, i := range indexs { + elements[k] = e[i] + } + return elements +} +func (e intElements) Append(elements Elements) Elements { + eles := elements.(intElements) + ret := append(e, eles...) + return ret +} +func (e intElements) Copy() Elements { + elements := make(intElements, len(e)) + copy(elements, e) + return elements +} // stringElements is the concrete implementation of Elements for String elements. type stringElements []stringElement -func (e stringElements) Len() int { return len(e) } -func (e stringElements) Elem(i int) Element { return &e[i] } -func (e stringElements) Slice(start, end int) Elements { return e[start : end] } +func (e stringElements) Len() int { return len(e) } +func (e stringElements) Elem(i int) Element { return &e[i] } +func (e stringElements) Slice(start, end int) Elements { return e[start:end] } +func (e stringElements) Get(indexs ...int) Elements { + elements := make(stringElements, len(indexs)) + for k, i := range indexs { + elements[k] = e[i] + } + return elements +} +func (e stringElements) Append(elements Elements) Elements { + eles := elements.(stringElements) + ret := append(e, eles...) + return ret +} +func (e stringElements) Copy() Elements { + elements := make(stringElements, len(e)) + copy(elements, e) + return elements +} // floatElements is the concrete implementation of Elements for Float elements. type floatElements []floatElement -func (e floatElements) Len() int { return len(e) } -func (e floatElements) Elem(i int) Element { return &e[i] } -func (e floatElements) Slice(start, end int) Elements { return e[start : end] } +func (e floatElements) Len() int { return len(e) } +func (e floatElements) Elem(i int) Element { return &e[i] } +func (e floatElements) Slice(start, end int) Elements { return e[start:end] } +func (e floatElements) Get(indexs ...int) Elements { + elements := make(floatElements, len(indexs)) + for k, i := range indexs { + elements[k] = e[i] + } + return elements +} +func (e floatElements) Append(elements Elements) Elements { + eles := elements.(floatElements) + ret := append(e, eles...) + return ret +} +func (e floatElements) Copy() Elements { + elements := make(floatElements, len(e)) + copy(elements, e) + return elements +} // boolElements is the concrete implementation of Elements for Bool elements. type boolElements []boolElement -func (e boolElements) Len() int { return len(e) } -func (e boolElements) Elem(i int) Element { return &e[i] } -func (e boolElements) Slice(start, end int) Elements { return e[start : end] } +func (e boolElements) Len() int { return len(e) } +func (e boolElements) Elem(i int) Element { return &e[i] } +func (e boolElements) Slice(start, end int) Elements { return e[start:end] } +func (e boolElements) Get(indexs ...int) Elements { + elements := make(boolElements, len(indexs)) + for k, i := range indexs { + elements[k] = e[i] + } + return elements +} +func (e boolElements) Append(elements Elements) Elements { + eles := elements.(boolElements) + ret := append(e, eles...) + return ret +} +func (e boolElements) Copy() Elements { + elements := make(boolElements, len(e)) + copy(elements, e) + return elements +} // ElementValue represents the value that can be used for marshaling or // unmarshaling Elements. @@ -127,6 +198,23 @@ const ( Bool Type = "bool" ) +func (t Type) emptyElements(n int) Elements { + var elements Elements + switch t { + case String: + elements = make(stringElements, n) + case Int: + elements = make(intElements, n) + case Float: + elements = make(floatElements, n) + case Bool: + elements = make(boolElements, n) + default: + panic(fmt.Sprintf("unknown type %v", t)) + } + return elements +} + const NaN = "NaN" // Indexes represent the elements that can be used for selecting a subset of @@ -148,18 +236,7 @@ func New(values interface{}, t Type, name string) Series { // Pre-allocate elements preAlloc := func(n int) { - switch t { - case String: - ret.elements = make(stringElements, n) - case Int: - ret.elements = make(intElements, n) - case Float: - ret.elements = make(floatElements, n) - case Bool: - ret.elements = make(boolElements, n) - default: - panic(fmt.Sprintf("unknown type %v", t)) - } + ret.elements = t.emptyElements(n) } if values == nil { @@ -234,18 +311,7 @@ func NewDefault(defaultValue interface{}, t Type, name string, len int) Series { // Pre-allocate elements preAlloc := func(n int) { - switch t { - case String: - ret.elements = make(stringElements, n) - case Int: - ret.elements = make(intElements, n) - case Float: - ret.elements = make(floatElements, n) - case Bool: - ret.elements = make(boolElements, n) - default: - panic(fmt.Sprintf("unknown type %v", t)) - } + ret.elements = t.emptyElements(n) } if defaultValue == nil { @@ -297,16 +363,7 @@ func (s *Series) Append(values interface{}) { return } news := New(values, s.t, s.Name) - switch s.t { - case String: - s.elements = append(s.elements.(stringElements), news.elements.(stringElements)...) - case Int: - s.elements = append(s.elements.(intElements), news.elements.(intElements)...) - case Float: - s.elements = append(s.elements.(floatElements), news.elements.(floatElements)...) - case Bool: - s.elements = append(s.elements.(boolElements), news.elements.(boolElements)...) - } + s.elements = s.elements.Append(news.elements) } // Concat concatenates two series together. It will return a new Series with the @@ -335,36 +392,9 @@ func (s Series) Subset(indexes Indexes) Series { return s } ret := Series{ - Name: s.Name, - t: s.t, - } - switch s.t { - case String: - elements := make(stringElements, len(idx)) - for k, i := range idx { - elements[k] = s.elements.(stringElements)[i] - } - ret.elements = elements - case Int: - elements := make(intElements, len(idx)) - for k, i := range idx { - elements[k] = s.elements.(intElements)[i] - } - ret.elements = elements - case Float: - elements := make(floatElements, len(idx)) - for k, i := range idx { - elements[k] = s.elements.(floatElements)[i] - } - ret.elements = elements - case Bool: - elements := make(boolElements, len(idx)) - for k, i := range idx { - elements[k] = s.elements.(boolElements)[i] - } - ret.elements = elements - default: - panic("unknown series type") + Name: s.Name, + t: s.t, + elements: s.elements.Get(idx...), } return ret } @@ -531,29 +561,11 @@ func (s Series) Compare(comparator Comparator, comparando interface{}) Series { // Copy will return a copy of the Series. func (s Series) Copy() Series { - name := s.Name - t := s.t - err := s.Err - var elements Elements - switch s.t { - case String: - elements = make(stringElements, s.Len()) - copy(elements.(stringElements), s.elements.(stringElements)) - case Float: - elements = make(floatElements, s.Len()) - copy(elements.(floatElements), s.elements.(floatElements)) - case Bool: - elements = make(boolElements, s.Len()) - copy(elements.(boolElements), s.elements.(boolElements)) - case Int: - elements = make(intElements, s.Len()) - copy(elements.(intElements), s.elements.(intElements)) - } ret := Series{ - Name: name, - t: t, - elements: elements, - Err: err, + Name: s.Name, + t: s.t, + elements: s.elements.Copy(), + Err: s.Err, } return ret } @@ -879,48 +891,49 @@ func (s Series) Shift(periods int) Series { } shiftElements := make([]Element, s.Len()) if periods < 0 { - for i := 0; i - periods < s.Len(); i++ { + for i := 0; i-periods < s.Len(); i++ { shiftElements[i] = s.Elem(i - periods).Copy() } for i := s.Len() + periods; i < s.Len(); i++ { shiftElements[i] = NaNElementByType(s.t) - - } + + } } else if periods > 0 { for i := 0; i < periods; i++ { shiftElements[i] = NaNElementByType(s.t) } - for i := 0 ; i + periods < s.Len(); i++ { - shiftElements[i + periods] = s.Elem(i).Copy() + for i := 0; i+periods < s.Len(); i++ { + shiftElements[i+periods] = s.Elem(i).Copy() } } return New(shiftElements, s.Type(), fmt.Sprintf("%s_Shift_%d", s.Name, periods)) } + // CumProd finds the cumulative product of the first i elements in s and returning a new Series object. -func(s Series) CumProd() Series { +func (s Series) CumProd() Series { dst := make([]float64, s.Len()) floats.CumProd(dst, s.Float()) - return New(dst,s.Type(), fmt.Sprintf("%s_CumProd", s.Name)) + return New(dst, s.Type(), fmt.Sprintf("%s_CumProd", s.Name)) } // Prod returns the product of the elements of the Series. Returns 1 if len(s) = 0. -func(s Series) Prod() float64 { +func (s Series) Prod() float64 { return floats.Prod(s.Float()) } // AddConst adds the scalar c to all of the values in Series and returning a new Series object. -func(s Series) AddConst(c float64) Series { +func (s Series) AddConst(c float64) Series { dst := s.Float() floats.AddConst(c, dst) - return New(dst,s.Type(), fmt.Sprintf("(%s + %v)", s.Name, c)) + return New(dst, s.Type(), fmt.Sprintf("(%s + %v)", s.Name, c)) } // AddConst multiply the scalar c to all of the values in Series and returning a new Series object. -func(s Series) MulConst(c float64) Series { +func (s Series) MulConst(c float64) Series { sm := s.Map(func(e Element, index int) Element { result := e.Copy() f := result.Float() - result.Set(f * c) + result.Set(f * c) return result }) sm.Name = fmt.Sprintf("(%s * %v)", s.Name, c) @@ -928,18 +941,18 @@ func(s Series) MulConst(c float64) Series { } // DivConst Div the scalar c to all of the values in Series and returning a new Series object. -func(s Series) DivConst(c float64) Series { +func (s Series) DivConst(c float64) Series { sm := s.Map(func(e Element, index int) Element { result := e.Copy() f := result.Float() - result.Set(f / c) + result.Set(f / c) return result }) sm.Name = fmt.Sprintf("(%s / %v)", s.Name, c) return sm } -func(s Series) Add(c Series) Series { +func (s Series) Add(c Series) Series { sf := s.Float() cf := c.Float() dst := make([]float64, s.Len()) @@ -947,7 +960,7 @@ func(s Series) Add(c Series) Series { return New(dst, Float, fmt.Sprintf("(%s + %s)", s.Name, c.Name)) } -func(s Series) Sub(c Series) Series { +func (s Series) Sub(c Series) Series { sf := s.Float() cf := c.Float() dst := make([]float64, s.Len()) @@ -955,7 +968,7 @@ func(s Series) Sub(c Series) Series { return New(dst, Float, fmt.Sprintf("(%s - %s)", s.Name, c.Name)) } -func(s Series) Mul(c Series) Series { +func (s Series) Mul(c Series) Series { sf := s.Float() cf := c.Float() dst := make([]float64, s.Len()) @@ -963,7 +976,7 @@ func(s Series) Mul(c Series) Series { return New(dst, Float, fmt.Sprintf("(%s * %s)", s.Name, c.Name)) } -func(s Series) Div(c Series) Series { +func (s Series) Div(c Series) Series { sf := s.Float() cf := c.Float() dst := make([]float64, s.Len()) @@ -971,20 +984,19 @@ func(s Series) Div(c Series) Series { return New(dst, Float, fmt.Sprintf("(%s / %s)", s.Name, c.Name)) } -func(s Series) Abs() Series { +func (s Series) Abs() Series { sm := s.Map(func(e Element, index int) Element { result := e.Copy() f := result.Float() - result.Set(math.Abs(f)) + result.Set(math.Abs(f)) return result }) sm.Name = fmt.Sprintf("Abs(%s)", s.Name) return sm } - // FillNaN Fill NaN values using the specified value. -func(s Series) FillNaN(value ElementValue) { +func (s Series) FillNaN(value ElementValue) { for i := 0; i < s.Len(); i++ { ele := s.Elem(i) if ele.IsNA() { @@ -994,7 +1006,7 @@ func(s Series) FillNaN(value ElementValue) { } // FillNaNForward Fill NaN values using the last non-NaN value -func(s Series) FillNaNForward() { +func (s Series) FillNaNForward() { var lastNotNaNValue ElementValue = nil for i := 0; i < s.Len(); i++ { ele := s.Elem(i) @@ -1009,9 +1021,9 @@ func(s Series) FillNaNForward() { } // FillNaNBackward fill NaN values using the next non-NaN value -func(s Series) FillNaNBackward() { +func (s Series) FillNaNBackward() { var lastNotNaNValue ElementValue = nil - for i := s.Len() - 1 ; i >= 0; i-- { + for i := s.Len() - 1; i >= 0; i-- { ele := s.Elem(i) if !ele.IsNA() { lastNotNaNValue = ele.Val() @@ -1023,11 +1035,11 @@ func(s Series) FillNaNBackward() { } } -func(s Series) Rolling(window int, minPeriods int) RollingSeries { +func (s Series) Rolling(window int, minPeriods int) RollingSeries { return NewRollingSeries(window, minPeriods, s) } -//Operation for multiple series calculation +//Operation for multiple series calculation func Operation(operate func(index int, eles ...Element) interface{}, seriess ...Series) (Series, error) { if len(seriess) == 0 { return Series{}, errors.New("seriess num must > 0") @@ -1059,7 +1071,7 @@ func Operation(operate func(index int, eles ...Element) interface{}, seriess ... e.Set(res) eles[i] = e } - result := New(eles, seriess[0].t,"") + result := New(eles, seriess[0].t, "") return result, nil } @@ -1089,6 +1101,7 @@ func NaNElementByType(t Type) Element { panic("not supported type:" + t) } } + // Sum calculates the sum value of a series func (s Series) Sum() float64 { if s.elements.Len() == 0 || s.Type() == String || s.Type() == Bool { From fd768271698ff5391cfe289c6160754d9101d494 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Tue, 15 Mar 2022 21:36:22 +0800 Subject: [PATCH 28/60] optimize series.Shift --- series/rolling.go | 15 ++++++--- series/series.go | 78 +++++++++++++++++-------------------------- series/type-bool.go | 4 +-- series/type-float.go | 4 +-- series/type-int.go | 4 +-- series/type-string.go | 4 +-- 6 files changed, 49 insertions(+), 60 deletions(-) diff --git a/series/rolling.go b/series/rolling.go index 847b1ad..ec77aa4 100644 --- a/series/rolling.go +++ b/series/rolling.go @@ -185,18 +185,23 @@ func (s rollingSeries) Apply(f func(window Series, windowIndex int) interface{}, if len(t) == 0 { t = s.t } - eles := make([]Element, s.Len()) + eles := t.emptyElements(s.Len()) index := 0 rw := NewRollingWindow(s.Series, s.window) for rw.HasNext() { window := rw.NextWindow() - ele := NaNElementByType(t) if window.Len() >= s.minPeriods { - ele.Set(f(window, index)) + eles.Elem(index).Set(f(window, index)) + } else { + eles.Elem(index).Set(NaN) } - eles[index] = ele index++ } - newS := New(eles, t, fmt.Sprintf("%s_RApply[w:%d]", s.Name, s.window)) + newS := Series{ + Name: fmt.Sprintf("%s_RApply[w:%d]", s.Name, s.window), + elements: eles, + t: t, + Err: nil, + } return newS } \ No newline at end of file diff --git a/series/series.go b/series/series.go index f9c76d7..242c710 100644 --- a/series/series.go +++ b/series/series.go @@ -889,24 +889,31 @@ func (s Series) Shift(periods int) Series { if periods == 0 { return s.Copy() } - shiftElements := make([]Element, s.Len()) - if periods < 0 { - for i := 0; i-periods < s.Len(); i++ { - shiftElements[i] = s.Elem(i - periods).Copy() - } - for i := s.Len() + periods; i < s.Len(); i++ { - shiftElements[i] = NaNElementByType(s.t) - } + naLen := periods + if naLen < 0 { + naLen = -naLen + } + naEles := s.t.emptyElements(naLen) + for i := 0; i < naLen; i++ { + naEles.Elem(i).Set(NaN) + } + + var shiftElements Elements + if periods < 0 { + //shift up + shiftElements = s.elements.Slice(-periods, s.Len()).Copy().Append(naEles) } else if periods > 0 { - for i := 0; i < periods; i++ { - shiftElements[i] = NaNElementByType(s.t) - } - for i := 0; i+periods < s.Len(); i++ { - shiftElements[i+periods] = s.Elem(i).Copy() - } + //move down + shiftElements = naEles.Append(s.elements.Slice(0, s.Len()-periods).Copy()) } - return New(shiftElements, s.Type(), fmt.Sprintf("%s_Shift_%d", s.Name, periods)) + ret := Series{ + Name: fmt.Sprintf("%s_Shift_%d", s.Name, periods), + elements: shiftElements, + t: s.t, + Err: nil, + } + return ret } // CumProd finds the cumulative product of the first i elements in s and returning a new Series object. @@ -1056,7 +1063,8 @@ func Operation(operate func(index int, eles ...Element) interface{}, seriess ... } } - eles := make([]Element, maxLen) + t := seriess[0].t + eles := t.emptyElements(maxLen) for i := 0; i < maxLen; i++ { operateParam := make([]Element, len(seriess)) for j := 0; j < len(seriess); j++ { @@ -1067,39 +1075,15 @@ func Operation(operate func(index int, eles ...Element) interface{}, seriess ... } } res := operate(i, operateParam...) - e := NaNElementByType(seriess[0].t) - e.Set(res) - eles[i] = e + eles.Elem(i).Set(res) } - result := New(eles, seriess[0].t, "") - return result, nil -} - -func NaNElementByType(t Type) Element { - switch t { - case String: - return &stringElement{ - e: NaN, - nan: true, - } - case Float: - return &floatElement{ - e: math.NaN(), - nan: true, - } - case Bool: - return &boolElement{ - e: false, - nan: true, - } - case Int: - return &intElement{ - e: 0, - nan: true, - } - default: - panic("not supported type:" + t) + result := Series{ + Name: "", + elements: eles, + t: t, + Err: nil, } + return result, nil } // Sum calculates the sum value of a series diff --git a/series/type-bool.go b/series/type-bool.go index b977510..2ce3449 100644 --- a/series/type-bool.go +++ b/series/type-bool.go @@ -18,7 +18,7 @@ func (e *boolElement) Set(value interface{}) { e.nan = false switch val := value.(type) { case string: - if val == "NaN" { + if val == NaN { e.nan = true return } @@ -89,7 +89,7 @@ func (e boolElement) Val() ElementValue { func (e boolElement) String() string { if e.IsNA() { - return "NaN" + return NaN } if e.e { return "true" diff --git a/series/type-float.go b/series/type-float.go index 7b19cc9..70eb704 100644 --- a/series/type-float.go +++ b/series/type-float.go @@ -18,7 +18,7 @@ func (e *floatElement) Set(value interface{}) { e.nan = false switch val := value.(type) { case string: - if val == "NaN" { + if val == NaN { e.nan = true return } @@ -73,7 +73,7 @@ func (e floatElement) Val() ElementValue { func (e floatElement) String() string { if e.IsNA() { - return "NaN" + return NaN } return fmt.Sprintf("%f", e.e) } diff --git a/series/type-int.go b/series/type-int.go index 446b35c..b5ed2fd 100644 --- a/series/type-int.go +++ b/series/type-int.go @@ -18,7 +18,7 @@ func (e *intElement) Set(value interface{}) { e.nan = false switch val := value.(type) { case string: - if val == "NaN" { + if val == NaN { e.nan = true return } @@ -82,7 +82,7 @@ func (e intElement) Val() ElementValue { func (e intElement) String() string { if e.IsNA() { - return "NaN" + return NaN } return fmt.Sprint(e.e) } diff --git a/series/type-string.go b/series/type-string.go index 803096f..4d1b6bd 100644 --- a/series/type-string.go +++ b/series/type-string.go @@ -20,7 +20,7 @@ func (e *stringElement) Set(value interface{}) { switch val := value.(type) { case string: e.e = val - if e.e == "NaN" { + if e.e == NaN { e.nan = true return } @@ -65,7 +65,7 @@ func (e stringElement) Val() ElementValue { func (e stringElement) String() string { if e.IsNA() { - return "NaN" + return NaN } return string(e.e) } From e0847fe7f5dacb5fe24ef9224ca74ea712fda06d Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Wed, 16 Mar 2022 10:19:36 +0800 Subject: [PATCH 29/60] optimize series.Shift --- series/series.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/series/series.go b/series/series.go index 242c710..77ec440 100644 --- a/series/series.go +++ b/series/series.go @@ -905,7 +905,7 @@ func (s Series) Shift(periods int) Series { shiftElements = s.elements.Slice(-periods, s.Len()).Copy().Append(naEles) } else if periods > 0 { //move down - shiftElements = naEles.Append(s.elements.Slice(0, s.Len()-periods).Copy()) + shiftElements = naEles.Append(s.elements.Slice(0, s.Len()-periods)) } ret := Series{ Name: fmt.Sprintf("%s_Shift_%d", s.Name, periods), From 5913f0c61891eb67174f8872ead292c7593033d8 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Wed, 16 Mar 2022 14:43:53 +0800 Subject: [PATCH 30/60] Modify comments --- series/series.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/series/series.go b/series/series.go index 77ec440..645b774 100644 --- a/series/series.go +++ b/series/series.go @@ -447,7 +447,7 @@ func (s Series) IsNaN() []bool { return ret } -// IsNaN returns an array that identifies which of the elements are not NaN. +// IsNotNaN returns an array that identifies which of the elements are not NaN. func (s Series) IsNotNaN() []bool { ret := make([]bool, s.Len()) for i := 0; i < s.Len(); i++ { From d426b3e574a4735aaaea008511eb1a4b633901b2 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Thu, 17 Mar 2022 18:12:25 +0800 Subject: [PATCH 31/60] optimize --- series/series.go | 59 ++++++++++++++++++++------ series/type-bool.go | 98 +++++++++++++++++++++++++------------------ series/type-float.go | 59 +++++++++++++++++--------- series/type-int.go | 83 ++++++++++++++++++++++-------------- series/type-string.go | 47 ++++++++++++++------- 5 files changed, 228 insertions(+), 118 deletions(-) diff --git a/series/series.go b/series/series.go index 645b774..259c509 100644 --- a/series/series.go +++ b/series/series.go @@ -43,6 +43,11 @@ type Elements interface { type Element interface { // Setter method Set(interface{}) + SetElement(val Element) + SetBool(val bool) + SetFloat(val float64) + SetInt(val int) + SetString(val string) // Comparation methods Eq(Element) bool @@ -250,37 +255,37 @@ func New(values interface{}, t Type, name string) Series { l := len(v) preAlloc(l) for i := 0; i < l; i++ { - ret.elements.Elem(i).Set(v[i]) + ret.elements.Elem(i).SetString(v[i]) } case []float64: l := len(v) preAlloc(l) for i := 0; i < l; i++ { - ret.elements.Elem(i).Set(v[i]) + ret.elements.Elem(i).SetFloat(v[i]) } case []int: l := len(v) preAlloc(l) for i := 0; i < l; i++ { - ret.elements.Elem(i).Set(v[i]) + ret.elements.Elem(i).SetInt(v[i]) } case []bool: l := len(v) preAlloc(l) for i := 0; i < l; i++ { - ret.elements.Elem(i).Set(v[i]) + ret.elements.Elem(i).SetBool(v[i]) } case []Element: l := len(v) preAlloc(l) for i := 0; i < l; i++ { - ret.elements.Elem(i).Set(v[i]) + ret.elements.Elem(i).SetElement(v[i]) } case Series: l := v.Len() preAlloc(l) for i := 0; i < l; i++ { - ret.elements.Elem(i).Set(v.elements.Elem(i)) + ret.elements.Elem(i).SetElement(v.elements.Elem(i)) } default: switch reflect.TypeOf(values).Kind() { @@ -320,8 +325,32 @@ func NewDefault(defaultValue interface{}, t Type, name string, len int) Series { return ret } preAlloc(len) - for i := 0; i < len; i++ { - ret.elements.Elem(i).Set(defaultValue) + + switch v := defaultValue.(type) { + case string: + for i := 0; i < len; i++ { + ret.elements.Elem(i).SetString(v) + } + case float64: + for i := 0; i < len; i++ { + ret.elements.Elem(i).SetFloat(v) + } + case int: + for i := 0; i < len; i++ { + ret.elements.Elem(i).SetInt(v) + } + case bool: + for i := 0; i < len; i++ { + ret.elements.Elem(i).SetBool(v) + } + case Element: + for i := 0; i < len; i++ { + ret.elements.Elem(i).SetElement(v) + } + default: + for i := 0; i < len; i++ { + ret.elements.Elem(i).Set(defaultValue) + } } return ret } @@ -423,7 +452,7 @@ func (s Series) Set(indexes Indexes, newvalues Series) Series { s.Err = fmt.Errorf("set error: index out of range") return s } - s.elements.Elem(i).Set(newvalues.elements.Elem(k)) + s.elements.Elem(i).SetElement(newvalues.elements.Elem(k)) } return s } @@ -873,12 +902,18 @@ func (s Series) Quantile(p float64) float64 { // the function passed in via argument `f` will not expect another type, but // instead expects to handle Element(s) of type Float. func (s Series) Map(f MapFunction) Series { - mappedValues := make([]Element, s.Len()) + eles := s.Type().emptyElements(s.Len()) for i := 0; i < s.Len(); i++ { value := f(s.elements.Elem(i), i) - mappedValues[i] = value + eles.Elem(i).SetElement(value) } - return New(mappedValues, s.Type(), s.Name) + ret := Series{ + Name: s.Name, + elements: eles, + t: s.Type(), + Err: nil, + } + return ret } //Shift series by desired number of periods and returning a new Series object. diff --git a/series/type-bool.go b/series/type-bool.go index 2ce3449..5a8a5cb 100644 --- a/series/type-bool.go +++ b/series/type-bool.go @@ -15,51 +15,70 @@ type boolElement struct { var _ Element = (*boolElement)(nil) func (e *boolElement) Set(value interface{}) { - e.nan = false switch val := value.(type) { case string: - if val == NaN { - e.nan = true - return - } - switch strings.ToLower(val) { - case "true", "t", "1": - e.e = true - case "false", "f", "0": - e.e = false - default: - e.nan = true - return - } + e.SetString(val) case int: - switch val { - case 1: - e.e = true - case 0: - e.e = false - default: - e.nan = true - return - } + e.SetInt(val) case float64: - switch val { - case 1: - e.e = true - case 0: - e.e = false - default: - e.nan = true - return - } + e.SetFloat(val) case bool: - e.e = val + e.SetBool(val) case Element: - b, err := val.Bool() - if err != nil { - e.nan = true - return - } - e.e = b + e.SetElement(val) + default: + e.nan = true + } +} + +func (e *boolElement) SetElement(val Element) { + e.nan = false + b, err := val.Bool() + if err != nil { + e.nan = true + return + } + e.e = b +} + +func (e *boolElement) SetBool(val bool) { + e.nan = false + e.e = val +} + +func (e *boolElement) SetFloat(val float64) { + e.nan = false + switch val { + case 1: + e.e = true + case 0: + e.e = false + default: + e.nan = true + } +} +func (e *boolElement) SetInt(val int) { + e.nan = false + switch val { + case 1: + e.e = true + case 0: + e.e = false + default: + e.nan = true + } +} +func (e *boolElement) SetString(val string) { + e.nan = false + if val == NaN { + e.nan = true + return + } + switch strings.ToLower(val) { + case "true", "t", "1": + e.e = true + case "false", "f", "0": + e.e = false default: e.nan = true } @@ -171,4 +190,3 @@ func (e boolElement) GreaterEq(elem Element) bool { } return e.e || !b } - diff --git a/series/type-float.go b/series/type-float.go index 70eb704..0bd827b 100644 --- a/series/type-float.go +++ b/series/type-float.go @@ -15,37 +15,56 @@ type floatElement struct { var _ Element = (*floatElement)(nil) func (e *floatElement) Set(value interface{}) { - e.nan = false switch val := value.(type) { case string: - if val == NaN { - e.nan = true - return - } - f, err := strconv.ParseFloat(val, 64) - if err != nil { - e.nan = true - return - } - e.e = f + e.SetString(val) case int: - e.e = float64(val) + e.SetInt(val) case float64: - e.e = val + e.SetFloat(val) case bool: - b := val - if b { - e.e = 1 - } else { - e.e = 0 - } + e.SetBool(val) case Element: - e.e = val.Float() + e.SetElement(val) default: e.nan = true } } +func (e *floatElement) SetElement(val Element) { + e.nan = false + e.e = val.Float() +} +func (e *floatElement) SetBool(val bool) { + e.nan = false + if val { + e.e = 1 + } else { + e.e = 0 + } +} +func (e *floatElement) SetFloat(val float64) { + e.nan = false + e.e = val +} +func (e *floatElement) SetInt(val int) { + e.nan = false + e.e = float64(val) +} +func (e *floatElement) SetString(val string) { + e.nan = false + if val == NaN { + e.nan = true + return + } + f, err := strconv.ParseFloat(val, 64) + if err != nil { + e.nan = true + return + } + e.e = f +} + func (e floatElement) Copy() Element { if e.IsNA() { return &floatElement{0.0, true} diff --git a/series/type-int.go b/series/type-int.go index b5ed2fd..5ef44e7 100644 --- a/series/type-int.go +++ b/series/type-int.go @@ -15,49 +15,68 @@ type intElement struct { var _ Element = (*intElement)(nil) func (e *intElement) Set(value interface{}) { - e.nan = false switch val := value.(type) { case string: - if val == NaN { - e.nan = true - return - } - i, err := strconv.Atoi(val) - if err != nil { - e.nan = true - return - } - e.e = i + e.SetString(val) case int: - e.e = val + e.SetInt(val) case float64: - f := val - if math.IsNaN(f) || - math.IsInf(f, 0) || - math.IsInf(f, 1) { - e.nan = true - return - } - e.e = int(f) + e.SetFloat(val) case bool: - b := val - if b { - e.e = 1 - } else { - e.e = 0 - } + e.SetBool(val) case Element: - v, err := val.Int() - if err != nil { - e.nan = true - return - } - e.e = v + e.SetElement(val) default: e.nan = true } } +func (e *intElement) SetElement(val Element) { + e.nan = false + v, err := val.Int() + if err != nil { + e.nan = true + return + } + e.e = v +} +func (e *intElement) SetBool(val bool) { + e.nan = false + if val { + e.e = 1 + } else { + e.e = 0 + } +} +func (e *intElement) SetFloat(val float64) { + e.nan = false + f := val + if math.IsNaN(f) || + math.IsInf(f, 0) || + math.IsInf(f, 1) { + e.nan = true + return + } + e.e = int(f) +} +func (e *intElement) SetInt(val int) { + e.nan = false + e.e = val +} +func (e *intElement) SetString(val string) { + e.nan = false + if val == NaN { + e.nan = true + return + } + i, err := strconv.Atoi(val) + if err != nil { + e.nan = true + return + } + e.e = i +} + func (e intElement) Copy() Element { if e.IsNA() { return &intElement{0, true} diff --git a/series/type-string.go b/series/type-string.go index 4d1b6bd..9d6b6be 100644 --- a/series/type-string.go +++ b/series/type-string.go @@ -16,31 +16,50 @@ type stringElement struct { var _ Element = (*stringElement)(nil) func (e *stringElement) Set(value interface{}) { - e.nan = false switch val := value.(type) { case string: - e.e = val - if e.e == NaN { - e.nan = true - return - } + e.SetString(val) case int: - e.e = strconv.Itoa(val) + e.SetInt(val) case float64: - e.e = strconv.FormatFloat(val, 'f', 6, 64) + e.SetFloat(val) case bool: - if val { - e.e = "true" - } else { - e.e = "false" - } + e.SetBool(val) case Element: - e.e = val.String() + e.SetElement(val) default: e.nan = true } } +func (e *stringElement) SetElement(val Element) { + e.nan = false + e.e = val.String() +} +func (e *stringElement) SetBool(val bool) { + e.nan = false + if val { + e.e = "true" + } else { + e.e = "false" + } +} +func (e *stringElement) SetFloat(val float64) { + e.nan = false + e.e = strconv.FormatFloat(val, 'f', 6, 64) +} +func (e *stringElement) SetInt(val int) { + e.nan = false + e.e = strconv.Itoa(val) +} +func (e *stringElement) SetString(val string) { + e.nan = false + e.e = val + if e.e == NaN { + e.nan = true + } +} + func (e stringElement) Copy() Element { if e.IsNA() { return &stringElement{"", true} From e4b68c69e4bfe705247cd7cf9e1188ccbc2cccc1 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Fri, 18 Mar 2022 14:38:20 +0800 Subject: [PATCH 32/60] modify module --- dataframe/benchmark_test.go | 4 ++-- dataframe/dataframe.go | 10 +++++----- dataframe/dataframe_test.go | 2 +- dataframe/examples_test.go | 4 ++-- go.mod | 2 +- series/benchmarks_test.go | 8 +++----- series/numberoperation_test.go | 2 +- 7 files changed, 15 insertions(+), 17 deletions(-) diff --git a/dataframe/benchmark_test.go b/dataframe/benchmark_test.go index 2b00a94..1a545d2 100644 --- a/dataframe/benchmark_test.go +++ b/dataframe/benchmark_test.go @@ -5,8 +5,8 @@ import ( "strconv" "testing" - "github.com/go-gota/gota/dataframe" - "github.com/go-gota/gota/series" + "github.com/mqy527/gota/dataframe" + "github.com/mqy527/gota/series" ) func generateSeries(n, rep int) (data []series.Series) { diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go index 7c72111..24a0afe 100644 --- a/dataframe/dataframe.go +++ b/dataframe/dataframe.go @@ -13,7 +13,7 @@ import ( "strings" "unicode/utf8" - "github.com/go-gota/gota/series" + "github.com/mqy527/gota/series" "golang.org/x/net/html" "golang.org/x/net/html/atom" ) @@ -636,14 +636,14 @@ func (df DataFrame) Concat(dfb DataFrame) DataFrame { // Mutate changes a column of the DataFrame with the given Series or adds it as // a new column if the column name does not exist. func (df DataFrame) Mutate(ss ...series.Series) DataFrame { - if df.Err != nil || len(ss) == 0{ + if df.Err != nil || len(ss) == 0 { return df } - + slen := ss[0].Len() for i := 1; i < len(ss); i++ { if slen != ss[i].Len() { - return DataFrame{Err: fmt.Errorf("mutate: serieses length not equal")} + return DataFrame{Err: fmt.Errorf("mutate: serieses length not equal")} } } if slen != df.nrows { @@ -660,7 +660,7 @@ func (df DataFrame) Mutate(ss ...series.Series) DataFrame { columns = append(columns, ss[i]) } } - + nrows, ncols, err := checkColumnsDimensions(columns...) if err != nil { return DataFrame{Err: err} diff --git a/dataframe/dataframe_test.go b/dataframe/dataframe_test.go index 19e2157..4238621 100644 --- a/dataframe/dataframe_test.go +++ b/dataframe/dataframe_test.go @@ -10,7 +10,7 @@ import ( "math" - "github.com/go-gota/gota/series" + "github.com/mqy527/gota/series" ) // compareFloats compares floating point values up to the number of digits specified. diff --git a/dataframe/examples_test.go b/dataframe/examples_test.go index 86d076a..fe17324 100644 --- a/dataframe/examples_test.go +++ b/dataframe/examples_test.go @@ -4,8 +4,8 @@ import ( "fmt" "strings" - "github.com/go-gota/gota/dataframe" - "github.com/go-gota/gota/series" + "github.com/mqy527/gota/dataframe" + "github.com/mqy527/gota/series" ) func ExampleNew() { diff --git a/go.mod b/go.mod index 6af1705..13022cf 100644 --- a/go.mod +++ b/go.mod @@ -1,4 +1,4 @@ -module github.com/go-gota/gota +module github.com/mqy527/gota go 1.16 diff --git a/series/benchmarks_test.go b/series/benchmarks_test.go index 935da87..d9bc8a5 100644 --- a/series/benchmarks_test.go +++ b/series/benchmarks_test.go @@ -5,7 +5,7 @@ import ( "strconv" "testing" - "github.com/go-gota/gota/series" + "github.com/mqy527/gota/series" ) func generateInts(n int) (data []int) { @@ -214,12 +214,11 @@ func BenchmarkSeries_Subset(b *testing.B) { } } - func BenchmarkSeries_Append(b *testing.B) { rand.Seed(100) table := []struct { - name string - series series.Series + name string + series series.Series }{ { "[]int(100000)_Int", @@ -249,7 +248,6 @@ func BenchmarkSeries_Append(b *testing.B) { } } - func BenchmarkSeries_Set(b *testing.B) { rand.Seed(100) table := []struct { diff --git a/series/numberoperation_test.go b/series/numberoperation_test.go index aacc147..f741bc9 100644 --- a/series/numberoperation_test.go +++ b/series/numberoperation_test.go @@ -4,7 +4,7 @@ import ( "reflect" "testing" - "github.com/go-gota/gota/series" + "github.com/mqy527/gota/series" ) func Test_Sub(t *testing.T) { From 7fbfed26b736a79a22dfc30d60b05a9bbddb9f97 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Sun, 20 Mar 2022 16:50:44 +0800 Subject: [PATCH 33/60] modify module name --- dataframe/examples_test.go | 4 ++-- series/numberoperation_test.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dataframe/examples_test.go b/dataframe/examples_test.go index 86d076a..fe17324 100644 --- a/dataframe/examples_test.go +++ b/dataframe/examples_test.go @@ -4,8 +4,8 @@ import ( "fmt" "strings" - "github.com/go-gota/gota/dataframe" - "github.com/go-gota/gota/series" + "github.com/mqy527/gota/dataframe" + "github.com/mqy527/gota/series" ) func ExampleNew() { diff --git a/series/numberoperation_test.go b/series/numberoperation_test.go index aacc147..f741bc9 100644 --- a/series/numberoperation_test.go +++ b/series/numberoperation_test.go @@ -4,7 +4,7 @@ import ( "reflect" "testing" - "github.com/go-gota/gota/series" + "github.com/mqy527/gota/series" ) func Test_Sub(t *testing.T) { From 9289c823921d221db8afbc3db87296097a03168d Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Sun, 20 Mar 2022 16:51:02 +0800 Subject: [PATCH 34/60] modify module name --- dataframe/benchmark_test.go | 4 ++-- dataframe/dataframe.go | 10 +++++----- dataframe/dataframe_test.go | 2 +- go.mod | 2 +- series/benchmarks_test.go | 8 +++----- 5 files changed, 12 insertions(+), 14 deletions(-) diff --git a/dataframe/benchmark_test.go b/dataframe/benchmark_test.go index 2b00a94..1a545d2 100644 --- a/dataframe/benchmark_test.go +++ b/dataframe/benchmark_test.go @@ -5,8 +5,8 @@ import ( "strconv" "testing" - "github.com/go-gota/gota/dataframe" - "github.com/go-gota/gota/series" + "github.com/mqy527/gota/dataframe" + "github.com/mqy527/gota/series" ) func generateSeries(n, rep int) (data []series.Series) { diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go index 7c72111..24a0afe 100644 --- a/dataframe/dataframe.go +++ b/dataframe/dataframe.go @@ -13,7 +13,7 @@ import ( "strings" "unicode/utf8" - "github.com/go-gota/gota/series" + "github.com/mqy527/gota/series" "golang.org/x/net/html" "golang.org/x/net/html/atom" ) @@ -636,14 +636,14 @@ func (df DataFrame) Concat(dfb DataFrame) DataFrame { // Mutate changes a column of the DataFrame with the given Series or adds it as // a new column if the column name does not exist. func (df DataFrame) Mutate(ss ...series.Series) DataFrame { - if df.Err != nil || len(ss) == 0{ + if df.Err != nil || len(ss) == 0 { return df } - + slen := ss[0].Len() for i := 1; i < len(ss); i++ { if slen != ss[i].Len() { - return DataFrame{Err: fmt.Errorf("mutate: serieses length not equal")} + return DataFrame{Err: fmt.Errorf("mutate: serieses length not equal")} } } if slen != df.nrows { @@ -660,7 +660,7 @@ func (df DataFrame) Mutate(ss ...series.Series) DataFrame { columns = append(columns, ss[i]) } } - + nrows, ncols, err := checkColumnsDimensions(columns...) if err != nil { return DataFrame{Err: err} diff --git a/dataframe/dataframe_test.go b/dataframe/dataframe_test.go index 19e2157..4238621 100644 --- a/dataframe/dataframe_test.go +++ b/dataframe/dataframe_test.go @@ -10,7 +10,7 @@ import ( "math" - "github.com/go-gota/gota/series" + "github.com/mqy527/gota/series" ) // compareFloats compares floating point values up to the number of digits specified. diff --git a/go.mod b/go.mod index 6af1705..13022cf 100644 --- a/go.mod +++ b/go.mod @@ -1,4 +1,4 @@ -module github.com/go-gota/gota +module github.com/mqy527/gota go 1.16 diff --git a/series/benchmarks_test.go b/series/benchmarks_test.go index 935da87..d9bc8a5 100644 --- a/series/benchmarks_test.go +++ b/series/benchmarks_test.go @@ -5,7 +5,7 @@ import ( "strconv" "testing" - "github.com/go-gota/gota/series" + "github.com/mqy527/gota/series" ) func generateInts(n int) (data []int) { @@ -214,12 +214,11 @@ func BenchmarkSeries_Subset(b *testing.B) { } } - func BenchmarkSeries_Append(b *testing.B) { rand.Seed(100) table := []struct { - name string - series series.Series + name string + series series.Series }{ { "[]int(100000)_Int", @@ -249,7 +248,6 @@ func BenchmarkSeries_Append(b *testing.B) { } } - func BenchmarkSeries_Set(b *testing.B) { rand.Seed(100) table := []struct { From 07dca744ead339c9cedb659c13b752cd9b57dfef Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Mon, 28 Mar 2022 18:01:27 +0800 Subject: [PATCH 35/60] rolling cache --- go.mod | 1 + go.sum | 2 + series/benchmarks_test.go | 44 +++++++ series/rollingcache.go | 141 ++++++++++++++++++++ series/rollingcache_test.go | 255 ++++++++++++++++++++++++++++++++++++ series/series.go | 3 + 6 files changed, 446 insertions(+) create mode 100644 series/rollingcache.go create mode 100644 series/rollingcache_test.go diff --git a/go.mod b/go.mod index 13022cf..413b47b 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/mqy527/gota go 1.16 require ( + github.com/patrickmn/go-cache v2.1.0+incompatible golang.org/x/net v0.0.0-20210423184538-5f58ad60dda6 gonum.org/v1/gonum v0.9.1 ) diff --git a/go.sum b/go.sum index 20a4dc3..c4e8d38 100644 --- a/go.sum +++ b/go.sum @@ -15,6 +15,8 @@ github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod h1:CO1AlKB2C github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= +github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc= +github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ= github.com/phpdave11/gofpdf v1.4.2/go.mod h1:zpO6xFn9yxo3YLyMvW8HcKWVdbNqgIfOOp2dXMnm1mY= github.com/phpdave11/gofpdi v1.0.12/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= diff --git a/series/benchmarks_test.go b/series/benchmarks_test.go index d9bc8a5..ca13d3c 100644 --- a/series/benchmarks_test.go +++ b/series/benchmarks_test.go @@ -1,6 +1,7 @@ package series_test import ( + "fmt" "math/rand" "strconv" "testing" @@ -290,3 +291,46 @@ func BenchmarkSeries_Set(b *testing.B) { }) } } + +func BenchmarkSeries_RollingCacheMeanByWeights(b *testing.B) { + tests := []struct { + series series.Series + window int + minPeriod int + weights []float64 + }{ + { + series.Floats([]string{"1.5", "-3.23", "-0.337397", "-0.380079", "1.60979", "34."}), + 3, + 2, + []float64{0.5, 0.3, 0.2}, + }, + { + series.Floats([]string{"23", "13", "101", "-64", "-3"}), + 3, + 1, + []float64{5, 3, 2}, + }, + } + + b.ResetTimer() + for testnum, test := range tests { + test.series.Name = fmt.Sprintf("Name-%d", testnum) + r := test.series.Rolling(test.window, test.minPeriod) + b.Run("Rolling-" + test.series.Name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + r.MeanByWeights(test.weights) + } + }) + } + b.ResetTimer() + for testnum, test := range tests { + test.series.Name = fmt.Sprintf("Name-%d", testnum) + rs := series.NewCacheAbleRollingSeries(test.window, test.minPeriod, test.series) + b.Run("CacheRolling-" + test.series.Name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + rs.MeanByWeights(test.weights) + } + }) + } +} diff --git a/series/rollingcache.go b/series/rollingcache.go new file mode 100644 index 0000000..290526d --- /dev/null +++ b/series/rollingcache.go @@ -0,0 +1,141 @@ +package series + +import ( + "fmt" + "sync" + "time" + "unsafe" + + "github.com/patrickmn/go-cache" +) + +var CacheAble = false + +var c Cache + +var once sync.Once + +//Cache define rolling cache +type Cache interface { + Set(k string, x interface{}) + Get(k string) (interface{}, bool) +} + +type defaultCache struct { + c *cache.Cache +} + +func (dc *defaultCache) Set(k string, v interface{}) { + dc.c.SetDefault(k, v) +} + +func (dc *defaultCache) Get(k string) (interface{}, bool) { + return dc.c.Get(k) +} + +//InitCache +func InitCache(f func() Cache) { + once.Do(func() { + CacheAble = true + if f == nil { + c = &defaultCache{ + c: cache.New(5*time.Minute, 10*time.Minute), + } + } else { + c = f() + } + }) +} + +type cacheAbleRollingSeries struct { + RollingSeries + cacheKey string +} + +func NewCacheAbleRollingSeries(window int, minPeriods int, s Series) RollingSeries { + if len(s.Name) == 0 { + panic("series must have a name") + } + if c == nil { + InitCache(nil) + } + cr := cacheAbleRollingSeries{ + RollingSeries: NewRollingSeries(window, minPeriods, s), + cacheKey: fmt.Sprintf("%s|%d|%d|%d", s.Name, s.Len(), window, minPeriods), + } + return cr +} + +func(rc cacheAbleRollingSeries) Max() Series{ + cacheKey := rc.cacheKey + "_max" + if ret, found := c.Get(cacheKey); found { + return ret.(Series) + } + ret := rc.RollingSeries.Max() + c.Set(cacheKey, ret) + return ret +} +func(rc cacheAbleRollingSeries) Min() Series{ + cacheKey := rc.cacheKey + "_min" + if ret, found := c.Get(cacheKey); found { + return ret.(Series) + } + ret := rc.RollingSeries.Min() + c.Set(cacheKey, ret) + return ret + +} +func(rc cacheAbleRollingSeries) Mean() Series{ + cacheKey := rc.cacheKey + "_mean" + if ret, found := c.Get(cacheKey); found { + return ret.(Series) + } + ret := rc.RollingSeries.Mean() + c.Set(cacheKey, ret) + return ret +} +func(rc cacheAbleRollingSeries) MeanByWeights(weights []float64) Series{ + cacheKey := fmt.Sprintf("%s_meanByWeights(%v)", rc.cacheKey, weights) + if ret, found := c.Get(cacheKey); found { + return ret.(Series) + } + ret := rc.RollingSeries.MeanByWeights(weights) + c.Set(cacheKey, ret) + return ret +} +func(rc cacheAbleRollingSeries) Quantile(p float64) Series{ + cacheKey := fmt.Sprintf("%s_quantile(%f)", rc.cacheKey, p) + if ret, found := c.Get(cacheKey); found { + return ret.(Series) + } + ret := rc.RollingSeries.Quantile(p) + c.Set(cacheKey, ret) + return ret +} +func(rc cacheAbleRollingSeries) Median() Series{ + cacheKey := rc.cacheKey + "_median" + if ret, found := c.Get(cacheKey); found { + return ret.(Series) + } + ret := rc.RollingSeries.Median() + c.Set(cacheKey, ret) + return ret +} +func(rc cacheAbleRollingSeries) StdDev() Series{ + cacheKey := rc.cacheKey + "_stdDev" + if ret, found := c.Get(cacheKey); found { + return ret.(Series) + } + ret := rc.RollingSeries.StdDev() + c.Set(cacheKey, ret) + return ret +} +func(rc cacheAbleRollingSeries) Apply(f func(window Series, windowIndex int) interface{}, t Type) Series{ + cacheKey := fmt.Sprintf("%s_apply(%v, %s)", rc.cacheKey, (*(*int64)(unsafe.Pointer(&f))), t) + if ret, found := c.Get(cacheKey); found { + return ret.(Series) + } + ret := rc.RollingSeries.Apply(f, t) + c.Set(cacheKey, ret) + return ret +} \ No newline at end of file diff --git a/series/rollingcache_test.go b/series/rollingcache_test.go new file mode 100644 index 0000000..4900ad9 --- /dev/null +++ b/series/rollingcache_test.go @@ -0,0 +1,255 @@ +package series + +import ( + "fmt" + "reflect" + "testing" + "unsafe" +) + +func TestSeries_RollingCache(t *testing.T) { + + tests := []struct { + series Series + window int + minPeriod int + maxExpected Series + minExpected Series + meanExpected Series + quantile float64 + quantileExpected Series + medianExpected Series + stdDevExpected Series + }{ + { + Bools([]string{"false", "true", "false", "false", "true"}), + 2, + 1, + Bools([]string{"false", "true", "true", "false", "true"}), + Bools([]string{"false", "false", "false", "false", "false"}), + Floats([]string{"0.000000", "0.500000", "0.500000", "0.000000", "0.500000"}), + 0.8, + Floats([]string{"0.000000", "1.000000", "1.000000", "0.000000", "1.000000"}), + Bools([]string{NaN, NaN, NaN, NaN, NaN}), + Floats([]string{NaN, "0.707106781", "0.707106781", "0.000000", "0.707106781"}), + }, + { + Floats([]string{"1.5", "-3.23", "-0.337397", "-0.380079", "1.60979", "34."}), + 3, + 2, + Floats([]string{NaN, "1.5", "1.5", "-0.337397", "1.60979", "34."}), + Floats([]string{NaN, "-3.23", "-3.23", "-3.23", "-0.380079", "-0.380079"}), + Floats([]string{NaN, "-0.865", "-0.689132333", "-1.315825333", "0.297438", "11.743237"}), + 0.7, + Floats([]string{NaN, "1.500000", "1.500000", "-0.337397", "1.609790", "34.000000"}), + Floats([]string{NaN, "-0.865", "-0.337397", "-0.380079", "-0.337397", "1.60979"}), + Floats([]string{NaN, "3.344615075", "2.384536288", "1.657861251", "1.136730517", "19.30058339"}), + }, + { + Strings([]string{"20210618", "20200909", "20200910", "20200912", "20200911"}), + 3, + 2, + Strings([]string{NaN, "20210618", "20210618", "20200912", "20200912"}), + Strings([]string{NaN, "20200909", "20200909", "20200909", "20200910"}), + Floats([]string{NaN, "20205763.500000", "20204145.666667", "20200910.333333", "20200911.000000"}), + 0.8, + Strings([]string{NaN, NaN, NaN, NaN, NaN}), + Strings([]string{NaN, NaN, NaN, NaN, NaN}), + Strings([]string{NaN, "6865.299739", "5605.205111", "1.527525", "1.000000"}), + }, + { + Ints([]string{"23", "13", "101", "-64", "-3"}), + 3, + 1, + Ints([]string{"23", "23", "101", "101", "101"}), + Ints([]string{"23", "13", "13", "-64", "-64"}), + Floats([]string{"23.000000", "18.000000", "45.666667", "16.666667", "11.333333"}), + 0.8, + Floats([]string{"23", "23", "101", "101", "101"}), + Floats([]string{"23", "18", "23", "13", "-3"}), + Floats([]string{NaN, "7.071067812", "48.18021724", "82.56108849", "83.4286122"}), + }, + } + + for testnum, test := range tests { + var b Series + test.series.Name = fmt.Sprintf("Name-%d", testnum) + expected := test.maxExpected.Records() + rs := NewCacheAbleRollingSeries(test.window, test.minPeriod, test.series) + b = rs.Max() + b = rs.Max() + received := b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-Max:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + + expected = test.minExpected.Records() + b = rs.Min() + b = rs.Min() + received = b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-Min:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + + expected = test.meanExpected.Records() + b = rs.Mean() + b = rs.Mean() + received = b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-Mean:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + + expected = test.quantileExpected.Records() + b = rs.Quantile(test.quantile) + b = rs.Quantile(test.quantile) + received = b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-Quantile:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + + expected = test.medianExpected.Records() + b = rs.Median() + b = rs.Median() + received = b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-Median:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + + expected = test.stdDevExpected.Records() + b = rs.StdDev() + b = rs.StdDev() + received = b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-StdDev:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + } +} + +func TestSeries_RollingCacheMeanByWeights(t *testing.T) { + tests := []struct { + series Series + window int + minPeriod int + weights []float64 + meanExpected Series + }{ + { + Floats([]string{"1.5", "-3.23", "-0.337397", "-0.380079", "1.60979", "34."}), + 3, + 2, + []float64{0.5, 0.3, 0.2}, + Floats([]string{NaN, "-0.392", "-0.2864794", "-1.7922349", "0.0392358", "7.0928975"}), + }, + { + Floats([]string{"23", "13", "101", "-64", "-3"}), + 3, + 1, + []float64{5, 3, 2}, + Floats([]string{"23", "19", "35.6", "24", "30.7"}), + }, + } + + for testnum, test := range tests { + var b Series + test.series.Name = fmt.Sprintf("Name-%d", testnum) + expected := test.meanExpected.Records() + rs := NewCacheAbleRollingSeries(test.window, test.minPeriod, test.series) + b = rs.MeanByWeights(test.weights) + b = rs.MeanByWeights(test.weights) + received := b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-MeanByWeights:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + } +} + +func TestSeries_F(t *testing.T) { + f := func() {} + fmt.Println("f: ", &f) + vf := f + fmt.Println("vfd: ", &vf) + + fmt.Println(*(*int64)(unsafe.Pointer(&f))) + fmt.Println(*(*int64)(unsafe.Pointer(&vf))) + +} + +func TestSeries_RollingCacheApply(t *testing.T) { + tests := []struct { + series Series + window int + minPeriod int + applyExpected Series + applyFunc func(window Series, windowIndex int) interface{} + t Type + }{ + { + Floats([]string{"1.5", "-3.23", "-0.337397", "-0.380079", "1.60979", "34."}), + 3, + 2, + Floats([]string{NaN, "2.5", "2.5", "-2.23", "0.662603", "0.619921"}), + func(window Series, windowIndex int) interface{} { + return window.Float()[0] + 1 + }, + "", + }, + { + Strings([]string{"20210618", "20200909", "20200910", "20200912", "20200911"}), + 3, + 2, + Strings([]string{NaN, "20210618-", "20210618-", "20200909-", "20200910-"}), + func(window Series, windowIndex int) interface{} { + return window.Elem(0).String() + "-" + }, + String, + }, + { + Ints([]string{"23", "13", "101", "-64", "-3"}), + 3, + 1, + Ints([]string{"24", "14", "102", "-63", "-2"}), + func(window Series, windowIndex int) interface{} { + i, _ := window.Elem(-1).Int() + return i + 1 + }, + Int, + }, + } + + for testnum, test := range tests { + var b Series + test.series.Name = fmt.Sprintf("Name-%d", testnum) + expected := test.applyExpected.Records() + rs := NewCacheAbleRollingSeries(test.window, test.minPeriod, test.series) + b = rs.Apply(test.applyFunc, test.t) + b = rs.Apply(test.applyFunc, test.t) + received := b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-Apply:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + } +} diff --git a/series/series.go b/series/series.go index 259c509..f847471 100644 --- a/series/series.go +++ b/series/series.go @@ -1078,6 +1078,9 @@ func (s Series) FillNaNBackward() { } func (s Series) Rolling(window int, minPeriods int) RollingSeries { + if CacheAble { + return NewCacheAbleRollingSeries(window, minPeriods, s) + } return NewRollingSeries(window, minPeriods, s) } From 789925a8551c1c98d6bc1dacbcee940e397f1575 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Tue, 29 Mar 2022 18:07:58 +0800 Subject: [PATCH 36/60] cacheAble --- series/cache.go | 43 +++ series/{rollingcache.go => cacherolling.go} | 46 +-- ...lingcache_test.go => cacherolling_test.go} | 0 series/cacheseries.go | 345 ++++++++++++++++++ series/series.go | 8 +- 5 files changed, 395 insertions(+), 47 deletions(-) create mode 100644 series/cache.go rename series/{rollingcache.go => cacherolling.go} (77%) rename series/{rollingcache_test.go => cacherolling_test.go} (100%) create mode 100644 series/cacheseries.go diff --git a/series/cache.go b/series/cache.go new file mode 100644 index 0000000..eea3259 --- /dev/null +++ b/series/cache.go @@ -0,0 +1,43 @@ +package series + +import ( + "sync" + "time" + + "github.com/patrickmn/go-cache" +) + +var c Cache + +var once sync.Once + +//Cache define rolling cache +type Cache interface { + Set(k string, x interface{}) + Get(k string) (interface{}, bool) +} + +type defaultCache struct { + c *cache.Cache +} + +func (dc *defaultCache) Set(k string, v interface{}) { + dc.c.SetDefault(k, v) +} + +func (dc *defaultCache) Get(k string) (interface{}, bool) { + return dc.c.Get(k) +} + +//InitCache +func InitCache(f func() Cache) { + once.Do(func() { + if f == nil { + c = &defaultCache{ + c: cache.New(5*time.Minute, 10*time.Minute), + } + } else { + c = f() + } + }) +} \ No newline at end of file diff --git a/series/rollingcache.go b/series/cacherolling.go similarity index 77% rename from series/rollingcache.go rename to series/cacherolling.go index 290526d..665b1de 100644 --- a/series/rollingcache.go +++ b/series/cacherolling.go @@ -2,56 +2,14 @@ package series import ( "fmt" - "sync" - "time" "unsafe" - - "github.com/patrickmn/go-cache" ) -var CacheAble = false - -var c Cache - -var once sync.Once - -//Cache define rolling cache -type Cache interface { - Set(k string, x interface{}) - Get(k string) (interface{}, bool) -} - -type defaultCache struct { - c *cache.Cache -} - -func (dc *defaultCache) Set(k string, v interface{}) { - dc.c.SetDefault(k, v) -} - -func (dc *defaultCache) Get(k string) (interface{}, bool) { - return dc.c.Get(k) -} - -//InitCache -func InitCache(f func() Cache) { - once.Do(func() { - CacheAble = true - if f == nil { - c = &defaultCache{ - c: cache.New(5*time.Minute, 10*time.Minute), - } - } else { - c = f() - } - }) -} - type cacheAbleRollingSeries struct { RollingSeries cacheKey string } - +// NewCacheAbleRollingSeries. You should make sure that the Series will not be modified. func NewCacheAbleRollingSeries(window int, minPeriods int, s Series) RollingSeries { if len(s.Name) == 0 { panic("series must have a name") @@ -60,7 +18,7 @@ func NewCacheAbleRollingSeries(window int, minPeriods int, s Series) RollingSeri InitCache(nil) } cr := cacheAbleRollingSeries{ - RollingSeries: NewRollingSeries(window, minPeriods, s), + RollingSeries: NewRollingSeries(window, minPeriods, s.Copy()), cacheKey: fmt.Sprintf("%s|%d|%d|%d", s.Name, s.Len(), window, minPeriods), } return cr diff --git a/series/rollingcache_test.go b/series/cacherolling_test.go similarity index 100% rename from series/rollingcache_test.go rename to series/cacherolling_test.go diff --git a/series/cacheseries.go b/series/cacheseries.go new file mode 100644 index 0000000..9b1437e --- /dev/null +++ b/series/cacheseries.go @@ -0,0 +1,345 @@ +package series + +import ( + "fmt" + "unsafe" +) + +var _ CacheAbleSeries = cacheAbleSeries{} + +type CacheAbleSeries interface { + Rolling(window int, minPeriods int) RollingSeries + HasNaN() bool + IsNaN() []bool + IsNotNaN() []bool + Compare(comparator Comparator, comparando interface{}) Series + Float() []float64 + Order(reverse bool) []int + StdDev() float64 + Mean() float64 + Median() float64 + Max() float64 + MaxStr() string + Min() float64 + MinStr() string + Quantile(p float64) float64 + Map(f MapFunction) Series + Shift(periods int) Series + CumProd() Series + Prod() float64 + AddConst(c float64) Series + MulConst(c float64) Series + DivConst(c float64) Series + Add(c Series) Series + Sub(c Series) Series + Mul(c Series) Series + Div(c Series) Series + Abs() Series + Sum() float64 + +} + +type cacheAbleSeries struct { + s Series + cacheKey string +} + +func newCacheAbleSeries(s Series) CacheAbleSeries { + if len(s.Name) == 0 { + panic("series must have a name") + } + if c == nil { + InitCache(nil) + } + + ret := &cacheAbleSeries{ + s: s.Copy(), + cacheKey: fmt.Sprintf("%s|%d", s.Name, s.Len()), + } + return ret +} + +func (cs cacheAbleSeries) Rolling(window int, minPeriods int) RollingSeries { + cr := cacheAbleRollingSeries{ + RollingSeries: NewRollingSeries(window, minPeriods, cs.s), + cacheKey: fmt.Sprintf("%s|%d|%d", cs.cacheKey, window, minPeriods), + } + return cr +} + +func (cs cacheAbleSeries) HasNaN() bool { + cacheKey := cs.cacheKey + "_HasNaN" + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.HasNaN() + return ret, nil + }) + return ret.(bool) +} + +func cacheOrExecuted(cacheKey string, f func() (interface{}, error)) (interface{}, error) { + if ret, found := c.Get(cacheKey); found { + return ret, nil + } + ret, err := f() + if err != nil { + c.Set(cacheKey, ret) + } + return ret, err +} + +func (cs cacheAbleSeries) IsNaN() []bool { + cacheKey := cs.cacheKey + "_IsNaN" + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.IsNaN() + return ret, nil + }) + return ret.([]bool) +} + +func (cs cacheAbleSeries) IsNotNaN() []bool { + cacheKey := cs.cacheKey + "_IsNotNaN" + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.IsNotNaN() + return ret, nil + }) + return ret.([]bool) +} + +func (cs cacheAbleSeries) Compare(comparator Comparator, comparando interface{}) Series { + var cacheKey string + if comparator == CompFunc { + f, ok := comparando.(compFunc) + if !ok { + panic("comparando is not a comparison function of type func(el Element) bool") + } + cacheKey = fmt.Sprintf("%s_Compare(%s, %v)", cs.cacheKey, comparator, (*(*int64)(unsafe.Pointer(&f)))) + } else { + return cs.s.Compare(comparator, comparando) + } + + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.Compare(comparator, comparando) + return ret, nil + }) + return ret.(Series) +} + +func (cs cacheAbleSeries) Float() []float64 { + cacheKey := cs.cacheKey + "_Float" + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.Float() + return ret, nil + }) + return ret.([]float64) +} + +func (cs cacheAbleSeries) Order(reverse bool) []int { + cacheKey := fmt.Sprintf("%s_Order(%v)", cs.cacheKey, reverse) + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.Order(reverse) + return ret, nil + }) + return ret.([]int) +} + +func (cs cacheAbleSeries) StdDev() float64 { + cacheKey := cs.cacheKey + "_StdDev" + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.StdDev() + return ret, nil + }) + return ret.(float64) +} + +func (cs cacheAbleSeries) Mean() float64 { + cacheKey := cs.cacheKey + "_Mean" + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.Mean() + return ret, nil + }) + return ret.(float64) +} + +func (cs cacheAbleSeries) Median() float64 { + cacheKey := cs.cacheKey + "_Median" + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.Median() + return ret, nil + }) + return ret.(float64) +} + +func (cs cacheAbleSeries) Max() float64 { + cacheKey := cs.cacheKey + "_Max" + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.Max() + return ret, nil + }) + return ret.(float64) +} + +func (cs cacheAbleSeries) MaxStr() string { + cacheKey := cs.cacheKey + "_MaxStr" + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.MaxStr() + return ret, nil + }) + return ret.(string) +} + +func (cs cacheAbleSeries) Min() float64 { + cacheKey := cs.cacheKey + "_Min" + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.Min() + return ret, nil + }) + return ret.(float64) +} + +func (cs cacheAbleSeries) MinStr() string { + cacheKey := cs.cacheKey + "_MinStr" + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.MinStr() + return ret, nil + }) + return ret.(string) +} + +func (cs cacheAbleSeries) Quantile(p float64) float64 { + cacheKey := fmt.Sprintf("%s_Quantile(%f)", cs.cacheKey, p) + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.Quantile(p) + return ret, nil + }) + return ret.(float64) +} + +func (cs cacheAbleSeries) Map(f MapFunction) Series { + cacheKey := fmt.Sprintf("%s_Map(%v)", cs.cacheKey, (*(*int64)(unsafe.Pointer(&f)))) + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.Map(f) + return ret, nil + }) + return ret.(Series) +} + +func (cs cacheAbleSeries) Shift(periods int) Series { + cacheKey := fmt.Sprintf("%s_Shift(%d)", cs.cacheKey, periods) + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.Shift(periods) + return ret, nil + }) + return ret.(Series) +} + +func (cs cacheAbleSeries) CumProd() Series { + cacheKey := cs.cacheKey + "_CumProd" + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.CumProd() + return ret, nil + }) + return ret.(Series) +} + +func (cs cacheAbleSeries) Prod() float64 { + cacheKey := cs.cacheKey + "_Prod" + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.Prod() + return ret, nil + }) + return ret.(float64) +} + +func (cs cacheAbleSeries) AddConst(c float64) Series { + cacheKey := fmt.Sprintf("%s_AddConst(%f)", cs.cacheKey, c) + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.AddConst(c) + return ret, nil + }) + return ret.(Series) +} + +func (cs cacheAbleSeries) MulConst(c float64) Series { + cacheKey := fmt.Sprintf("%s_MulConst(%f)", cs.cacheKey, c) + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.MulConst(c) + return ret, nil + }) + return ret.(Series) +} + +func (cs cacheAbleSeries) DivConst(c float64) Series { + cacheKey := fmt.Sprintf("%s_DivConst(%f)", cs.cacheKey, c) + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.DivConst(c) + return ret, nil + }) + return ret.(Series) +} + +func (cs cacheAbleSeries) Add(c Series) Series { + if len(c.Name) == 0 { + panic("series c must have a name") + } + cacheKey := fmt.Sprintf("%s_Add(%s|%d)", cs.cacheKey, c.Name, c.Len()) + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.Add(c) + return ret, nil + }) + return ret.(Series) +} + +func (cs cacheAbleSeries) Sub(c Series) Series { + if len(c.Name) == 0 { + panic("series c must have a name") + } + cacheKey := fmt.Sprintf("%s_Sub(%s|%d)", cs.cacheKey, c.Name, c.Len()) + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.Sub(c) + return ret, nil + }) + return ret.(Series) +} + +func (cs cacheAbleSeries) Mul(c Series) Series { + if len(c.Name) == 0 { + panic("series c must have a name") + } + cacheKey := fmt.Sprintf("%s_Mul(%s|%d)", cs.cacheKey, c.Name, c.Len()) + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.Mul(c) + return ret, nil + }) + return ret.(Series) +} + +func (cs cacheAbleSeries) Div(c Series) Series { + if len(c.Name) == 0 { + panic("series c must have a name") + } + cacheKey := fmt.Sprintf("%s_Div(%s|%d)", cs.cacheKey, c.Name, c.Len()) + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.Div(c) + return ret, nil + }) + return ret.(Series) +} + +func (cs cacheAbleSeries) Abs() Series { + cacheKey := cs.cacheKey + "_Abs" + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.Abs() + return ret, nil + }) + return ret.(Series) +} + +func (cs cacheAbleSeries) Sum() float64 { + cacheKey := cs.cacheKey + "_Sum" + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret := cs.s.Sum() + return ret, nil + }) + return ret.(float64) +} + diff --git a/series/series.go b/series/series.go index f847471..9ae9fa2 100644 --- a/series/series.go +++ b/series/series.go @@ -1078,12 +1078,14 @@ func (s Series) FillNaNBackward() { } func (s Series) Rolling(window int, minPeriods int) RollingSeries { - if CacheAble { - return NewCacheAbleRollingSeries(window, minPeriods, s) - } return NewRollingSeries(window, minPeriods, s) } +// CacheAble +func (s Series) CacheAble() CacheAbleSeries { + return newCacheAbleSeries(s) +} + //Operation for multiple series calculation func Operation(operate func(index int, eles ...Element) interface{}, seriess ...Series) (Series, error) { if len(seriess) == 0 { From 9f25bdb0681909e0f98600b2b750edea9ce064fc Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Wed, 30 Mar 2022 18:17:27 +0800 Subject: [PATCH 37/60] unstable --- dataframe/dataframe.go | 64 ++-- go.mod | 1 + go.sum | 2 + series/benchmarks_test.go | 8 +- series/cache.go | 13 +- series/cacherolling.go | 4 +- series/cacherolling_test.go | 12 +- series/cacheseries.go | 219 ++++++++----- series/cacheseries_test.go | 598 ++++++++++++++++++++++++++++++++++++ series/readonly.go | 62 ++++ series/rolling.go | 33 +- series/series.go | 288 +++++++++-------- series/series_logic.go | 8 +- series/series_test.go | 34 +- 14 files changed, 1054 insertions(+), 292 deletions(-) create mode 100644 series/cacheseries_test.go create mode 100644 series/readonly.go diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go index 24a0afe..189a764 100644 --- a/dataframe/dataframe.go +++ b/dataframe/dataframe.go @@ -63,7 +63,7 @@ func New(se ...series.Series) DataFrame { colnames := df.Names() fixColnames(colnames) for i, colname := range colnames { - df.columns[i].Name = colname + df.columns[i].SetName(colname) } return df } @@ -76,8 +76,8 @@ func checkColumnsDimensions(se ...series.Series) (nrows, ncols int, err error) { return } for i, s := range se { - if s.Err != nil { - err = fmt.Errorf("error on series %d: %v", i, s.Err) + if s.Error() != nil { + err = fmt.Errorf("error on series %d: %v", i, s.Error()) return } if nrows == -1 { @@ -273,8 +273,8 @@ func (df DataFrame) Set(indexes series.Indexes, newvalues DataFrame) DataFrame { columns := make([]series.Series, df.ncols) for i, s := range df.columns { columns[i] = s.Set(indexes, newvalues.columns[i]) - if columns[i].Err != nil { - df = DataFrame{Err: fmt.Errorf("setting error on column %d: %v", i, columns[i].Err)} + if columns[i].Error() != nil { + df = DataFrame{Err: fmt.Errorf("setting error on column %d: %v", i, columns[i].Error())} return df } } @@ -343,7 +343,7 @@ func (df DataFrame) Select(indexes SelectIndexes) DataFrame { colnames := df.Names() fixColnames(colnames) for i, colname := range colnames { - df.columns[i].Name = colname + df.columns[i].SetName(colname) } return df } @@ -375,7 +375,7 @@ func (df DataFrame) Drop(indexes SelectIndexes) DataFrame { colnames := df.Names() fixColnames(colnames) for i, colname := range colnames { - df.columns[i].Name = colname + df.columns[i].SetName(colname) } return df } @@ -424,7 +424,7 @@ func (df DataFrame) GroupBy(colnames ...string) *Groups { // Save column types colTypes := map[string]series.Type{} for _, c := range df.columns { - colTypes[c.Name] = c.Type() + colTypes[c.Name()] = c.Type() } for k, cMaps := range groupSeries { @@ -542,7 +542,7 @@ func (df DataFrame) Rename(newname, oldname string) DataFrame { } copy := df.Copy() - copy.columns[idx].Name = newname + copy.columns[idx].SetName(newname) return copy } @@ -577,7 +577,7 @@ func (df DataFrame) RBind(dfb DataFrame) DataFrame { originalSeries := df.columns[k] addedSeries := dfb.columns[idx] newSeries := originalSeries.Concat(addedSeries) - if err := newSeries.Err; err != nil { + if err := newSeries.Error(); err != nil { return DataFrame{Err: fmt.Errorf("rbind: %v", err)} } expandedSeries[k] = newSeries @@ -617,15 +617,15 @@ func (df DataFrame) Concat(dfb DataFrame) DataFrame { a = df.columns[aidx] } else { bb := dfb.columns[bidx] - a = series.New(make([]struct{}, df.nrows), bb.Type(), bb.Name) + a = series.New(make([]struct{}, df.nrows), bb.Type(), bb.Name()) } if bidx != -1 { b = dfb.columns[bidx] } else { - b = series.New(make([]struct{}, dfb.nrows), a.Type(), a.Name) + b = series.New(make([]struct{}, dfb.nrows), a.Type(), a.Name()) } newSeries := a.Concat(b) - if err := newSeries.Err; err != nil { + if err := newSeries.Error(); err != nil { return DataFrame{Err: fmt.Errorf("concat: %v", err)} } expandedSeries[k] = newSeries @@ -654,7 +654,7 @@ func (df DataFrame) Mutate(ss ...series.Series) DataFrame { columns := df.columns dfNames := df.Names() for i := 0; i < len(ss); i++ { - if idx := findInStringSlice(ss[i].Name, dfNames); idx != -1 { + if idx := findInStringSlice(ss[i].Name(), dfNames); idx != -1 { columns[idx] = ss[i] } else { columns = append(columns, ss[i]) @@ -673,7 +673,7 @@ func (df DataFrame) Mutate(ss ...series.Series) DataFrame { colnames := df.Names() fixColnames(colnames) for i, colname := range colnames { - df.columns[i].Name = colname + df.columns[i].SetName(colname) } return df } @@ -734,7 +734,7 @@ func (df DataFrame) FilterAggregation(agg Aggregation, filters ...F) DataFrame { } } res := df.columns[idx].Compare(f.Comparator, f.Comparando) - if err := res.Err; err != nil { + if err := res.Error(); err != nil { return DataFrame{Err: fmt.Errorf("filter: %v", err)} } compResults[i] = res @@ -834,7 +834,7 @@ func (df DataFrame) Capply(f func(series.Series) series.Series) DataFrame { columns := make([]series.Series, df.ncols) for i, s := range df.columns { applied := f(s) - applied.Name = s.Name + applied.SetName(s.Name()) columns[i] = applied } return New(columns...) @@ -890,8 +890,8 @@ func (df DataFrame) Rapply(f func(series.Series) series.Series) DataFrame { row.Append(col.Elem(i)) } row = f(row) - if row.Err != nil { - return DataFrame{Err: fmt.Errorf("error applying function on row %d: %v", i, row.Err)} + if row.Error() != nil { + return DataFrame{Err: fmt.Errorf("error applying function on row %d: %v", i, row.Error())} } if rowlen != -1 && rowlen != row.Len() { @@ -933,7 +933,7 @@ func (df DataFrame) Rapply(f func(series.Series) series.Series) DataFrame { colnames := df.Names() fixColnames(colnames) for i, colname := range colnames { - df.columns[i].Name = colname + df.columns[i].SetName(colname) } return df } @@ -1254,8 +1254,8 @@ func LoadRecords(records [][]string, options ...LoadOption) DataFrame { columns := make([]series.Series, len(headers)) for i, colname := range headers { col := series.New(rawcols[i], types[i], colname) - if col.Err != nil { - return DataFrame{Err: col.Err} + if col.Error() != nil { + return DataFrame{Err: col.Error()} } columns[i] = col } @@ -1272,7 +1272,7 @@ func LoadRecords(records [][]string, options ...LoadOption) DataFrame { colnames := df.Names() fixColnames(colnames) for i, colname := range colnames { - df.columns[i].Name = colname + df.columns[i].SetName(colname) } return df } @@ -1342,7 +1342,7 @@ func LoadMatrix(mat Matrix) DataFrame { colnames := df.Names() fixColnames(colnames) for i, colname := range colnames { - df.columns[i].Name = colname + df.columns[i].SetName(colname) } return df } @@ -1563,7 +1563,7 @@ func ReadHTML(r io.Reader, options ...LoadOption) []DataFrame { func (df DataFrame) Names() []string { colnames := make([]string, df.ncols) for i, s := range df.columns { - colnames[i] = s.Name + colnames[i] = s.Name() } return colnames } @@ -1587,7 +1587,7 @@ func (df DataFrame) SetNames(colnames ...string) error { return fmt.Errorf("setting names: wrong dimensions") } for k, s := range colnames { - df.columns[k].Name = s + df.columns[k].SetName(s) } return nil } @@ -1610,12 +1610,12 @@ func (df DataFrame) Ncol() int { // Col returns a copy of the Series with the given column name contained in the DataFrame. func (df DataFrame) Col(colname string) series.Series { if df.Err != nil { - return series.Series{Err: df.Err} + return series.Err(df.Err) } // Check that colname exist on dataframe idx := findInStringSlice(colname, df.Names()) if idx < 0 { - return series.Series{Err: fmt.Errorf("unknown column name")} + return series.Err(fmt.Errorf("unknown column name")) } return df.columns[idx].Copy() } @@ -2226,7 +2226,7 @@ func parseSelectIndexes(l int, indexes SelectIndexes, colnames []string) ([]int, } case series.Series: s := indexes.(series.Series) - if err := s.Err; err != nil { + if err := s.Error(); err != nil { return nil, fmt.Errorf("indexing error: new values has errors: %v", err) } if s.HasNaN() { @@ -2332,7 +2332,7 @@ func (df DataFrame) Describe() DataFrame { "75%", "max", }) - labels.Name = "column" + labels.SetName("column") ss := []series.Series{labels} @@ -2351,7 +2351,7 @@ func (df DataFrame) Describe() DataFrame { col.MaxStr(), }, col.Type(), - col.Name, + col.Name(), ) case series.Bool: fallthrough @@ -2369,7 +2369,7 @@ func (df DataFrame) Describe() DataFrame { col.Max(), }, series.Float, - col.Name, + col.Name(), ) } ss = append(ss, newCol) diff --git a/go.mod b/go.mod index 413b47b..3b84a90 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.16 require ( github.com/patrickmn/go-cache v2.1.0+incompatible + github.com/satori/go.uuid v1.2.0 golang.org/x/net v0.0.0-20210423184538-5f58ad60dda6 gonum.org/v1/gonum v0.9.1 ) diff --git a/go.sum b/go.sum index c4e8d38..532c9ae 100644 --- a/go.sum +++ b/go.sum @@ -23,6 +23,8 @@ github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod h1:6lfFZQK844Gfx8o5WFuvpxWRwnSoipWe/p622j1v06w= +github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww= +github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= diff --git a/series/benchmarks_test.go b/series/benchmarks_test.go index ca13d3c..b8f30f6 100644 --- a/series/benchmarks_test.go +++ b/series/benchmarks_test.go @@ -315,9 +315,9 @@ func BenchmarkSeries_RollingCacheMeanByWeights(b *testing.B) { b.ResetTimer() for testnum, test := range tests { - test.series.Name = fmt.Sprintf("Name-%d", testnum) + test.series.SetName(fmt.Sprintf("Name-%d", testnum)) r := test.series.Rolling(test.window, test.minPeriod) - b.Run("Rolling-" + test.series.Name, func(b *testing.B) { + b.Run("Rolling-" + test.series.Name(), func(b *testing.B) { for i := 0; i < b.N; i++ { r.MeanByWeights(test.weights) } @@ -325,9 +325,9 @@ func BenchmarkSeries_RollingCacheMeanByWeights(b *testing.B) { } b.ResetTimer() for testnum, test := range tests { - test.series.Name = fmt.Sprintf("Name-%d", testnum) + test.series.SetName(fmt.Sprintf("Name-%d", testnum)) rs := series.NewCacheAbleRollingSeries(test.window, test.minPeriod, test.series) - b.Run("CacheRolling-" + test.series.Name, func(b *testing.B) { + b.Run("CacheRolling-" + test.series.Name(), func(b *testing.B) { for i := 0; i < b.N; i++ { rs.MeanByWeights(test.weights) } diff --git a/series/cache.go b/series/cache.go index eea3259..6c041b5 100644 --- a/series/cache.go +++ b/series/cache.go @@ -15,6 +15,7 @@ var once sync.Once type Cache interface { Set(k string, x interface{}) Get(k string) (interface{}, bool) + Clear() } type defaultCache struct { @@ -29,6 +30,10 @@ func (dc *defaultCache) Get(k string) (interface{}, bool) { return dc.c.Get(k) } +func (dc *defaultCache) Clear() { + dc.c.Flush() +} + //InitCache func InitCache(f func() Cache) { once.Do(func() { @@ -40,4 +45,10 @@ func InitCache(f func() Cache) { c = f() } }) -} \ No newline at end of file +} + +func ClearCache() { + if c != nil { + c.Clear() + } +} diff --git a/series/cacherolling.go b/series/cacherolling.go index 665b1de..c1b0c33 100644 --- a/series/cacherolling.go +++ b/series/cacherolling.go @@ -11,7 +11,7 @@ type cacheAbleRollingSeries struct { } // NewCacheAbleRollingSeries. You should make sure that the Series will not be modified. func NewCacheAbleRollingSeries(window int, minPeriods int, s Series) RollingSeries { - if len(s.Name) == 0 { + if len(s.Name()) == 0 { panic("series must have a name") } if c == nil { @@ -19,7 +19,7 @@ func NewCacheAbleRollingSeries(window int, minPeriods int, s Series) RollingSeri } cr := cacheAbleRollingSeries{ RollingSeries: NewRollingSeries(window, minPeriods, s.Copy()), - cacheKey: fmt.Sprintf("%s|%d|%d|%d", s.Name, s.Len(), window, minPeriods), + cacheKey: fmt.Sprintf("%s|%d|%d|%d", s.Name(), s.Len(), window, minPeriods), } return cr } diff --git a/series/cacherolling_test.go b/series/cacherolling_test.go index 4900ad9..f47efad 100644 --- a/series/cacherolling_test.go +++ b/series/cacherolling_test.go @@ -73,9 +73,9 @@ func TestSeries_RollingCache(t *testing.T) { for testnum, test := range tests { var b Series - test.series.Name = fmt.Sprintf("Name-%d", testnum) + test.series.SetName(fmt.Sprintf("Name-%d", testnum)) expected := test.maxExpected.Records() - rs := NewCacheAbleRollingSeries(test.window, test.minPeriod, test.series) + rs := test.series.CacheAble().Rolling(test.window, test.minPeriod) b = rs.Max() b = rs.Max() received := b.Records() @@ -169,9 +169,9 @@ func TestSeries_RollingCacheMeanByWeights(t *testing.T) { for testnum, test := range tests { var b Series - test.series.Name = fmt.Sprintf("Name-%d", testnum) + test.series.SetName(fmt.Sprintf("Name-%d", testnum)) expected := test.meanExpected.Records() - rs := NewCacheAbleRollingSeries(test.window, test.minPeriod, test.series) + rs := test.series.CacheAble().Rolling(test.window, test.minPeriod) b = rs.MeanByWeights(test.weights) b = rs.MeanByWeights(test.weights) received := b.Records() @@ -239,9 +239,9 @@ func TestSeries_RollingCacheApply(t *testing.T) { for testnum, test := range tests { var b Series - test.series.Name = fmt.Sprintf("Name-%d", testnum) + test.series.SetName(fmt.Sprintf("Name-%d", testnum)) expected := test.applyExpected.Records() - rs := NewCacheAbleRollingSeries(test.window, test.minPeriod, test.series) + rs := test.series.CacheAble().Rolling(test.window, test.minPeriod) b = rs.Apply(test.applyFunc, test.t) b = rs.Apply(test.applyFunc, test.t) received := b.Records() diff --git a/series/cacheseries.go b/series/cacheseries.go index 9b1437e..e1c8779 100644 --- a/series/cacheseries.go +++ b/series/cacheseries.go @@ -2,50 +2,22 @@ package series import ( "fmt" + "reflect" "unsafe" + + uuid "github.com/satori/go.uuid" ) -var _ CacheAbleSeries = cacheAbleSeries{} - -type CacheAbleSeries interface { - Rolling(window int, minPeriods int) RollingSeries - HasNaN() bool - IsNaN() []bool - IsNotNaN() []bool - Compare(comparator Comparator, comparando interface{}) Series - Float() []float64 - Order(reverse bool) []int - StdDev() float64 - Mean() float64 - Median() float64 - Max() float64 - MaxStr() string - Min() float64 - MinStr() string - Quantile(p float64) float64 - Map(f MapFunction) Series - Shift(periods int) Series - CumProd() Series - Prod() float64 - AddConst(c float64) Series - MulConst(c float64) Series - DivConst(c float64) Series - Add(c Series) Series - Sub(c Series) Series - Mul(c Series) Series - Div(c Series) Series - Abs() Series - Sum() float64 +var _ Series = (*cacheAbleSeries)(nil) -} type cacheAbleSeries struct { - s Series + Series cacheKey string } -func newCacheAbleSeries(s Series) CacheAbleSeries { - if len(s.Name) == 0 { +func newCacheAbleSeries(s Series) Series { + if len(s.Name()) == 0 { panic("series must have a name") } if c == nil { @@ -53,15 +25,15 @@ func newCacheAbleSeries(s Series) CacheAbleSeries { } ret := &cacheAbleSeries{ - s: s.Copy(), - cacheKey: fmt.Sprintf("%s|%d", s.Name, s.Len()), + Series: s, + cacheKey: fmt.Sprintf("%s|%d", s.Name(), s.Len()), } return ret } func (cs cacheAbleSeries) Rolling(window int, minPeriods int) RollingSeries { cr := cacheAbleRollingSeries{ - RollingSeries: NewRollingSeries(window, minPeriods, cs.s), + RollingSeries: NewRollingSeries(window, minPeriods, cs.Series), cacheKey: fmt.Sprintf("%s|%d|%d", cs.cacheKey, window, minPeriods), } return cr @@ -70,7 +42,7 @@ func (cs cacheAbleSeries) Rolling(window int, minPeriods int) RollingSeries { func (cs cacheAbleSeries) HasNaN() bool { cacheKey := cs.cacheKey + "_HasNaN" ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.HasNaN() + ret := cs.Series.HasNaN() return ret, nil }) return ret.(bool) @@ -81,7 +53,7 @@ func cacheOrExecuted(cacheKey string, f func() (interface{}, error)) (interface{ return ret, nil } ret, err := f() - if err != nil { + if err == nil { c.Set(cacheKey, ret) } return ret, err @@ -90,7 +62,7 @@ func cacheOrExecuted(cacheKey string, f func() (interface{}, error)) (interface{ func (cs cacheAbleSeries) IsNaN() []bool { cacheKey := cs.cacheKey + "_IsNaN" ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.IsNaN() + ret := cs.Series.IsNaN() return ret, nil }) return ret.([]bool) @@ -99,12 +71,13 @@ func (cs cacheAbleSeries) IsNaN() []bool { func (cs cacheAbleSeries) IsNotNaN() []bool { cacheKey := cs.cacheKey + "_IsNotNaN" ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.IsNotNaN() + ret := cs.Series.IsNotNaN() return ret, nil }) return ret.([]bool) } +//Compare. The result will be cached if comparando is 'compFunc', 'Series' or not a slice. func (cs cacheAbleSeries) Compare(comparator Comparator, comparando interface{}) Series { var cacheKey string if comparator == CompFunc { @@ -114,11 +87,24 @@ func (cs cacheAbleSeries) Compare(comparator Comparator, comparando interface{}) } cacheKey = fmt.Sprintf("%s_Compare(%s, %v)", cs.cacheKey, comparator, (*(*int64)(unsafe.Pointer(&f)))) } else { - return cs.s.Compare(comparator, comparando) + switch v := comparando.(type) { + case series: + if len(v.name) == 0 { + panic("series must have a name") + } + cacheKey = fmt.Sprintf("%s_Compare(%s, %s|%d)", cs.cacheKey, comparator, v.name, v.Len()) + default: + switch reflect.TypeOf(comparando).Kind() { + case reflect.Slice: + return cs.Series.Compare(comparator, comparando) + default: + cacheKey = fmt.Sprintf("%s_Compare(%s, %v)", cs.cacheKey, comparator, comparando) + } + } } ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.Compare(comparator, comparando) + ret := cs.Series.Compare(comparator, comparando) return ret, nil }) return ret.(Series) @@ -127,7 +113,7 @@ func (cs cacheAbleSeries) Compare(comparator Comparator, comparando interface{}) func (cs cacheAbleSeries) Float() []float64 { cacheKey := cs.cacheKey + "_Float" ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.Float() + ret := cs.Series.Float() return ret, nil }) return ret.([]float64) @@ -136,7 +122,7 @@ func (cs cacheAbleSeries) Float() []float64 { func (cs cacheAbleSeries) Order(reverse bool) []int { cacheKey := fmt.Sprintf("%s_Order(%v)", cs.cacheKey, reverse) ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.Order(reverse) + ret := cs.Series.Order(reverse) return ret, nil }) return ret.([]int) @@ -145,7 +131,7 @@ func (cs cacheAbleSeries) Order(reverse bool) []int { func (cs cacheAbleSeries) StdDev() float64 { cacheKey := cs.cacheKey + "_StdDev" ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.StdDev() + ret := cs.Series.StdDev() return ret, nil }) return ret.(float64) @@ -154,7 +140,7 @@ func (cs cacheAbleSeries) StdDev() float64 { func (cs cacheAbleSeries) Mean() float64 { cacheKey := cs.cacheKey + "_Mean" ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.Mean() + ret := cs.Series.Mean() return ret, nil }) return ret.(float64) @@ -163,7 +149,7 @@ func (cs cacheAbleSeries) Mean() float64 { func (cs cacheAbleSeries) Median() float64 { cacheKey := cs.cacheKey + "_Median" ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.Median() + ret := cs.Series.Median() return ret, nil }) return ret.(float64) @@ -172,7 +158,7 @@ func (cs cacheAbleSeries) Median() float64 { func (cs cacheAbleSeries) Max() float64 { cacheKey := cs.cacheKey + "_Max" ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.Max() + ret := cs.Series.Max() return ret, nil }) return ret.(float64) @@ -181,7 +167,7 @@ func (cs cacheAbleSeries) Max() float64 { func (cs cacheAbleSeries) MaxStr() string { cacheKey := cs.cacheKey + "_MaxStr" ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.MaxStr() + ret := cs.Series.MaxStr() return ret, nil }) return ret.(string) @@ -190,7 +176,7 @@ func (cs cacheAbleSeries) MaxStr() string { func (cs cacheAbleSeries) Min() float64 { cacheKey := cs.cacheKey + "_Min" ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.Min() + ret := cs.Series.Min() return ret, nil }) return ret.(float64) @@ -199,7 +185,7 @@ func (cs cacheAbleSeries) Min() float64 { func (cs cacheAbleSeries) MinStr() string { cacheKey := cs.cacheKey + "_MinStr" ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.MinStr() + ret := cs.Series.MinStr() return ret, nil }) return ret.(string) @@ -208,7 +194,7 @@ func (cs cacheAbleSeries) MinStr() string { func (cs cacheAbleSeries) Quantile(p float64) float64 { cacheKey := fmt.Sprintf("%s_Quantile(%f)", cs.cacheKey, p) ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.Quantile(p) + ret := cs.Series.Quantile(p) return ret, nil }) return ret.(float64) @@ -217,7 +203,7 @@ func (cs cacheAbleSeries) Quantile(p float64) float64 { func (cs cacheAbleSeries) Map(f MapFunction) Series { cacheKey := fmt.Sprintf("%s_Map(%v)", cs.cacheKey, (*(*int64)(unsafe.Pointer(&f)))) ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.Map(f) + ret := cs.Series.Map(f) return ret, nil }) return ret.(Series) @@ -226,7 +212,9 @@ func (cs cacheAbleSeries) Map(f MapFunction) Series { func (cs cacheAbleSeries) Shift(periods int) Series { cacheKey := fmt.Sprintf("%s_Shift(%d)", cs.cacheKey, periods) ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.Shift(periods) + res := cs.Series.Shift(periods) + res.SetName(cacheKey) + ret := newCacheAbleSeries(res) return ret, nil }) return ret.(Series) @@ -235,7 +223,9 @@ func (cs cacheAbleSeries) Shift(periods int) Series { func (cs cacheAbleSeries) CumProd() Series { cacheKey := cs.cacheKey + "_CumProd" ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.CumProd() + res := cs.Series.CumProd() + res.SetName(cacheKey) + ret := newCacheAbleSeries(res) return ret, nil }) return ret.(Series) @@ -244,7 +234,7 @@ func (cs cacheAbleSeries) CumProd() Series { func (cs cacheAbleSeries) Prod() float64 { cacheKey := cs.cacheKey + "_Prod" ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.Prod() + ret := cs.Series.Prod() return ret, nil }) return ret.(float64) @@ -253,7 +243,9 @@ func (cs cacheAbleSeries) Prod() float64 { func (cs cacheAbleSeries) AddConst(c float64) Series { cacheKey := fmt.Sprintf("%s_AddConst(%f)", cs.cacheKey, c) ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.AddConst(c) + res := cs.Series.AddConst(c) + res.SetName(cacheKey) + ret := newCacheAbleSeries(res) return ret, nil }) return ret.(Series) @@ -262,7 +254,9 @@ func (cs cacheAbleSeries) AddConst(c float64) Series { func (cs cacheAbleSeries) MulConst(c float64) Series { cacheKey := fmt.Sprintf("%s_MulConst(%f)", cs.cacheKey, c) ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.MulConst(c) + res := cs.Series.MulConst(c) + res.SetName(cacheKey) + ret := newCacheAbleSeries(res) return ret, nil }) return ret.(Series) @@ -271,55 +265,65 @@ func (cs cacheAbleSeries) MulConst(c float64) Series { func (cs cacheAbleSeries) DivConst(c float64) Series { cacheKey := fmt.Sprintf("%s_DivConst(%f)", cs.cacheKey, c) ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.DivConst(c) + res := cs.Series.DivConst(c) + res.SetName(cacheKey) + ret := newCacheAbleSeries(res) return ret, nil }) return ret.(Series) } func (cs cacheAbleSeries) Add(c Series) Series { - if len(c.Name) == 0 { + if len(c.Name()) == 0 { panic("series c must have a name") } - cacheKey := fmt.Sprintf("%s_Add(%s|%d)", cs.cacheKey, c.Name, c.Len()) + cacheKey := fmt.Sprintf("%s_Add(%s|%d)", cs.cacheKey, c.Name(), c.Len()) ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.Add(c) + res := cs.Series.Add(c) + res.SetName(cacheKey) + ret := newCacheAbleSeries(res) return ret, nil }) return ret.(Series) } func (cs cacheAbleSeries) Sub(c Series) Series { - if len(c.Name) == 0 { + if len(c.Name()) == 0 { panic("series c must have a name") } - cacheKey := fmt.Sprintf("%s_Sub(%s|%d)", cs.cacheKey, c.Name, c.Len()) + cacheKey := fmt.Sprintf("%s_Sub(%s|%d)", cs.cacheKey, c.Name(), c.Len()) ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.Sub(c) + res := cs.Series.Sub(c) + res.SetName(cacheKey) + ret := newCacheAbleSeries(res) return ret, nil }) return ret.(Series) } func (cs cacheAbleSeries) Mul(c Series) Series { - if len(c.Name) == 0 { + if len(c.Name()) == 0 { panic("series c must have a name") } - cacheKey := fmt.Sprintf("%s_Mul(%s|%d)", cs.cacheKey, c.Name, c.Len()) + cacheKey := fmt.Sprintf("%s_Mul(%s|%d)", cs.cacheKey, c.Name(), c.Len()) ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.Mul(c) + res := cs.Series.Mul(c) + res.SetName(cacheKey) + ret := newCacheAbleSeries(res) return ret, nil }) return ret.(Series) } func (cs cacheAbleSeries) Div(c Series) Series { - if len(c.Name) == 0 { + if len(c.Name()) == 0 { panic("series c must have a name") } - cacheKey := fmt.Sprintf("%s_Div(%s|%d)", cs.cacheKey, c.Name, c.Len()) + cacheKey := fmt.Sprintf("%s_Div(%s|%d)", cs.cacheKey, c.Name(), c.Len()) ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.Div(c) + res := cs.Series.Div(c) + res.SetName(cacheKey) + ret := newCacheAbleSeries(res) return ret, nil }) return ret.(Series) @@ -328,7 +332,9 @@ func (cs cacheAbleSeries) Div(c Series) Series { func (cs cacheAbleSeries) Abs() Series { cacheKey := cs.cacheKey + "_Abs" ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.Abs() + res := cs.Series.Abs() + res.SetName(cacheKey) + ret := newCacheAbleSeries(res) return ret, nil }) return ret.(Series) @@ -337,9 +343,72 @@ func (cs cacheAbleSeries) Abs() Series { func (cs cacheAbleSeries) Sum() float64 { cacheKey := cs.cacheKey + "_Sum" ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { - ret := cs.s.Sum() + ret := cs.Series.Sum() return ret, nil }) return ret.(float64) } +func (cs cacheAbleSeries) Empty() Series { + return cs.Series.Empty() +} + +func (cs cacheAbleSeries) Error() error { + return cs.Series.Error() +} + +func (cs cacheAbleSeries) Subset(indexes Indexes) Series { + return cs.Series.Subset(indexes) +} + +func (cs cacheAbleSeries) Concat(x Series) Series { + if len(x.Name()) == 0 { + panic("series x must have a name") + } + cacheKey := fmt.Sprintf("%s_Concat(%s|%d)", cs.cacheKey, x.Name(), x.Len()) + res := cs.Series.Concat(x) + res.SetName(cacheKey) + ret := newCacheAbleSeries(res) + return ret +} + +func (cs cacheAbleSeries) Copy() Series { + cacheKey := fmt.Sprintf("%s_Copy{%s}", cs.cacheKey, uuid.NewV4().String()) + res := cs.Series.Copy() + res.SetName(cacheKey) + ret := newCacheAbleSeries(res) + return ret +} + +func (cs cacheAbleSeries) Records() []string { + return cs.Series.Records() +} + +func (cs cacheAbleSeries) Type() Type { + return cs.Series.Type() +} + +func (cs cacheAbleSeries) Len() int { + return cs.Series.Len() +} + +func (cs cacheAbleSeries) String() string { + return cs.Series.String() +} + +func (cs cacheAbleSeries) Str() string { + return cs.Series.Str() +} + +func (cs cacheAbleSeries) Val(i int) interface{} { + return cs.Series.Val(i) +} + +func (cs cacheAbleSeries) Elem(i int) Element { + return cs.Series.Elem(i) +} + +func (cs cacheAbleSeries) Slice(start int, end int) Series { + return cs.Series.Slice(start, end) +} + diff --git a/series/cacheseries_test.go b/series/cacheseries_test.go new file mode 100644 index 0000000..10c13da --- /dev/null +++ b/series/cacheseries_test.go @@ -0,0 +1,598 @@ +package series + +import ( + "fmt" + "math" + "reflect" + "strings" + "testing" + "time" + + "github.com/patrickmn/go-cache" +) + +type mockCache struct { + setCount int + getCount int + hitCount int + innerCache Cache +} + +func (mc *mockCache) Set(k string, v interface{}) { + mc.innerCache.Set(k, v) + mc.setCount++ +} + +func (mc *mockCache) Get(k string) (interface{}, bool) { + mc.getCount++ + v, ok := mc.innerCache.Get(k) + if ok { + mc.hitCount++ + } + return v, ok +} + +func (mc *mockCache) Clear() { + mc.innerCache.Clear() + mc.setCount = 0 + mc.getCount = 0 + mc.hitCount = 0 +} + +var testCache = &mockCache{ + innerCache: &defaultCache{ + c: cache.New(5*time.Minute, 10*time.Minute), + }, +} + +func TestMain(m *testing.M) { + InitCache(func() Cache { + return testCache + }) + m.Run() + ClearCache() +} + +func TestCacheSeries_Map(t *testing.T) { + tests := []struct { + series Series + expected Series + }{ + { + Bools([]bool{false, true, false, false, true}), + Bools([]bool{false, true, false, false, true}), + }, + { + Floats([]float64{1.5, -3.23, -0.337397, -0.380079, 1.60979, 34.}), + Floats([]float64{3, -6.46, -0.674794, -0.760158, 3.21958, 68.}), + }, + { + Floats([]float64{math.Pi, math.Phi, math.SqrtE, math.Cbrt(64)}), + Floats([]float64{2 * math.Pi, 2 * math.Phi, 2 * math.SqrtE, 2 * math.Cbrt(64)}), + }, + { + Strings([]string{"XyZApple", "XyZBanana", "XyZCitrus", "XyZDragonfruit"}), + Strings([]string{"Apple", "Banana", "Citrus", "Dragonfruit"}), + }, + { + Strings([]string{"San Francisco", "XyZTokyo", "MoscowXyZ", "XyzSydney"}), + Strings([]string{"San Francisco", "Tokyo", "MoscowXyZ", "XyzSydney"}), + }, + { + Ints([]int{23, 13, 101, -64, -3}), + Ints([]int{28, 18, 106, -59, 2}), + }, + { + Ints([]string{"morning", "noon", "afternoon", "evening", "night"}), + Ints([]int{5, 5, 5, 5, 5}), + }, + } + + doubleFloat64 := func(e Element, index int) Element { + result := e.Copy() + result.Set(result.Float() * 2) + return Element(result) + } + + // and two booleans + and := func(e Element, index int) Element { + result := e.Copy() + b, err := result.Bool() + if err != nil { + t.Errorf("%v", err) + return Element(nil) + } + result.Set(b && true) + return Element(result) + } + + // add constant (+5) to value (v) + add5Int := func(e Element, index int) Element { + result := e.Copy() + i, err := result.Int() + if err != nil { + return Element(&intElement{ + e: +5, + nan: false, + }) + } + result.Set(i + 5) + return Element(result) + } + + // trim (XyZ) prefix from string + trimXyZPrefix := func(e Element, index int) Element { + result := e.Copy() + result.Set(strings.TrimPrefix(result.String(), "XyZ")) + return Element(result) + } + + setCount := 0 + getCount := 0 + hitCount := 0 + + for testnum, test := range tests { + test.series.SetName(fmt.Sprintf("Name-%d", testnum)) + tmpSeries := test.series.CacheAble() + var received Series + switch test.series.Type() { + case Bool: + expected := test.expected + received = tmpSeries.Map(and) + getCount++ + setCount++ + received = tmpSeries.Map(and) + getCount++ + hitCount++ + for i := 0; i < expected.Len(); i++ { + e, _ := expected.Elem(i).Bool() + r, _ := received.Elem(i).Bool() + + if e != r { + t.Errorf( + "Test:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + } + + case Float: + expected := test.expected + received = tmpSeries.Map(doubleFloat64) + getCount++ + setCount++ + received = tmpSeries.Map(doubleFloat64) + getCount++ + hitCount++ + for i := 0; i < expected.Len(); i++ { + if !compareFloats(expected.Elem(i).Float(), + received.Elem(i).Float(), 6) { + t.Errorf( + "Test:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + } + case Int: + expected := test.expected + received = tmpSeries.Map(add5Int) + getCount++ + setCount++ + received = tmpSeries.Map(add5Int) + getCount++ + hitCount++ + for i := 0; i < expected.Len(); i++ { + e, _ := expected.Elem(i).Int() + r, _ := received.Elem(i).Int() + if e != r { + t.Errorf( + "Test:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + } + case String: + expected := test.expected + received = tmpSeries.Map(trimXyZPrefix) + getCount++ + setCount++ + received = tmpSeries.Map(trimXyZPrefix) + getCount++ + hitCount++ + for i := 0; i < expected.Len(); i++ { + if strings.Compare(expected.Elem(i).String(), + received.Elem(i).String()) != 0 { + t.Errorf( + "Test:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + } + default: + } + } + if setCount != testCache.setCount { + t.Errorf("CacheInfo[setCount]:\nsetExpected:%v\nActual:%v", setCount, testCache.setCount) + } + if getCount != testCache.getCount { + t.Errorf("CacheInfo[getCount]:\nExpected:%v\nActual:%v", getCount, testCache.getCount) + } + if hitCount != testCache.hitCount { + t.Errorf("CacheInfo[hitCount]:\nExpected:%v\nActual:%v", hitCount, testCache.hitCount) + } + +} + + +func TestCacheSeries_Compare(t *testing.T) { + table := []struct { + series Series + comparator Comparator + comparando interface{} + expected Series + }{ + { + Strings([]string{"A", "B", "C", "B", "D", "BADA"}), + Eq, + "B", + Bools([]bool{false, true, false, true, false, false}), + }, + { + Strings([]string{"A", "B", "C", "B", "D", "BADA"}), + Eq, + []string{"B", "B", "C", "D", "A", "A"}, + Bools([]bool{false, true, true, false, false, false}), + }, + { + Ints([]int{0, 2, 1, 5, 9}), + Eq, + "2", + Bools([]bool{false, true, false, false, false}), + }, + { + Ints([]int{0, 2, 1, 5, 9}), + Eq, + []int{0, 2, 0, 5, 10}, + Bools([]bool{true, true, false, true, false}), + }, + { + Floats([]float64{0.1, 2, 1, 5, 9}), + Eq, + "2", + Bools([]bool{false, true, false, false, false}), + }, + { + Floats([]float64{0.1, 2, 1, 5, 9}), + Eq, + []float64{0.1, 2, 0, 5, 10}, + Bools([]bool{true, true, false, true, false}), + }, + { + Bools([]bool{true, true, false}), + Eq, + "true", + Bools([]bool{true, true, false}), + }, + { + Bools([]bool{true, true, false}), + Eq, + []bool{true, false, false}, + Bools([]bool{true, false, true}), + }, + { + Strings([]string{"A", "B", "C", "B", "D", "BADA"}), + Neq, + "B", + Bools([]bool{true, false, true, false, true, true}), + }, + { + Strings([]string{"A", "B", "C", "B", "D", "BADA"}), + Neq, + []string{"B", "B", "C", "D", "A", "A"}, + Bools([]bool{true, false, false, true, true, true}), + }, + { + Ints([]int{0, 2, 1, 5, 9}), + Neq, + "2", + Bools([]bool{true, false, true, true, true}), + }, + { + Ints([]int{0, 2, 1, 5, 9}), + Neq, + []int{0, 2, 0, 5, 10}, + Bools([]bool{false, false, true, false, true}), + }, + { + Floats([]float64{0.1, 2, 1, 5, 9}), + Neq, + "2", + Bools([]bool{true, false, true, true, true}), + }, + { + Floats([]float64{0.1, 2, 1, 5, 9}), + Neq, + []float64{0.1, 2, 0, 5, 10}, + Bools([]bool{false, false, true, false, true}), + }, + { + Bools([]bool{true, true, false}), + Neq, + "true", + Bools([]bool{false, false, true}), + }, + { + Bools([]bool{true, true, false}), + Neq, + []bool{true, false, false}, + Bools([]bool{false, true, false}), + }, + { + Strings([]string{"A", "B", "C", "B", "D", "BADA"}), + Greater, + "B", + Bools([]bool{false, false, true, false, true, true}), + }, + { + Strings([]string{"A", "B", "C", "B", "D", "BADA"}), + Greater, + []string{"B", "B", "C", "D", "A", "A"}, + Bools([]bool{false, false, false, false, true, true}), + }, + { + Ints([]int{0, 2, 1, 5, 9}), + Greater, + "2", + Bools([]bool{false, false, false, true, true}), + }, + { + Ints([]int{0, 2, 1, 5, 9}), + Greater, + []int{0, 2, 0, 5, 10}, + Bools([]bool{false, false, true, false, false}), + }, + { + Floats([]float64{0.1, 2, 1, 5, 9}), + Greater, + "2", + Bools([]bool{false, false, false, true, true}), + }, + { + Floats([]float64{0.1, 2, 1, 5, 9}), + Greater, + []float64{0.1, 2, 0, 5, 10}, + Bools([]bool{false, false, true, false, false}), + }, + { + Bools([]bool{true, true, false}), + Greater, + "true", + Bools([]bool{false, false, false}), + }, + { + Bools([]bool{true, true, false}), + Greater, + []bool{true, false, false}, + Bools([]bool{false, true, false}), + }, + { + Strings([]string{"A", "B", "C", "B", "D", "BADA"}), + GreaterEq, + "B", + Bools([]bool{false, true, true, true, true, true}), + }, + { + Strings([]string{"A", "B", "C", "B", "D", "BADA"}), + GreaterEq, + []string{"B", "B", "C", "D", "A", "A"}, + Bools([]bool{false, true, true, false, true, true}), + }, + { + Ints([]int{0, 2, 1, 5, 9}), + GreaterEq, + "2", + Bools([]bool{false, true, false, true, true}), + }, + { + Ints([]int{0, 2, 1, 5, 9}), + GreaterEq, + []int{0, 2, 0, 5, 10}, + Bools([]bool{true, true, true, true, false}), + }, + { + Floats([]float64{0.1, 2, 1, 5, 9}), + GreaterEq, + "2", + Bools([]bool{false, true, false, true, true}), + }, + { + Floats([]float64{0.1, 2, 1, 5, 9}), + GreaterEq, + []float64{0.1, 2, 0, 5, 10}, + Bools([]bool{true, true, true, true, false}), + }, + { + Bools([]bool{true, true, false}), + GreaterEq, + "true", + Bools([]bool{true, true, false}), + }, + { + Bools([]bool{true, true, false}), + GreaterEq, + []bool{true, false, false}, + Bools([]bool{true, true, true}), + }, + { + Strings([]string{"A", "B", "C", "B", "D", "BADA"}), + Less, + "B", + Bools([]bool{true, false, false, false, false, false}), + }, + { + Strings([]string{"A", "B", "C", "B", "D", "BADA"}), + Less, + []string{"B", "B", "C", "D", "A", "A"}, + Bools([]bool{true, false, false, true, false, false}), + }, + { + Ints([]int{0, 2, 1, 5, 9}), + Less, + "2", + Bools([]bool{true, false, true, false, false}), + }, + { + Ints([]int{0, 2, 1, 5, 9}), + Less, + []int{0, 2, 0, 5, 10}, + Bools([]bool{false, false, false, false, true}), + }, + { + Floats([]float64{0.1, 2, 1, 5, 9}), + Less, + "2", + Bools([]bool{true, false, true, false, false}), + }, + { + Floats([]float64{0.1, 2, 1, 5, 9}), + Less, + []float64{0.1, 2, 0, 5, 10}, + Bools([]bool{false, false, false, false, true}), + }, + { + Bools([]bool{true, true, false}), + Less, + "true", + Bools([]bool{false, false, true}), + }, + { + Bools([]bool{true, true, false}), + Less, + []bool{true, false, false}, + Bools([]bool{false, false, false}), + }, + { + Strings([]string{"A", "B", "C", "B", "D", "BADA"}), + LessEq, + "B", + Bools([]bool{true, true, false, true, false, false}), + }, + { + Strings([]string{"A", "B", "C", "B", "D", "BADA"}), + LessEq, + []string{"B", "B", "C", "D", "A", "A"}, + Bools([]bool{true, true, true, true, false, false}), + }, + { + Ints([]int{0, 2, 1, 5, 9}), + LessEq, + "2", + Bools([]bool{true, true, true, false, false}), + }, + { + Ints([]int{0, 2, 1, 5, 9}), + LessEq, + []int{0, 2, 0, 5, 10}, + Bools([]bool{true, true, false, true, true}), + }, + { + Floats([]float64{0.1, 2, 1, 5, 9}), + LessEq, + "2", + Bools([]bool{true, true, true, false, false}), + }, + { + Floats([]float64{0.1, 2, 1, 5, 9}), + LessEq, + []float64{0.1, 2, 0, 5, 10}, + Bools([]bool{true, true, false, true, true}), + }, + { + Bools([]bool{true, true, false}), + LessEq, + "true", + Bools([]bool{true, true, true}), + }, + { + Bools([]bool{true, true, false}), + LessEq, + []bool{true, false, false}, + Bools([]bool{true, false, true}), + }, + { + Strings([]string{"A", "B", "C", "B", "D", "BADA"}), + In, + "B", + Bools([]bool{false, true, false, true, false, false}), + }, + { + Strings([]string{"Hello", "world", "this", "is", "a", "test"}), + In, + []string{"cat", "world", "hello", "a"}, + Bools([]bool{false, true, false, false, true, false}), + }, + { + Ints([]int{0, 2, 1, 5, 9}), + In, + "2", + Bools([]bool{false, true, false, false, false}), + }, + { + Ints([]int{0, 2, 1, 5, 9}), + In, + []int{2, 99, 1234, 9}, + Bools([]bool{false, true, false, false, true}), + }, + { + Floats([]float64{0.1, 2, 1, 5, 9}), + In, + "2", + Bools([]bool{false, true, false, false, false}), + }, + { + Floats([]float64{0.1, 2, 1, 5, 9}), + In, + []float64{2, 99, 1234, 9}, + Bools([]bool{false, true, false, false, true}), + }, + { + Bools([]bool{true, true, false}), + In, + "true", + Bools([]bool{true, true, false}), + }, + { + Bools([]bool{true, true, false}), + In, + []bool{false, false, false}, + Bools([]bool{false, false, true}), + }, + } + for testnum, test := range table { + test.series.SetName(fmt.Sprintf("Name-%d", testnum)) + a := test.series.CacheAble() + b := a.Compare(test.comparator, test.comparando) + b = a.Compare(test.comparator, test.comparando) + if err := b.Error(); err != nil { + t.Errorf("Test:%v\nError:%v", testnum, err) + } + expected := test.expected.Records() + received := b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + if err := checkTypes(b); err != nil { + t.Errorf( + "Test:%v\nError:%v", + testnum, err, + ) + } + } + + fmt.Printf("getCount:%d, setCount:%d, hitCount:%d \n", testCache.getCount, testCache.setCount, testCache.hitCount) +} + + diff --git a/series/readonly.go b/series/readonly.go new file mode 100644 index 0000000..3700b83 --- /dev/null +++ b/series/readonly.go @@ -0,0 +1,62 @@ +package series + +type Series interface { + Rolling(window int, minPeriods int) RollingSeries + HasNaN() bool + IsNaN() []bool + IsNotNaN() []bool + Compare(comparator Comparator, comparando interface{}) Series + Float() []float64 + Bool() ([]bool, error) + Int() ([]int, error) + + Order(reverse bool) []int + StdDev() float64 + Mean() float64 + Median() float64 + Max() float64 + MaxStr() string + Min() float64 + MinStr() string + Quantile(p float64) float64 + Map(f MapFunction) Series + Shift(periods int) Series + CumProd() Series + Prod() float64 + AddConst(c float64) Series + MulConst(c float64) Series + DivConst(c float64) Series + Add(c Series) Series + Sub(c Series) Series + Mul(c Series) Series + Div(c Series) Series + Abs() Series + Sum() float64 + + Empty() Series + Error() error + Subset(indexes Indexes) Series + + Concat(x Series) Series + Copy() Series + + Records() []string + Type() Type + Len() int + String() string + Str() string + Val(i int) interface{} + Elem(i int) Element + Slice(start, end int) Series + FillNaN(value ElementValue) + FillNaNForward() + FillNaNBackward() + CacheAble() Series + Set(indexes Indexes, newvalues Series) Series + Append(values interface{}) + Name() string + SetName(name string) + SetErr(err error) + And(in interface{}) Series + Or(in interface{}) Series +} diff --git a/series/rolling.go b/series/rolling.go index ec77aa4..38f5ce6 100644 --- a/series/rolling.go +++ b/series/rolling.go @@ -50,11 +50,7 @@ func (rw *rollingWindow) HasNext() bool { } func (rw *rollingWindow) NextWindow() Series { - window := Series{ - Name: rw.s.Name, - t: rw.s.t, - } - window.elements = rw.s.elements.Slice(rw.startIndex, rw.endIndexExclude) + window := rw.s.Slice(rw.startIndex, rw.endIndexExclude) rw.endIndexExclude++ startIndex := rw.endIndexExclude - rw.windowSize if startIndex > rw.startIndex { @@ -63,7 +59,7 @@ func (rw *rollingWindow) NextWindow() Series { return window } -//NewRollingSeries establish a rolling series +//NewRollingSeries establish a rolling Series func NewRollingSeries(window int, minPeriods int, s Series) RollingSeries { if window < 1 { panic("window must >= 1") @@ -93,7 +89,7 @@ func (s rollingSeries) Max() Series { } newS := s.Apply(maxFunc, "") - newS.Name = fmt.Sprintf("%s_RMax[w:%d]", s.Name, s.window) + newS.SetName(fmt.Sprintf("%s_RMax[w:%d]", s.Name(), s.window)) return newS } @@ -111,7 +107,7 @@ func (s rollingSeries) Min() Series { } newS := s.Apply(minFunc, "") - newS.Name = fmt.Sprintf("%s_RMin[w:%d]", s.Name, s.window) + newS.SetName(fmt.Sprintf("%s_RMin[w:%d]", s.Name(), s.window)) return newS } @@ -120,7 +116,7 @@ func (s rollingSeries) Mean() Series { newS := s.Apply(func(window Series, windowIndex int) interface{} { return window.Mean() }, Float) - newS.Name = fmt.Sprintf("%s_RMean[w:%d]", s.Name, s.window) + newS.SetName(fmt.Sprintf("%s_RMean[w:%d]", s.Name(), s.window)) return newS } @@ -131,7 +127,7 @@ func (s rollingSeries) MeanByWeights(weights []float64) Series { } weightSum := floats.Sum(weights) weightLen := len(weights) - ma := s.Apply( + newS := s.Apply( func(window Series, windowIndex int) interface{} { weightsUse := weights weightSumUse := weightSum @@ -147,7 +143,8 @@ func (s rollingSeries) MeanByWeights(weights []float64) Series { } return totalSum / weightSumUse }, Float) - return ma + newS.SetName(fmt.Sprintf("%s_MeanByWeights[w:%d,%v]", s.Name(), s.window, weights)) + return newS } // Quantile calculates the quantile value of a rollingSeries @@ -155,7 +152,7 @@ func (s rollingSeries) Quantile(p float64) Series { newS := s.Apply(func(window Series, windowIndex int) interface{} { return window.Quantile(p) }, Float) - newS.Name = fmt.Sprintf("%s_RQuantile[w:%d, p:%f]", s.Name, s.window, p) + newS.SetName(fmt.Sprintf("%s_RQuantile[w:%d,p:%f]", s.Name(), s.window, p)) return newS } @@ -164,7 +161,7 @@ func (s rollingSeries) Median() Series { newS := s.Apply(func(window Series, windowIndex int) interface{} { return window.Median() }, Float) - newS.Name = fmt.Sprintf("%s_RMedian[w:%d]", s.Name, s.window) + newS.SetName(fmt.Sprintf("%s_RMedian[w:%d]", s.Name(), s.window)) return newS } @@ -173,7 +170,7 @@ func (s rollingSeries) StdDev() Series { newS := s.Apply(func(window Series, windowIndex int) interface{} { return window.StdDev() }, Float) - newS.Name = fmt.Sprintf("%s_RStdDev[w:%d]", s.Name, s.window) + newS.SetName(fmt.Sprintf("%s_RStdDev[w:%d]", s.Name(), s.window)) return newS } @@ -183,7 +180,7 @@ func (s rollingSeries) Apply(f func(window Series, windowIndex int) interface{}, return s.Empty() } if len(t) == 0 { - t = s.t + t = s.Type() } eles := t.emptyElements(s.Len()) index := 0 @@ -197,11 +194,11 @@ func (s rollingSeries) Apply(f func(window Series, windowIndex int) interface{}, } index++ } - newS := Series{ - Name: fmt.Sprintf("%s_RApply[w:%d]", s.Name, s.window), + newS := &series{ + name: fmt.Sprintf("%s_RApply[w:%d]", s.Name(), s.window), elements: eles, t: t, - Err: nil, + err: nil, } return newS } \ No newline at end of file diff --git a/series/series.go b/series/series.go index 9ae9fa2..d152b65 100644 --- a/series/series.go +++ b/series/series.go @@ -13,18 +13,18 @@ import ( "gonum.org/v1/gonum/stat" ) -// Series is a data structure designed for operating on arrays of elements that +// series is a data structure designed for operating on arrays of elements that // should comply with a certain type structure. They are flexible enough that can -// be transformed to other Series types and account for missing or non valid -// elements. Most of the power of Series resides on the ability to compare and -// subset Series of different types. -type Series struct { - Name string // The name of the series +// be transformed to other series types and account for missing or non valid +// elements. Most of the power of series resides on the ability to compare and +// subset series of different types. +type series struct { + name string // The name of the series elements Elements // The values of the elements t Type // The type of the series // deprecated: use Error() instead - Err error + err error } // Elements is the interface that represents the array of elements contained on @@ -232,10 +232,19 @@ const NaN = "NaN" // Series [Bool] // Same as []bool type Indexes interface{} +var _ Series = (*series)(nil) + +func Err(err error) Series { + return &series{err: err} +} // New is the generic Series constructor func New(values interface{}, t Type, name string) Series { - ret := Series{ - Name: name, + ret := newSeries(values, t, name) + return &ret +} +func newSeries(values interface{}, t Type, name string) series { + ret := series{ + name: name, t: t, } @@ -281,7 +290,7 @@ func New(values interface{}, t Type, name string) Series { for i := 0; i < l; i++ { ret.elements.Elem(i).SetElement(v[i]) } - case Series: + case series: l := v.Len() preAlloc(l) for i := 0; i < l; i++ { @@ -308,9 +317,10 @@ func New(values interface{}, t Type, name string) Series { return ret } + func NewDefault(defaultValue interface{}, t Type, name string, len int) Series { - ret := Series{ - Name: name, + ret := &series{ + name: name, t: t, } @@ -376,34 +386,38 @@ func Bools(values interface{}) Series { } // Empty returns an empty Series of the same type -func (s Series) Empty() Series { - return New([]int{}, s.t, s.Name) +func (s series) Empty() Series { + return New([]int{}, s.t, s.name) } // Returns Error or nil if no error occured -func (s *Series) Error() error { - return s.Err +func (s *series) Error() error { + return s.err +} + +func (s *series) SetErr(err error) { + s.err = err } // Append adds new elements to the end of the Series. When using Append, the // Series is modified in place. -func (s *Series) Append(values interface{}) { - if err := s.Err; err != nil { +func (s *series) Append(values interface{}) { + if err := s.err; err != nil { return } - news := New(values, s.t, s.Name) + news := newSeries(values, s.t, s.name) s.elements = s.elements.Append(news.elements) } // Concat concatenates two series together. It will return a new Series with the // combined elements of both Series. -func (s Series) Concat(x Series) Series { - if err := s.Err; err != nil { - return s +func (s series) Concat(x Series) Series { + if err := s.err; err != nil { + return &s } - if err := x.Err; err != nil { - s.Err = fmt.Errorf("concat error: argument has errors: %v", err) - return s + if err := x.Error(); err != nil { + s.err = fmt.Errorf("concat error: argument has errors: %v", err) + return &s } y := s.Copy() y.Append(x) @@ -411,17 +425,17 @@ func (s Series) Concat(x Series) Series { } // Subset returns a subset of the series based on the given Indexes. -func (s Series) Subset(indexes Indexes) Series { - if err := s.Err; err != nil { - return s +func (s series) Subset(indexes Indexes) Series { + if err := s.err; err != nil { + return &s } idx, err := parseIndexes(s.Len(), indexes) if err != nil { - s.Err = err - return s + s.err = err + return &s } - ret := Series{ - Name: s.Name, + ret := &series{ + name: s.name, t: s.t, elements: s.elements.Get(idx...), } @@ -430,35 +444,35 @@ func (s Series) Subset(indexes Indexes) Series { // Set sets the values on the indexes of a Series and returns the reference // for itself. The original Series is modified. -func (s Series) Set(indexes Indexes, newvalues Series) Series { - if err := s.Err; err != nil { +func (s *series) Set(indexes Indexes, newvalues Series) Series { + if err := s.err; err != nil { return s } - if err := newvalues.Err; err != nil { - s.Err = fmt.Errorf("set error: argument has errors: %v", err) + if err := newvalues.Error(); err != nil { + s.err = fmt.Errorf("set error: argument has errors: %v", err) return s } idx, err := parseIndexes(s.Len(), indexes) if err != nil { - s.Err = err + s.err = err return s } if len(idx) != newvalues.Len() { - s.Err = fmt.Errorf("set error: dimensions mismatch") + s.err = fmt.Errorf("set error: dimensions mismatch") return s } for k, i := range idx { if i < 0 || i >= s.Len() { - s.Err = fmt.Errorf("set error: index out of range") + s.err = fmt.Errorf("set error: index out of range") return s } - s.elements.Elem(i).SetElement(newvalues.elements.Elem(k)) + s.elements.Elem(i).SetElement(newvalues.Elem(k)) } return s } // HasNaN checks whether the Series contain NaN elements. -func (s Series) HasNaN() bool { +func (s series) HasNaN() bool { for i := 0; i < s.Len(); i++ { if s.elements.Elem(i).IsNA() { return true @@ -468,7 +482,7 @@ func (s Series) HasNaN() bool { } // IsNaN returns an array that identifies which of the elements are NaN. -func (s Series) IsNaN() []bool { +func (s series) IsNaN() []bool { ret := make([]bool, s.Len()) for i := 0; i < s.Len(); i++ { ret[i] = s.elements.Elem(i).IsNA() @@ -477,7 +491,7 @@ func (s Series) IsNaN() []bool { } // IsNotNaN returns an array that identifies which of the elements are not NaN. -func (s Series) IsNotNaN() []bool { +func (s series) IsNotNaN() []bool { ret := make([]bool, s.Len()) for i := 0; i < s.Len(); i++ { ret[i] = !s.elements.Elem(i).IsNA() @@ -488,9 +502,9 @@ func (s Series) IsNotNaN() []bool { // Compare compares the values of a Series with other elements. To do so, the // elements with are to be compared are first transformed to a Series of the same // type as the caller. -func (s Series) Compare(comparator Comparator, comparando interface{}) Series { - if err := s.Err; err != nil { - return s +func (s series) Compare(comparator Comparator, comparando interface{}) Series { + if err := s.err; err != nil { + return &s } compareElements := func(a, b Element, c Comparator) (bool, error) { var ret bool @@ -530,7 +544,7 @@ func (s Series) Compare(comparator Comparator, comparando interface{}) Series { return Bools(bools) } - comp := New(comparando, s.t, "") + comp := newSeries(comparando, s.t, "") // In comparator comparison if comparator == In { for i := 0; i < s.Len(); i++ { @@ -540,9 +554,9 @@ func (s Series) Compare(comparator Comparator, comparando interface{}) Series { m := comp.elements.Elem(j) c, err := compareElements(e, m, Eq) if err != nil { - s = s.Empty() - s.Err = err - return s + s1 := s.Empty() + s1.SetErr(err) + return s1 } if c { b = true @@ -560,9 +574,9 @@ func (s Series) Compare(comparator Comparator, comparando interface{}) Series { e := s.elements.Elem(i) c, err := compareElements(e, comp.elements.Elem(0), comparator) if err != nil { - s = s.Empty() - s.Err = err - return s + s1 := s.Empty() + s1.SetErr(err) + return s1 } bools[i] = c } @@ -571,17 +585,17 @@ func (s Series) Compare(comparator Comparator, comparando interface{}) Series { // Multiple element comparison if s.Len() != comp.Len() { - s := s.Empty() - s.Err = fmt.Errorf("can't compare: length mismatch") - return s + s1 := s.Empty() + s1.SetErr(fmt.Errorf("can't compare: length mismatch")) + return s1 } for i := 0; i < s.Len(); i++ { e := s.elements.Elem(i) c, err := compareElements(e, comp.elements.Elem(i), comparator) if err != nil { - s = s.Empty() - s.Err = err - return s + s1 := s.Empty() + s1.SetErr(err) + return s1 } bools[i] = c } @@ -589,18 +603,18 @@ func (s Series) Compare(comparator Comparator, comparando interface{}) Series { } // Copy will return a copy of the Series. -func (s Series) Copy() Series { - ret := Series{ - Name: s.Name, +func (s series) Copy() Series { + ret := &series{ + name: s.name, t: s.t, elements: s.elements.Copy(), - Err: s.Err, + err: s.err, } return ret } // Records returns the elements of a Series as a []string -func (s Series) Records() []string { +func (s series) Records() []string { ret := make([]string, s.Len()) for i := 0; i < s.Len(); i++ { e := s.elements.Elem(i) @@ -612,7 +626,7 @@ func (s Series) Records() []string { // Float returns the elements of a Series as a []float64. If the elements can not // be converted to float64 or contains a NaN returns the float representation of // NaN. -func (s Series) Float() []float64 { +func (s series) Float() []float64 { ret := make([]float64, s.Len()) for i := 0; i < s.Len(); i++ { e := s.elements.Elem(i) @@ -623,7 +637,7 @@ func (s Series) Float() []float64 { // Int returns the elements of a Series as a []int or an error if the // transformation is not possible. -func (s Series) Int() ([]int, error) { +func (s series) Int() ([]int, error) { ret := make([]int, s.Len()) for i := 0; i < s.Len(); i++ { e := s.elements.Elem(i) @@ -638,7 +652,7 @@ func (s Series) Int() ([]int, error) { // Bool returns the elements of a Series as a []bool or an error if the // transformation is not possible. -func (s Series) Bool() ([]bool, error) { +func (s series) Bool() ([]bool, error) { ret := make([]bool, s.Len()) for i := 0; i < s.Len(); i++ { e := s.elements.Elem(i) @@ -652,26 +666,26 @@ func (s Series) Bool() ([]bool, error) { } // Type returns the type of a given series -func (s Series) Type() Type { +func (s series) Type() Type { return s.t } // Len returns the length of a given Series -func (s Series) Len() int { +func (s series) Len() int { return s.elements.Len() } // String implements the Stringer interface for Series -func (s Series) String() string { +func (s series) String() string { return fmt.Sprint(s.elements) } // Str prints some extra information about a given series -func (s Series) Str() string { +func (s series) Str() string { var ret []string // If name exists print name - if s.Name != "" { - ret = append(ret, "Name: "+s.Name) + if s.name != "" { + ret = append(ret, "Name: "+s.name) } ret = append(ret, "Type: "+fmt.Sprint(s.t)) ret = append(ret, "Length: "+fmt.Sprint(s.Len())) @@ -683,14 +697,14 @@ func (s Series) Str() string { // Val returns the value of a series for the given index. Will panic if the index // is out of bounds. -func (s Series) Val(i int) interface{} { +func (s series) Val(i int) interface{} { return s.elements.Elem(i).Val() } // Elem returns the element of a series for the given index. Will panic if the // index is out of bounds. // The index could be less than 0. When the index equals -1, Elem returns the last element of a series. -func (s Series) Elem(i int) Element { +func (s series) Elem(i int) Element { if i < 0 { return s.elements.Elem(s.Len() + i) } @@ -716,9 +730,9 @@ func parseIndexes(l int, indexes Indexes) ([]int, error) { idx = append(idx, i) } } - case Series: + case series: s := idxs - if err := s.Err; err != nil { + if err := s.err; err != nil { return nil, fmt.Errorf("indexing error: new values has errors: %v", err) } if s.HasNaN() { @@ -744,7 +758,7 @@ func parseIndexes(l int, indexes Indexes) ([]int, error) { // Order returns the indexes for sorting a Series. NaN elements are pushed to the // end by order of appearance. -func (s Series) Order(reverse bool) []int { +func (s series) Order(reverse bool) []int { var ie indexedElements var nasIdx []int for i := 0; i < s.Len(); i++ { @@ -780,20 +794,20 @@ func (e indexedElements) Less(i, j int) bool { return e[i].element.Less(e[j].ele func (e indexedElements) Swap(i, j int) { e[i], e[j] = e[j], e[i] } // StdDev calculates the standard deviation of a series -func (s Series) StdDev() float64 { +func (s series) StdDev() float64 { stdDev := stat.StdDev(s.Float(), nil) return stdDev } // Mean calculates the average value of a series -func (s Series) Mean() float64 { +func (s series) Mean() float64 { stdDev := stat.Mean(s.Float(), nil) return stdDev } // Median calculates the middle or median value, as opposed to // mean, and there is less susceptible to being affected by outliers. -func (s Series) Median() float64 { +func (s series) Median() float64 { if s.elements.Len() == 0 || s.Type() == String || s.Type() == Bool { @@ -818,7 +832,7 @@ func (s Series) Median() float64 { } // Max return the biggest element in the series -func (s Series) Max() float64 { +func (s series) Max() float64 { if s.elements.Len() == 0 || s.Type() == String { return math.NaN() } @@ -834,7 +848,7 @@ func (s Series) Max() float64 { } // MaxStr return the biggest element in a series of type String -func (s Series) MaxStr() string { +func (s series) MaxStr() string { if s.elements.Len() == 0 || s.Type() != String { return "" } @@ -850,7 +864,7 @@ func (s Series) MaxStr() string { } // Min return the lowest element in the series -func (s Series) Min() float64 { +func (s series) Min() float64 { if s.elements.Len() == 0 || s.Type() == String { return math.NaN() } @@ -866,7 +880,7 @@ func (s Series) Min() float64 { } // MinStr return the lowest element in a series of type String -func (s Series) MinStr() string { +func (s series) MinStr() string { if s.elements.Len() == 0 || s.Type() != String { return "" } @@ -884,7 +898,7 @@ func (s Series) MinStr() string { // Quantile returns the sample of x such that x is greater than or // equal to the fraction p of samples. // Note: gonum/stat panics when called with strings -func (s Series) Quantile(p float64) float64 { +func (s series) Quantile(p float64) float64 { if s.Type() == String || s.Len() == 0 { return math.NaN() } @@ -901,23 +915,23 @@ func (s Series) Quantile(p float64) float64 { // In other words it is expected that when working with a Float Series, that // the function passed in via argument `f` will not expect another type, but // instead expects to handle Element(s) of type Float. -func (s Series) Map(f MapFunction) Series { +func (s series) Map(f MapFunction) Series { eles := s.Type().emptyElements(s.Len()) for i := 0; i < s.Len(); i++ { value := f(s.elements.Elem(i), i) eles.Elem(i).SetElement(value) } - ret := Series{ - Name: s.Name, + ret := &series{ + name: s.name, elements: eles, t: s.Type(), - Err: nil, + err: nil, } return ret } //Shift series by desired number of periods and returning a new Series object. -func (s Series) Shift(periods int) Series { +func (s series) Shift(periods int) Series { if s.Len() == 0 { return s.Empty() } @@ -942,103 +956,103 @@ func (s Series) Shift(periods int) Series { //move down shiftElements = naEles.Append(s.elements.Slice(0, s.Len()-periods)) } - ret := Series{ - Name: fmt.Sprintf("%s_Shift_%d", s.Name, periods), + ret := &series{ + name: fmt.Sprintf("%s_Shift_%d", s.name, periods), elements: shiftElements, t: s.t, - Err: nil, + err: nil, } return ret } // CumProd finds the cumulative product of the first i elements in s and returning a new Series object. -func (s Series) CumProd() Series { +func (s series) CumProd() Series { dst := make([]float64, s.Len()) floats.CumProd(dst, s.Float()) - return New(dst, s.Type(), fmt.Sprintf("%s_CumProd", s.Name)) + return New(dst, s.Type(), fmt.Sprintf("%s_CumProd", s.name)) } // Prod returns the product of the elements of the Series. Returns 1 if len(s) = 0. -func (s Series) Prod() float64 { +func (s series) Prod() float64 { return floats.Prod(s.Float()) } // AddConst adds the scalar c to all of the values in Series and returning a new Series object. -func (s Series) AddConst(c float64) Series { +func (s series) AddConst(c float64) Series { dst := s.Float() floats.AddConst(c, dst) - return New(dst, s.Type(), fmt.Sprintf("(%s + %v)", s.Name, c)) + return New(dst, s.Type(), fmt.Sprintf("(%s + %v)", s.name, c)) } // AddConst multiply the scalar c to all of the values in Series and returning a new Series object. -func (s Series) MulConst(c float64) Series { +func (s series) MulConst(c float64) Series { sm := s.Map(func(e Element, index int) Element { result := e.Copy() f := result.Float() result.Set(f * c) return result }) - sm.Name = fmt.Sprintf("(%s * %v)", s.Name, c) + sm.SetName(fmt.Sprintf("(%s * %v)", s.name, c)) return sm } // DivConst Div the scalar c to all of the values in Series and returning a new Series object. -func (s Series) DivConst(c float64) Series { +func (s series) DivConst(c float64) Series { sm := s.Map(func(e Element, index int) Element { result := e.Copy() f := result.Float() result.Set(f / c) return result }) - sm.Name = fmt.Sprintf("(%s / %v)", s.Name, c) + sm.SetName(fmt.Sprintf("(%s / %v)", s.name, c)) return sm } -func (s Series) Add(c Series) Series { +func (s series) Add(c Series) Series { sf := s.Float() cf := c.Float() dst := make([]float64, s.Len()) floats.AddTo(dst, sf, cf) - return New(dst, Float, fmt.Sprintf("(%s + %s)", s.Name, c.Name)) + return New(dst, Float, fmt.Sprintf("(%s + %s)", s.name, c.Name())) } -func (s Series) Sub(c Series) Series { +func (s series) Sub(c Series) Series { sf := s.Float() cf := c.Float() dst := make([]float64, s.Len()) floats.SubTo(dst, sf, cf) - return New(dst, Float, fmt.Sprintf("(%s - %s)", s.Name, c.Name)) + return New(dst, Float, fmt.Sprintf("(%s - %s)", s.name, c.Name())) } -func (s Series) Mul(c Series) Series { +func (s series) Mul(c Series) Series { sf := s.Float() cf := c.Float() dst := make([]float64, s.Len()) floats.MulTo(dst, sf, cf) - return New(dst, Float, fmt.Sprintf("(%s * %s)", s.Name, c.Name)) + return New(dst, Float, fmt.Sprintf("(%s * %s)", s.name, c.Name())) } -func (s Series) Div(c Series) Series { +func (s series) Div(c Series) Series { sf := s.Float() cf := c.Float() dst := make([]float64, s.Len()) floats.DivTo(dst, sf, cf) - return New(dst, Float, fmt.Sprintf("(%s / %s)", s.Name, c.Name)) + return New(dst, Float, fmt.Sprintf("(%s / %s)", s.name, c.Name())) } -func (s Series) Abs() Series { +func (s series) Abs() Series { sm := s.Map(func(e Element, index int) Element { result := e.Copy() f := result.Float() result.Set(math.Abs(f)) return result }) - sm.Name = fmt.Sprintf("Abs(%s)", s.Name) + sm.SetName(fmt.Sprintf("Abs(%s)", s.name)) return sm } // FillNaN Fill NaN values using the specified value. -func (s Series) FillNaN(value ElementValue) { +func (s series) FillNaN(value ElementValue) { for i := 0; i < s.Len(); i++ { ele := s.Elem(i) if ele.IsNA() { @@ -1048,7 +1062,7 @@ func (s Series) FillNaN(value ElementValue) { } // FillNaNForward Fill NaN values using the last non-NaN value -func (s Series) FillNaNForward() { +func (s series) FillNaNForward() { var lastNotNaNValue ElementValue = nil for i := 0; i < s.Len(); i++ { ele := s.Elem(i) @@ -1063,7 +1077,7 @@ func (s Series) FillNaNForward() { } // FillNaNBackward fill NaN values using the next non-NaN value -func (s Series) FillNaNBackward() { +func (s series) FillNaNBackward() { var lastNotNaNValue ElementValue = nil for i := s.Len() - 1; i >= 0; i-- { ele := s.Elem(i) @@ -1077,33 +1091,33 @@ func (s Series) FillNaNBackward() { } } -func (s Series) Rolling(window int, minPeriods int) RollingSeries { - return NewRollingSeries(window, minPeriods, s) +func (s series) Rolling(window int, minPeriods int) RollingSeries { + return NewRollingSeries(window, minPeriods, &s) } // CacheAble -func (s Series) CacheAble() CacheAbleSeries { - return newCacheAbleSeries(s) +func (s series) CacheAble() Series { + return newCacheAbleSeries(&s) } //Operation for multiple series calculation func Operation(operate func(index int, eles ...Element) interface{}, seriess ...Series) (Series, error) { if len(seriess) == 0 { - return Series{}, errors.New("seriess num must > 0") + return nil, errors.New("seriess num must > 0") } sl := seriess[0].Len() maxLen := sl for i := 1; i < len(seriess); i++ { slen := seriess[i].Len() if sl != slen && slen != 1 { - return Series{}, errors.New("seriess length must be 1 or same") + return nil, errors.New("seriess length must be 1 or same") } if slen > maxLen { maxLen = slen } } - t := seriess[0].t + t := seriess[0].Type() eles := t.emptyElements(maxLen) for i := 0; i < maxLen; i++ { operateParam := make([]Element, len(seriess)) @@ -1117,17 +1131,17 @@ func Operation(operate func(index int, eles ...Element) interface{}, seriess ... res := operate(i, operateParam...) eles.Elem(i).Set(res) } - result := Series{ - Name: "", + result := &series{ + name: "", elements: eles, t: t, - Err: nil, + err: nil, } return result, nil } // Sum calculates the sum value of a series -func (s Series) Sum() float64 { +func (s series) Sum() float64 { if s.elements.Len() == 0 || s.Type() == String || s.Type() == Bool { return math.NaN() } @@ -1140,21 +1154,29 @@ func (s Series) Sum() float64 { } // Slice slices Series from start to end-1 index. -func (s Series) Slice(start, end int) Series { - if s.Err != nil { - return s +func (s series) Slice(start, end int) Series { + if s.err != nil { + return &s } if start > end || start < 0 || end > s.Len() { empty := s.Empty() - empty.Err = fmt.Errorf("slice index out of bounds") + empty.SetErr(fmt.Errorf("slice index out of bounds")) return empty } - ret := Series{ - Name: s.Name, + ret := &series{ + name: s.name, t: s.t, } ret.elements = s.elements.Slice(start, end) return ret } + +func (s *series) SetName(name string) { + s.name = name +} + +func (s series) Name() string { + return s.name +} \ No newline at end of file diff --git a/series/series_logic.go b/series/series_logic.go index c1163e9..8a8ee54 100644 --- a/series/series_logic.go +++ b/series/series_logic.go @@ -5,7 +5,7 @@ import ( ) -func (s Series) And(in interface{}) Series { +func (s series) And(in interface{}) Series { inSeries := New(in, Bool, "") result, err := Operation(func(index int, eles ...Element) interface{} { e0b, err := eles[0].Bool() @@ -17,14 +17,14 @@ func (s Series) And(in interface{}) Series { return nil } return e0b && e1b - }, s, inSeries) + }, &s, inSeries) if err != nil { log.Panic(err) } return result } -func (s Series) Or(in interface{}) Series { +func (s series) Or(in interface{}) Series { inSeries := New(in, Bool, "") result, err := Operation(func(index int, eles ...Element) interface{} { e0b, err := eles[0].Bool() @@ -36,7 +36,7 @@ func (s Series) Or(in interface{}) Series { return nil } return e0b || e1b - }, s, inSeries) + }, &s, inSeries) if err != nil { log.Panic(err) } diff --git a/series/series_test.go b/series/series_test.go index 2a40bc2..3b33af5 100644 --- a/series/series_test.go +++ b/series/series_test.go @@ -29,12 +29,12 @@ import ( func checkTypes(s Series) error { var types []Type for i := 0; i < s.Len(); i++ { - e := s.elements.Elem(i) + e := s.Elem(i) types = append(types, e.Type()) } for _, t := range types { - if t != s.t { - return fmt.Errorf("bad types for %v Series:\n%v", s.t, types) + if t != s.Type() { + return fmt.Errorf("bad types for %v Series:\n%v", s.Type(), types) } } return nil @@ -399,7 +399,7 @@ func TestSeries_Compare(t *testing.T) { for testnum, test := range table { a := test.series b := a.Compare(test.comparator, test.comparando) - if err := b.Err; err != nil { + if err := b.Error(); err != nil { t.Errorf("Test:%v\nError:%v", testnum, err) } expected := test.expected.Records() @@ -471,7 +471,7 @@ func TestSeries_Compare_CompFunc(t *testing.T) { a := test.series b := a.Compare(test.comparator, test.comparando) - if err := b.Err; err != nil { + if err := b.Error(); err != nil { t.Errorf("Test:%v\nError:%v", testnum, err) } expected := test.expected.Records() @@ -537,7 +537,7 @@ func TestSeries_Subset(t *testing.T) { for testnum, test := range table { a := test.series b := a.Subset(test.indexes) - if err := b.Err; err != nil { + if err := b.Error(); err != nil { t.Errorf("Test:%v\nError:%v", testnum, err) } expected := test.expected @@ -594,7 +594,7 @@ func TestSeries_Set(t *testing.T) { } for testnum, test := range table { b := test.series.Set(test.indexes, test.values) - if err := b.Err; err != nil { + if err := b.Error(); err != nil { t.Errorf("Test:%v\nError:%v", testnum, err) } expected := test.expected @@ -696,7 +696,7 @@ func TestStrings(t *testing.T) { }, } for testnum, test := range table { - if err := test.series.Err; err != nil { + if err := test.series.Error(); err != nil { t.Errorf("Test:%v\nError:%v", testnum, err) } expected := test.expected @@ -796,7 +796,7 @@ func TestInts(t *testing.T) { }, } for testnum, test := range table { - if err := test.series.Err; err != nil { + if err := test.series.Error(); err != nil { t.Errorf("Test:%v\nError:%v", testnum, err) } expected := test.expected @@ -892,7 +892,7 @@ func TestFloats(t *testing.T) { }, } for testnum, test := range table { - if err := test.series.Err; err != nil { + if err := test.series.Error(); err != nil { t.Errorf("Test:%v\nError:%v", testnum, err) } expected := test.expected @@ -988,7 +988,7 @@ func TestBools(t *testing.T) { }, } for testnum, test := range table { - if err := test.series.Err; err != nil { + if err := test.series.Error(); err != nil { t.Errorf("Test:%v\nError:%v", testnum, err) } expected := test.expected @@ -1018,7 +1018,7 @@ func TestSeries_Copy(t *testing.T) { if fmt.Sprint(a) != fmt.Sprint(b) { t.Error("Different values when copying String elements") } - if err := b.Err; err != nil { + if err := b.Error(); err != nil { t.Errorf("Test:%v\nError:%v", testnum, err) } if err := checkTypes(b); err != nil { @@ -1141,7 +1141,7 @@ func TestSeries_Concat(t *testing.T) { } for testnum, test := range tests { ab := test.a.Concat(test.b) - if err := ab.Err; err != nil { + if err := ab.Error(); err != nil { t.Errorf("Test:%v\nError:%v", testnum, err) } received := ab.Records() @@ -2080,7 +2080,7 @@ func TestSeries_Sum(t *testing.T) { func TestSeries_Slice(t *testing.T) { seriesWithErr := Ints([]int{}) - seriesWithErr.Err = fmt.Errorf("slice index out of bounds") + seriesWithErr.SetErr(fmt.Errorf("slice index out of bounds")) tests := []struct { j int @@ -2134,11 +2134,11 @@ func TestSeries_Slice(t *testing.T) { } } - if expected.Err != nil { - if received.Err == nil || expected.Err.Error() != received.Err.Error() { + if expected.Error() != nil { + if received.Error() == nil || expected.Error().Error() != received.Error().Error() { t.Errorf( "Test:%v\nExpected error:\n%v\nReceived:\n%v", - testnum, expected.Err, received.Err, + testnum, expected.Error(), received.Error(), ) } } From fae510611c795ccf9a0641ffb150ec9807702268 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Wed, 30 Mar 2022 23:12:22 +0800 Subject: [PATCH 38/60] cacheAbleRollingSeries --- series/cache.go | 29 +++++++++- series/cacherolling.go | 108 ++++++++++++++++++------------------ series/cacherolling_test.go | 98 ++++++++++++++++++++++++++++++++ series/cacheseries.go | 98 +++++++++++++++++++++++++++----- series/cacheseries_test.go | 13 +++-- series/readonly.go | 62 --------------------- series/series.go | 71 ++++++++++++++++++++++-- 7 files changed, 338 insertions(+), 141 deletions(-) delete mode 100644 series/readonly.go diff --git a/series/cache.go b/series/cache.go index 6c041b5..a223155 100644 --- a/series/cache.go +++ b/series/cache.go @@ -1,6 +1,7 @@ package series import ( + "strings" "sync" "time" @@ -16,14 +17,22 @@ type Cache interface { Set(k string, x interface{}) Get(k string) (interface{}, bool) Clear() + DelByKeyPrefix(keyPrefix string) } type defaultCache struct { c *cache.Cache + keys map[string]struct{} + mu sync.RWMutex } func (dc *defaultCache) Set(k string, v interface{}) { - dc.c.SetDefault(k, v) + err := dc.c.Add(k, v, cache.DefaultExpiration) + if err != nil { + dc.mu.Lock() + dc.keys[k] = struct{}{} + dc.mu.Unlock() + } } func (dc *defaultCache) Get(k string) (interface{}, bool) { @@ -32,6 +41,20 @@ func (dc *defaultCache) Get(k string) (interface{}, bool) { func (dc *defaultCache) Clear() { dc.c.Flush() + dc.mu.Lock() + dc.keys = map[string]struct{}{} + dc.mu.Unlock() +} + +func (dc *defaultCache) DelByKeyPrefix(keyPrefix string) { + dc.mu.Lock() + for key, _ := range dc.keys { + if strings.HasPrefix(key, keyPrefix) { + delete(dc.keys, key) + dc.c.Delete(key) + } + } + dc.mu.Unlock() } //InitCache @@ -39,7 +62,9 @@ func InitCache(f func() Cache) { once.Do(func() { if f == nil { c = &defaultCache{ - c: cache.New(5*time.Minute, 10*time.Minute), + c: cache.New(5*time.Minute, 10*time.Minute), + keys: map[string]struct{}{}, + mu: sync.RWMutex{}, } } else { c = f() diff --git a/series/cacherolling.go b/series/cacherolling.go index c1b0c33..73b2fb9 100644 --- a/series/cacherolling.go +++ b/series/cacherolling.go @@ -9,6 +9,7 @@ type cacheAbleRollingSeries struct { RollingSeries cacheKey string } + // NewCacheAbleRollingSeries. You should make sure that the Series will not be modified. func NewCacheAbleRollingSeries(window int, minPeriods int, s Series) RollingSeries { if len(s.Name()) == 0 { @@ -19,81 +20,80 @@ func NewCacheAbleRollingSeries(window int, minPeriods int, s Series) RollingSeri } cr := cacheAbleRollingSeries{ RollingSeries: NewRollingSeries(window, minPeriods, s.Copy()), - cacheKey: fmt.Sprintf("%s|%d|%d|%d", s.Name(), s.Len(), window, minPeriods), + cacheKey: fmt.Sprintf("%s(%d)|[w%d,p%d]", s.Name(), s.Len(), window, minPeriods), } return cr } -func(rc cacheAbleRollingSeries) Max() Series{ - cacheKey := rc.cacheKey + "_max" +func cacheOrExecuteRolling(cacheKey string, f func() Series) Series { if ret, found := c.Get(cacheKey); found { return ret.(Series) } - ret := rc.RollingSeries.Max() + res := f() + if res == nil { + return nil + } + res.SetName(cacheKey) + ret := newCacheAbleSeries(res) c.Set(cacheKey, ret) return ret } -func(rc cacheAbleRollingSeries) Min() Series{ - cacheKey := rc.cacheKey + "_min" - if ret, found := c.Get(cacheKey); found { - return ret.(Series) - } - ret := rc.RollingSeries.Min() - c.Set(cacheKey, ret) + +func (rc cacheAbleRollingSeries) Max() Series { + cacheKey := rc.cacheKey + "_RMax" + ret := cacheOrExecuteRolling(cacheKey, func() Series { + return rc.RollingSeries.Max() + }) + return ret +} +func (rc cacheAbleRollingSeries) Min() Series { + cacheKey := rc.cacheKey + "_RMin" + ret := cacheOrExecuteRolling(cacheKey, func() Series { + return rc.RollingSeries.Min() + }) return ret } -func(rc cacheAbleRollingSeries) Mean() Series{ - cacheKey := rc.cacheKey + "_mean" - if ret, found := c.Get(cacheKey); found { - return ret.(Series) - } - ret := rc.RollingSeries.Mean() - c.Set(cacheKey, ret) +func (rc cacheAbleRollingSeries) Mean() Series { + cacheKey := rc.cacheKey + "_RMean" + ret := cacheOrExecuteRolling(cacheKey, func() Series { + return rc.RollingSeries.Mean() + }) return ret } -func(rc cacheAbleRollingSeries) MeanByWeights(weights []float64) Series{ - cacheKey := fmt.Sprintf("%s_meanByWeights(%v)", rc.cacheKey, weights) - if ret, found := c.Get(cacheKey); found { - return ret.(Series) - } - ret := rc.RollingSeries.MeanByWeights(weights) - c.Set(cacheKey, ret) +func (rc cacheAbleRollingSeries) MeanByWeights(weights []float64) Series { + cacheKey := fmt.Sprintf("%s_RMeanByWeights(%v)", rc.cacheKey, weights) + ret := cacheOrExecuteRolling(cacheKey, func() Series { + return rc.RollingSeries.MeanByWeights(weights) + }) return ret } -func(rc cacheAbleRollingSeries) Quantile(p float64) Series{ - cacheKey := fmt.Sprintf("%s_quantile(%f)", rc.cacheKey, p) - if ret, found := c.Get(cacheKey); found { - return ret.(Series) - } - ret := rc.RollingSeries.Quantile(p) - c.Set(cacheKey, ret) +func (rc cacheAbleRollingSeries) Quantile(p float64) Series { + cacheKey := fmt.Sprintf("%s_RQuantile(%f)", rc.cacheKey, p) + ret := cacheOrExecuteRolling(cacheKey, func() Series { + return rc.RollingSeries.Quantile(p) + }) return ret } -func(rc cacheAbleRollingSeries) Median() Series{ - cacheKey := rc.cacheKey + "_median" - if ret, found := c.Get(cacheKey); found { - return ret.(Series) - } - ret := rc.RollingSeries.Median() - c.Set(cacheKey, ret) +func (rc cacheAbleRollingSeries) Median() Series { + cacheKey := rc.cacheKey + "_RMedian" + ret := cacheOrExecuteRolling(cacheKey, func() Series { + return rc.RollingSeries.Median() + }) return ret } -func(rc cacheAbleRollingSeries) StdDev() Series{ - cacheKey := rc.cacheKey + "_stdDev" - if ret, found := c.Get(cacheKey); found { - return ret.(Series) - } - ret := rc.RollingSeries.StdDev() - c.Set(cacheKey, ret) +func (rc cacheAbleRollingSeries) StdDev() Series { + cacheKey := rc.cacheKey + "_RStdDev" + ret := cacheOrExecuteRolling(cacheKey, func() Series { + return rc.RollingSeries.StdDev() + }) return ret } -func(rc cacheAbleRollingSeries) Apply(f func(window Series, windowIndex int) interface{}, t Type) Series{ - cacheKey := fmt.Sprintf("%s_apply(%v, %s)", rc.cacheKey, (*(*int64)(unsafe.Pointer(&f))), t) - if ret, found := c.Get(cacheKey); found { - return ret.(Series) - } - ret := rc.RollingSeries.Apply(f, t) - c.Set(cacheKey, ret) +func (rc cacheAbleRollingSeries) Apply(f func(window Series, windowIndex int) interface{}, t Type) Series { + cacheKey := fmt.Sprintf("%s_RApply(%v, %s)", rc.cacheKey, (*(*int64)(unsafe.Pointer(&f))), t) + + ret := cacheOrExecuteRolling(cacheKey, func() Series { + return rc.RollingSeries.Apply(f, t) + }) return ret -} \ No newline at end of file +} diff --git a/series/cacherolling_test.go b/series/cacherolling_test.go index f47efad..d2d507f 100644 --- a/series/cacherolling_test.go +++ b/series/cacherolling_test.go @@ -253,3 +253,101 @@ func TestSeries_RollingCacheApply(t *testing.T) { } } } + + +func TestSeries_RollingRollingCache(t *testing.T) { + + tests := []struct { + series Series + window int + minPeriod int + maxExpected Series + maxExpectedRolling Series + minExpected Series + minExpectedRolling Series + + }{ + { + Bools([]string{"false", "true", "false", "false", "true"}), + 2, + 1, + Bools([]string{"false", "true", "true", "false", "true"}), + Bools([]string{"false", "true", "true", "true", "true"}), + Bools([]string{"false", "false", "false", "false", "false"}), + Bools([]string{"false", "false", "false", "false", "false"}), + }, + { + Floats([]string{"1.5", "-3.23", "-0.337397", "-0.380079", "1.60979", "34."}), + 3, + 2, + Floats([]string{NaN, "1.5", "1.5", "-0.337397", "1.60979", "34."}), + Floats([]string{NaN, NaN, NaN, "1.5", "1.60979", "34."}), + Floats([]string{NaN, "-3.23", "-3.23", "-3.23", "-0.380079", "-0.380079"}), + Floats([]string{NaN, NaN, NaN, "-3.23", "-3.23", "-3.23"}), + }, + { + Strings([]string{"20210618", "20200909", "20200910", "20200912", "20200911"}), + 3, + 2, + Strings([]string{NaN, "20210618", "20210618", "20200912", "20200912"}), + Strings([]string{NaN, NaN, NaN, "20210618", "20210618"}), + Strings([]string{NaN, "20200909", "20200909", "20200909", "20200910"}), + Strings([]string{NaN, NaN, NaN, "20200909", "20200909"}), + }, + { + Ints([]string{"23", "13", "101", "-64", "-3"}), + 3, + 1, + Ints([]string{"23", "23", "101", "101", "101"}), + Ints([]string{"23", "23", "101", "101", "101"}), + Ints([]string{"23", "13", "13", "-64", "-64"}), + Ints([]string{"23", "13", "13", "-64", "-64"}), + }, + } + + for testnum, test := range tests { + var b Series + test.series.SetName(fmt.Sprintf("Name-%d", testnum)) + expected := test.maxExpected.Records() + rs := test.series.CacheAble().Rolling(test.window, test.minPeriod) + b = rs.Max() + received := b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-Max:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + br := b.Rolling(test.window, test.minPeriod) + b = br.Max() + b = br.Max() + received = b.Records() + expected = test.maxExpectedRolling.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-MaxRolling:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + + expected = test.minExpected.Records() + b = rs.Min() + received = b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-Min:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + b = b.Rolling(test.window, test.minPeriod).Min() + received = b.Records() + expected = test.minExpectedRolling.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-MinRolling:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + + } +} \ No newline at end of file diff --git a/series/cacheseries.go b/series/cacheseries.go index e1c8779..b7a6ccd 100644 --- a/series/cacheseries.go +++ b/series/cacheseries.go @@ -10,7 +10,6 @@ import ( var _ Series = (*cacheAbleSeries)(nil) - type cacheAbleSeries struct { Series cacheKey string @@ -25,8 +24,8 @@ func newCacheAbleSeries(s Series) Series { } ret := &cacheAbleSeries{ - Series: s, - cacheKey: fmt.Sprintf("%s|%d", s.Name(), s.Len()), + Series: s, + cacheKey: fmt.Sprintf("%s(%d)", s.Name(), s.Len()), } return ret } @@ -34,7 +33,7 @@ func newCacheAbleSeries(s Series) Series { func (cs cacheAbleSeries) Rolling(window int, minPeriods int) RollingSeries { cr := cacheAbleRollingSeries{ RollingSeries: NewRollingSeries(window, minPeriods, cs.Series), - cacheKey: fmt.Sprintf("%s|%d|%d", cs.cacheKey, window, minPeriods), + cacheKey: fmt.Sprintf("%s|[w%d,p%d]", cs.cacheKey, window, minPeriods), } return cr } @@ -88,21 +87,21 @@ func (cs cacheAbleSeries) Compare(comparator Comparator, comparando interface{}) cacheKey = fmt.Sprintf("%s_Compare(%s, %v)", cs.cacheKey, comparator, (*(*int64)(unsafe.Pointer(&f)))) } else { switch v := comparando.(type) { - case series: - if len(v.name) == 0 { + case Series: + if len(v.Name()) == 0 { panic("series must have a name") } - cacheKey = fmt.Sprintf("%s_Compare(%s, %s|%d)", cs.cacheKey, comparator, v.name, v.Len()) + cacheKey = fmt.Sprintf("%s_Compare(%s, %s|%d)", cs.cacheKey, comparator, v.Name(), v.Len()) default: switch reflect.TypeOf(comparando).Kind() { - case reflect.Slice: - return cs.Series.Compare(comparator, comparando) - default: - cacheKey = fmt.Sprintf("%s_Compare(%s, %v)", cs.cacheKey, comparator, comparando) + case reflect.Slice: + return cs.Series.Compare(comparator, comparando) + default: + cacheKey = fmt.Sprintf("%s_Compare(%s, %v)", cs.cacheKey, comparator, comparando) } } } - + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { ret := cs.Series.Compare(comparator, comparando) return ret, nil @@ -409,6 +408,79 @@ func (cs cacheAbleSeries) Elem(i int) Element { } func (cs cacheAbleSeries) Slice(start int, end int) Series { - return cs.Series.Slice(start, end) + cacheKey := fmt.Sprintf("%s_Slice(%d,%d)", cs.cacheKey, start, end) + res := cs.Series.Slice(start, end) + res.SetName(cacheKey) + ret := newCacheAbleSeries(res) + return ret +} + +func (cs *cacheAbleSeries) CacheAble() Series { + return cs +} + +func (cs *cacheAbleSeries) Set(indexes Indexes, newvalues Series) Series { + c.DelByKeyPrefix(cs.cacheKey) + return cs.Series.Set(indexes, newvalues) } +func (cs *cacheAbleSeries) Append(values interface{}) { + c.DelByKeyPrefix(cs.cacheKey) + cs.Series.Append(values) +} +func (cs *cacheAbleSeries) And(in interface{}) Series { + var cacheKey string + switch v := in.(type) { + case Series: + if len(v.Name()) == 0 { + panic("series must have a name") + } + cacheKey = fmt.Sprintf("%s_And(%s|%d)", cs.cacheKey, v.Name(), v.Len()) + default: + switch reflect.TypeOf(in).Kind() { + case reflect.Slice: + res := cs.Series.And(in) + res.SetName(cacheKey) + ret := newCacheAbleSeries(res) + return ret + default: + cacheKey = fmt.Sprintf("%s_And(%v)", cs.cacheKey, in) + } + } + + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + res := cs.Series.And(in) + res.SetName(cacheKey) + ret := newCacheAbleSeries(res) + return ret, nil + }) + return ret.(Series) +} +func (cs *cacheAbleSeries) Or(in interface{}) Series { +var cacheKey string + switch v := in.(type) { + case Series: + if len(v.Name()) == 0 { + panic("series must have a name") + } + cacheKey = fmt.Sprintf("%s_Or(%s|%d)", cs.cacheKey, v.Name(), v.Len()) + default: + switch reflect.TypeOf(in).Kind() { + case reflect.Slice: + res := cs.Series.Or(in) + res.SetName(cacheKey) + ret := newCacheAbleSeries(res) + return ret + default: + cacheKey = fmt.Sprintf("%s_Or(%v)", cs.cacheKey, in) + } + } + + ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + res := cs.Series.Or(in) + res.SetName(cacheKey) + ret := newCacheAbleSeries(res) + return ret, nil + }) + return ret.(Series) +} diff --git a/series/cacheseries_test.go b/series/cacheseries_test.go index 10c13da..8089ef3 100644 --- a/series/cacheseries_test.go +++ b/series/cacheseries_test.go @@ -39,6 +39,10 @@ func (mc *mockCache) Clear() { mc.hitCount = 0 } +func (mc *mockCache) DelByKeyPrefix(keyPrefix string) { + mc.innerCache.DelByKeyPrefix(keyPrefix) +} + var testCache = &mockCache{ innerCache: &defaultCache{ c: cache.New(5*time.Minute, 10*time.Minute), @@ -46,6 +50,7 @@ var testCache = &mockCache{ } func TestMain(m *testing.M) { + ClearCache() InitCache(func() Cache { return testCache }) @@ -130,7 +135,7 @@ func TestCacheSeries_Map(t *testing.T) { setCount := 0 getCount := 0 hitCount := 0 - + ClearCache() for testnum, test := range tests { test.series.SetName(fmt.Sprintf("Name-%d", testnum)) tmpSeries := test.series.CacheAble() @@ -212,7 +217,7 @@ func TestCacheSeries_Map(t *testing.T) { } } if setCount != testCache.setCount { - t.Errorf("CacheInfo[setCount]:\nsetExpected:%v\nActual:%v", setCount, testCache.setCount) + t.Errorf("CacheInfo[setCount]:\nExpected:%v\nActual:%v", setCount, testCache.setCount) } if getCount != testCache.getCount { t.Errorf("CacheInfo[getCount]:\nExpected:%v\nActual:%v", getCount, testCache.getCount) @@ -223,7 +228,6 @@ func TestCacheSeries_Map(t *testing.T) { } - func TestCacheSeries_Compare(t *testing.T) { table := []struct { series Series @@ -568,6 +572,7 @@ func TestCacheSeries_Compare(t *testing.T) { Bools([]bool{false, false, true}), }, } + ClearCache() for testnum, test := range table { test.series.SetName(fmt.Sprintf("Name-%d", testnum)) a := test.series.CacheAble() @@ -594,5 +599,3 @@ func TestCacheSeries_Compare(t *testing.T) { fmt.Printf("getCount:%d, setCount:%d, hitCount:%d \n", testCache.getCount, testCache.setCount, testCache.hitCount) } - - diff --git a/series/readonly.go b/series/readonly.go deleted file mode 100644 index 3700b83..0000000 --- a/series/readonly.go +++ /dev/null @@ -1,62 +0,0 @@ -package series - -type Series interface { - Rolling(window int, minPeriods int) RollingSeries - HasNaN() bool - IsNaN() []bool - IsNotNaN() []bool - Compare(comparator Comparator, comparando interface{}) Series - Float() []float64 - Bool() ([]bool, error) - Int() ([]int, error) - - Order(reverse bool) []int - StdDev() float64 - Mean() float64 - Median() float64 - Max() float64 - MaxStr() string - Min() float64 - MinStr() string - Quantile(p float64) float64 - Map(f MapFunction) Series - Shift(periods int) Series - CumProd() Series - Prod() float64 - AddConst(c float64) Series - MulConst(c float64) Series - DivConst(c float64) Series - Add(c Series) Series - Sub(c Series) Series - Mul(c Series) Series - Div(c Series) Series - Abs() Series - Sum() float64 - - Empty() Series - Error() error - Subset(indexes Indexes) Series - - Concat(x Series) Series - Copy() Series - - Records() []string - Type() Type - Len() int - String() string - Str() string - Val(i int) interface{} - Elem(i int) Element - Slice(start, end int) Series - FillNaN(value ElementValue) - FillNaNForward() - FillNaNBackward() - CacheAble() Series - Set(indexes Indexes, newvalues Series) Series - Append(values interface{}) - Name() string - SetName(name string) - SetErr(err error) - And(in interface{}) Series - Or(in interface{}) Series -} diff --git a/series/series.go b/series/series.go index d152b65..77898a7 100644 --- a/series/series.go +++ b/series/series.go @@ -70,6 +70,67 @@ type Element interface { Type() Type } +type Series interface { + Rolling(window int, minPeriods int) RollingSeries + HasNaN() bool + IsNaN() []bool + IsNotNaN() []bool + Compare(comparator Comparator, comparando interface{}) Series + Float() []float64 + Bool() ([]bool, error) + Int() ([]int, error) + + Order(reverse bool) []int + StdDev() float64 + Mean() float64 + Median() float64 + Max() float64 + MaxStr() string + Min() float64 + MinStr() string + Quantile(p float64) float64 + Map(f MapFunction) Series + Shift(periods int) Series + CumProd() Series + Prod() float64 + AddConst(c float64) Series + MulConst(c float64) Series + DivConst(c float64) Series + Add(c Series) Series + Sub(c Series) Series + Mul(c Series) Series + Div(c Series) Series + Abs() Series + Sum() float64 + + Empty() Series + Error() error + Subset(indexes Indexes) Series + + Concat(x Series) Series + Copy() Series + + Records() []string + Type() Type + Len() int + String() string + Str() string + Val(i int) interface{} + Elem(i int) Element + Slice(start, end int) Series + FillNaN(value ElementValue) + FillNaNForward() + FillNaNBackward() + CacheAble() Series + Set(indexes Indexes, newvalues Series) Series + Append(values interface{}) + Name() string + SetName(name string) + SetErr(err error) + And(in interface{}) Series + Or(in interface{}) Series +} + // intElements is the concrete implementation of Elements for Int elements. type intElements []intElement @@ -290,11 +351,11 @@ func newSeries(values interface{}, t Type, name string) series { for i := 0; i < l; i++ { ret.elements.Elem(i).SetElement(v[i]) } - case series: + case Series: l := v.Len() preAlloc(l) for i := 0; i < l; i++ { - ret.elements.Elem(i).SetElement(v.elements.Elem(i)) + ret.elements.Elem(i).SetElement(v.Elem(i)) } default: switch reflect.TypeOf(values).Kind() { @@ -730,15 +791,15 @@ func parseIndexes(l int, indexes Indexes) ([]int, error) { idx = append(idx, i) } } - case series: + case Series: s := idxs - if err := s.err; err != nil { + if err := s.Error(); err != nil { return nil, fmt.Errorf("indexing error: new values has errors: %v", err) } if s.HasNaN() { return nil, fmt.Errorf("indexing error: indexes contain NaN") } - switch s.t { + switch s.Type() { case Int: return s.Int() case Bool: From 5ac7a3d97ad3b7ddb903ccaeac8b2d46e6e7f585 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Thu, 31 Mar 2022 18:04:15 +0800 Subject: [PATCH 39/60] cacheAbleSeries --- series/cache.go | 43 +++++++++++++++++++++++++------------- series/cacheseries.go | 15 ++++++++++++- series/cacheseries_test.go | 15 +++++++------ 3 files changed, 49 insertions(+), 24 deletions(-) diff --git a/series/cache.go b/series/cache.go index a223155..cd85399 100644 --- a/series/cache.go +++ b/series/cache.go @@ -12,21 +12,31 @@ var c Cache var once sync.Once -//Cache define rolling cache +//Cache define series cache type Cache interface { Set(k string, x interface{}) Get(k string) (interface{}, bool) Clear() - DelByKeyPrefix(keyPrefix string) + DelByKeyPrefix(keyPrefix string) int + Size() int } -type defaultCache struct { - c *cache.Cache +type seriesCache struct { + c *cache.Cache keys map[string]struct{} - mu sync.RWMutex + mu sync.RWMutex } -func (dc *defaultCache) Set(k string, v interface{}) { +func NewDefaultCache() Cache { + ch := &seriesCache{ + c: cache.New(5*time.Minute, 10*time.Minute), + keys: map[string]struct{}{}, + mu: sync.RWMutex{}, + } + return ch +} + +func (dc *seriesCache) Set(k string, v interface{}) { err := dc.c.Add(k, v, cache.DefaultExpiration) if err != nil { dc.mu.Lock() @@ -35,37 +45,40 @@ func (dc *defaultCache) Set(k string, v interface{}) { } } -func (dc *defaultCache) Get(k string) (interface{}, bool) { +func (dc *seriesCache) Size() int { + return dc.c.ItemCount() +} + +func (dc *seriesCache) Get(k string) (interface{}, bool) { return dc.c.Get(k) } -func (dc *defaultCache) Clear() { +func (dc *seriesCache) Clear() { dc.c.Flush() dc.mu.Lock() dc.keys = map[string]struct{}{} dc.mu.Unlock() } -func (dc *defaultCache) DelByKeyPrefix(keyPrefix string) { +func (dc *seriesCache) DelByKeyPrefix(keyPrefix string) int { + delCount := 0 dc.mu.Lock() - for key, _ := range dc.keys { + for key := range dc.keys { if strings.HasPrefix(key, keyPrefix) { delete(dc.keys, key) dc.c.Delete(key) + delCount++ } } dc.mu.Unlock() + return delCount } //InitCache func InitCache(f func() Cache) { once.Do(func() { if f == nil { - c = &defaultCache{ - c: cache.New(5*time.Minute, 10*time.Minute), - keys: map[string]struct{}{}, - mu: sync.RWMutex{}, - } + c = NewDefaultCache() } else { c = f() } diff --git a/series/cacheseries.go b/series/cacheseries.go index b7a6ccd..bc2226b 100644 --- a/series/cacheseries.go +++ b/series/cacheseries.go @@ -424,6 +424,19 @@ func (cs *cacheAbleSeries) Set(indexes Indexes, newvalues Series) Series { return cs.Series.Set(indexes, newvalues) } +func (cs *cacheAbleSeries) FillNaN(value ElementValue) { + c.DelByKeyPrefix(cs.cacheKey) + cs.Series.FillNaN(value) +} +func (cs *cacheAbleSeries) FillNaNForward() { + c.DelByKeyPrefix(cs.cacheKey) + cs.Series.FillNaNForward() +} +func (cs *cacheAbleSeries) FillNaNBackward() { + c.DelByKeyPrefix(cs.cacheKey) + cs.Series.FillNaNBackward() +} + func (cs *cacheAbleSeries) Append(values interface{}) { c.DelByKeyPrefix(cs.cacheKey) cs.Series.Append(values) @@ -457,7 +470,7 @@ func (cs *cacheAbleSeries) And(in interface{}) Series { return ret.(Series) } func (cs *cacheAbleSeries) Or(in interface{}) Series { -var cacheKey string + var cacheKey string switch v := in.(type) { case Series: if len(v.Name()) == 0 { diff --git a/series/cacheseries_test.go b/series/cacheseries_test.go index 8089ef3..90e805a 100644 --- a/series/cacheseries_test.go +++ b/series/cacheseries_test.go @@ -6,9 +6,6 @@ import ( "reflect" "strings" "testing" - "time" - - "github.com/patrickmn/go-cache" ) type mockCache struct { @@ -39,14 +36,16 @@ func (mc *mockCache) Clear() { mc.hitCount = 0 } -func (mc *mockCache) DelByKeyPrefix(keyPrefix string) { - mc.innerCache.DelByKeyPrefix(keyPrefix) +func (mc *mockCache) DelByKeyPrefix(keyPrefix string) int { + return mc.innerCache.DelByKeyPrefix(keyPrefix) +} + +func (dc *mockCache) Size() int { + return dc.innerCache.Size() } var testCache = &mockCache{ - innerCache: &defaultCache{ - c: cache.New(5*time.Minute, 10*time.Minute), - }, + innerCache: NewDefaultCache(), } func TestMain(m *testing.M) { From 1b91cf9d4639aabce7c9cb6a6a0a307bb7ab090d Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Thu, 31 Mar 2022 22:20:16 +0800 Subject: [PATCH 40/60] cacheAbleSeries --- series/cacherolling.go | 4 +- series/cacheseries.go | 116 ++++++++++++++++++------------------- series/cacheseries_test.go | 64 +++++++++++++++++++- series/series.go | 4 +- 4 files changed, 124 insertions(+), 64 deletions(-) diff --git a/series/cacherolling.go b/series/cacherolling.go index 73b2fb9..653512a 100644 --- a/series/cacherolling.go +++ b/series/cacherolling.go @@ -20,7 +20,7 @@ func NewCacheAbleRollingSeries(window int, minPeriods int, s Series) RollingSeri } cr := cacheAbleRollingSeries{ RollingSeries: NewRollingSeries(window, minPeriods, s.Copy()), - cacheKey: fmt.Sprintf("%s(%d)|[w%d,p%d]", s.Name(), s.Len(), window, minPeriods), + cacheKey: fmt.Sprintf("%s[w%d,p%d]", s.Name(), window, minPeriods), } return cr } @@ -34,7 +34,7 @@ func cacheOrExecuteRolling(cacheKey string, f func() Series) Series { return nil } res.SetName(cacheKey) - ret := newCacheAbleSeries(res) + ret := res.CacheAble() c.Set(cacheKey, ret) return ret } diff --git a/series/cacheseries.go b/series/cacheseries.go index bc2226b..2de457f 100644 --- a/series/cacheseries.go +++ b/series/cacheseries.go @@ -25,7 +25,7 @@ func newCacheAbleSeries(s Series) Series { ret := &cacheAbleSeries{ Series: s, - cacheKey: fmt.Sprintf("%s(%d)", s.Name(), s.Len()), + cacheKey: s.Name(), } return ret } @@ -40,14 +40,14 @@ func (cs cacheAbleSeries) Rolling(window int, minPeriods int) RollingSeries { func (cs cacheAbleSeries) HasNaN() bool { cacheKey := cs.cacheKey + "_HasNaN" - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.HasNaN() return ret, nil }) return ret.(bool) } -func cacheOrExecuted(cacheKey string, f func() (interface{}, error)) (interface{}, error) { +func cacheOrExecute(cacheKey string, f func() (interface{}, error)) (interface{}, error) { if ret, found := c.Get(cacheKey); found { return ret, nil } @@ -60,7 +60,7 @@ func cacheOrExecuted(cacheKey string, f func() (interface{}, error)) (interface{ func (cs cacheAbleSeries) IsNaN() []bool { cacheKey := cs.cacheKey + "_IsNaN" - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.IsNaN() return ret, nil }) @@ -69,7 +69,7 @@ func (cs cacheAbleSeries) IsNaN() []bool { func (cs cacheAbleSeries) IsNotNaN() []bool { cacheKey := cs.cacheKey + "_IsNotNaN" - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.IsNotNaN() return ret, nil }) @@ -102,7 +102,7 @@ func (cs cacheAbleSeries) Compare(comparator Comparator, comparando interface{}) } } - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.Compare(comparator, comparando) return ret, nil }) @@ -111,7 +111,7 @@ func (cs cacheAbleSeries) Compare(comparator Comparator, comparando interface{}) func (cs cacheAbleSeries) Float() []float64 { cacheKey := cs.cacheKey + "_Float" - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.Float() return ret, nil }) @@ -120,7 +120,7 @@ func (cs cacheAbleSeries) Float() []float64 { func (cs cacheAbleSeries) Order(reverse bool) []int { cacheKey := fmt.Sprintf("%s_Order(%v)", cs.cacheKey, reverse) - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.Order(reverse) return ret, nil }) @@ -129,7 +129,7 @@ func (cs cacheAbleSeries) Order(reverse bool) []int { func (cs cacheAbleSeries) StdDev() float64 { cacheKey := cs.cacheKey + "_StdDev" - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.StdDev() return ret, nil }) @@ -138,7 +138,7 @@ func (cs cacheAbleSeries) StdDev() float64 { func (cs cacheAbleSeries) Mean() float64 { cacheKey := cs.cacheKey + "_Mean" - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.Mean() return ret, nil }) @@ -147,7 +147,7 @@ func (cs cacheAbleSeries) Mean() float64 { func (cs cacheAbleSeries) Median() float64 { cacheKey := cs.cacheKey + "_Median" - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.Median() return ret, nil }) @@ -156,7 +156,7 @@ func (cs cacheAbleSeries) Median() float64 { func (cs cacheAbleSeries) Max() float64 { cacheKey := cs.cacheKey + "_Max" - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.Max() return ret, nil }) @@ -165,7 +165,7 @@ func (cs cacheAbleSeries) Max() float64 { func (cs cacheAbleSeries) MaxStr() string { cacheKey := cs.cacheKey + "_MaxStr" - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.MaxStr() return ret, nil }) @@ -174,7 +174,7 @@ func (cs cacheAbleSeries) MaxStr() string { func (cs cacheAbleSeries) Min() float64 { cacheKey := cs.cacheKey + "_Min" - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.Min() return ret, nil }) @@ -183,7 +183,7 @@ func (cs cacheAbleSeries) Min() float64 { func (cs cacheAbleSeries) MinStr() string { cacheKey := cs.cacheKey + "_MinStr" - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.MinStr() return ret, nil }) @@ -192,7 +192,7 @@ func (cs cacheAbleSeries) MinStr() string { func (cs cacheAbleSeries) Quantile(p float64) float64 { cacheKey := fmt.Sprintf("%s_Quantile(%f)", cs.cacheKey, p) - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.Quantile(p) return ret, nil }) @@ -201,7 +201,7 @@ func (cs cacheAbleSeries) Quantile(p float64) float64 { func (cs cacheAbleSeries) Map(f MapFunction) Series { cacheKey := fmt.Sprintf("%s_Map(%v)", cs.cacheKey, (*(*int64)(unsafe.Pointer(&f)))) - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.Map(f) return ret, nil }) @@ -210,10 +210,10 @@ func (cs cacheAbleSeries) Map(f MapFunction) Series { func (cs cacheAbleSeries) Shift(periods int) Series { cacheKey := fmt.Sprintf("%s_Shift(%d)", cs.cacheKey, periods) - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { res := cs.Series.Shift(periods) res.SetName(cacheKey) - ret := newCacheAbleSeries(res) + ret := res.CacheAble() return ret, nil }) return ret.(Series) @@ -221,10 +221,10 @@ func (cs cacheAbleSeries) Shift(periods int) Series { func (cs cacheAbleSeries) CumProd() Series { cacheKey := cs.cacheKey + "_CumProd" - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { res := cs.Series.CumProd() res.SetName(cacheKey) - ret := newCacheAbleSeries(res) + ret := res.CacheAble() return ret, nil }) return ret.(Series) @@ -232,7 +232,7 @@ func (cs cacheAbleSeries) CumProd() Series { func (cs cacheAbleSeries) Prod() float64 { cacheKey := cs.cacheKey + "_Prod" - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.Prod() return ret, nil }) @@ -241,10 +241,10 @@ func (cs cacheAbleSeries) Prod() float64 { func (cs cacheAbleSeries) AddConst(c float64) Series { cacheKey := fmt.Sprintf("%s_AddConst(%f)", cs.cacheKey, c) - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { res := cs.Series.AddConst(c) res.SetName(cacheKey) - ret := newCacheAbleSeries(res) + ret := res.CacheAble() return ret, nil }) return ret.(Series) @@ -252,10 +252,10 @@ func (cs cacheAbleSeries) AddConst(c float64) Series { func (cs cacheAbleSeries) MulConst(c float64) Series { cacheKey := fmt.Sprintf("%s_MulConst(%f)", cs.cacheKey, c) - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { res := cs.Series.MulConst(c) res.SetName(cacheKey) - ret := newCacheAbleSeries(res) + ret := res.CacheAble() return ret, nil }) return ret.(Series) @@ -263,10 +263,10 @@ func (cs cacheAbleSeries) MulConst(c float64) Series { func (cs cacheAbleSeries) DivConst(c float64) Series { cacheKey := fmt.Sprintf("%s_DivConst(%f)", cs.cacheKey, c) - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { res := cs.Series.DivConst(c) res.SetName(cacheKey) - ret := newCacheAbleSeries(res) + ret := res.CacheAble() return ret, nil }) return ret.(Series) @@ -276,11 +276,11 @@ func (cs cacheAbleSeries) Add(c Series) Series { if len(c.Name()) == 0 { panic("series c must have a name") } - cacheKey := fmt.Sprintf("%s_Add(%s|%d)", cs.cacheKey, c.Name(), c.Len()) - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + cacheKey := fmt.Sprintf("%s_Add(%s)", cs.cacheKey, c.Name()) + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { res := cs.Series.Add(c) res.SetName(cacheKey) - ret := newCacheAbleSeries(res) + ret := res.CacheAble() return ret, nil }) return ret.(Series) @@ -290,11 +290,11 @@ func (cs cacheAbleSeries) Sub(c Series) Series { if len(c.Name()) == 0 { panic("series c must have a name") } - cacheKey := fmt.Sprintf("%s_Sub(%s|%d)", cs.cacheKey, c.Name(), c.Len()) - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + cacheKey := fmt.Sprintf("%s_Sub(%s)", cs.cacheKey, c.Name()) + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { res := cs.Series.Sub(c) res.SetName(cacheKey) - ret := newCacheAbleSeries(res) + ret := res.CacheAble() return ret, nil }) return ret.(Series) @@ -304,11 +304,11 @@ func (cs cacheAbleSeries) Mul(c Series) Series { if len(c.Name()) == 0 { panic("series c must have a name") } - cacheKey := fmt.Sprintf("%s_Mul(%s|%d)", cs.cacheKey, c.Name(), c.Len()) - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + cacheKey := fmt.Sprintf("%s_Mul(%s)", cs.cacheKey, c.Name()) + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { res := cs.Series.Mul(c) res.SetName(cacheKey) - ret := newCacheAbleSeries(res) + ret := res.CacheAble() return ret, nil }) return ret.(Series) @@ -318,11 +318,11 @@ func (cs cacheAbleSeries) Div(c Series) Series { if len(c.Name()) == 0 { panic("series c must have a name") } - cacheKey := fmt.Sprintf("%s_Div(%s|%d)", cs.cacheKey, c.Name(), c.Len()) - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + cacheKey := fmt.Sprintf("%s_Div(%s)", cs.cacheKey, c.Name()) + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { res := cs.Series.Div(c) res.SetName(cacheKey) - ret := newCacheAbleSeries(res) + ret := res.CacheAble() return ret, nil }) return ret.(Series) @@ -330,10 +330,10 @@ func (cs cacheAbleSeries) Div(c Series) Series { func (cs cacheAbleSeries) Abs() Series { cacheKey := cs.cacheKey + "_Abs" - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { res := cs.Series.Abs() res.SetName(cacheKey) - ret := newCacheAbleSeries(res) + ret := res.CacheAble() return ret, nil }) return ret.(Series) @@ -341,7 +341,7 @@ func (cs cacheAbleSeries) Abs() Series { func (cs cacheAbleSeries) Sum() float64 { cacheKey := cs.cacheKey + "_Sum" - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.Sum() return ret, nil }) @@ -364,10 +364,10 @@ func (cs cacheAbleSeries) Concat(x Series) Series { if len(x.Name()) == 0 { panic("series x must have a name") } - cacheKey := fmt.Sprintf("%s_Concat(%s|%d)", cs.cacheKey, x.Name(), x.Len()) + cacheKey := fmt.Sprintf("%s_Concat(%s)", cs.cacheKey, x.Name()) res := cs.Series.Concat(x) res.SetName(cacheKey) - ret := newCacheAbleSeries(res) + ret := res.CacheAble() return ret } @@ -375,7 +375,7 @@ func (cs cacheAbleSeries) Copy() Series { cacheKey := fmt.Sprintf("%s_Copy{%s}", cs.cacheKey, uuid.NewV4().String()) res := cs.Series.Copy() res.SetName(cacheKey) - ret := newCacheAbleSeries(res) + ret := res.CacheAble() return ret } @@ -411,7 +411,7 @@ func (cs cacheAbleSeries) Slice(start int, end int) Series { cacheKey := fmt.Sprintf("%s_Slice(%d,%d)", cs.cacheKey, start, end) res := cs.Series.Slice(start, end) res.SetName(cacheKey) - ret := newCacheAbleSeries(res) + ret := res.CacheAble() return ret } @@ -441,6 +441,7 @@ func (cs *cacheAbleSeries) Append(values interface{}) { c.DelByKeyPrefix(cs.cacheKey) cs.Series.Append(values) } + func (cs *cacheAbleSeries) And(in interface{}) Series { var cacheKey string switch v := in.(type) { @@ -448,27 +449,26 @@ func (cs *cacheAbleSeries) And(in interface{}) Series { if len(v.Name()) == 0 { panic("series must have a name") } - cacheKey = fmt.Sprintf("%s_And(%s|%d)", cs.cacheKey, v.Name(), v.Len()) + cacheKey = fmt.Sprintf("%s_And(%s)", cs.cacheKey, v.Name()) default: switch reflect.TypeOf(in).Kind() { case reflect.Slice: res := cs.Series.And(in) - res.SetName(cacheKey) - ret := newCacheAbleSeries(res) - return ret + return res default: cacheKey = fmt.Sprintf("%s_And(%v)", cs.cacheKey, in) } } - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { res := cs.Series.And(in) res.SetName(cacheKey) - ret := newCacheAbleSeries(res) + ret := res.CacheAble() return ret, nil }) return ret.(Series) } + func (cs *cacheAbleSeries) Or(in interface{}) Series { var cacheKey string switch v := in.(type) { @@ -476,23 +476,21 @@ func (cs *cacheAbleSeries) Or(in interface{}) Series { if len(v.Name()) == 0 { panic("series must have a name") } - cacheKey = fmt.Sprintf("%s_Or(%s|%d)", cs.cacheKey, v.Name(), v.Len()) + cacheKey = fmt.Sprintf("%s_Or(%s)", cs.cacheKey, v.Name()) default: switch reflect.TypeOf(in).Kind() { case reflect.Slice: res := cs.Series.Or(in) - res.SetName(cacheKey) - ret := newCacheAbleSeries(res) - return ret + return res default: cacheKey = fmt.Sprintf("%s_Or(%v)", cs.cacheKey, in) } } - ret, _ := cacheOrExecuted(cacheKey, func() (interface{}, error) { + ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { res := cs.Series.Or(in) res.SetName(cacheKey) - ret := newCacheAbleSeries(res) + ret := res.CacheAble() return ret, nil }) return ret.(Series) diff --git a/series/cacheseries_test.go b/series/cacheseries_test.go index 90e805a..69d8f7d 100644 --- a/series/cacheseries_test.go +++ b/series/cacheseries_test.go @@ -40,7 +40,7 @@ func (mc *mockCache) DelByKeyPrefix(keyPrefix string) int { return mc.innerCache.DelByKeyPrefix(keyPrefix) } -func (dc *mockCache) Size() int { +func (dc *mockCache) Size() int { return dc.innerCache.Size() } @@ -598,3 +598,65 @@ func TestCacheSeries_Compare(t *testing.T) { fmt.Printf("getCount:%d, setCount:%d, hitCount:%d \n", testCache.getCount, testCache.setCount, testCache.hitCount) } + +func TestCacheSeries_Add(t *testing.T) { + tests := []struct { + series Series + addSeries Series + addConst float64 + expected Series + }{ + { + Floats([]float64{1.5, -3.23, -0.33, -0.38, 1.6, 34.}), + Floats([]float64{3, -6.46, -0.67, -0.76, 3.2, 68.}), + 1, + Floats([]float64{5.5, -8.69, 0, -0.14, 5.8, 103.}), + }, + { + Ints([]int{23, 13, 101, -6, -3}), + Ints([]int{28, 18, 106, -5, 2}), + 2, + Ints([]int{53, 33, 209, -9, 1}), + }, + } + + setCount := 0 + getCount := 0 + hitCount := 0 + ClearCache() + for testnum, test := range tests { + test.series.SetName(fmt.Sprintf("Name-%d", testnum)) + test.addSeries.SetName(fmt.Sprintf("AddName-%d", testnum)) + tmpSeries := test.series.CacheAble() + var received Series + + expected := test.expected + _ = tmpSeries.Add(test.addSeries).AddConst(test.addConst) + setCount = setCount + 2 + getCount = getCount + 2 + + received = tmpSeries.Add(test.addSeries).AddConst(test.addConst) + getCount = getCount + 2 + hitCount = hitCount + 2 + + for i := 0; i < expected.Len(); i++ { + if !compareFloats(expected.Elem(i).Float(), + received.Elem(i).Float(), 6) { + t.Errorf( + "Test:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + } + } + if setCount != testCache.setCount { + t.Errorf("CacheInfo[setCount]:\nExpected:%v\nActual:%v", setCount, testCache.setCount) + } + if getCount != testCache.getCount { + t.Errorf("CacheInfo[getCount]:\nExpected:%v\nActual:%v", getCount, testCache.getCount) + } + if hitCount != testCache.hitCount { + t.Errorf("CacheInfo[hitCount]:\nExpected:%v\nActual:%v", hitCount, testCache.hitCount) + } + +} diff --git a/series/series.go b/series/series.go index 77898a7..6fc92a9 100644 --- a/series/series.go +++ b/series/series.go @@ -1018,7 +1018,7 @@ func (s series) Shift(periods int) Series { shiftElements = naEles.Append(s.elements.Slice(0, s.Len()-periods)) } ret := &series{ - name: fmt.Sprintf("%s_Shift_%d", s.name, periods), + name: fmt.Sprintf("%s_Shift(%d)", s.name, periods), elements: shiftElements, t: s.t, err: nil, @@ -1227,7 +1227,7 @@ func (s series) Slice(start, end int) Series { } ret := &series{ - name: s.name, + name: fmt.Sprintf("%s_Slice(%d,%d)", s.name, start, end), t: s.t, } ret.elements = s.elements.Slice(start, end) From 54fe127bebb8eadc1f72f68f795b8224c6663a11 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Fri, 1 Apr 2022 18:15:14 +0800 Subject: [PATCH 41/60] cacheAbleSeries --- series/cacherolling.go | 6 +-- series/cacheseries.go | 34 +++++++++----- series/series.go | 89 ++++++++++++++++++++++++++++++++++--- series/series_logic.go | 15 +++++++ series/series_logic_test.go | 17 +++++++ 5 files changed, 141 insertions(+), 20 deletions(-) diff --git a/series/cacherolling.go b/series/cacherolling.go index 653512a..7ad7e67 100644 --- a/series/cacherolling.go +++ b/series/cacherolling.go @@ -13,13 +13,13 @@ type cacheAbleRollingSeries struct { // NewCacheAbleRollingSeries. You should make sure that the Series will not be modified. func NewCacheAbleRollingSeries(window int, minPeriods int, s Series) RollingSeries { if len(s.Name()) == 0 { - panic("series must have a name") + return NewRollingSeries(window, minPeriods, s) } if c == nil { InitCache(nil) } cr := cacheAbleRollingSeries{ - RollingSeries: NewRollingSeries(window, minPeriods, s.Copy()), + RollingSeries: NewRollingSeries(window, minPeriods, s), cacheKey: fmt.Sprintf("%s[w%d,p%d]", s.Name(), window, minPeriods), } return cr @@ -91,7 +91,7 @@ func (rc cacheAbleRollingSeries) StdDev() Series { } func (rc cacheAbleRollingSeries) Apply(f func(window Series, windowIndex int) interface{}, t Type) Series { cacheKey := fmt.Sprintf("%s_RApply(%v, %s)", rc.cacheKey, (*(*int64)(unsafe.Pointer(&f))), t) - + ret := cacheOrExecuteRolling(cacheKey, func() Series { return rc.RollingSeries.Apply(f, t) }) diff --git a/series/cacheseries.go b/series/cacheseries.go index 2de457f..4c74976 100644 --- a/series/cacheseries.go +++ b/series/cacheseries.go @@ -17,7 +17,7 @@ type cacheAbleSeries struct { func newCacheAbleSeries(s Series) Series { if len(s.Name()) == 0 { - panic("series must have a name") + return s } if c == nil { InitCache(nil) @@ -33,7 +33,7 @@ func newCacheAbleSeries(s Series) Series { func (cs cacheAbleSeries) Rolling(window int, minPeriods int) RollingSeries { cr := cacheAbleRollingSeries{ RollingSeries: NewRollingSeries(window, minPeriods, cs.Series), - cacheKey: fmt.Sprintf("%s|[w%d,p%d]", cs.cacheKey, window, minPeriods), + cacheKey: fmt.Sprintf("%s[w%d,p%d]", cs.cacheKey, window, minPeriods), } return cr } @@ -89,9 +89,9 @@ func (cs cacheAbleSeries) Compare(comparator Comparator, comparando interface{}) switch v := comparando.(type) { case Series: if len(v.Name()) == 0 { - panic("series must have a name") + return cs.Series.Compare(comparator, comparando) } - cacheKey = fmt.Sprintf("%s_Compare(%s, %s|%d)", cs.cacheKey, comparator, v.Name(), v.Len()) + cacheKey = fmt.Sprintf("%s_Compare(%s, %s)", cs.cacheKey, comparator, v.Name()) default: switch reflect.TypeOf(comparando).Kind() { case reflect.Slice: @@ -103,7 +103,9 @@ func (cs cacheAbleSeries) Compare(comparator Comparator, comparando interface{}) } ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { - ret := cs.Series.Compare(comparator, comparando) + res := cs.Series.Compare(comparator, comparando) + res.SetName(cacheKey) + ret := res.CacheAble() return ret, nil }) return ret.(Series) @@ -274,7 +276,7 @@ func (cs cacheAbleSeries) DivConst(c float64) Series { func (cs cacheAbleSeries) Add(c Series) Series { if len(c.Name()) == 0 { - panic("series c must have a name") + return cs.Series.Add(c) } cacheKey := fmt.Sprintf("%s_Add(%s)", cs.cacheKey, c.Name()) ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { @@ -288,7 +290,7 @@ func (cs cacheAbleSeries) Add(c Series) Series { func (cs cacheAbleSeries) Sub(c Series) Series { if len(c.Name()) == 0 { - panic("series c must have a name") + return cs.Series.Sub(c) } cacheKey := fmt.Sprintf("%s_Sub(%s)", cs.cacheKey, c.Name()) ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { @@ -302,7 +304,7 @@ func (cs cacheAbleSeries) Sub(c Series) Series { func (cs cacheAbleSeries) Mul(c Series) Series { if len(c.Name()) == 0 { - panic("series c must have a name") + return cs.Series.Mul(c) } cacheKey := fmt.Sprintf("%s_Mul(%s)", cs.cacheKey, c.Name()) ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { @@ -316,7 +318,7 @@ func (cs cacheAbleSeries) Mul(c Series) Series { func (cs cacheAbleSeries) Div(c Series) Series { if len(c.Name()) == 0 { - panic("series c must have a name") + return cs.Series.Div(c) } cacheKey := fmt.Sprintf("%s_Div(%s)", cs.cacheKey, c.Name()) ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { @@ -362,7 +364,7 @@ func (cs cacheAbleSeries) Subset(indexes Indexes) Series { func (cs cacheAbleSeries) Concat(x Series) Series { if len(x.Name()) == 0 { - panic("series x must have a name") + return cs.Series.Concat(x) } cacheKey := fmt.Sprintf("%s_Concat(%s)", cs.cacheKey, x.Name()) res := cs.Series.Concat(x) @@ -447,7 +449,7 @@ func (cs *cacheAbleSeries) And(in interface{}) Series { switch v := in.(type) { case Series: if len(v.Name()) == 0 { - panic("series must have a name") + return cs.Series.And(in) } cacheKey = fmt.Sprintf("%s_And(%s)", cs.cacheKey, v.Name()) default: @@ -474,7 +476,7 @@ func (cs *cacheAbleSeries) Or(in interface{}) Series { switch v := in.(type) { case Series: if len(v.Name()) == 0 { - panic("series must have a name") + return cs.Series.Or(in) } cacheKey = fmt.Sprintf("%s_Or(%s)", cs.cacheKey, v.Name()) default: @@ -495,3 +497,11 @@ func (cs *cacheAbleSeries) Or(in interface{}) Series { }) return ret.(Series) } + +func (cs cacheAbleSeries) Not() Series { + cacheKey := cs.cacheKey + "_Not" + res := cs.Series.Not() + res.SetName(cacheKey) + ret := res.CacheAble() + return ret +} diff --git a/series/series.go b/series/series.go index 6fc92a9..aa61092 100644 --- a/series/series.go +++ b/series/series.go @@ -72,63 +72,134 @@ type Element interface { type Series interface { Rolling(window int, minPeriods int) RollingSeries + // HasNaN checks whether the Series contain NaN elements. HasNaN() bool + // IsNaN returns an array that identifies which of the elements are NaN. IsNaN() []bool + // IsNotNaN returns an array that identifies which of the elements are not NaN. IsNotNaN() []bool + // Compare compares the values of a Series with other elements. To do so, the + // elements with are to be compared are first transformed to a Series of the same + // type as the caller. Compare(comparator Comparator, comparando interface{}) Series + // Float returns the elements of a Series as a []float64. If the elements can not + // be converted to float64 or contains a NaN returns the float representation of + // NaN. Float() []float64 + // Bool returns the elements of a Series as a []bool or an error if the + // transformation is not possible. Bool() ([]bool, error) + // Int returns the elements of a Series as a []int or an error if the + // transformation is not possible. Int() ([]int, error) - + // Order returns the indexes for sorting a Series. NaN elements are pushed to the + // end by order of appearance. Order(reverse bool) []int + // StdDev calculates the standard deviation of a series StdDev() float64 + // Mean calculates the average value of a series Mean() float64 + // Median calculates the middle or median value, as opposed to + // mean, and there is less susceptible to being affected by outliers. Median() float64 + // Max return the biggest element in the series Max() float64 + // MaxStr return the biggest element in a series of type String MaxStr() string + // Min return the lowest element in the series Min() float64 + // MinStr return the lowest element in a series of type String MinStr() string + // Quantile returns the sample of x such that x is greater than or + // equal to the fraction p of samples. + // Note: gonum/stat panics when called with strings Quantile(p float64) float64 + // Map applies a function matching MapFunction signature, which itself + // allowing for a fairly flexible MAP implementation, intended for mapping + // the function over each element in Series and returning a new Series object. + // Function must be compatible with the underlying type of data in the Series. + // In other words it is expected that when working with a Float Series, that + // the function passed in via argument `f` will not expect another type, but + // instead expects to handle Element(s) of type Float. Map(f MapFunction) Series + //Shift series by desired number of periods and returning a new Series object. Shift(periods int) Series + // CumProd finds the cumulative product of the first i elements in s and returning a new Series object. CumProd() Series + // Prod returns the product of the elements of the Series. Returns 1 if len(s) = 0. Prod() float64 + // AddConst adds the scalar c to all of the values in Series and returning a new Series object. AddConst(c float64) Series + // AddConst multiply the scalar c to all of the values in Series and returning a new Series object. MulConst(c float64) Series + // DivConst Div the scalar c to all of the values in Series and returning a new Series object. DivConst(c float64) Series Add(c Series) Series Sub(c Series) Series Mul(c Series) Series Div(c Series) Series Abs() Series + // Sum calculates the sum value of a series Sum() float64 + // Empty returns an empty Series of the same type Empty() Series + // Returns Error or nil if no error occured Error() error + // Subset returns a subset of the series based on the given Indexes. Subset(indexes Indexes) Series - + // Concat concatenates two series together. It will return a new Series with the + // combined elements of both Series. Concat(x Series) Series + // Copy will return a copy of the Series. Copy() Series - + // Records returns the elements of a Series as a []string Records() []string + // Type returns the type of a given series Type() Type + // Len returns the length of a given Series Len() int + // String implements the Stringer interface for Series String() string + // Str prints some extra information about a given series Str() string + // Val returns the value of a series for the given index. Will panic if the index + // is out of bounds. Val(i int) interface{} + // Elem returns the element of a series for the given index. Will panic if the + // index is out of bounds. + // The index could be less than 0. When the index equals -1, Elem returns the last element of a series. Elem(i int) Element + // Slice slices Series from start to end-1 index. Slice(start, end int) Series + // FillNaN Fill NaN values using the specified value. FillNaN(value ElementValue) + // FillNaNForward Fill NaN values using the last non-NaN value FillNaNForward() + // FillNaNBackward fill NaN values using the next non-NaN value FillNaNBackward() + // CacheAble CacheAble() Series + // Set sets the values on the indexes of a Series and returns the reference + // for itself. The original Series is modified. Set(indexes Indexes, newvalues Series) Series + // Append adds new elements to the end of the Series. When using Append, the + // Series is modified in place. Append(values interface{}) Name() string SetName(name string) SetErr(err error) + //And logical operation And(in interface{}) Series + //Or logical operation Or(in interface{}) Series + //Not logical operation + Not() Series + + //Wrap define special operations for multiple Series + Wrap(ss ...Series) Wrapper + //When define conditional computation + When(whenF WhenFilterFunction) When } // intElements is the concrete implementation of Elements for Int elements. @@ -298,6 +369,7 @@ var _ Series = (*series)(nil) func Err(err error) Series { return &series{err: err} } + // New is the generic Series constructor func New(values interface{}, t Type, name string) Series { ret := newSeries(values, t, name) @@ -378,7 +450,6 @@ func newSeries(values interface{}, t Type, name string) series { return ret } - func NewDefault(defaultValue interface{}, t Type, name string, len int) Series { ret := &series{ name: name, @@ -1240,4 +1311,12 @@ func (s *series) SetName(name string) { func (s series) Name() string { return s.name -} \ No newline at end of file +} + +func (s *series) Wrap(ss ...Series) Wrapper { + return newWrapper(s, ss) +} + +func (s *series) When(whenF WhenFilterFunction) When { + return newWhen(whenF, s) +} diff --git a/series/series_logic.go b/series/series_logic.go index 8a8ee54..7084f3c 100644 --- a/series/series_logic.go +++ b/series/series_logic.go @@ -41,4 +41,19 @@ func (s series) Or(in interface{}) Series { log.Panic(err) } return result +} + +func (s series) Not() Series { + result := s.Map(func(ele Element, index int) Element { + ret := ele.Copy() + b, err := ele.Bool() + if err != nil { + ret.Set(nil) + return ret + } else { + ret.SetBool(!b) + } + return ret + }) + return result } \ No newline at end of file diff --git a/series/series_logic_test.go b/series/series_logic_test.go index 7218cfd..0186172 100644 --- a/series/series_logic_test.go +++ b/series/series_logic_test.go @@ -11,48 +11,56 @@ func TestSeries_Logic(t *testing.T) { another interface{} andExpected Series orExpected Series + notExpected Series }{ { Bools([]string{"false", "true", "false", "false", "true"}), "true", Bools([]string{"false", "true", "false", "false", "true"}), Bools([]string{"true", "true", "true", "true", "true"}), + Bools([]string{"true", "false", "true", "true", "false"}), }, { Bools([]string{"false", "true", "false", "false", "true"}), []string {"true", "false", "true", "false", "false"}, Bools([]string{"false", "false", "false", "false", "false"}), Bools([]string{"true", "true", "true", "false", "true"}), + Bools([]string{"true", "false", "true", "true", "false"}), }, { Bools([]string{"false", "true", "false", "false", "true"}), Bools([]string{"true", "false", "true", "false", "false"}), Bools([]string{"false", "false", "false", "false", "false"}), Bools([]string{"true", "true", "true", "false", "true"}), + Bools([]string{"true", "false", "true", "true", "false"}), }, { Bools([]string{"false", "true", "false", "false", "true"}), []string {"1", "0", "1", "0", "0"}, Bools([]string{"false", "false", "false", "false", "false"}), Bools([]string{"true", "true", "true", "false", "true"}), + Bools([]string{"true", "false", "true", "true", "false"}), }, { Bools([]string{"false", "true", "false", "false", "true"}), []float64 {1, 0, 1, 0, 0}, Bools([]string{"false", "false", "false", "false", "false"}), Bools([]string{"true", "true", "true", "false", "true"}), + Bools([]string{"true", "false", "true", "true", "false"}), }, { Bools([]string{"false", "true", "false", "false", "true"}), []int {1, 0, 1, 0, 0}, Bools([]string{"false", "false", "false", "false", "false"}), Bools([]string{"true", "true", "true", "false", "true"}), + Bools([]string{"true", "false", "true", "true", "false"}), }, { Bools([]string{"false", "true", "false", "false", "123"}), []int {7, 0, 1, 0, 0}, Bools([]string{"NaN", "false", "false", "false", "NaN"}), Bools([]string{"NaN", "true", "true", "false", "NaN"}), + Bools([]string{"true", "false", "true", "true", "NaN"}), }, } @@ -76,6 +84,15 @@ func TestSeries_Logic(t *testing.T) { testnum, expected, received, ) } + expected = test.notExpected.Records() + b = test.series.Not() + received = b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test-Not:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } } } From 6a0ce859363f2b072c54efb14847941d12122dee Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Fri, 1 Apr 2022 18:16:21 +0800 Subject: [PATCH 42/60] when, wrap: for special operations --- series/when.go | 34 ++++++++++++++++ series/when_test.go | 53 ++++++++++++++++++++++++ series/wrap.go | 92 ++++++++++++++++++++++++++++++++++++++++++ series/wrap_test.go | 98 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 277 insertions(+) create mode 100644 series/when.go create mode 100644 series/when_test.go create mode 100644 series/wrap.go create mode 100644 series/wrap_test.go diff --git a/series/when.go b/series/when.go new file mode 100644 index 0000000..77dc118 --- /dev/null +++ b/series/when.go @@ -0,0 +1,34 @@ +package series + +type WhenFilterFunction func(ele Element, index int) bool +type WhenApplyFunction func(newEle Element, index int) + +//When defines a conditional computation +type When interface { + //We do the operation on the elements that satisfy the condition and do nothing on the elements that dose not satisfy the condition. + Apply(f WhenApplyFunction) Series +} + +func newWhen(whenF WhenFilterFunction, s Series) When { + w := when{ + whenF: whenF, + s: s, + } + return w +} + +type when struct { + whenF WhenFilterFunction + s Series +} + +func (e when) Apply(f WhenApplyFunction) Series { + ret := e.s.Map(func(ele Element, index int) Element { + newEle := ele.Copy() + if e.whenF(ele, index) { + f(newEle, index) + } + return newEle + }) + return ret +} diff --git a/series/when_test.go b/series/when_test.go new file mode 100644 index 0000000..d0851d1 --- /dev/null +++ b/series/when_test.go @@ -0,0 +1,53 @@ +package series + +import ( + "testing" +) + +func TestSeries_When(t *testing.T) { + tests := []struct { + series Series + whenF WhenFilterFunction + whenApplyF WhenApplyFunction + expected Series + }{ + { + Floats([]float64{1.5, -3.23, -0.33, -0.38, 1.6, 34.}), + func(ele Element, index int) bool { + return index%2 == 0 + }, + func(newEle Element, index int) { + newEle.SetFloat(formatFloat(newEle.Float()+1, "%.6f")) + }, + Floats([]float64{2.5, -3.23, 0.67, -0.38, 2.6, 34.}), + }, + { + Ints([]int{23, 13, 101, -6, -3}), + func(ele Element, index int) bool { + v, _ := ele.Int() + return v < 0 + }, + func(newEle Element, index int) { + v, _ := newEle.Int() + newEle.SetInt(v + 1) + }, + Ints([]int{23, 13, 101, -5, -2}), + }, + } + + for testnum, test := range tests { + expected := test.expected + received := test.series.When(test.whenF).Apply(test.whenApplyF) + + for i := 0; i < expected.Len(); i++ { + if !compareFloats(expected.Elem(i).Float(), + received.Elem(i).Float(), 6) { + t.Errorf( + "Test:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + } + } + +} diff --git a/series/wrap.go b/series/wrap.go new file mode 100644 index 0000000..5c9cbbc --- /dev/null +++ b/series/wrap.go @@ -0,0 +1,92 @@ +package series + +//Wrapper define special operations for multiple Series +type Wrapper interface { + FloatApply(f func(thisValue float64, wrapValues []float64) float64) Series + BoolApply(f func(thisValue bool, wrapValues []bool) bool) Series +} + +//wrapper implements Wrapper +type wrapper struct { + thisSeries Series + ss []Series +} + +func newWrapper(this Series, ss []Series) Wrapper { + if len(ss) > 0 { + expectedLen := this.Len() + for i := 0; i < len(ss); i++ { + if expectedLen != ss[i].Len() { + panic("wrappered series must have the same length") + } + } + } + w := wrapper{thisSeries: this, ss: ss} + return w +} + +func (w wrapper) FloatApply(f func(thisValue float64, wrapValues []float64) float64) Series { + length := w.thisSeries.Len() + elements := make(floatElements, length) + for i := 0; i < length; i++ { + elements[i].SetFloat(f(w.thisSeries.Elem(i).Float(), rowFloats(i, w.ss))) + } + ret := &series{ + name: "", + elements: elements, + t: Float, + err: nil, + } + return ret +} + +func (w wrapper) BoolApply(f func(thisValue bool, wrapValues []bool) bool) Series { + length := w.thisSeries.Len() + elements := make(boolElements, length) + for i := 0; i < length; i++ { + thisB, err := w.thisSeries.Elem(i).Bool() + if err != nil { + return Err(err) + } + wrapBs, err := rowBools(i, w.ss) + if err != nil { + return Err(err) + } + elements[i].SetBool(f(thisB, wrapBs)) + } + ret := &series{ + name: "", + elements: elements, + t: Bool, + err: nil, + } + return ret +} + +func rowBools(index int, ss []Series) ([]bool, error) { + length := len(ss) + if length == 0 { + return nil, nil + } + ret := make([]bool, length) + var err error + for i := 0; i < length; i++ { + ret[i], err = ss[i].Elem(index).Bool() + if err != nil { + return nil, err + } + } + return ret, nil +} + +func rowFloats(index int, ss []Series) []float64 { + length := len(ss) + if length == 0 { + return nil + } + ret := make([]float64, length) + for i := 0; i < length; i++ { + ret[i] = ss[i].Elem(index).Float() + } + return ret +} diff --git a/series/wrap_test.go b/series/wrap_test.go new file mode 100644 index 0000000..3d048e2 --- /dev/null +++ b/series/wrap_test.go @@ -0,0 +1,98 @@ +package series + +import ( + "fmt" + "reflect" + "strconv" + "testing" +) + +func TestSeries_Wrap_FloatApply(t *testing.T) { + tests := []struct { + series Series + addSeries Series + subSeries Series + addConst float64 + expected Series + }{ + { + Floats([]float64{1.5, -3.23, -0.33, -0.38, 1.6, 34.}), + Floats([]float64{3, -6.46, -0.67, -0.76, 3.2, 68.}), + Floats([]float64{1, -2, -3, -4, 3.2, 5.}), + 1, + Floats([]float64{4.5, -6.69, 3, 3.86, 2.6, 98.}), + }, + { + Ints([]int{23, 13, 101, -6, -3}), + Ints([]int{28, 18, 106, -5, 2}), + Ints([]int{1, 2, 3, -4, 5}), + 2, + Ints([]int{52, 31, 206, -5, -4}), + }, + } + + for testnum, test := range tests { + expected := test.expected + received := test.series.Wrap(test.addSeries, test.subSeries).FloatApply(func(thisValue float64, wrapValues []float64) float64 { + res := thisValue + wrapValues[0] - wrapValues[1] + test.addConst + ret := formatFloat(res, "%.6f") + return ret + }) + + for i := 0; i < expected.Len(); i++ { + if !compareFloats(expected.Elem(i).Float(), + received.Elem(i).Float(), 6) { + t.Errorf( + "Test:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + } + } + +} + +func TestSeries_Wrap_BoolApply(t *testing.T) { + tests := []struct { + series Series + andSeries Series + orSeries Series + expected Series + }{ + { + Bools([]bool{false, true, false, true, false, false}), + Bools([]bool{true, false, false, true, true, false}), + Bools([]bool{true, true, false, false, false, true}), + Bools([]bool{true, true, false, true, false, true}), + }, + } + + for testnum, test := range tests { + expected := test.expected + received := test.series.Wrap(test.andSeries, test.orSeries).BoolApply(func(thisValue bool, wrapValues []bool) bool { + return thisValue && wrapValues[0] || wrapValues[1] + }) + + for i := 0; i < expected.Len(); i++ { + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + if err := checkTypes(received); err != nil { + t.Errorf( + "Test:%v\nError:%v", + testnum, err, + ) + } + } + } + +} + +func formatFloat(f float64, format string) float64 { + f1 := fmt.Sprintf(format, f) + f2, _ := strconv.ParseFloat(f1, 64) + return f2 +} From 2ac0c4462c190a537003d5f81fab6979dcb36a30 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Sat, 2 Apr 2022 18:02:19 +0800 Subject: [PATCH 43/60] add comment --- series/benchmarks_test.go | 54 +++++++++++++++++++++++++++++++++---- series/cacherolling.go | 15 ----------- series/cacherolling_test.go | 20 +++++++------- series/cacheseries_test.go | 12 ++++----- series/rolling.go | 29 ++++++++++---------- series/series.go | 6 +++-- series/series_test.go | 31 +++++++++++---------- 7 files changed, 98 insertions(+), 69 deletions(-) diff --git a/series/benchmarks_test.go b/series/benchmarks_test.go index b8f30f6..da2fa1f 100644 --- a/series/benchmarks_test.go +++ b/series/benchmarks_test.go @@ -300,13 +300,13 @@ func BenchmarkSeries_RollingCacheMeanByWeights(b *testing.B) { weights []float64 }{ { - series.Floats([]string{"1.5", "-3.23", "-0.337397", "-0.380079", "1.60979", "34."}), + series.Floats(generateFloats(100000)), 3, 2, []float64{0.5, 0.3, 0.2}, }, { - series.Floats([]string{"23", "13", "101", "-64", "-3"}), + series.Floats(generateFloats(100000)), 3, 1, []float64{5, 3, 2}, @@ -316,9 +316,10 @@ func BenchmarkSeries_RollingCacheMeanByWeights(b *testing.B) { b.ResetTimer() for testnum, test := range tests { test.series.SetName(fmt.Sprintf("Name-%d", testnum)) - r := test.series.Rolling(test.window, test.minPeriod) + b.Run("Rolling-" + test.series.Name(), func(b *testing.B) { for i := 0; i < b.N; i++ { + r := test.series.Rolling(test.window, test.minPeriod) r.MeanByWeights(test.weights) } }) @@ -326,10 +327,53 @@ func BenchmarkSeries_RollingCacheMeanByWeights(b *testing.B) { b.ResetTimer() for testnum, test := range tests { test.series.SetName(fmt.Sprintf("Name-%d", testnum)) - rs := series.NewCacheAbleRollingSeries(test.window, test.minPeriod, test.series) b.Run("CacheRolling-" + test.series.Name(), func(b *testing.B) { for i := 0; i < b.N; i++ { - rs.MeanByWeights(test.weights) + r := test.series.CacheAble().Rolling(test.window, test.minPeriod) + r.MeanByWeights(test.weights) + } + }) + } +} + +func BenchmarkSeries_RollingCacheQuantile(b *testing.B) { + tests := []struct { + series series.Series + window int + minPeriod int + quantile float64 + }{ + { + series.Floats(generateFloats(2000)), + 100, + 2, + 0.15, + }, + { + series.Floats(generateFloats(2000)), + 300, + 1, + 0.93, + }, + } + + b.ResetTimer() + for testnum, test := range tests { + test.series.SetName(fmt.Sprintf("Name-%d", testnum)) + b.Run("Rolling-" + test.series.Name(), func(b *testing.B) { + for i := 0; i < b.N; i++ { + r := test.series.Rolling(test.window, test.minPeriod) + r.Quantile(test.quantile) + } + }) + } + b.ResetTimer() + for testnum, test := range tests { + test.series.SetName(fmt.Sprintf("Name-%d", testnum)) + b.Run("CacheRolling-" + test.series.Name(), func(b *testing.B) { + r := test.series.CacheAble().Rolling(test.window, test.minPeriod) + for i := 0; i < b.N; i++ { + r.Quantile(test.quantile) } }) } diff --git a/series/cacherolling.go b/series/cacherolling.go index 7ad7e67..21d7549 100644 --- a/series/cacherolling.go +++ b/series/cacherolling.go @@ -10,21 +10,6 @@ type cacheAbleRollingSeries struct { cacheKey string } -// NewCacheAbleRollingSeries. You should make sure that the Series will not be modified. -func NewCacheAbleRollingSeries(window int, minPeriods int, s Series) RollingSeries { - if len(s.Name()) == 0 { - return NewRollingSeries(window, minPeriods, s) - } - if c == nil { - InitCache(nil) - } - cr := cacheAbleRollingSeries{ - RollingSeries: NewRollingSeries(window, minPeriods, s), - cacheKey: fmt.Sprintf("%s[w%d,p%d]", s.Name(), window, minPeriods), - } - return cr -} - func cacheOrExecuteRolling(cacheKey string, f func() Series) Series { if ret, found := c.Get(cacheKey); found { return ret.(Series) diff --git a/series/cacherolling_test.go b/series/cacherolling_test.go index d2d507f..7601c04 100644 --- a/series/cacherolling_test.go +++ b/series/cacherolling_test.go @@ -169,7 +169,7 @@ func TestSeries_RollingCacheMeanByWeights(t *testing.T) { for testnum, test := range tests { var b Series - test.series.SetName(fmt.Sprintf("Name-%d", testnum)) + test.series.SetName(fmt.Sprintf("Name-%d", testnum)) expected := test.meanExpected.Records() rs := test.series.CacheAble().Rolling(test.window, test.minPeriod) b = rs.MeanByWeights(test.weights) @@ -254,18 +254,16 @@ func TestSeries_RollingCacheApply(t *testing.T) { } } - func TestSeries_RollingRollingCache(t *testing.T) { tests := []struct { - series Series - window int - minPeriod int - maxExpected Series - maxExpectedRolling Series - minExpected Series - minExpectedRolling Series - + series Series + window int + minPeriod int + maxExpected Series + maxExpectedRolling Series + minExpected Series + minExpectedRolling Series }{ { Bools([]string{"false", "true", "false", "false", "true"}), @@ -350,4 +348,4 @@ func TestSeries_RollingRollingCache(t *testing.T) { } } -} \ No newline at end of file +} diff --git a/series/cacheseries_test.go b/series/cacheseries_test.go index 69d8f7d..6707e55 100644 --- a/series/cacheseries_test.go +++ b/series/cacheseries_test.go @@ -601,10 +601,10 @@ func TestCacheSeries_Compare(t *testing.T) { func TestCacheSeries_Add(t *testing.T) { tests := []struct { - series Series - addSeries Series - addConst float64 - expected Series + series Series + addSeries Series + addConst float64 + expected Series }{ { Floats([]float64{1.5, -3.23, -0.33, -0.38, 1.6, 34.}), @@ -634,11 +634,11 @@ func TestCacheSeries_Add(t *testing.T) { _ = tmpSeries.Add(test.addSeries).AddConst(test.addConst) setCount = setCount + 2 getCount = getCount + 2 - + received = tmpSeries.Add(test.addSeries).AddConst(test.addConst) getCount = getCount + 2 hitCount = hitCount + 2 - + for i := 0; i < expected.Len(); i++ { if !compareFloats(expected.Elem(i).Float(), received.Elem(i).Float(), 6) { diff --git a/series/rolling.go b/series/rolling.go index 38f5ce6..5f1acad 100644 --- a/series/rolling.go +++ b/series/rolling.go @@ -5,15 +5,24 @@ import ( "gonum.org/v1/gonum/floats" ) -//RollingSeries define rolling methods +//RollingSeries defines methods of a rolling series type RollingSeries interface { + // Max return the biggest element in the rolling series Max() Series + // Min return the lowest element in the rolling series Min() Series + // Mean calculates the average value of the rolling series Mean() Series + // Mean calculates the weighted average value of the rolling series MeanByWeights(weights []float64) Series + // Quantile returns the sample of x such that x is greater than or + // equal to the fraction p of samples. Quantile(p float64) Series + // Median calculates the middle or median value of the rolling series Median() Series + // StdDev calculates the standard deviation of the rolling series StdDev() Series + // Apply applies a function for the rolling series Apply(f func(window Series, windowIndex int) interface{}, t Type) Series } @@ -30,10 +39,10 @@ type RollingWindow interface { } type rollingWindow struct { - startIndex int - endIndexExclude int - windowSize int - s Series + startIndex int + endIndexExclude int + windowSize int + s Series } func NewRollingWindow(s Series, windowSize int) RollingWindow { @@ -74,7 +83,6 @@ func NewRollingSeries(window int, minPeriods int, s Series) RollingSeries { } } -// Max return the biggest element in the rollingSeries func (s rollingSeries) Max() Series { var maxFunc func(window Series, windowIndex int) interface{} @@ -93,7 +101,6 @@ func (s rollingSeries) Max() Series { return newS } -// Min return the lowest element in the rollingSeries func (s rollingSeries) Min() Series { var minFunc func(window Series, windowIndex int) interface{} if s.Type() == String { @@ -111,7 +118,6 @@ func (s rollingSeries) Min() Series { return newS } -// Mean calculates the average value of a rollingSeries func (s rollingSeries) Mean() Series { newS := s.Apply(func(window Series, windowIndex int) interface{} { return window.Mean() @@ -120,7 +126,6 @@ func (s rollingSeries) Mean() Series { return newS } -// MeanByWeights calculates the weighted average value of a rollingSeries func (s rollingSeries) MeanByWeights(weights []float64) Series { if s.window != len(weights) { panic("window must be equal to weights length") @@ -147,7 +152,6 @@ func (s rollingSeries) MeanByWeights(weights []float64) Series { return newS } -// Quantile calculates the quantile value of a rollingSeries func (s rollingSeries) Quantile(p float64) Series { newS := s.Apply(func(window Series, windowIndex int) interface{} { return window.Quantile(p) @@ -156,7 +160,6 @@ func (s rollingSeries) Quantile(p float64) Series { return newS } -// Median calculates the median value of a rollingSeries func (s rollingSeries) Median() Series { newS := s.Apply(func(window Series, windowIndex int) interface{} { return window.Median() @@ -165,7 +168,6 @@ func (s rollingSeries) Median() Series { return newS } -// StdDev calculates the standard deviation of a rollingSeries func (s rollingSeries) StdDev() Series { newS := s.Apply(func(window Series, windowIndex int) interface{} { return window.StdDev() @@ -174,7 +176,6 @@ func (s rollingSeries) StdDev() Series { return newS } -// Apply for extend the computation func (s rollingSeries) Apply(f func(window Series, windowIndex int) interface{}, t Type) Series { if s.Len() == 0 { return s.Empty() @@ -201,4 +202,4 @@ func (s rollingSeries) Apply(f func(window Series, windowIndex int) interface{}, err: nil, } return newS -} \ No newline at end of file +} diff --git a/series/series.go b/series/series.go index aa61092..839d922 100644 --- a/series/series.go +++ b/series/series.go @@ -178,7 +178,8 @@ type Series interface { FillNaNForward() // FillNaNBackward fill NaN values using the next non-NaN value FillNaNBackward() - // CacheAble + // CacheAble returns a cacheable series and the returned series's calculation will be cached in case of repeate calcution. + // You should make sure that the series will not be modified and has a unique name. CacheAble() Series // Set sets the values on the indexes of a Series and returns the reference // for itself. The original Series is modified. @@ -1227,7 +1228,8 @@ func (s series) Rolling(window int, minPeriods int) RollingSeries { return NewRollingSeries(window, minPeriods, &s) } -// CacheAble +// CacheAble returns a cacheable series and the returned series's calculation will be cached in case of repeate calcution. +// You should make sure that the series will not be modified and has a unique name. func (s series) CacheAble() Series { return newCacheAbleSeries(&s) } diff --git a/series/series_test.go b/series/series_test.go index 3b33af5..e82d282 100644 --- a/series/series_test.go +++ b/series/series_test.go @@ -1633,11 +1633,11 @@ func TestSeries_Map(t *testing.T) { doubleFloat64 := func(e Element, index int) Element { result := e.Copy() - result.Set(result.Float() * 2) + result.Set(result.Float() * 2) return Element(result) } - // and two booleans + // and two booleans and := func(e Element, index int) Element { result := e.Copy() b, err := result.Bool() @@ -1729,9 +1729,9 @@ func TestSeries_Map(t *testing.T) { } } func TestSeries_Shift(t *testing.T) { - tests := []struct { + tests := []struct { series Series - shift int + shift int expected Series }{ { @@ -1752,9 +1752,9 @@ func TestSeries_Shift(t *testing.T) { { Floats([]string{"1.5", "-3.23", "-0.337397", "-0.380079", "1.60979", "34."}), 1, - Floats([]string{ "NaN", "1.5", "-3.23", "-0.337397", "-0.380079", "1.60979"}), + Floats([]string{"NaN", "1.5", "-3.23", "-0.337397", "-0.380079", "1.60979"}), }, - + { Strings([]string{"XyZApple", "XyZBanana", "XyZCitrus", "XyZDragonfruit"}), 2, @@ -1802,7 +1802,7 @@ func TestSeries_Shift(t *testing.T) { } func TestSeries_CumProd(t *testing.T) { - tests := []struct { + tests := []struct { series Series expected Series }{ @@ -1829,7 +1829,7 @@ func TestSeries_CumProd(t *testing.T) { } } func TestSeries_Prod(t *testing.T) { - tests := []struct { + tests := []struct { series Series expected float64 }{ @@ -1856,9 +1856,9 @@ func TestSeries_Prod(t *testing.T) { } func TestSeries_AddConst(t *testing.T) { - tests := []struct { + tests := []struct { series Series - c float64 + c float64 expected Series }{ { @@ -1887,9 +1887,9 @@ func TestSeries_AddConst(t *testing.T) { } func TestSeries_MulConst(t *testing.T) { - tests := []struct { + tests := []struct { series Series - c float64 + c float64 expected Series }{ { @@ -1918,7 +1918,7 @@ func TestSeries_MulConst(t *testing.T) { } func TestSeries_FillNA(t *testing.T) { - tests := []struct { + tests := []struct { series Series nanValue ElementValue expected Series @@ -1959,7 +1959,7 @@ func TestSeries_FillNA(t *testing.T) { } func TestSeries_FillNaNForward(t *testing.T) { - tests := []struct { + tests := []struct { series Series expected Series }{ @@ -1995,7 +1995,7 @@ func TestSeries_FillNaNForward(t *testing.T) { } func TestSeries_FillNaNBackward(t *testing.T) { - tests := []struct { + tests := []struct { series Series expected Series }{ @@ -2030,7 +2030,6 @@ func TestSeries_FillNaNBackward(t *testing.T) { } } - func TestSeries_Sum(t *testing.T) { tests := []struct { series Series From d1a94cd29180fa7059e3a92fc43069e013fc974a Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Mon, 4 Apr 2022 22:40:48 +0800 Subject: [PATCH 44/60] fix bug:seriesCache.set --- series/cache.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/series/cache.go b/series/cache.go index cd85399..4eead4c 100644 --- a/series/cache.go +++ b/series/cache.go @@ -38,7 +38,7 @@ func NewDefaultCache() Cache { func (dc *seriesCache) Set(k string, v interface{}) { err := dc.c.Add(k, v, cache.DefaultExpiration) - if err != nil { + if err == nil { dc.mu.Lock() dc.keys[k] = struct{}{} dc.mu.Unlock() From d85a663279249b27c9a3c30731c66fd5246780b6 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Mon, 9 May 2022 17:53:25 +0800 Subject: [PATCH 45/60] Series.Filter --- series/series.go | 46 ++++++++++++++++++++++++++++++++++ series/series_test.go | 57 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+) diff --git a/series/series.go b/series/series.go index 839d922..d29ff0b 100644 --- a/series/series.go +++ b/series/series.go @@ -35,6 +35,7 @@ type Elements interface { Slice(start, end int) Elements Get(indexs ...int) Elements Append(Elements) Elements + AppendOne(Element) Elements Copy() Elements } @@ -201,6 +202,9 @@ type Series interface { Wrap(ss ...Series) Wrapper //When define conditional computation When(whenF WhenFilterFunction) When + + //Filter Select the elements that match the FilterFunction + Filter(ff FilterFunction) Series } // intElements is the concrete implementation of Elements for Int elements. @@ -221,6 +225,13 @@ func (e intElements) Append(elements Elements) Elements { ret := append(e, eles...) return ret } +func (e intElements) AppendOne(element Element) Elements { + ele := element.(*intElement) + ret := append(e, *ele) + return ret +} + + func (e intElements) Copy() Elements { elements := make(intElements, len(e)) copy(elements, e) @@ -245,6 +256,11 @@ func (e stringElements) Append(elements Elements) Elements { ret := append(e, eles...) return ret } +func (e stringElements) AppendOne(element Element) Elements { + ele := element.(*stringElement) + ret := append(e, *ele) + return ret +} func (e stringElements) Copy() Elements { elements := make(stringElements, len(e)) copy(elements, e) @@ -269,6 +285,11 @@ func (e floatElements) Append(elements Elements) Elements { ret := append(e, eles...) return ret } +func (e floatElements) AppendOne(element Element) Elements { + ele := element.(*floatElement) + ret := append(e, *ele) + return ret +} func (e floatElements) Copy() Elements { elements := make(floatElements, len(e)) copy(elements, e) @@ -293,6 +314,11 @@ func (e boolElements) Append(elements Elements) Elements { ret := append(e, eles...) return ret } +func (e boolElements) AppendOne(element Element) Elements { + ele := element.(*boolElement) + ret := append(e, *ele) + return ret +} func (e boolElements) Copy() Elements { elements := make(boolElements, len(e)) copy(elements, e) @@ -1322,3 +1348,23 @@ func (s *series) Wrap(ss ...Series) Wrapper { func (s *series) When(whenF WhenFilterFunction) When { return newWhen(whenF, s) } + +//FilterFunction Select the elements that match the FilterFunction +type FilterFunction func(ele Element, index int) bool + +func (s *series) Filter(ff FilterFunction) Series { + eles := s.Type().emptyElements(0) + for i := 0; i < s.Len(); i++ { + ele := s.elements.Elem(i) + if ff(ele, i) { + eles = eles.AppendOne(ele) + } + } + ret := &series{ + name: s.name, + elements: eles, + t: s.Type(), + err: nil, + } + return ret +} \ No newline at end of file diff --git a/series/series_test.go b/series/series_test.go index e82d282..33589b9 100644 --- a/series/series_test.go +++ b/series/series_test.go @@ -2143,3 +2143,60 @@ func TestSeries_Slice(t *testing.T) { } } } + + +func TestSeries_Filter(t *testing.T) { + tests := []struct { + ff FilterFunction + series Series + expected Series + }{ + { + func(ele Element, index int) bool { + v, _ := ele.Int() + return v <= 3 + }, + Ints([]int{1, 2, 3, 4, 5}), + Ints([]int{1, 2, 3}), + }, + { + func(ele Element, index int) bool { + v, _ := ele.Int() + return v == 3 + }, + Ints([]int{1, 2, 3, 4, 5}), + Ints([]int{3}), + }, + { + func(ele Element, index int) bool { + return index % 2 == 0 + }, + Ints([]int{1, 2, 3, 4, 5}), + Ints([]int{1, 3, 5}), + }, + { + func(ele Element, index int) bool { + return !ele.IsNA() + }, + Ints([]string{"1", NaN, "3", NaN, "5"}), + Ints([]int{1, 3, 5}), + }, + } + + for testnum, test := range tests { + expected := test.expected + received := test.series.Filter(test.ff) + + for i := 0; i < expected.Len(); i++ { + if strings.Compare(expected.Elem(i).String(), + received.Elem(i).String()) != 0 { + t.Errorf( + "Test:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + } + } +} + + From 029d698ed40999601d0ac370e6e92f6b5eded130 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Wed, 11 May 2022 18:01:40 +0800 Subject: [PATCH 46/60] add:DataQuantile() --- series/series.go | 26 ++++++++++++++++++-- series/series_test.go | 56 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 2 deletions(-) diff --git a/series/series.go b/series/series.go index d29ff0b..5932a36 100644 --- a/series/series.go +++ b/series/series.go @@ -115,6 +115,8 @@ type Series interface { // equal to the fraction p of samples. // Note: gonum/stat panics when called with strings Quantile(p float64) float64 + // DataQuantile returns the data quantile in the series + DataQuantile(data float64) float64 // Map applies a function matching MapFunction signature, which itself // allowing for a fairly flexible MAP implementation, intended for mapping // the function over each element in Series and returning a new Series object. @@ -231,7 +233,6 @@ func (e intElements) AppendOne(element Element) Elements { return ret } - func (e intElements) Copy() Elements { elements := make(intElements, len(e)) copy(elements, e) @@ -1067,6 +1068,27 @@ func (s series) Quantile(p float64) float64 { return stat.Quantile(p, stat.Empirical, ordered, nil) } +// DataQuantile returns the data quantile in the series +func (s series) DataQuantile(data float64) float64 { + if s.Type() == String || s.Len() == 0 { + return math.NaN() + } + + ordered := s.Subset(s.Order(false)).Float() + + length := len(ordered) + if length%2 == 1 { + length = length + 1 + } + + for i, d := range ordered { + if data < d { + return float64(i) / float64(length) + } + } + return 1 +} + // Map applies a function matching MapFunction signature, which itself // allowing for a fairly flexible MAP implementation, intended for mapping // the function over each element in Series and returning a new Series object. @@ -1367,4 +1389,4 @@ func (s *series) Filter(ff FilterFunction) Series { err: nil, } return ret -} \ No newline at end of file +} diff --git a/series/series_test.go b/series/series_test.go index 33589b9..d26aa32 100644 --- a/series/series_test.go +++ b/series/series_test.go @@ -1596,6 +1596,62 @@ func TestSeries_Quantile(t *testing.T) { } } + +func TestSeries_DataQuantile(t *testing.T) { + tests := []struct { + series Series + data float64 + expected float64 + }{ + { + Ints([]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}), + 9, + 0.9, + }, + { + Floats([]float64{3.141592, math.Sqrt(3), 2.718281, math.Sqrt(2)}), + 3.141592, + 1, + }, + { + Floats([]float64{1.0, 2.0, 3.0}), + 2.0, + 0.5, + }, + { + Floats([]float64{1.0, 2.0, 3.0, 4.0}), + 2.0, + 0.5, + }, + { + Strings([]string{"A", "B", "C", "D"}), + 0.25, + math.NaN(), + }, + { + Bools([]bool{false, false, false, true}), + 0.0, + 0.75, + }, + { + Floats([]float64{}), + 0.50, + math.NaN(), + }, + } + + for testnum, test := range tests { + received := test.series.DataQuantile(test.data) + expected := test.expected + if !compareFloats(received, expected, 6) { + t.Errorf( + "Test:%v\nExpected:\n%v\nReceived:\n%v\nSeries:\n%v", + testnum, expected, received, test.series.Float(), + ) + } + } +} + func TestSeries_Map(t *testing.T) { tests := []struct { series Series From 3b9f6be10fe519b3ed0d720109f60e9fd531dda5 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Thu, 12 May 2022 17:33:51 +0800 Subject: [PATCH 47/60] add: rolling Quantile --- series/rolling.go | 82 ++++++++++++++++++++++++- series/series.go | 60 ++++++++++++++++++ series/series_test.go | 140 ++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 273 insertions(+), 9 deletions(-) diff --git a/series/rolling.go b/series/rolling.go index 5f1acad..4670083 100644 --- a/series/rolling.go +++ b/series/rolling.go @@ -15,15 +15,24 @@ type RollingSeries interface { Mean() Series // Mean calculates the weighted average value of the rolling series MeanByWeights(weights []float64) Series - // Quantile returns the sample of x such that x is greater than or - // equal to the fraction p of samples. + // Quantile returns the quantile of the window of the rolling series. Quantile(p float64) Series + // Quantiles can be computed in batches. + Quantiles(ps ...float64) []Series + // QuantileRolling scrolls to calculate the quantile in the rolling series. + // the p's element corresponds to the window of the rolling series one by one. + QuantileRolling(p Series) Series + // DataQuantile scrolls to calculate the current data's quantile in the rolling series. + // the data's element corresponds to the window of the rolling series one by one. + DataQuantileRolling(data Series) Series // Median calculates the middle or median value of the rolling series Median() Series // StdDev calculates the standard deviation of the rolling series StdDev() Series // Apply applies a function for the rolling series Apply(f func(window Series, windowIndex int) interface{}, t Type) Series + //Iterate iterates the rolling series, the window series is nil when minPeriods is less than the window size + Iterate(f func(window Series, windowIndex int)) } type rollingSeries struct { @@ -148,7 +157,7 @@ func (s rollingSeries) MeanByWeights(weights []float64) Series { } return totalSum / weightSumUse }, Float) - newS.SetName(fmt.Sprintf("%s_MeanByWeights[w:%d,%v]", s.Name(), s.window, weights)) + newS.SetName(fmt.Sprintf("%s_RMeanByWeights[w:%d,%v]", s.Name(), s.window, weights)) return newS } @@ -160,6 +169,59 @@ func (s rollingSeries) Quantile(p float64) Series { return newS } +func (s rollingSeries) Quantiles(ps ...float64) []Series { + ret := make([]Series, len(ps)) + + for i := 0; i < len(ps); i++ { + ret[i] = &series{ + name: fmt.Sprintf("%s_RQuantile[w:%d,p:%f]", s.Name(), s.window, ps[i]), + elements: Float.emptyElements(s.Len()), + t: Float, + err: nil, + } + } + + s.Iterate(func(window Series, windowIndex int) { + if window == nil { + for i := 0; i < len(ps); i++ { + ret[i].Elem(windowIndex).SetString(NaN) + } + } else { + qs := window.Quantiles(ps...) + for i := 0; i < len(ps); i++ { + ret[i].Elem(windowIndex).SetFloat(qs[i]) + } + } + }) + return ret +} + +func (s rollingSeries) QuantileRolling(p Series) Series { + newS := s.Apply(func(window Series, windowIndex int) interface{} { + ele := p.Elem(windowIndex) + if ele.IsNA() { + return NaN + } + thisP := ele.Float() + return window.Quantile(thisP) + }, Float) + newS.SetName(fmt.Sprintf("%s_RQuantileRolling[w:%d,p:%s]", s.Name(), s.window, p.Name())) + return newS +} + +func (s rollingSeries) DataQuantileRolling(data Series) Series { + newS := s.Apply(func(window Series, windowIndex int) interface{} { + ele := data.Elem(windowIndex) + if ele.IsNA() { + return NaN + } + thisData := ele.Float() + return window.DataQuantile(thisData) + }, Float) + newS.SetName(fmt.Sprintf("%s_RDataQuantileRolling[w:%d,d:%s]", s.Name(), s.window, data.Name())) + return newS +} + func (s rollingSeries) Median() Series { newS := s.Apply(func(window Series, windowIndex int) interface{} { return window.Median() @@ -203,3 +265,17 @@ func (s rollingSeries) Apply(f func(window Series, windowIndex int) interface{}, } return newS } + +func (s rollingSeries) Iterate(f func(window Series, windowIndex int)) { + index := 0 + rw := NewRollingWindow(s.Series, s.window) + for rw.HasNext() { + window := rw.NextWindow() + if window.Len() >= s.minPeriods { + f(window, index) + } else { + f(nil, index) + } + index++ + } +} diff --git a/series/series.go b/series/series.go index 5932a36..eb2b854 100644 --- a/series/series.go +++ b/series/series.go @@ -115,8 +115,10 @@ type Series interface { // equal to the fraction p of samples. // Note: gonum/stat panics when called with strings Quantile(p float64) float64 + Quantiles(ps ...float64) []float64 // DataQuantile returns the data quantile in the series DataQuantile(data float64) float64 + DataQuantiles(datas ...float64) []float64 // Map applies a function matching MapFunction signature, which itself // allowing for a fairly flexible MAP implementation, intended for mapping // the function over each element in Series and returning a new Series object. @@ -1062,12 +1064,44 @@ func (s series) Quantile(p float64) float64 { if s.Type() == String || s.Len() == 0 { return math.NaN() } + if p == 0 { + return s.Min() + } + if p == 1 { + return s.Max() + } ordered := s.Subset(s.Order(false)).Float() return stat.Quantile(p, stat.Empirical, ordered, nil) } +func (s series) Quantiles(ps ...float64) []float64 { + if s.Type() == String || s.Len() == 0 { + return nil + } + + ret := make([]float64, len(ps)) + + var ordered []float64 + for i := 0; i < len(ps); i++ { + if ps[i] == 0 { + ret[i] = s.Min() + continue + } + if ps[i] == 1 { + ret[i] = s.Max() + continue + } + if ordered == nil { + ordered = s.Subset(s.Order(false)).Float() + } + ret[i] = stat.Quantile(ps[i], stat.Empirical, ordered, nil) + } + + return ret +} + // DataQuantile returns the data quantile in the series func (s series) DataQuantile(data float64) float64 { if s.Type() == String || s.Len() == 0 { @@ -1081,6 +1115,32 @@ func (s series) DataQuantile(data float64) float64 { length = length + 1 } + ret := dataQuantile(data , ordered, length) + return ret +} + +func (s series) DataQuantiles(datas ...float64) []float64 { + if s.Type() == String || s.Len() == 0 { + return nil + } + + ordered := s.Subset(s.Order(false)).Float() + + length := len(ordered) + if length%2 == 1 { + length = length + 1 + } + + ret := make([]float64, len(datas)) + + for j := 0; j < len(datas); j++ { + ret[j] = dataQuantile(datas[j] , ordered, length) + } + + return ret +} + +func dataQuantile(data float64, ordered []float64, length int) float64 { for i, d := range ordered { if data < d { return float64(i) / float64(length) diff --git a/series/series_test.go b/series/series_test.go index d26aa32..0523da3 100644 --- a/series/series_test.go +++ b/series/series_test.go @@ -1567,6 +1567,16 @@ func TestSeries_Quantile(t *testing.T) { 0.5, 2.0, }, + { + Floats([]float64{1.0, 2.0, 3.0}), + 0.0, + 1.0, + }, + { + Floats([]float64{1.0, 2.0, 3.0}), + 1.0, + 3.0, + }, { Strings([]string{"A", "B", "C", "D"}), 0.25, @@ -1596,11 +1606,69 @@ func TestSeries_Quantile(t *testing.T) { } } +func TestSeries_Quantiles(t *testing.T) { + tests := []struct { + series Series + ps []float64 + expected []float64 + }{ + { + Ints([]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}), + []float64{0.9, 0.8, 0.4, 0.6, 0.0}, + []float64{9, 8, 4, 6, 1}, + }, + { + Floats([]float64{3.141592, math.Sqrt(3), 2.718281, math.Sqrt(2)}), + []float64{0.8, 1}, + []float64{3.141592, 3.141592}, + }, + { + Floats([]float64{1.0, 2.0, 3.0}), + []float64{0.5, 0.1, 1.0}, + []float64{2.0, 1.0, 3.0}, + }, + { + Strings([]string{"A", "B", "C", "D"}), + []float64{0.25}, + nil, + }, + { + Bools([]bool{false, false, false, true}), + []float64{1.0, 0.75, 0.0}, + []float64{1.0, 0.0, 0.0}, + }, + { + Floats([]float64{}), + []float64{0.50}, + nil, + }, + } + + for testnum, test := range tests { + received := test.series.Quantiles(test.ps...) + expected := test.expected + if len(expected) != len(received) { + t.Errorf( + "Test:%v\nExpected length:\n%v\nReceived length:\n%v", + testnum, len(expected), len(received), + ) + } + for i := 0; i < len(received); i++ { + if !compareFloats(received[i], expected[i], 6) { + t.Errorf( + "Test:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + } + + } +} func TestSeries_DataQuantile(t *testing.T) { tests := []struct { series Series - data float64 + data float64 expected float64 }{ { @@ -1652,6 +1720,69 @@ func TestSeries_DataQuantile(t *testing.T) { } } +func TestSeries_DataQuantiles(t *testing.T) { + tests := []struct { + series Series + datas []float64 + expected []float64 + }{ + { + Ints([]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}), + []float64{9, 8, 4, 6}, + []float64{0.9, 0.8, 0.4, 0.6}, + }, + { + Floats([]float64{3.141592, math.Sqrt(3), 2.718281, math.Sqrt(2)}), + []float64{3.141592, 2.718281}, + []float64{1, 0.75}, + }, + { + Floats([]float64{1.0, 2.0, 3.0}), + []float64{2.0, 3.0, 1.0}, + []float64{0.5, 1, 0.25}, + }, + { + Floats([]float64{1.0, 2.0, 3.0, 4.0}), + []float64{2.0, 3.0}, + []float64{0.5, 0.75}, + }, + { + Strings([]string{"A", "B", "C", "D"}), + []float64{0.25}, + nil, + }, + { + Bools([]bool{false, false, false, true}), + []float64{0.0, 1.0}, + []float64{0.75, 1}, + }, + { + Floats([]float64{}), + []float64{0.50}, + nil, + }, + } + + for testnum, test := range tests { + received := test.series.DataQuantiles(test.datas...) + expected := test.expected + if len(expected) != len(received) { + t.Errorf( + "Test:%v\nExpected length:\n%v\nReceived length:\n%v", + testnum, len(expected), len(received), + ) + } + for i := 0; i < len(received); i++ { + if !compareFloats(received[i], expected[i], 6) { + t.Errorf( + "Test:%v\nExpected:\n%v\nReceived:\n%v\nSeries:\n%v", + testnum, expected, received, test.series.Float(), + ) + } + } + } +} + func TestSeries_Map(t *testing.T) { tests := []struct { series Series @@ -2200,10 +2331,9 @@ func TestSeries_Slice(t *testing.T) { } } - func TestSeries_Filter(t *testing.T) { tests := []struct { - ff FilterFunction + ff FilterFunction series Series expected Series }{ @@ -2225,7 +2355,7 @@ func TestSeries_Filter(t *testing.T) { }, { func(ele Element, index int) bool { - return index % 2 == 0 + return index%2 == 0 }, Ints([]int{1, 2, 3, 4, 5}), Ints([]int{1, 3, 5}), @@ -2254,5 +2384,3 @@ func TestSeries_Filter(t *testing.T) { } } } - - From 400ca1d7cbffe318ff909e7621d09b280e7e5159 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Mon, 16 May 2022 18:11:26 +0800 Subject: [PATCH 48/60] DataFrame.Slice --- dataframe/dataframe.go | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go index 189a764..44b0428 100644 --- a/dataframe/dataframe.go +++ b/dataframe/dataframe.go @@ -303,6 +303,27 @@ func (df DataFrame) Subset(indexes series.Indexes) DataFrame { } } +func (df DataFrame) Slice(start, end int) DataFrame { + if df.Err != nil { + return df + } + columns := make([]series.Series, df.ncols) + for i, column := range df.columns { + s := column.Slice(start, end) + columns[i] = s + columns[i].SetName(column.Name()) + } + nrows, ncols, err := checkColumnsDimensions(columns...) + if err != nil { + return DataFrame{Err: err} + } + return DataFrame{ + columns: columns, + ncols: ncols, + nrows: nrows, + } +} + // SelectIndexes are the supported indexes used for the DataFrame.Select method. Currently supported are: // // int // Matches the given index number From 09e42af94b02195856be94704e76dbf7b658a421 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Tue, 17 May 2022 18:13:21 +0800 Subject: [PATCH 49/60] optimize Series.DataQuantile(s) --- series/series.go | 28 +++++++++++++++++++++------- series/type-bool.go | 2 +- series/type-float.go | 8 ++++++-- series/type-int.go | 5 ++--- series/type-string.go | 14 +++++++++++--- 5 files changed, 41 insertions(+), 16 deletions(-) diff --git a/series/series.go b/series/series.go index eb2b854..4c6b575 100644 --- a/series/series.go +++ b/series/series.go @@ -1082,15 +1082,15 @@ func (s series) Quantiles(ps ...float64) []float64 { } ret := make([]float64, len(ps)) - + var ordered []float64 for i := 0; i < len(ps); i++ { if ps[i] == 0 { - ret[i] = s.Min() + ret[i] = s.Min() continue } if ps[i] == 1 { - ret[i] = s.Max() + ret[i] = s.Max() continue } if ordered == nil { @@ -1108,14 +1108,21 @@ func (s series) DataQuantile(data float64) float64 { return math.NaN() } - ordered := s.Subset(s.Order(false)).Float() + tmpS := s.Filter(func(ele Element, index int) bool { + return !ele.IsNA() + }) + if tmpS.Len() == 0 { + return math.NaN() + } + + ordered := tmpS.Subset(tmpS.Order(false)).Float() length := len(ordered) if length%2 == 1 { length = length + 1 } - ret := dataQuantile(data , ordered, length) + ret := dataQuantile(data, ordered, length) return ret } @@ -1124,7 +1131,14 @@ func (s series) DataQuantiles(datas ...float64) []float64 { return nil } - ordered := s.Subset(s.Order(false)).Float() + tmpS := s.Filter(func(ele Element, index int) bool { + return !ele.IsNA() + }) + if tmpS.Len() == 0 { + return nil + } + + ordered := tmpS.Subset(tmpS.Order(false)).Float() length := len(ordered) if length%2 == 1 { @@ -1134,7 +1148,7 @@ func (s series) DataQuantiles(datas ...float64) []float64 { ret := make([]float64, len(datas)) for j := 0; j < len(datas); j++ { - ret[j] = dataQuantile(datas[j] , ordered, length) + ret[j] = dataQuantile(datas[j], ordered, length) } return ret diff --git a/series/type-bool.go b/series/type-bool.go index 5a8a5cb..d2527f8 100644 --- a/series/type-bool.go +++ b/series/type-bool.go @@ -32,7 +32,7 @@ func (e *boolElement) Set(value interface{}) { } func (e *boolElement) SetElement(val Element) { - e.nan = false + e.nan = val.IsNA() b, err := val.Bool() if err != nil { e.nan = true diff --git a/series/type-float.go b/series/type-float.go index 0bd827b..8b070f6 100644 --- a/series/type-float.go +++ b/series/type-float.go @@ -32,7 +32,7 @@ func (e *floatElement) Set(value interface{}) { } func (e *floatElement) SetElement(val Element) { - e.nan = false + e.nan = val.IsNA() e.e = val.Float() } func (e *floatElement) SetBool(val bool) { @@ -44,8 +44,12 @@ func (e *floatElement) SetBool(val bool) { } } func (e *floatElement) SetFloat(val float64) { - e.nan = false e.e = val + if math.IsNaN(val) { + e.nan = true + } else { + e.nan = false + } } func (e *floatElement) SetInt(val int) { e.nan = false diff --git a/series/type-int.go b/series/type-int.go index 5ef44e7..7fc040c 100644 --- a/series/type-int.go +++ b/series/type-int.go @@ -32,7 +32,7 @@ func (e *intElement) Set(value interface{}) { } func (e *intElement) SetElement(val Element) { - e.nan = false + e.nan = val.IsNA() v, err := val.Int() if err != nil { e.nan = true @@ -52,8 +52,7 @@ func (e *intElement) SetFloat(val float64) { e.nan = false f := val if math.IsNaN(f) || - math.IsInf(f, 0) || - math.IsInf(f, 1) { + math.IsInf(f, 0) { e.nan = true return } diff --git a/series/type-string.go b/series/type-string.go index 9d6b6be..7a2836d 100644 --- a/series/type-string.go +++ b/series/type-string.go @@ -33,9 +33,10 @@ func (e *stringElement) Set(value interface{}) { } func (e *stringElement) SetElement(val Element) { - e.nan = false + e.nan = val.IsNA() e.e = val.String() } + func (e *stringElement) SetBool(val bool) { e.nan = false if val { @@ -45,18 +46,25 @@ func (e *stringElement) SetBool(val bool) { } } func (e *stringElement) SetFloat(val float64) { - e.nan = false + if math.IsNaN(val) { + e.nan = true + } else { + e.nan = false + } e.e = strconv.FormatFloat(val, 'f', 6, 64) } + func (e *stringElement) SetInt(val int) { e.nan = false e.e = strconv.Itoa(val) } + func (e *stringElement) SetString(val string) { - e.nan = false e.e = val if e.e == NaN { e.nan = true + } else { + e.nan = false } } From e3b8e38f9b64d15deed0f96bd7146a8f6ded63f9 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Fri, 20 May 2022 18:09:13 +0800 Subject: [PATCH 50/60] self --- dataframe/dataframe.go | 2 +- dataframe/self.go | 57 +++++++++++++++++ dataframe/self_test.go | 139 +++++++++++++++++++++++++++++++++++++++++ series/self.go | 21 +++++++ series/self_test.go | 42 +++++++++++++ series/series.go | 3 + 6 files changed, 263 insertions(+), 1 deletion(-) create mode 100644 dataframe/self.go create mode 100644 dataframe/self_test.go create mode 100644 series/self.go create mode 100644 series/self_test.go diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go index 44b0428..3b4a176 100644 --- a/dataframe/dataframe.go +++ b/dataframe/dataframe.go @@ -303,7 +303,7 @@ func (df DataFrame) Subset(indexes series.Indexes) DataFrame { } } -func (df DataFrame) Slice(start, end int) DataFrame { +func (df DataFrame) SliceRow(start, end int) DataFrame { if df.Err != nil { return df } diff --git a/dataframe/self.go b/dataframe/self.go new file mode 100644 index 0000000..daa103f --- /dev/null +++ b/dataframe/self.go @@ -0,0 +1,57 @@ +package dataframe + +import ( + "fmt" + + "github.com/mqy527/gota/series" +) + +// All the operations on it will influence the DataFrame's content. +type Self struct { + this *DataFrame +} + +// All the operations on Self will influence the DataFrame's content. +func (df *DataFrame) Self() Self { + self := Self{ + this: df, + } + return self +} + +// AppendColumns Append columns on the DataFrame. +func (s Self) AppendColumns(cols ...series.Series) error { + if s.this.Err != nil || len(cols) == 0 { + return nil + } + slen := cols[0].Len() + for i := 1; i < len(cols); i++ { + if cols[i].Error() != nil { + return fmt.Errorf("AppendColumns: col[%s] has error: %v", cols[i].Name(), cols[i].Error()) + } + if slen != cols[i].Len() { + return fmt.Errorf("AppendColumns: serieses length not equal") + } + } + if slen != s.this.nrows { + return fmt.Errorf("AppendColumns: wrong dimensions") + } + s.this.columns = append(s.this.columns, cols...) + s.this.ncols = len(s.this.columns) + + colnames := s.this.Names() + fixColnames(colnames) + for i, colname := range colnames { + s.this.columns[i].SetName(colname) + } + return nil +} +// Capply applies the given function to the columns of a DataFrame, will influence the DataFrame's content. +func (s Self) Capply(f func(series.Series)) { + if s.this.Err != nil { + return + } + for _, s := range s.this.columns { + f(s) + } +} diff --git a/dataframe/self_test.go b/dataframe/self_test.go new file mode 100644 index 0000000..4fdac6e --- /dev/null +++ b/dataframe/self_test.go @@ -0,0 +1,139 @@ +package dataframe + +import ( + "reflect" + "testing" + + "github.com/mqy527/gota/series" +) + +func TestDataFrame_Self_AppendColumns(t *testing.T) { + a := New( + series.New([]string{"b", "a", "b", "c", "d"}, series.String, "COL.1"), + series.New([]int{1, 2, 4, 5, 4}, series.Int, "COL.2"), + series.New([]float64{3.0, 4.0, 5.3, 3.2, 1.2}, series.Float, "COL.3"), + ) + table := []struct { + s series.Series + s1 series.Series + expDf DataFrame + }{ + { + series.New([]string{"A", "B", "A", "A", "A"}, series.String, "COL.1"), + series.New([]int{2, 3, 5, 6, 7}, series.String, "COL.3"), + New( + series.New([]string{"b", "a", "b", "c", "d"}, series.String, "COL.1_0"), + series.New([]int{1, 2, 4, 5, 4}, series.Int, "COL.2"), + series.New([]float64{3.0, 4.0, 5.3, 3.2, 1.2}, series.Float, "COL.3_0"), + series.New([]string{"A", "B", "A", "A", "A"}, series.String, "COL.1_1"), + series.New([]int{2, 3, 5, 6, 7}, series.String, "COL.3_1"), + ), + }, + { + series.New([]string{"A", "B", "A", "A", "A"}, series.String, "COL.2"), + series.New([]string{"w", "e", "r", "t", "y"}, series.String, "COL.1"), + New( + series.New([]string{"b", "a", "b", "c", "d"}, series.String, "COL.1_0"), + series.New([]int{1, 2, 4, 5, 4}, series.Int, "COL.2_0"), + series.New([]float64{3.0, 4.0, 5.3, 3.2, 1.2}, series.Float, "COL.3"), + series.New([]string{"A", "B", "A", "A", "A"}, series.String, "COL.2_1"), + series.New([]string{"w", "e", "r", "t", "y"}, series.String, "COL.1_1"), + ), + }, + { + series.New([]string{"A", "B", "A", "A", "A"}, series.String, "COL.4"), + series.New([]int{2, 3, 5, 6, 7}, series.String, "COL.5"), + New( + series.New([]string{"b", "a", "b", "c", "d"}, series.String, "COL.1"), + series.New([]int{1, 2, 4, 5, 4}, series.Int, "COL.2"), + series.New([]float64{3.0, 4.0, 5.3, 3.2, 1.2}, series.Float, "COL.3"), + series.New([]string{"A", "B", "A", "A", "A"}, series.String, "COL.4"), + series.New([]int{2, 3, 5, 6, 7}, series.String, "COL.5"), + ), + }, + { + series.New([]string{"A", "B", "A", "A", "A"}, series.String, "COL.4"), + series.New([]float64{3.3, 4.3, 5.3, 5.5, 6.4}, series.Float, "COL.5"), + New( + series.New([]string{"b", "a", "b", "c", "d"}, series.String, "COL.1"), + series.New([]int{1, 2, 4, 5, 4}, series.Int, "COL.2"), + series.New([]float64{3.0, 4.0, 5.3, 3.2, 1.2}, series.Float, "COL.3"), + series.New([]string{"A", "B", "A", "A", "A"}, series.String, "COL.4"), + series.New([]float64{3.3, 4.3, 5.3, 5.5, 6.4}, series.Float, "COL.5"), + ), + }, + } + for i, tc := range table { + b := a.Copy() + + b.Self().AppendColumns(tc.s, tc.s1) + + if b.Err != nil { + t.Errorf("Test: %d\nError:%v", i, b.Err) + } + // Check that the types are the same between both DataFrames + if !reflect.DeepEqual(tc.expDf.Types(), b.Types()) { + t.Errorf("Test: %d\nDifferent types:\nA:%v\nB:%v", i, tc.expDf.Types(), b.Types()) + } + // Check that the colnames are the same between both DataFrames + if !reflect.DeepEqual(tc.expDf.Names(), b.Names()) { + t.Errorf("Test: %d\nDifferent colnames:\nA:%v\nB:%v", i, tc.expDf.Names(), b.Names()) + } + // Check that the values are the same between both DataFrames + if !reflect.DeepEqual(tc.expDf.Records(), b.Records()) { + t.Errorf("Test: %d\nDifferent values:\nA:%v\nB:%v", i, tc.expDf.Records(), b.Records()) + } + } +} + +func TestDataFrame_Self_Capply(t *testing.T) { + a := LoadRecords( + [][]string{ + {"A", "B", "C", "D"}, + {"a", "4", "5.1", series.NaN}, + {"b", series.NaN, "6.0", "true"}, + {"c", "3", "6.0", series.NaN}, + {series.NaN, "2", "7.1", "false"}, + }, + ) + fillNaNForward := func(s series.Series) { + s.FillNaNForward() + } + table := []struct { + fun func(series.Series) + expDf DataFrame + }{ + { + fillNaNForward, + LoadRecords( + [][]string{ + {"A", "B", "C", "D"}, + {"a", "4", "5.1", series.NaN}, + {"b", "4", "6.0", "true"}, + {"c", "3", "6.0", "true"}, + {"c", "2", "7.1", "false"}, + }, + ), + }, + } + for i, tc := range table { + b := a.Copy() + b.Self().Capply(tc.fun) + + if b.Err != nil { + t.Errorf("Test: %d\nError:%v", i, b.Err) + } + // Check that the types are the same between both DataFrames + if !reflect.DeepEqual(tc.expDf.Types(), b.Types()) { + t.Errorf("Test: %d\nDifferent types:\nA:%v\nB:%v", i, tc.expDf.Types(), b.Types()) + } + // Check that the colnames are the same between both DataFrames + if !reflect.DeepEqual(tc.expDf.Names(), b.Names()) { + t.Errorf("Test: %d\nDifferent colnames:\nA:%v\nB:%v", i, tc.expDf.Names(), b.Names()) + } + // Check that the values are the same between both DataFrames + if !reflect.DeepEqual(tc.expDf.Records(), b.Records()) { + t.Errorf("Test: %d\nDifferent values:\nA:%v\nB:%v", i, tc.expDf.Records(), b.Records()) + } + } +} \ No newline at end of file diff --git a/series/self.go b/series/self.go new file mode 100644 index 0000000..274d268 --- /dev/null +++ b/series/self.go @@ -0,0 +1,21 @@ +package series + +// All the operations on it will influence the DataFrame's content. +type Self struct { + this Series +} + +// All the operations on Self will influence the Series's content. +func (s *series) Self() Self { + self := Self{ + this: s, + } + return self +} + +// Apply applies the given function to the element of a Series, will influence the Series's content. +func (s Self) Apply(f func(ele Element, index int)) { + for i := 0; i < s.this.Len(); i++ { + f(s.this.Elem(i), i) + } +} diff --git a/series/self_test.go b/series/self_test.go new file mode 100644 index 0000000..1d103a5 --- /dev/null +++ b/series/self_test.go @@ -0,0 +1,42 @@ +package series + +import ( + "reflect" + "testing" +) + +func TestSeries_Self_Apply(t *testing.T) { + tests := []struct { + series Series + f func(ele Element, index int) + expected Series + }{ + { + Floats([]string{"1.5", "-3.23", "0.337397", "0.380079", "1.60979"}), + func(ele Element, index int) { + ele.SetFloat(ele.Float() + 2) + }, + Floats([]string{"3.5", "-1.23", "2.337397", "2.380079", "3.60979"}), + }, + { + Ints([]string{"23", "13", "101", "-64", "-3"}), + func(ele Element, index int) { + ele.SetFloat(ele.Float() - 2) + }, + Ints([]string{"21", "11", "99", "-66", "-5"}), + }, + } + + for testnum, test := range tests { + expected := test.expected.Records() + b := test.series.Copy() + b.Self().Apply(test.f) + received := b.Records() + if !reflect.DeepEqual(expected, received) { + t.Errorf( + "Test:%v\nExpected:\n%v\nReceived:\n%v", + testnum, expected, received, + ) + } + } +} \ No newline at end of file diff --git a/series/series.go b/series/series.go index 4c6b575..4881129 100644 --- a/series/series.go +++ b/series/series.go @@ -209,6 +209,9 @@ type Series interface { //Filter Select the elements that match the FilterFunction Filter(ff FilterFunction) Series + + // All the operations on Self will influence the Series's content. + Self() Self } // intElements is the concrete implementation of Elements for Int elements. From 7d91dc235975c2afffb83ecc874c3760116f8ade Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Sun, 22 May 2022 23:06:48 +0800 Subject: [PATCH 51/60] DataFrame FromSeries --- dataframe/dataframe.go | 28 +++++++++++++++++++++++++++- series/self.go | 2 +- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go index 3b4a176..324bafb 100644 --- a/dataframe/dataframe.go +++ b/dataframe/dataframe.go @@ -68,6 +68,32 @@ func New(se ...series.Series) DataFrame { return df } +// New is the generic DataFrame constructor +func FromSeries(se ...series.Series) DataFrame { + if len(se) == 0 { + return DataFrame{Err: fmt.Errorf("empty DataFrame")} + } + + columns := se + nrows, ncols, err := checkColumnsDimensions(columns...) + if err != nil { + return DataFrame{Err: err} + } + + // Fill DataFrame base structure + df := DataFrame{ + columns: columns, + ncols: ncols, + nrows: nrows, + } + colnames := df.Names() + fixColnames(colnames) + for i, colname := range colnames { + df.columns[i].SetName(colname) + } + return df +} + func checkColumnsDimensions(se ...series.Series) (nrows, ncols int, err error) { ncols = len(se) nrows = -1 @@ -651,7 +677,7 @@ func (df DataFrame) Concat(dfb DataFrame) DataFrame { } expandedSeries[k] = newSeries } - return New(expandedSeries...) + return FromSeries(expandedSeries...) } // Mutate changes a column of the DataFrame with the given Series or adds it as diff --git a/series/self.go b/series/self.go index 274d268..e384bef 100644 --- a/series/self.go +++ b/series/self.go @@ -1,6 +1,6 @@ package series -// All the operations on it will influence the DataFrame's content. +// All the operations on it will influence the Series's content. type Self struct { this Series } From 26ec1781e3441845983f0eb612b27b2036530491 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Tue, 24 May 2022 18:15:50 +0800 Subject: [PATCH 52/60] optimize cache --- series/cache.go | 100 +++--- series/cacherolling.go | 45 ++- series/cacheseries.go | 401 +++++------------------- series/cacheseries_test.go | 625 +++---------------------------------- series/series.go | 2 +- series/series_logic.go | 2 + 6 files changed, 187 insertions(+), 988 deletions(-) diff --git a/series/cache.go b/series/cache.go index 4eead4c..569e96f 100644 --- a/series/cache.go +++ b/series/cache.go @@ -1,92 +1,86 @@ package series import ( - "strings" "sync" - "time" - - "github.com/patrickmn/go-cache" ) -var c Cache - -var once sync.Once +var CacheFactory func() Cache = nil //Cache define series cache type Cache interface { - Set(k string, x interface{}) - Get(k string) (interface{}, bool) + Set(key string, value interface{}) + Get(key string) (interface{}, bool) Clear() - DelByKeyPrefix(keyPrefix string) int Size() int + Delete(key string) + Copy() Cache } type seriesCache struct { - c *cache.Cache - keys map[string]struct{} - mu sync.RWMutex + c map[string]interface{} + mu sync.RWMutex } -func NewDefaultCache() Cache { +func newSeriesCache() Cache { + if CacheFactory != nil { + return CacheFactory() + } ch := &seriesCache{ - c: cache.New(5*time.Minute, 10*time.Minute), - keys: map[string]struct{}{}, - mu: sync.RWMutex{}, + c: map[string]interface{}{}, + mu: sync.RWMutex{}, } return ch } -func (dc *seriesCache) Set(k string, v interface{}) { - err := dc.c.Add(k, v, cache.DefaultExpiration) - if err == nil { - dc.mu.Lock() - dc.keys[k] = struct{}{} - dc.mu.Unlock() - } +func (dc *seriesCache) Set(key string, value interface{}) { + dc.mu.Lock() + dc.c[key] = value + dc.mu.Unlock() } -func (dc *seriesCache) Size() int { - return dc.c.ItemCount() +func (dc *seriesCache) Size() int { + dc.mu.RLock() + defer dc.mu.RUnlock() + return len(dc.c) } -func (dc *seriesCache) Get(k string) (interface{}, bool) { - return dc.c.Get(k) +func (dc *seriesCache) Get(key string) (interface{}, bool) { + dc.mu.RLock() + v, ok := dc.c[key] + dc.mu.RUnlock() + return v, ok } func (dc *seriesCache) Clear() { - dc.c.Flush() dc.mu.Lock() - dc.keys = map[string]struct{}{} + dc.c = make(map[string]interface{}) dc.mu.Unlock() } -func (dc *seriesCache) DelByKeyPrefix(keyPrefix string) int { - delCount := 0 +func (dc *seriesCache) Delete(key string) { dc.mu.Lock() - for key := range dc.keys { - if strings.HasPrefix(key, keyPrefix) { - delete(dc.keys, key) - dc.c.Delete(key) - delCount++ - } - } + delete(dc.c, key) dc.mu.Unlock() - return delCount } -//InitCache -func InitCache(f func() Cache) { - once.Do(func() { - if f == nil { - c = NewDefaultCache() - } else { - c = f() +func (dc *seriesCache) Copy() Cache { + nc := &seriesCache{ + c: map[string]interface{}{}, + mu: sync.RWMutex{}, + } + dc.mu.RLock() + defer dc.mu.RUnlock() + for k, v := range dc.c { + switch vt := v.(type) { + case Series: + nc.c[k] = vt.Copy() + case Element: + nc.c[k] = vt.Copy() + case string, float64, int, bool: + nc.c[k] = vt + default: + nc.c[k] = vt } - }) -} - -func ClearCache() { - if c != nil { - c.Clear() } + return nc } diff --git a/series/cacherolling.go b/series/cacherolling.go index 21d7549..64cf6c2 100644 --- a/series/cacherolling.go +++ b/series/cacherolling.go @@ -2,16 +2,15 @@ package series import ( "fmt" - "unsafe" ) type cacheAbleRollingSeries struct { RollingSeries - cacheKey string + c Cache } -func cacheOrExecuteRolling(cacheKey string, f func() Series) Series { - if ret, found := c.Get(cacheKey); found { +func (rc *cacheAbleRollingSeries) cacheOrExecuteRolling(cacheKey string, f func() Series) Series { + if ret, found := rc.c.Get(cacheKey); found { return ret.(Series) } res := f() @@ -20,65 +19,57 @@ func cacheOrExecuteRolling(cacheKey string, f func() Series) Series { } res.SetName(cacheKey) ret := res.CacheAble() - c.Set(cacheKey, ret) + rc.c.Set(cacheKey, ret) return ret } func (rc cacheAbleRollingSeries) Max() Series { - cacheKey := rc.cacheKey + "_RMax" - ret := cacheOrExecuteRolling(cacheKey, func() Series { + cacheKey := "RMax" + ret := rc.cacheOrExecuteRolling(cacheKey, func() Series { return rc.RollingSeries.Max() }) return ret } func (rc cacheAbleRollingSeries) Min() Series { - cacheKey := rc.cacheKey + "_RMin" - ret := cacheOrExecuteRolling(cacheKey, func() Series { + cacheKey := "RMin" + ret := rc.cacheOrExecuteRolling(cacheKey, func() Series { return rc.RollingSeries.Min() }) return ret } func (rc cacheAbleRollingSeries) Mean() Series { - cacheKey := rc.cacheKey + "_RMean" - ret := cacheOrExecuteRolling(cacheKey, func() Series { + cacheKey := "RMean" + ret := rc.cacheOrExecuteRolling(cacheKey, func() Series { return rc.RollingSeries.Mean() }) return ret } func (rc cacheAbleRollingSeries) MeanByWeights(weights []float64) Series { - cacheKey := fmt.Sprintf("%s_RMeanByWeights(%v)", rc.cacheKey, weights) - ret := cacheOrExecuteRolling(cacheKey, func() Series { + cacheKey := fmt.Sprintf("RMeanByWeights(%v)", weights) + ret := rc.cacheOrExecuteRolling(cacheKey, func() Series { return rc.RollingSeries.MeanByWeights(weights) }) return ret } func (rc cacheAbleRollingSeries) Quantile(p float64) Series { - cacheKey := fmt.Sprintf("%s_RQuantile(%f)", rc.cacheKey, p) - ret := cacheOrExecuteRolling(cacheKey, func() Series { + cacheKey := fmt.Sprintf("RQuantile(%f)", p) + ret := rc.cacheOrExecuteRolling(cacheKey, func() Series { return rc.RollingSeries.Quantile(p) }) return ret } func (rc cacheAbleRollingSeries) Median() Series { - cacheKey := rc.cacheKey + "_RMedian" - ret := cacheOrExecuteRolling(cacheKey, func() Series { + cacheKey := "RMedian" + ret := rc.cacheOrExecuteRolling(cacheKey, func() Series { return rc.RollingSeries.Median() }) return ret } func (rc cacheAbleRollingSeries) StdDev() Series { - cacheKey := rc.cacheKey + "_RStdDev" - ret := cacheOrExecuteRolling(cacheKey, func() Series { + cacheKey := "RStdDev" + ret := rc.cacheOrExecuteRolling(cacheKey, func() Series { return rc.RollingSeries.StdDev() }) return ret } -func (rc cacheAbleRollingSeries) Apply(f func(window Series, windowIndex int) interface{}, t Type) Series { - cacheKey := fmt.Sprintf("%s_RApply(%v, %s)", rc.cacheKey, (*(*int64)(unsafe.Pointer(&f))), t) - - ret := cacheOrExecuteRolling(cacheKey, func() Series { - return rc.RollingSeries.Apply(f, t) - }) - return ret -} diff --git a/series/cacheseries.go b/series/cacheseries.go index 4c74976..f48ed19 100644 --- a/series/cacheseries.go +++ b/series/cacheseries.go @@ -2,30 +2,19 @@ package series import ( "fmt" - "reflect" - "unsafe" - - uuid "github.com/satori/go.uuid" ) var _ Series = (*cacheAbleSeries)(nil) type cacheAbleSeries struct { Series - cacheKey string + c Cache } func newCacheAbleSeries(s Series) Series { - if len(s.Name()) == 0 { - return s - } - if c == nil { - InitCache(nil) - } - ret := &cacheAbleSeries{ - Series: s, - cacheKey: s.Name(), + Series: s, + c: newSeriesCache(), } return ret } @@ -33,34 +22,34 @@ func newCacheAbleSeries(s Series) Series { func (cs cacheAbleSeries) Rolling(window int, minPeriods int) RollingSeries { cr := cacheAbleRollingSeries{ RollingSeries: NewRollingSeries(window, minPeriods, cs.Series), - cacheKey: fmt.Sprintf("%s[w%d,p%d]", cs.cacheKey, window, minPeriods), + c: newSeriesCache(), } return cr } func (cs cacheAbleSeries) HasNaN() bool { - cacheKey := cs.cacheKey + "_HasNaN" - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { + cacheKey := "HasNaN" + ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.HasNaN() return ret, nil }) return ret.(bool) } -func cacheOrExecute(cacheKey string, f func() (interface{}, error)) (interface{}, error) { - if ret, found := c.Get(cacheKey); found { +func (cs *cacheAbleSeries) cacheOrExecute(cacheKey string, f func() (interface{}, error)) (interface{}, error) { + if ret, found := cs.c.Get(cacheKey); found { return ret, nil } ret, err := f() if err == nil { - c.Set(cacheKey, ret) + cs.c.Set(cacheKey, ret) } return ret, err } func (cs cacheAbleSeries) IsNaN() []bool { - cacheKey := cs.cacheKey + "_IsNaN" - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { + cacheKey := "IsNaN" + ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.IsNaN() return ret, nil }) @@ -68,61 +57,53 @@ func (cs cacheAbleSeries) IsNaN() []bool { } func (cs cacheAbleSeries) IsNotNaN() []bool { - cacheKey := cs.cacheKey + "_IsNotNaN" - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { + cacheKey := "IsNotNaN" + ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.IsNotNaN() return ret, nil }) return ret.([]bool) } -//Compare. The result will be cached if comparando is 'compFunc', 'Series' or not a slice. -func (cs cacheAbleSeries) Compare(comparator Comparator, comparando interface{}) Series { - var cacheKey string - if comparator == CompFunc { - f, ok := comparando.(compFunc) - if !ok { - panic("comparando is not a comparison function of type func(el Element) bool") - } - cacheKey = fmt.Sprintf("%s_Compare(%s, %v)", cs.cacheKey, comparator, (*(*int64)(unsafe.Pointer(&f)))) - } else { - switch v := comparando.(type) { - case Series: - if len(v.Name()) == 0 { - return cs.Series.Compare(comparator, comparando) - } - cacheKey = fmt.Sprintf("%s_Compare(%s, %s)", cs.cacheKey, comparator, v.Name()) - default: - switch reflect.TypeOf(comparando).Kind() { - case reflect.Slice: - return cs.Series.Compare(comparator, comparando) - default: - cacheKey = fmt.Sprintf("%s_Compare(%s, %v)", cs.cacheKey, comparator, comparando) - } - } - } - - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { - res := cs.Series.Compare(comparator, comparando) - res.SetName(cacheKey) - ret := res.CacheAble() +func (cs cacheAbleSeries) Float() []float64 { + cacheKey := "Float" + ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { + ret := cs.Series.Float() return ret, nil }) - return ret.(Series) + return ret.([]float64) } -func (cs cacheAbleSeries) Float() []float64 { - cacheKey := cs.cacheKey + "_Float" - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { - ret := cs.Series.Float() +func (cs cacheAbleSeries) Bool() ([]bool, error) { + cacheKey := "Bool" + ret, err := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { + ret, err := cs.Series.Bool() + return ret, err + }) + return ret.([]bool), err +} + +func (cs cacheAbleSeries) Int() ([]int, error) { + cacheKey := "Int" + ret, err := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { + ret, err := cs.Series.Int() + return ret, err + }) + return ret.([]int), err +} + +func (cs cacheAbleSeries) Records() []string { + cacheKey := "Records" + ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { + ret := cs.Series.Records() return ret, nil }) - return ret.([]float64) + return ret.([]string) } func (cs cacheAbleSeries) Order(reverse bool) []int { - cacheKey := fmt.Sprintf("%s_Order(%v)", cs.cacheKey, reverse) - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { + cacheKey := fmt.Sprintf("Order(%v)", reverse) + ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.Order(reverse) return ret, nil }) @@ -130,8 +111,8 @@ func (cs cacheAbleSeries) Order(reverse bool) []int { } func (cs cacheAbleSeries) StdDev() float64 { - cacheKey := cs.cacheKey + "_StdDev" - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { + cacheKey := "StdDev" + ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.StdDev() return ret, nil }) @@ -139,8 +120,8 @@ func (cs cacheAbleSeries) StdDev() float64 { } func (cs cacheAbleSeries) Mean() float64 { - cacheKey := cs.cacheKey + "_Mean" - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { + cacheKey := "Mean" + ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.Mean() return ret, nil }) @@ -148,8 +129,8 @@ func (cs cacheAbleSeries) Mean() float64 { } func (cs cacheAbleSeries) Median() float64 { - cacheKey := cs.cacheKey + "_Median" - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { + cacheKey := "Median" + ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.Median() return ret, nil }) @@ -157,8 +138,8 @@ func (cs cacheAbleSeries) Median() float64 { } func (cs cacheAbleSeries) Max() float64 { - cacheKey := cs.cacheKey + "_Max" - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { + cacheKey := "Max" + ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.Max() return ret, nil }) @@ -166,8 +147,8 @@ func (cs cacheAbleSeries) Max() float64 { } func (cs cacheAbleSeries) MaxStr() string { - cacheKey := cs.cacheKey + "_MaxStr" - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { + cacheKey := "MaxStr" + ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.MaxStr() return ret, nil }) @@ -175,8 +156,8 @@ func (cs cacheAbleSeries) MaxStr() string { } func (cs cacheAbleSeries) Min() float64 { - cacheKey := cs.cacheKey + "_Min" - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { + cacheKey := "Min" + ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.Min() return ret, nil }) @@ -184,8 +165,8 @@ func (cs cacheAbleSeries) Min() float64 { } func (cs cacheAbleSeries) MinStr() string { - cacheKey := cs.cacheKey + "_MinStr" - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { + cacheKey := "MinStr" + ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.MinStr() return ret, nil }) @@ -193,39 +174,18 @@ func (cs cacheAbleSeries) MinStr() string { } func (cs cacheAbleSeries) Quantile(p float64) float64 { - cacheKey := fmt.Sprintf("%s_Quantile(%f)", cs.cacheKey, p) - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { + cacheKey := fmt.Sprintf("Quantile(%f)", p) + ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.Quantile(p) return ret, nil }) return ret.(float64) } -func (cs cacheAbleSeries) Map(f MapFunction) Series { - cacheKey := fmt.Sprintf("%s_Map(%v)", cs.cacheKey, (*(*int64)(unsafe.Pointer(&f)))) - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { - ret := cs.Series.Map(f) - return ret, nil - }) - return ret.(Series) -} - -func (cs cacheAbleSeries) Shift(periods int) Series { - cacheKey := fmt.Sprintf("%s_Shift(%d)", cs.cacheKey, periods) - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { - res := cs.Series.Shift(periods) - res.SetName(cacheKey) - ret := res.CacheAble() - return ret, nil - }) - return ret.(Series) -} - func (cs cacheAbleSeries) CumProd() Series { - cacheKey := cs.cacheKey + "_CumProd" - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { + cacheKey := "CumProd" + ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { res := cs.Series.CumProd() - res.SetName(cacheKey) ret := res.CacheAble() return ret, nil }) @@ -233,8 +193,8 @@ func (cs cacheAbleSeries) CumProd() Series { } func (cs cacheAbleSeries) Prod() float64 { - cacheKey := cs.cacheKey + "_Prod" - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { + cacheKey := "Prod" + ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.Prod() return ret, nil }) @@ -242,10 +202,9 @@ func (cs cacheAbleSeries) Prod() float64 { } func (cs cacheAbleSeries) AddConst(c float64) Series { - cacheKey := fmt.Sprintf("%s_AddConst(%f)", cs.cacheKey, c) - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { + cacheKey := fmt.Sprintf("AddConst(%f)", c) + ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { res := cs.Series.AddConst(c) - res.SetName(cacheKey) ret := res.CacheAble() return ret, nil }) @@ -253,10 +212,9 @@ func (cs cacheAbleSeries) AddConst(c float64) Series { } func (cs cacheAbleSeries) MulConst(c float64) Series { - cacheKey := fmt.Sprintf("%s_MulConst(%f)", cs.cacheKey, c) - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { + cacheKey := fmt.Sprintf("MulConst(%f)", c) + ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { res := cs.Series.MulConst(c) - res.SetName(cacheKey) ret := res.CacheAble() return ret, nil }) @@ -264,77 +222,29 @@ func (cs cacheAbleSeries) MulConst(c float64) Series { } func (cs cacheAbleSeries) DivConst(c float64) Series { - cacheKey := fmt.Sprintf("%s_DivConst(%f)", cs.cacheKey, c) - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { + cacheKey := fmt.Sprintf("DivConst(%f)", c) + ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { res := cs.Series.DivConst(c) - res.SetName(cacheKey) ret := res.CacheAble() return ret, nil }) return ret.(Series) } -func (cs cacheAbleSeries) Add(c Series) Series { - if len(c.Name()) == 0 { - return cs.Series.Add(c) - } - cacheKey := fmt.Sprintf("%s_Add(%s)", cs.cacheKey, c.Name()) - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { - res := cs.Series.Add(c) - res.SetName(cacheKey) - ret := res.CacheAble() - return ret, nil - }) - return ret.(Series) -} - -func (cs cacheAbleSeries) Sub(c Series) Series { - if len(c.Name()) == 0 { - return cs.Series.Sub(c) - } - cacheKey := fmt.Sprintf("%s_Sub(%s)", cs.cacheKey, c.Name()) - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { - res := cs.Series.Sub(c) - res.SetName(cacheKey) - ret := res.CacheAble() - return ret, nil - }) - return ret.(Series) -} - -func (cs cacheAbleSeries) Mul(c Series) Series { - if len(c.Name()) == 0 { - return cs.Series.Mul(c) - } - cacheKey := fmt.Sprintf("%s_Mul(%s)", cs.cacheKey, c.Name()) - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { - res := cs.Series.Mul(c) - res.SetName(cacheKey) - ret := res.CacheAble() - return ret, nil - }) - return ret.(Series) -} - -func (cs cacheAbleSeries) Div(c Series) Series { - if len(c.Name()) == 0 { - return cs.Series.Div(c) - } - cacheKey := fmt.Sprintf("%s_Div(%s)", cs.cacheKey, c.Name()) - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { - res := cs.Series.Div(c) - res.SetName(cacheKey) +func (cs cacheAbleSeries) Abs() Series { + cacheKey := "Abs" + ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { + res := cs.Series.Abs() ret := res.CacheAble() return ret, nil }) return ret.(Series) } -func (cs cacheAbleSeries) Abs() Series { - cacheKey := cs.cacheKey + "_Abs" - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { - res := cs.Series.Abs() - res.SetName(cacheKey) +func (cs cacheAbleSeries) Not() Series { + cacheKey := "Not" + ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { + res := cs.Series.Not() ret := res.CacheAble() return ret, nil }) @@ -342,166 +252,23 @@ func (cs cacheAbleSeries) Abs() Series { } func (cs cacheAbleSeries) Sum() float64 { - cacheKey := cs.cacheKey + "_Sum" - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { + cacheKey := "Sum" + ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { ret := cs.Series.Sum() return ret, nil }) return ret.(float64) } -func (cs cacheAbleSeries) Empty() Series { - return cs.Series.Empty() -} - -func (cs cacheAbleSeries) Error() error { - return cs.Series.Error() -} - -func (cs cacheAbleSeries) Subset(indexes Indexes) Series { - return cs.Series.Subset(indexes) -} - -func (cs cacheAbleSeries) Concat(x Series) Series { - if len(x.Name()) == 0 { - return cs.Series.Concat(x) - } - cacheKey := fmt.Sprintf("%s_Concat(%s)", cs.cacheKey, x.Name()) - res := cs.Series.Concat(x) - res.SetName(cacheKey) - ret := res.CacheAble() - return ret -} - func (cs cacheAbleSeries) Copy() Series { - cacheKey := fmt.Sprintf("%s_Copy{%s}", cs.cacheKey, uuid.NewV4().String()) - res := cs.Series.Copy() - res.SetName(cacheKey) - ret := res.CacheAble() - return ret -} - -func (cs cacheAbleSeries) Records() []string { - return cs.Series.Records() -} - -func (cs cacheAbleSeries) Type() Type { - return cs.Series.Type() -} - -func (cs cacheAbleSeries) Len() int { - return cs.Series.Len() -} - -func (cs cacheAbleSeries) String() string { - return cs.Series.String() -} - -func (cs cacheAbleSeries) Str() string { - return cs.Series.Str() -} - -func (cs cacheAbleSeries) Val(i int) interface{} { - return cs.Series.Val(i) -} - -func (cs cacheAbleSeries) Elem(i int) Element { - return cs.Series.Elem(i) -} - -func (cs cacheAbleSeries) Slice(start int, end int) Series { - cacheKey := fmt.Sprintf("%s_Slice(%d,%d)", cs.cacheKey, start, end) - res := cs.Series.Slice(start, end) - res.SetName(cacheKey) - ret := res.CacheAble() + s := cs.Series.Copy() + ret := &cacheAbleSeries{ + Series: s, + c: cs.c.Copy(), + } return ret } func (cs *cacheAbleSeries) CacheAble() Series { return cs } - -func (cs *cacheAbleSeries) Set(indexes Indexes, newvalues Series) Series { - c.DelByKeyPrefix(cs.cacheKey) - return cs.Series.Set(indexes, newvalues) -} - -func (cs *cacheAbleSeries) FillNaN(value ElementValue) { - c.DelByKeyPrefix(cs.cacheKey) - cs.Series.FillNaN(value) -} -func (cs *cacheAbleSeries) FillNaNForward() { - c.DelByKeyPrefix(cs.cacheKey) - cs.Series.FillNaNForward() -} -func (cs *cacheAbleSeries) FillNaNBackward() { - c.DelByKeyPrefix(cs.cacheKey) - cs.Series.FillNaNBackward() -} - -func (cs *cacheAbleSeries) Append(values interface{}) { - c.DelByKeyPrefix(cs.cacheKey) - cs.Series.Append(values) -} - -func (cs *cacheAbleSeries) And(in interface{}) Series { - var cacheKey string - switch v := in.(type) { - case Series: - if len(v.Name()) == 0 { - return cs.Series.And(in) - } - cacheKey = fmt.Sprintf("%s_And(%s)", cs.cacheKey, v.Name()) - default: - switch reflect.TypeOf(in).Kind() { - case reflect.Slice: - res := cs.Series.And(in) - return res - default: - cacheKey = fmt.Sprintf("%s_And(%v)", cs.cacheKey, in) - } - } - - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { - res := cs.Series.And(in) - res.SetName(cacheKey) - ret := res.CacheAble() - return ret, nil - }) - return ret.(Series) -} - -func (cs *cacheAbleSeries) Or(in interface{}) Series { - var cacheKey string - switch v := in.(type) { - case Series: - if len(v.Name()) == 0 { - return cs.Series.Or(in) - } - cacheKey = fmt.Sprintf("%s_Or(%s)", cs.cacheKey, v.Name()) - default: - switch reflect.TypeOf(in).Kind() { - case reflect.Slice: - res := cs.Series.Or(in) - return res - default: - cacheKey = fmt.Sprintf("%s_Or(%v)", cs.cacheKey, in) - } - } - - ret, _ := cacheOrExecute(cacheKey, func() (interface{}, error) { - res := cs.Series.Or(in) - res.SetName(cacheKey) - ret := res.CacheAble() - return ret, nil - }) - return ret.(Series) -} - -func (cs cacheAbleSeries) Not() Series { - cacheKey := cs.cacheKey + "_Not" - res := cs.Series.Not() - res.SetName(cacheKey) - ret := res.CacheAble() - return ret -} diff --git a/series/cacheseries_test.go b/series/cacheseries_test.go index 6707e55..1663522 100644 --- a/series/cacheseries_test.go +++ b/series/cacheseries_test.go @@ -1,10 +1,8 @@ package series import ( - "fmt" - "math" "reflect" - "strings" + "sync" "testing" ) @@ -36,627 +34,74 @@ func (mc *mockCache) Clear() { mc.hitCount = 0 } -func (mc *mockCache) DelByKeyPrefix(keyPrefix string) int { - return mc.innerCache.DelByKeyPrefix(keyPrefix) +func (mc *mockCache) Delete(keyPrefix string) { + mc.innerCache.Delete(keyPrefix) } func (dc *mockCache) Size() int { return dc.innerCache.Size() } -var testCache = &mockCache{ - innerCache: NewDefaultCache(), +func (dc *mockCache) Copy() Cache { + nc := &mockCache{ + setCount: dc.setCount, + getCount: dc.getCount, + hitCount: dc.hitCount, + innerCache: dc.innerCache.Copy(), + } + return nc } func TestMain(m *testing.M) { - ClearCache() - InitCache(func() Cache { + CacheFactory = func() Cache { + testCache := &mockCache{ + innerCache: &seriesCache{ + c: map[string]interface{}{}, + mu: sync.RWMutex{}, + }, + } return testCache - }) + } m.Run() - ClearCache() } -func TestCacheSeries_Map(t *testing.T) { +func TestCacheSeries_Add(t *testing.T) { tests := []struct { series Series + addConst float64 expected Series - }{ - { - Bools([]bool{false, true, false, false, true}), - Bools([]bool{false, true, false, false, true}), - }, - { - Floats([]float64{1.5, -3.23, -0.337397, -0.380079, 1.60979, 34.}), - Floats([]float64{3, -6.46, -0.674794, -0.760158, 3.21958, 68.}), - }, - { - Floats([]float64{math.Pi, math.Phi, math.SqrtE, math.Cbrt(64)}), - Floats([]float64{2 * math.Pi, 2 * math.Phi, 2 * math.SqrtE, 2 * math.Cbrt(64)}), - }, - { - Strings([]string{"XyZApple", "XyZBanana", "XyZCitrus", "XyZDragonfruit"}), - Strings([]string{"Apple", "Banana", "Citrus", "Dragonfruit"}), - }, - { - Strings([]string{"San Francisco", "XyZTokyo", "MoscowXyZ", "XyzSydney"}), - Strings([]string{"San Francisco", "Tokyo", "MoscowXyZ", "XyzSydney"}), - }, - { - Ints([]int{23, 13, 101, -64, -3}), - Ints([]int{28, 18, 106, -59, 2}), - }, - { - Ints([]string{"morning", "noon", "afternoon", "evening", "night"}), - Ints([]int{5, 5, 5, 5, 5}), - }, - } - - doubleFloat64 := func(e Element, index int) Element { - result := e.Copy() - result.Set(result.Float() * 2) - return Element(result) - } - - // and two booleans - and := func(e Element, index int) Element { - result := e.Copy() - b, err := result.Bool() - if err != nil { - t.Errorf("%v", err) - return Element(nil) - } - result.Set(b && true) - return Element(result) - } - - // add constant (+5) to value (v) - add5Int := func(e Element, index int) Element { - result := e.Copy() - i, err := result.Int() - if err != nil { - return Element(&intElement{ - e: +5, - nan: false, - }) - } - result.Set(i + 5) - return Element(result) - } - - // trim (XyZ) prefix from string - trimXyZPrefix := func(e Element, index int) Element { - result := e.Copy() - result.Set(strings.TrimPrefix(result.String(), "XyZ")) - return Element(result) - } - - setCount := 0 - getCount := 0 - hitCount := 0 - ClearCache() - for testnum, test := range tests { - test.series.SetName(fmt.Sprintf("Name-%d", testnum)) - tmpSeries := test.series.CacheAble() - var received Series - switch test.series.Type() { - case Bool: - expected := test.expected - received = tmpSeries.Map(and) - getCount++ - setCount++ - received = tmpSeries.Map(and) - getCount++ - hitCount++ - for i := 0; i < expected.Len(); i++ { - e, _ := expected.Elem(i).Bool() - r, _ := received.Elem(i).Bool() - - if e != r { - t.Errorf( - "Test:%v\nExpected:\n%v\nReceived:\n%v", - testnum, expected, received, - ) - } - } - - case Float: - expected := test.expected - received = tmpSeries.Map(doubleFloat64) - getCount++ - setCount++ - received = tmpSeries.Map(doubleFloat64) - getCount++ - hitCount++ - for i := 0; i < expected.Len(); i++ { - if !compareFloats(expected.Elem(i).Float(), - received.Elem(i).Float(), 6) { - t.Errorf( - "Test:%v\nExpected:\n%v\nReceived:\n%v", - testnum, expected, received, - ) - } - } - case Int: - expected := test.expected - received = tmpSeries.Map(add5Int) - getCount++ - setCount++ - received = tmpSeries.Map(add5Int) - getCount++ - hitCount++ - for i := 0; i < expected.Len(); i++ { - e, _ := expected.Elem(i).Int() - r, _ := received.Elem(i).Int() - if e != r { - t.Errorf( - "Test:%v\nExpected:\n%v\nReceived:\n%v", - testnum, expected, received, - ) - } - } - case String: - expected := test.expected - received = tmpSeries.Map(trimXyZPrefix) - getCount++ - setCount++ - received = tmpSeries.Map(trimXyZPrefix) - getCount++ - hitCount++ - for i := 0; i < expected.Len(); i++ { - if strings.Compare(expected.Elem(i).String(), - received.Elem(i).String()) != 0 { - t.Errorf( - "Test:%v\nExpected:\n%v\nReceived:\n%v", - testnum, expected, received, - ) - } - } - default: - } - } - if setCount != testCache.setCount { - t.Errorf("CacheInfo[setCount]:\nExpected:%v\nActual:%v", setCount, testCache.setCount) - } - if getCount != testCache.getCount { - t.Errorf("CacheInfo[getCount]:\nExpected:%v\nActual:%v", getCount, testCache.getCount) - } - if hitCount != testCache.hitCount { - t.Errorf("CacheInfo[hitCount]:\nExpected:%v\nActual:%v", hitCount, testCache.hitCount) - } - -} - -func TestCacheSeries_Compare(t *testing.T) { - table := []struct { - series Series - comparator Comparator - comparando interface{} - expected Series - }{ - { - Strings([]string{"A", "B", "C", "B", "D", "BADA"}), - Eq, - "B", - Bools([]bool{false, true, false, true, false, false}), - }, - { - Strings([]string{"A", "B", "C", "B", "D", "BADA"}), - Eq, - []string{"B", "B", "C", "D", "A", "A"}, - Bools([]bool{false, true, true, false, false, false}), - }, - { - Ints([]int{0, 2, 1, 5, 9}), - Eq, - "2", - Bools([]bool{false, true, false, false, false}), - }, - { - Ints([]int{0, 2, 1, 5, 9}), - Eq, - []int{0, 2, 0, 5, 10}, - Bools([]bool{true, true, false, true, false}), - }, - { - Floats([]float64{0.1, 2, 1, 5, 9}), - Eq, - "2", - Bools([]bool{false, true, false, false, false}), - }, - { - Floats([]float64{0.1, 2, 1, 5, 9}), - Eq, - []float64{0.1, 2, 0, 5, 10}, - Bools([]bool{true, true, false, true, false}), - }, - { - Bools([]bool{true, true, false}), - Eq, - "true", - Bools([]bool{true, true, false}), - }, - { - Bools([]bool{true, true, false}), - Eq, - []bool{true, false, false}, - Bools([]bool{true, false, true}), - }, - { - Strings([]string{"A", "B", "C", "B", "D", "BADA"}), - Neq, - "B", - Bools([]bool{true, false, true, false, true, true}), - }, - { - Strings([]string{"A", "B", "C", "B", "D", "BADA"}), - Neq, - []string{"B", "B", "C", "D", "A", "A"}, - Bools([]bool{true, false, false, true, true, true}), - }, - { - Ints([]int{0, 2, 1, 5, 9}), - Neq, - "2", - Bools([]bool{true, false, true, true, true}), - }, - { - Ints([]int{0, 2, 1, 5, 9}), - Neq, - []int{0, 2, 0, 5, 10}, - Bools([]bool{false, false, true, false, true}), - }, - { - Floats([]float64{0.1, 2, 1, 5, 9}), - Neq, - "2", - Bools([]bool{true, false, true, true, true}), - }, - { - Floats([]float64{0.1, 2, 1, 5, 9}), - Neq, - []float64{0.1, 2, 0, 5, 10}, - Bools([]bool{false, false, true, false, true}), - }, - { - Bools([]bool{true, true, false}), - Neq, - "true", - Bools([]bool{false, false, true}), - }, - { - Bools([]bool{true, true, false}), - Neq, - []bool{true, false, false}, - Bools([]bool{false, true, false}), - }, - { - Strings([]string{"A", "B", "C", "B", "D", "BADA"}), - Greater, - "B", - Bools([]bool{false, false, true, false, true, true}), - }, - { - Strings([]string{"A", "B", "C", "B", "D", "BADA"}), - Greater, - []string{"B", "B", "C", "D", "A", "A"}, - Bools([]bool{false, false, false, false, true, true}), - }, - { - Ints([]int{0, 2, 1, 5, 9}), - Greater, - "2", - Bools([]bool{false, false, false, true, true}), - }, - { - Ints([]int{0, 2, 1, 5, 9}), - Greater, - []int{0, 2, 0, 5, 10}, - Bools([]bool{false, false, true, false, false}), - }, - { - Floats([]float64{0.1, 2, 1, 5, 9}), - Greater, - "2", - Bools([]bool{false, false, false, true, true}), - }, - { - Floats([]float64{0.1, 2, 1, 5, 9}), - Greater, - []float64{0.1, 2, 0, 5, 10}, - Bools([]bool{false, false, true, false, false}), - }, - { - Bools([]bool{true, true, false}), - Greater, - "true", - Bools([]bool{false, false, false}), - }, - { - Bools([]bool{true, true, false}), - Greater, - []bool{true, false, false}, - Bools([]bool{false, true, false}), - }, - { - Strings([]string{"A", "B", "C", "B", "D", "BADA"}), - GreaterEq, - "B", - Bools([]bool{false, true, true, true, true, true}), - }, - { - Strings([]string{"A", "B", "C", "B", "D", "BADA"}), - GreaterEq, - []string{"B", "B", "C", "D", "A", "A"}, - Bools([]bool{false, true, true, false, true, true}), - }, - { - Ints([]int{0, 2, 1, 5, 9}), - GreaterEq, - "2", - Bools([]bool{false, true, false, true, true}), - }, - { - Ints([]int{0, 2, 1, 5, 9}), - GreaterEq, - []int{0, 2, 0, 5, 10}, - Bools([]bool{true, true, true, true, false}), - }, - { - Floats([]float64{0.1, 2, 1, 5, 9}), - GreaterEq, - "2", - Bools([]bool{false, true, false, true, true}), - }, - { - Floats([]float64{0.1, 2, 1, 5, 9}), - GreaterEq, - []float64{0.1, 2, 0, 5, 10}, - Bools([]bool{true, true, true, true, false}), - }, - { - Bools([]bool{true, true, false}), - GreaterEq, - "true", - Bools([]bool{true, true, false}), - }, - { - Bools([]bool{true, true, false}), - GreaterEq, - []bool{true, false, false}, - Bools([]bool{true, true, true}), - }, - { - Strings([]string{"A", "B", "C", "B", "D", "BADA"}), - Less, - "B", - Bools([]bool{true, false, false, false, false, false}), - }, - { - Strings([]string{"A", "B", "C", "B", "D", "BADA"}), - Less, - []string{"B", "B", "C", "D", "A", "A"}, - Bools([]bool{true, false, false, true, false, false}), - }, - { - Ints([]int{0, 2, 1, 5, 9}), - Less, - "2", - Bools([]bool{true, false, true, false, false}), - }, - { - Ints([]int{0, 2, 1, 5, 9}), - Less, - []int{0, 2, 0, 5, 10}, - Bools([]bool{false, false, false, false, true}), - }, - { - Floats([]float64{0.1, 2, 1, 5, 9}), - Less, - "2", - Bools([]bool{true, false, true, false, false}), - }, - { - Floats([]float64{0.1, 2, 1, 5, 9}), - Less, - []float64{0.1, 2, 0, 5, 10}, - Bools([]bool{false, false, false, false, true}), - }, - { - Bools([]bool{true, true, false}), - Less, - "true", - Bools([]bool{false, false, true}), - }, - { - Bools([]bool{true, true, false}), - Less, - []bool{true, false, false}, - Bools([]bool{false, false, false}), - }, - { - Strings([]string{"A", "B", "C", "B", "D", "BADA"}), - LessEq, - "B", - Bools([]bool{true, true, false, true, false, false}), - }, - { - Strings([]string{"A", "B", "C", "B", "D", "BADA"}), - LessEq, - []string{"B", "B", "C", "D", "A", "A"}, - Bools([]bool{true, true, true, true, false, false}), - }, - { - Ints([]int{0, 2, 1, 5, 9}), - LessEq, - "2", - Bools([]bool{true, true, true, false, false}), - }, - { - Ints([]int{0, 2, 1, 5, 9}), - LessEq, - []int{0, 2, 0, 5, 10}, - Bools([]bool{true, true, false, true, true}), - }, - { - Floats([]float64{0.1, 2, 1, 5, 9}), - LessEq, - "2", - Bools([]bool{true, true, true, false, false}), - }, - { - Floats([]float64{0.1, 2, 1, 5, 9}), - LessEq, - []float64{0.1, 2, 0, 5, 10}, - Bools([]bool{true, true, false, true, true}), - }, - { - Bools([]bool{true, true, false}), - LessEq, - "true", - Bools([]bool{true, true, true}), - }, - { - Bools([]bool{true, true, false}), - LessEq, - []bool{true, false, false}, - Bools([]bool{true, false, true}), - }, - { - Strings([]string{"A", "B", "C", "B", "D", "BADA"}), - In, - "B", - Bools([]bool{false, true, false, true, false, false}), - }, - { - Strings([]string{"Hello", "world", "this", "is", "a", "test"}), - In, - []string{"cat", "world", "hello", "a"}, - Bools([]bool{false, true, false, false, true, false}), - }, - { - Ints([]int{0, 2, 1, 5, 9}), - In, - "2", - Bools([]bool{false, true, false, false, false}), - }, - { - Ints([]int{0, 2, 1, 5, 9}), - In, - []int{2, 99, 1234, 9}, - Bools([]bool{false, true, false, false, true}), - }, - { - Floats([]float64{0.1, 2, 1, 5, 9}), - In, - "2", - Bools([]bool{false, true, false, false, false}), - }, - { - Floats([]float64{0.1, 2, 1, 5, 9}), - In, - []float64{2, 99, 1234, 9}, - Bools([]bool{false, true, false, false, true}), - }, - { - Bools([]bool{true, true, false}), - In, - "true", - Bools([]bool{true, true, false}), - }, - { - Bools([]bool{true, true, false}), - In, - []bool{false, false, false}, - Bools([]bool{false, false, true}), - }, - } - ClearCache() - for testnum, test := range table { - test.series.SetName(fmt.Sprintf("Name-%d", testnum)) - a := test.series.CacheAble() - b := a.Compare(test.comparator, test.comparando) - b = a.Compare(test.comparator, test.comparando) - if err := b.Error(); err != nil { - t.Errorf("Test:%v\nError:%v", testnum, err) - } - expected := test.expected.Records() - received := b.Records() - if !reflect.DeepEqual(expected, received) { - t.Errorf( - "Test:%v\nExpected:\n%v\nReceived:\n%v", - testnum, expected, received, - ) - } - if err := checkTypes(b); err != nil { - t.Errorf( - "Test:%v\nError:%v", - testnum, err, - ) - } - } - - fmt.Printf("getCount:%d, setCount:%d, hitCount:%d \n", testCache.getCount, testCache.setCount, testCache.hitCount) -} - -func TestCacheSeries_Add(t *testing.T) { - tests := []struct { - series Series - addSeries Series - addConst float64 - expected Series }{ { Floats([]float64{1.5, -3.23, -0.33, -0.38, 1.6, 34.}), - Floats([]float64{3, -6.46, -0.67, -0.76, 3.2, 68.}), 1, - Floats([]float64{5.5, -8.69, 0, -0.14, 5.8, 103.}), + Floats([]float64{2.5, -2.23, 0.67, 0.62, 2.6, 35.}), }, { Ints([]int{23, 13, 101, -6, -3}), - Ints([]int{28, 18, 106, -5, 2}), 2, - Ints([]int{53, 33, 209, -9, 1}), + Ints([]int{25, 15, 103, -4, -1}), }, } - setCount := 0 - getCount := 0 - hitCount := 0 - ClearCache() for testnum, test := range tests { - test.series.SetName(fmt.Sprintf("Name-%d", testnum)) - test.addSeries.SetName(fmt.Sprintf("AddName-%d", testnum)) + tmpSeries := test.series.CacheAble() - var received Series expected := test.expected - _ = tmpSeries.Add(test.addSeries).AddConst(test.addConst) - setCount = setCount + 2 - getCount = getCount + 2 + _ = tmpSeries.AddConst(test.addConst) - received = tmpSeries.Add(test.addSeries).AddConst(test.addConst) - getCount = getCount + 2 - hitCount = hitCount + 2 + received := tmpSeries.AddConst(test.addConst) - for i := 0; i < expected.Len(); i++ { - if !compareFloats(expected.Elem(i).Float(), - received.Elem(i).Float(), 6) { - t.Errorf( - "Test:%v\nExpected:\n%v\nReceived:\n%v", - testnum, expected, received, - ) - } + exp := expected.Records() + rev := received.Records() + + if !reflect.DeepEqual(exp, rev) { + t.Errorf( + "Test:%v\nExpected:\n%v\nReceived:\n%v", + testnum, exp, rev, + ) } - } - if setCount != testCache.setCount { - t.Errorf("CacheInfo[setCount]:\nExpected:%v\nActual:%v", setCount, testCache.setCount) - } - if getCount != testCache.getCount { - t.Errorf("CacheInfo[getCount]:\nExpected:%v\nActual:%v", getCount, testCache.getCount) - } - if hitCount != testCache.hitCount { - t.Errorf("CacheInfo[hitCount]:\nExpected:%v\nActual:%v", hitCount, testCache.hitCount) + } } diff --git a/series/series.go b/series/series.go index 4881129..1decfe2 100644 --- a/series/series.go +++ b/series/series.go @@ -1227,7 +1227,7 @@ func (s series) Shift(periods int) Series { func (s series) CumProd() Series { dst := make([]float64, s.Len()) floats.CumProd(dst, s.Float()) - return New(dst, s.Type(), fmt.Sprintf("%s_CumProd", s.name)) + return New(dst, s.Type(), fmt.Sprintf("CumProd(%s)", s.name)) } // Prod returns the product of the elements of the Series. Returns 1 if len(s) = 0. diff --git a/series/series_logic.go b/series/series_logic.go index 7084f3c..2ee91b5 100644 --- a/series/series_logic.go +++ b/series/series_logic.go @@ -1,6 +1,7 @@ package series import ( + "fmt" "log" ) @@ -55,5 +56,6 @@ func (s series) Not() Series { } return ret }) + result.SetName(fmt.Sprintf("Not(%s)", s.Name())) return result } \ No newline at end of file From 4ad05e92572c801a71dbb3738c804bbd91c96b8e Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Tue, 24 May 2022 23:06:03 +0800 Subject: [PATCH 53/60] immutable series --- series/cache.go | 5 --- series/cacheseries.go | 56 ++++++++++++++++++++++-------- series/cacheseries_test.go | 59 -------------------------------- series/immutable.go | 70 ++++++++++++++++++++++++++++++++++++++ series/series.go | 6 +++- 5 files changed, 117 insertions(+), 79 deletions(-) create mode 100644 series/immutable.go diff --git a/series/cache.go b/series/cache.go index 569e96f..a1841b5 100644 --- a/series/cache.go +++ b/series/cache.go @@ -4,8 +4,6 @@ import ( "sync" ) -var CacheFactory func() Cache = nil - //Cache define series cache type Cache interface { Set(key string, value interface{}) @@ -22,9 +20,6 @@ type seriesCache struct { } func newSeriesCache() Cache { - if CacheFactory != nil { - return CacheFactory() - } ch := &seriesCache{ c: map[string]interface{}{}, mu: sync.RWMutex{}, diff --git a/series/cacheseries.go b/series/cacheseries.go index f48ed19..14427ce 100644 --- a/series/cacheseries.go +++ b/series/cacheseries.go @@ -13,7 +13,7 @@ type cacheAbleSeries struct { func newCacheAbleSeries(s Series) Series { ret := &cacheAbleSeries{ - Series: s, + Series: s.Immutable(), c: newSeriesCache(), } return ret @@ -53,7 +53,11 @@ func (cs cacheAbleSeries) IsNaN() []bool { ret := cs.Series.IsNaN() return ret, nil }) - return ret.([]bool) + + bs := ret.([]bool) + retCopy := make([]bool, len(bs)) + copy(retCopy, bs) + return retCopy } func (cs cacheAbleSeries) IsNotNaN() []bool { @@ -62,7 +66,10 @@ func (cs cacheAbleSeries) IsNotNaN() []bool { ret := cs.Series.IsNotNaN() return ret, nil }) - return ret.([]bool) + bs := ret.([]bool) + retCopy := make([]bool, len(bs)) + copy(retCopy, bs) + return retCopy } func (cs cacheAbleSeries) Float() []float64 { @@ -71,7 +78,10 @@ func (cs cacheAbleSeries) Float() []float64 { ret := cs.Series.Float() return ret, nil }) - return ret.([]float64) + fs := ret.([]float64) + retCopy := make([]float64, len(fs)) + copy(retCopy, fs) + return retCopy } func (cs cacheAbleSeries) Bool() ([]bool, error) { @@ -80,7 +90,13 @@ func (cs cacheAbleSeries) Bool() ([]bool, error) { ret, err := cs.Series.Bool() return ret, err }) - return ret.([]bool), err + if err != nil { + return nil, err + } + bs := ret.([]bool) + retCopy := make([]bool, len(bs)) + copy(retCopy, bs) + return retCopy, nil } func (cs cacheAbleSeries) Int() ([]int, error) { @@ -89,7 +105,13 @@ func (cs cacheAbleSeries) Int() ([]int, error) { ret, err := cs.Series.Int() return ret, err }) - return ret.([]int), err + if err != nil { + return nil, err + } + ints := ret.([]int) + retCopy := make([]int, len(ints)) + copy(retCopy, ints) + return retCopy, nil } func (cs cacheAbleSeries) Records() []string { @@ -98,7 +120,10 @@ func (cs cacheAbleSeries) Records() []string { ret := cs.Series.Records() return ret, nil }) - return ret.([]string) + rs := ret.([]string) + retCopy := make([]string, len(rs)) + copy(retCopy, rs) + return retCopy } func (cs cacheAbleSeries) Order(reverse bool) []int { @@ -107,7 +132,10 @@ func (cs cacheAbleSeries) Order(reverse bool) []int { ret := cs.Series.Order(reverse) return ret, nil }) - return ret.([]int) + ints := ret.([]int) + retCopy := make([]int, len(ints)) + copy(retCopy, ints) + return retCopy } func (cs cacheAbleSeries) StdDev() float64 { @@ -186,7 +214,7 @@ func (cs cacheAbleSeries) CumProd() Series { cacheKey := "CumProd" ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { res := cs.Series.CumProd() - ret := res.CacheAble() + ret := res.Immutable() return ret, nil }) return ret.(Series) @@ -205,7 +233,7 @@ func (cs cacheAbleSeries) AddConst(c float64) Series { cacheKey := fmt.Sprintf("AddConst(%f)", c) ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { res := cs.Series.AddConst(c) - ret := res.CacheAble() + ret := res.Immutable() return ret, nil }) return ret.(Series) @@ -215,7 +243,7 @@ func (cs cacheAbleSeries) MulConst(c float64) Series { cacheKey := fmt.Sprintf("MulConst(%f)", c) ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { res := cs.Series.MulConst(c) - ret := res.CacheAble() + ret := res.Immutable() return ret, nil }) return ret.(Series) @@ -225,7 +253,7 @@ func (cs cacheAbleSeries) DivConst(c float64) Series { cacheKey := fmt.Sprintf("DivConst(%f)", c) ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { res := cs.Series.DivConst(c) - ret := res.CacheAble() + ret := res.Immutable() return ret, nil }) return ret.(Series) @@ -235,7 +263,7 @@ func (cs cacheAbleSeries) Abs() Series { cacheKey := "Abs" ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { res := cs.Series.Abs() - ret := res.CacheAble() + ret := res.Immutable() return ret, nil }) return ret.(Series) @@ -245,7 +273,7 @@ func (cs cacheAbleSeries) Not() Series { cacheKey := "Not" ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { res := cs.Series.Not() - ret := res.CacheAble() + ret := res.Immutable() return ret, nil }) return ret.(Series) diff --git a/series/cacheseries_test.go b/series/cacheseries_test.go index 1663522..96e4500 100644 --- a/series/cacheseries_test.go +++ b/series/cacheseries_test.go @@ -2,68 +2,9 @@ package series import ( "reflect" - "sync" "testing" ) -type mockCache struct { - setCount int - getCount int - hitCount int - innerCache Cache -} - -func (mc *mockCache) Set(k string, v interface{}) { - mc.innerCache.Set(k, v) - mc.setCount++ -} - -func (mc *mockCache) Get(k string) (interface{}, bool) { - mc.getCount++ - v, ok := mc.innerCache.Get(k) - if ok { - mc.hitCount++ - } - return v, ok -} - -func (mc *mockCache) Clear() { - mc.innerCache.Clear() - mc.setCount = 0 - mc.getCount = 0 - mc.hitCount = 0 -} - -func (mc *mockCache) Delete(keyPrefix string) { - mc.innerCache.Delete(keyPrefix) -} - -func (dc *mockCache) Size() int { - return dc.innerCache.Size() -} - -func (dc *mockCache) Copy() Cache { - nc := &mockCache{ - setCount: dc.setCount, - getCount: dc.getCount, - hitCount: dc.hitCount, - innerCache: dc.innerCache.Copy(), - } - return nc -} - -func TestMain(m *testing.M) { - CacheFactory = func() Cache { - testCache := &mockCache{ - innerCache: &seriesCache{ - c: map[string]interface{}{}, - mu: sync.RWMutex{}, - }, - } - return testCache - } - m.Run() -} func TestCacheSeries_Add(t *testing.T) { tests := []struct { diff --git a/series/immutable.go b/series/immutable.go new file mode 100644 index 0000000..58d8fe1 --- /dev/null +++ b/series/immutable.go @@ -0,0 +1,70 @@ +package series + +var _ Series = (*immutableSeries)(nil) + +type immutableSeries struct { + Series +} + +func newImmutableSeries(s Series) Series { + ret := &immutableSeries{ + Series: s, + } + return ret +} + +func (s immutableSeries) Elem(i int) Element { + ele := &immutableElement{ + Element: s.Series.Elem(i), + } + return ele +} + +func (s *immutableSeries) Immutable() Series { + return s +} + +func (s immutableSeries) Slice(start, end int) Series { + ret := &immutableSeries{ + Series: s.Series.Slice(start, end), + } + return ret +} +func (s immutableSeries) FillNaN(value ElementValue) { + panic("The method[FillNaN] is not supported by immutableElement") +} +func (s immutableSeries) FillNaNForward() { + panic("The method[FillNaNForward] is not supported by immutableElement") +} +func (s immutableSeries) FillNaNBackward() { + panic("The method[FillNaNBackward] is not supported by immutableElement") +} +func (s immutableSeries) Set(indexes Indexes, newvalues Series) Series { + panic("The method[Set] is not supported by immutableElement") +} +func (s immutableSeries) Append(values interface{}) { + panic("The method[Append] is not supported by immutableElement") +} + +type immutableElement struct { + Element +} + +func (e *immutableElement) Set(interface{}) { + panic("The method[Set] is not supported by immutableElement") +} +func (e *immutableElement) SetElement(val Element) { + panic("The method[SetElement] is not supported by immutableElement") +} +func (e *immutableElement) SetBool(val bool) { + panic("The method[SetBool] is not supported by immutableElement") +} +func (e *immutableElement) SetFloat(val float64) { + panic("The method[SetFloat] is not supported by immutableElement") +} +func (e *immutableElement) SetInt(val int) { + panic("The method[SetInt] is not supported by immutableElement") +} +func (e *immutableElement) SetString(val string) { + panic("The method[SetString] is not supported by immutableElement") +} diff --git a/series/series.go b/series/series.go index 1decfe2..823d20d 100644 --- a/series/series.go +++ b/series/series.go @@ -184,8 +184,9 @@ type Series interface { // FillNaNBackward fill NaN values using the next non-NaN value FillNaNBackward() // CacheAble returns a cacheable series and the returned series's calculation will be cached in case of repeate calcution. - // You should make sure that the series will not be modified and has a unique name. CacheAble() Series + // Immutable returns an immutable series and the series can not be modified. + Immutable() Series // Set sets the values on the indexes of a Series and returns the reference // for itself. The original Series is modified. Set(indexes Indexes, newvalues Series) Series @@ -1358,6 +1359,9 @@ func (s series) Rolling(window int, minPeriods int) RollingSeries { func (s series) CacheAble() Series { return newCacheAbleSeries(&s) } +func (s series) Immutable() Series { + return newImmutableSeries(&s) +} //Operation for multiple series calculation func Operation(operate func(index int, eles ...Element) interface{}, seriess ...Series) (Series, error) { From 836a329504250b0e83d1581244c54775ac9856ad Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Wed, 25 May 2022 18:04:38 +0800 Subject: [PATCH 54/60] optimize cacheable --- dataframe/self.go | 16 +++++++- go.mod | 2 - go.sum | 4 -- series/benchmarks_test.go | 55 +++++++++++++++++++++++++ series/cache.go | 79 ----------------------------------- series/cacherolling.go | 2 +- series/cacherolling_test.go | 71 -------------------------------- series/cacheseries.go | 82 +++++++++++++++++++++++++++++++++++-- series/cacheseries_test.go | 3 +- series/immutable.go | 22 +++++----- series/immutable_test.go | 65 +++++++++++++++++++++++++++++ series/rolling.go | 9 ++-- series/series.go | 4 +- 13 files changed, 236 insertions(+), 178 deletions(-) create mode 100644 series/immutable_test.go diff --git a/dataframe/self.go b/dataframe/self.go index daa103f..7a959ba 100644 --- a/dataframe/self.go +++ b/dataframe/self.go @@ -19,7 +19,7 @@ func (df *DataFrame) Self() Self { return self } -// AppendColumns Append columns on the DataFrame. +// AppendColumns Append columns on the DataFrame. The param's modification will influence the DataFrame's content after AppendColumns. func (s Self) AppendColumns(cols ...series.Series) error { if s.this.Err != nil || len(cols) == 0 { return nil @@ -46,6 +46,7 @@ func (s Self) AppendColumns(cols ...series.Series) error { } return nil } + // Capply applies the given function to the columns of a DataFrame, will influence the DataFrame's content. func (s Self) Capply(f func(series.Series)) { if s.this.Err != nil { @@ -55,3 +56,16 @@ func (s Self) Capply(f func(series.Series)) { f(s) } } + +// ImmutableCol returns an immutable Series of the DataFrame with the given column name contained in the DataFrame. +func (s Self) ImmutableCol(colname string) series.Series { + if s.this.Err != nil { + return series.Err(s.this.Err) + } + // Check that colname exist on dataframe + idx := findInStringSlice(colname, s.this.Names()) + if idx < 0 { + return series.Err(fmt.Errorf("unknown column name")) + } + return s.this.columns[idx].Immutable() +} \ No newline at end of file diff --git a/go.mod b/go.mod index 3b84a90..13022cf 100644 --- a/go.mod +++ b/go.mod @@ -3,8 +3,6 @@ module github.com/mqy527/gota go 1.16 require ( - github.com/patrickmn/go-cache v2.1.0+incompatible - github.com/satori/go.uuid v1.2.0 golang.org/x/net v0.0.0-20210423184538-5f58ad60dda6 gonum.org/v1/gonum v0.9.1 ) diff --git a/go.sum b/go.sum index 532c9ae..20a4dc3 100644 --- a/go.sum +++ b/go.sum @@ -15,16 +15,12 @@ github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod h1:CO1AlKB2C github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= -github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc= -github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ= github.com/phpdave11/gofpdf v1.4.2/go.mod h1:zpO6xFn9yxo3YLyMvW8HcKWVdbNqgIfOOp2dXMnm1mY= github.com/phpdave11/gofpdi v1.0.12/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod h1:6lfFZQK844Gfx8o5WFuvpxWRwnSoipWe/p622j1v06w= -github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww= -github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= diff --git a/series/benchmarks_test.go b/series/benchmarks_test.go index da2fa1f..ee7db5f 100644 --- a/series/benchmarks_test.go +++ b/series/benchmarks_test.go @@ -2,6 +2,7 @@ package series_test import ( "fmt" + "math" "math/rand" "strconv" "testing" @@ -378,3 +379,57 @@ func BenchmarkSeries_RollingCacheQuantile(b *testing.B) { }) } } + + + +func BenchmarkSeries_Quantile(b *testing.B) { + rand.Seed(100) + table := []struct { + name string + series series.Series + quantile float64 + }{ + { + "[]int(100000)_Int", + series.Ints(generateInts(100000)), + 0.75, + }, + { + "[]int(100000)_Float", + series.Floats(generateInts(100000)), + 0.45, + }, + } + for testnum, test := range table { + s := test.series + var result1, result2 float64 + b.Run(test.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + result1 = s.Quantile(test.quantile) + } + }) + s = s.CacheAble() + b.Run(test.name + "-cacheAbleSeries", func(b *testing.B) { + for i := 0; i < b.N; i++ { + result2 = s.Quantile(test.quantile) + } + }) + + if !compareFloats(result1, result2, 6) { + b.Errorf( + "Test:%v\nresult1:\n%v\nresult2:\n%v", + testnum, result1, result2, + ) + } + } +} + +func compareFloats(lvalue, rvalue float64, digits int) bool { + if math.IsNaN(lvalue) || math.IsNaN(rvalue) { + return math.IsNaN(lvalue) && math.IsNaN(rvalue) + } + d := math.Pow(10.0, float64(digits)) + lv := int(lvalue * d) + rv := int(rvalue * d) + return lv == rv +} \ No newline at end of file diff --git a/series/cache.go b/series/cache.go index a1841b5..05ef78c 100644 --- a/series/cache.go +++ b/series/cache.go @@ -1,81 +1,2 @@ package series -import ( - "sync" -) - -//Cache define series cache -type Cache interface { - Set(key string, value interface{}) - Get(key string) (interface{}, bool) - Clear() - Size() int - Delete(key string) - Copy() Cache -} - -type seriesCache struct { - c map[string]interface{} - mu sync.RWMutex -} - -func newSeriesCache() Cache { - ch := &seriesCache{ - c: map[string]interface{}{}, - mu: sync.RWMutex{}, - } - return ch -} - -func (dc *seriesCache) Set(key string, value interface{}) { - dc.mu.Lock() - dc.c[key] = value - dc.mu.Unlock() -} - -func (dc *seriesCache) Size() int { - dc.mu.RLock() - defer dc.mu.RUnlock() - return len(dc.c) -} - -func (dc *seriesCache) Get(key string) (interface{}, bool) { - dc.mu.RLock() - v, ok := dc.c[key] - dc.mu.RUnlock() - return v, ok -} - -func (dc *seriesCache) Clear() { - dc.mu.Lock() - dc.c = make(map[string]interface{}) - dc.mu.Unlock() -} - -func (dc *seriesCache) Delete(key string) { - dc.mu.Lock() - delete(dc.c, key) - dc.mu.Unlock() -} - -func (dc *seriesCache) Copy() Cache { - nc := &seriesCache{ - c: map[string]interface{}{}, - mu: sync.RWMutex{}, - } - dc.mu.RLock() - defer dc.mu.RUnlock() - for k, v := range dc.c { - switch vt := v.(type) { - case Series: - nc.c[k] = vt.Copy() - case Element: - nc.c[k] = vt.Copy() - case string, float64, int, bool: - nc.c[k] = vt - default: - nc.c[k] = vt - } - } - return nc -} diff --git a/series/cacherolling.go b/series/cacherolling.go index 64cf6c2..c346edf 100644 --- a/series/cacherolling.go +++ b/series/cacherolling.go @@ -18,7 +18,7 @@ func (rc *cacheAbleRollingSeries) cacheOrExecuteRolling(cacheKey string, f func( return nil } res.SetName(cacheKey) - ret := res.CacheAble() + ret := res.Immutable() rc.c.Set(cacheKey, ret) return ret } diff --git a/series/cacherolling_test.go b/series/cacherolling_test.go index 7601c04..7e211aa 100644 --- a/series/cacherolling_test.go +++ b/series/cacherolling_test.go @@ -4,7 +4,6 @@ import ( "fmt" "reflect" "testing" - "unsafe" ) func TestSeries_RollingCache(t *testing.T) { @@ -184,76 +183,6 @@ func TestSeries_RollingCacheMeanByWeights(t *testing.T) { } } -func TestSeries_F(t *testing.T) { - f := func() {} - fmt.Println("f: ", &f) - vf := f - fmt.Println("vfd: ", &vf) - - fmt.Println(*(*int64)(unsafe.Pointer(&f))) - fmt.Println(*(*int64)(unsafe.Pointer(&vf))) - -} - -func TestSeries_RollingCacheApply(t *testing.T) { - tests := []struct { - series Series - window int - minPeriod int - applyExpected Series - applyFunc func(window Series, windowIndex int) interface{} - t Type - }{ - { - Floats([]string{"1.5", "-3.23", "-0.337397", "-0.380079", "1.60979", "34."}), - 3, - 2, - Floats([]string{NaN, "2.5", "2.5", "-2.23", "0.662603", "0.619921"}), - func(window Series, windowIndex int) interface{} { - return window.Float()[0] + 1 - }, - "", - }, - { - Strings([]string{"20210618", "20200909", "20200910", "20200912", "20200911"}), - 3, - 2, - Strings([]string{NaN, "20210618-", "20210618-", "20200909-", "20200910-"}), - func(window Series, windowIndex int) interface{} { - return window.Elem(0).String() + "-" - }, - String, - }, - { - Ints([]string{"23", "13", "101", "-64", "-3"}), - 3, - 1, - Ints([]string{"24", "14", "102", "-63", "-2"}), - func(window Series, windowIndex int) interface{} { - i, _ := window.Elem(-1).Int() - return i + 1 - }, - Int, - }, - } - - for testnum, test := range tests { - var b Series - test.series.SetName(fmt.Sprintf("Name-%d", testnum)) - expected := test.applyExpected.Records() - rs := test.series.CacheAble().Rolling(test.window, test.minPeriod) - b = rs.Apply(test.applyFunc, test.t) - b = rs.Apply(test.applyFunc, test.t) - received := b.Records() - if !reflect.DeepEqual(expected, received) { - t.Errorf( - "Test-Apply:%v\nExpected:\n%v\nReceived:\n%v", - testnum, expected, received, - ) - } - } -} - func TestSeries_RollingRollingCache(t *testing.T) { tests := []struct { diff --git a/series/cacheseries.go b/series/cacheseries.go index 14427ce..bdeec6d 100644 --- a/series/cacheseries.go +++ b/series/cacheseries.go @@ -13,7 +13,7 @@ type cacheAbleSeries struct { func newCacheAbleSeries(s Series) Series { ret := &cacheAbleSeries{ - Series: s.Immutable(), + Series: s.Copy().Immutable(), c: newSeriesCache(), } return ret @@ -21,7 +21,7 @@ func newCacheAbleSeries(s Series) Series { func (cs cacheAbleSeries) Rolling(window int, minPeriods int) RollingSeries { cr := cacheAbleRollingSeries{ - RollingSeries: NewRollingSeries(window, minPeriods, cs.Series), + RollingSeries: newRollingSeries(window, minPeriods, cs.Series), c: newSeriesCache(), } return cr @@ -53,7 +53,7 @@ func (cs cacheAbleSeries) IsNaN() []bool { ret := cs.Series.IsNaN() return ret, nil }) - + bs := ret.([]bool) retCopy := make([]bool, len(bs)) copy(retCopy, bs) @@ -300,3 +300,79 @@ func (cs cacheAbleSeries) Copy() Series { func (cs *cacheAbleSeries) CacheAble() Series { return cs } + +func (cs *cacheAbleSeries) Str() string { + return cs.Series.Str() + "\n" + cs.c.State() +} + + +//Cache define series cache +type Cache interface { + Set(key string, value interface{}) + Get(key string) (interface{}, bool) + Clear() + Size() int + Delete(key string) + Copy() Cache + State() string +} + +type seriesCache struct { + c map[string]interface{} + setCount int + getCount int + hitCount int +} + +func newSeriesCache() Cache { + ch := &seriesCache{ + c: map[string]interface{}{}, + } + return ch +} + +func (dc *seriesCache) Set(key string, value interface{}) { + dc.setCount++ + dc.c[key] = value +} + +func (dc *seriesCache) Size() int { + return len(dc.c) +} + +func (dc *seriesCache) Get(key string) (interface{}, bool) { + dc.getCount++ + v, ok := dc.c[key] + if ok { + dc.hitCount++ + } + return v, ok +} + +func (dc *seriesCache) Clear() { + dc.c = make(map[string]interface{}) + dc.setCount = 0 + dc.getCount = 0 + dc.hitCount = 0 +} + +func (dc *seriesCache) Delete(key string) { + delete(dc.c, key) +} + +func (dc *seriesCache) Copy() Cache { + nc := &seriesCache{ + c: map[string]interface{}{}, + setCount: dc.setCount, + getCount: dc.getCount, + hitCount: dc.hitCount, + } + for k, v := range dc.c { + nc.c[k] = v + } + return nc +} + +func (dc *seriesCache) State() string { + return fmt.Sprintf("Cache info: size: %d, setCount: %d, getCount: %d, hitCount: %d\n", dc.Size(), dc.setCount, dc.getCount, dc.hitCount) +} diff --git a/series/cacheseries_test.go b/series/cacheseries_test.go index 96e4500..299ddb3 100644 --- a/series/cacheseries_test.go +++ b/series/cacheseries_test.go @@ -1,6 +1,7 @@ package series import ( + "fmt" "reflect" "testing" ) @@ -42,7 +43,7 @@ func TestCacheSeries_Add(t *testing.T) { testnum, exp, rev, ) } - + fmt.Printf("testnum[%d] series state info:\n %s", testnum, tmpSeries.Str()) } } diff --git a/series/immutable.go b/series/immutable.go index 58d8fe1..16cb853 100644 --- a/series/immutable.go +++ b/series/immutable.go @@ -2,6 +2,7 @@ package series var _ Series = (*immutableSeries)(nil) +//immutableSeries is an immutable series and the series can not be modified. type immutableSeries struct { Series } @@ -30,22 +31,23 @@ func (s immutableSeries) Slice(start, end int) Series { } return ret } -func (s immutableSeries) FillNaN(value ElementValue) { - panic("The method[FillNaN] is not supported by immutableElement") +func (s *immutableSeries) FillNaN(value ElementValue) { + panic("The method[FillNaN] is not supported by immutableSeries") } -func (s immutableSeries) FillNaNForward() { - panic("The method[FillNaNForward] is not supported by immutableElement") +func (s *immutableSeries) FillNaNForward() { + panic("The method[FillNaNForward] is not supported by immutableSeries") } -func (s immutableSeries) FillNaNBackward() { - panic("The method[FillNaNBackward] is not supported by immutableElement") +func (s *immutableSeries) FillNaNBackward() { + panic("The method[FillNaNBackward] is not supported by immutableSeries") } -func (s immutableSeries) Set(indexes Indexes, newvalues Series) Series { - panic("The method[Set] is not supported by immutableElement") +func (s *immutableSeries) Set(indexes Indexes, newvalues Series) Series { + panic("The method[Set] is not supported by immutableSeries") } -func (s immutableSeries) Append(values interface{}) { - panic("The method[Append] is not supported by immutableElement") +func (s *immutableSeries) Append(values interface{}) { + panic("The method[Append] is not supported by immutableSeries") } +//immutableElement is an immutable element and the element can not be modified. type immutableElement struct { Element } diff --git a/series/immutable_test.go b/series/immutable_test.go new file mode 100644 index 0000000..ea92324 --- /dev/null +++ b/series/immutable_test.go @@ -0,0 +1,65 @@ +package series + +import ( + "fmt" + "strings" + "testing" +) + +func TestImmutableSeries_ModifyPanic(t *testing.T) { + tests := []struct { + series Series + modifySeries func(Series) + }{ + { + Ints([]string{"2", "1", "3", "NaN", "4", "NaN"}), + func(s Series) { + s.Elem(0).SetString(NaN) + }, + }, + { + Floats([]string{"2", "1", "3", "NaN", "4", "NaN"}), + func(s Series) { + s.FillNaN("1") + }, + }, + { + Strings([]string{"c", "b", "a"}), + func(s Series) { + s.FillNaNForward() + }, + }, + { + Bools([]bool{true, false, false, false, true}), + func(s Series) { + s.FillNaNBackward() + }, + }, + { + Strings([]string{"c", "b", "a"}), + func(s Series) { + s.Set(0, NewDefault("a", String, "", 1)) + }, + }, + { + Bools([]bool{true, false, false, false, true}), + func(s Series) { + s.Append([]bool{true, false}) + }, + }, + } + for testnum, test := range tests { + received := test.series.Immutable() + modifySeries := test.modifySeries + name := fmt.Sprintf("Test-%d", testnum) + t.Run(name, func(t *testing.T) { + defer func() { + err := recover() + if err == nil || !strings.Contains(err.(string), "is not supported by") { + t.Errorf("Test:%v\nError, must panic: %v", testnum, err) + } + }() + modifySeries(received) + }) + } +} diff --git a/series/rolling.go b/series/rolling.go index 4670083..b697072 100644 --- a/series/rolling.go +++ b/series/rolling.go @@ -2,6 +2,7 @@ package series import ( "fmt" + "gonum.org/v1/gonum/floats" ) @@ -59,7 +60,7 @@ func NewRollingWindow(s Series, windowSize int) RollingWindow { startIndex: 0, endIndexExclude: 1, windowSize: windowSize, - s: s.Copy(), + s: s, } } @@ -77,8 +78,8 @@ func (rw *rollingWindow) NextWindow() Series { return window } -//NewRollingSeries establish a rolling Series -func NewRollingSeries(window int, minPeriods int, s Series) RollingSeries { +//newRollingSeries establish a rolling Series +func newRollingSeries(window int, minPeriods int, s Series) RollingSeries { if window < 1 { panic("window must >= 1") } @@ -86,7 +87,7 @@ func NewRollingSeries(window int, minPeriods int, s Series) RollingSeries { panic("minPeriods must >= 1 && minPeriods must <= window") } return rollingSeries{ - Series: s, + Series: s.Copy().Immutable(), window: window, minPeriods: minPeriods, } diff --git a/series/series.go b/series/series.go index 823d20d..4c22454 100644 --- a/series/series.go +++ b/series/series.go @@ -183,7 +183,7 @@ type Series interface { FillNaNForward() // FillNaNBackward fill NaN values using the next non-NaN value FillNaNBackward() - // CacheAble returns a cacheable series and the returned series's calculation will be cached in case of repeate calcution. + // CacheAble returns a cacheable series and the returned series's calculation will be cached in case of repeate calculation. CacheAble() Series // Immutable returns an immutable series and the series can not be modified. Immutable() Series @@ -1351,7 +1351,7 @@ func (s series) FillNaNBackward() { } func (s series) Rolling(window int, minPeriods int) RollingSeries { - return NewRollingSeries(window, minPeriods, &s) + return newRollingSeries(window, minPeriods, &s) } // CacheAble returns a cacheable series and the returned series's calculation will be cached in case of repeate calcution. From 5fc68cfed82cac135e5aaa1f78c92be1b26f34e2 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Wed, 25 May 2022 21:10:10 +0800 Subject: [PATCH 55/60] delete cache --- series/cache.go | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 series/cache.go diff --git a/series/cache.go b/series/cache.go deleted file mode 100644 index 05ef78c..0000000 --- a/series/cache.go +++ /dev/null @@ -1,2 +0,0 @@ -package series - From b54ea74b13cd00303b21b1bef45f691b9480874c Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Mon, 13 Jun 2022 16:03:26 +0800 Subject: [PATCH 56/60] =?UTF-8?q?delete=20some=20AddConst=E3=80=81MulConst?= =?UTF-8?q?=E3=80=81DivConst=20from=20cacheAbleSeries?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- series/cacheseries.go | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/series/cacheseries.go b/series/cacheseries.go index bdeec6d..beb3158 100644 --- a/series/cacheseries.go +++ b/series/cacheseries.go @@ -229,36 +229,6 @@ func (cs cacheAbleSeries) Prod() float64 { return ret.(float64) } -func (cs cacheAbleSeries) AddConst(c float64) Series { - cacheKey := fmt.Sprintf("AddConst(%f)", c) - ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { - res := cs.Series.AddConst(c) - ret := res.Immutable() - return ret, nil - }) - return ret.(Series) -} - -func (cs cacheAbleSeries) MulConst(c float64) Series { - cacheKey := fmt.Sprintf("MulConst(%f)", c) - ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { - res := cs.Series.MulConst(c) - ret := res.Immutable() - return ret, nil - }) - return ret.(Series) -} - -func (cs cacheAbleSeries) DivConst(c float64) Series { - cacheKey := fmt.Sprintf("DivConst(%f)", c) - ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { - res := cs.Series.DivConst(c) - ret := res.Immutable() - return ret, nil - }) - return ret.(Series) -} - func (cs cacheAbleSeries) Abs() Series { cacheKey := "Abs" ret, _ := cs.cacheOrExecute(cacheKey, func() (interface{}, error) { From 97b5a4d5cbd8f9e5f416c2bcb10462a6aaae4449 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Thu, 7 Jul 2022 18:13:58 +0800 Subject: [PATCH 57/60] Sum support Bool --- series/series.go | 2 +- series/series_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/series/series.go b/series/series.go index 4c22454..56c95fe 100644 --- a/series/series.go +++ b/series/series.go @@ -1405,7 +1405,7 @@ func Operation(operate func(index int, eles ...Element) interface{}, seriess ... // Sum calculates the sum value of a series func (s series) Sum() float64 { - if s.elements.Len() == 0 || s.Type() == String || s.Type() == Bool { + if s.elements.Len() == 0 || s.Type() == String { return math.NaN() } sFloat := s.Float() diff --git a/series/series_test.go b/series/series_test.go index 0523da3..acc5c09 100644 --- a/series/series_test.go +++ b/series/series_test.go @@ -2244,7 +2244,7 @@ func TestSeries_Sum(t *testing.T) { }, { Bools([]bool{true, true, false, true}), - math.NaN(), + 3.0, }, { Floats([]float64{}), From 3962767ec15ebd0bf079ba0562f730f0ecbdcaa8 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Sat, 24 Dec 2022 14:50:14 +0800 Subject: [PATCH 58/60] =?UTF-8?q?add=20method:CapplyWithName=E3=80=81Renam?= =?UTF-8?q?e=E3=80=81RemoveCols?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dataframe/self.go | 45 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/dataframe/self.go b/dataframe/self.go index 7a959ba..c8985b2 100644 --- a/dataframe/self.go +++ b/dataframe/self.go @@ -57,6 +57,18 @@ func (s Self) Capply(f func(series.Series)) { } } +// CapplyWithName applies the given function to the column, will influence the DataFrame's content. +func (s Self) CapplyWithName(colname string, f func(series.Series)) { + if s.this.Err != nil { + return + } + idx := findInStringSlice(colname, s.this.Names()) + if idx < 0 { + return + } + f(s.this.columns[idx]) +} + // ImmutableCol returns an immutable Series of the DataFrame with the given column name contained in the DataFrame. func (s Self) ImmutableCol(colname string) series.Series { if s.this.Err != nil { @@ -68,4 +80,35 @@ func (s Self) ImmutableCol(colname string) series.Series { return series.Err(fmt.Errorf("unknown column name")) } return s.this.columns[idx].Immutable() -} \ No newline at end of file +} + +// Rename changes the name of one of the columns of a DataFrame +func (s Self) Rename(newname, oldname string) { + if s.this.Err != nil { + return + } + // Check that colname exist on dataframe + colnames := s.this.Names() + idx := findInStringSlice(oldname, colnames) + if idx == -1 { + return + } + s.this.columns[idx].SetName(newname) +} + +func (s Self) RemoveCols(removedColnames ...string) { + if s.this.Err != nil || len(removedColnames) == 0 { + return + } + var cols []series.Series + // Check that colname exist on dataframe + colnames := s.this.Names() + for i := 0; i < len(colnames); i++ { + idx := findInStringSlice(colnames[i], removedColnames) + if idx == -1 { + cols = append(cols, s.this.columns[i]) + } + } + s.this.columns = cols + s.this.ncols = len(cols) +} From 7a1c43b20b24796b7e80e0c3d46885b040c1c968 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Sun, 25 Dec 2022 11:31:13 +0800 Subject: [PATCH 59/60] =?UTF-8?q?change=20method=20name=EF=BC=9ACapplyWith?= =?UTF-8?q?Name-->CapplyByName?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dataframe/self.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dataframe/self.go b/dataframe/self.go index c8985b2..a4e1874 100644 --- a/dataframe/self.go +++ b/dataframe/self.go @@ -57,8 +57,8 @@ func (s Self) Capply(f func(series.Series)) { } } -// CapplyWithName applies the given function to the column, will influence the DataFrame's content. -func (s Self) CapplyWithName(colname string, f func(series.Series)) { +// CapplyByName applies the given function to the column, will influence the DataFrame's content. +func (s Self) CapplyByName(colname string, f func(series.Series)) { if s.this.Err != nil { return } From a3a8aac6719b55fc1e76b987c085828fd4b0d5a2 Mon Sep 17 00:00:00 2001 From: mqyqingkong Date: Wed, 19 Jul 2023 21:29:24 +0800 Subject: [PATCH 60/60] add FloatValuer --- series/type-float.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/series/type-float.go b/series/type-float.go index 8b070f6..2c1616c 100644 --- a/series/type-float.go +++ b/series/type-float.go @@ -26,6 +26,9 @@ func (e *floatElement) Set(value interface{}) { e.SetBool(val) case Element: e.SetElement(val) + case FloatValuer: + e.e = val.Float() + e.nan = math.IsNaN(e.e) default: e.nan = true } @@ -182,3 +185,12 @@ func (e floatElement) GreaterEq(elem Element) bool { } return e.e >= f } + +// FloatValuer is the interface providing the Float method. +// +// Types implementing FloatValuer interface are able to convert +// themselves to a float Value. +type FloatValuer interface { + // Float returns a float64 value. + Float() float64 +}