diff --git a/config/config.go b/config/config.go index 0accebd..4ef6e2b 100644 --- a/config/config.go +++ b/config/config.go @@ -37,6 +37,7 @@ func defaultConfig() *Config { "fc2", "18av", "freejavbt", + "tktube", "avsox", }, Handlers: []string{ diff --git a/searcher/plugin/field_parser.go b/searcher/parser/date_parser.go similarity index 58% rename from searcher/plugin/field_parser.go rename to searcher/parser/date_parser.go index 23ab862..b5e26fe 100644 --- a/searcher/plugin/field_parser.go +++ b/searcher/parser/date_parser.go @@ -1,4 +1,4 @@ -package plugin +package parser import ( "context" @@ -9,17 +9,6 @@ import ( "go.uber.org/zap" ) -func DefaultDurationParser(ctx context.Context) decoder.NumberParseFunc { - return func(v string) int64 { - val, err := utils.ToDuration(v) - if err != nil { - logutil.GetLogger(ctx).Error("decode duration failed", zap.Error(err), zap.String("data", v)) - return 0 - } - return val - } -} - func DefaultReleaseDateParser(ctx context.Context) decoder.NumberParseFunc { return func(v string) int64 { val, err := utils.ToTimestamp(v) diff --git a/searcher/parser/duration_parser.go b/searcher/parser/duration_parser.go new file mode 100644 index 0000000..c4b1a76 --- /dev/null +++ b/searcher/parser/duration_parser.go @@ -0,0 +1,45 @@ +package parser + +import ( + "context" + "math" + "strconv" + "strings" + "yamdc/searcher/decoder" + "yamdc/searcher/utils" + + "github.com/xxxsen/common/logutil" + "go.uber.org/zap" +) + +func DefaultHHMMSSDurationParser(ctx context.Context) decoder.NumberParseFunc { + return func(v string) int64 { + res := strings.Split(v, ":") + if len(res) > 3 { + logutil.GetLogger(ctx).Error("invalid time format", zap.String("data", v)) + return 0 + } + var sec int64 + for i := 0; i < len(res); i++ { + item := strings.TrimSpace(res[len(res)-i-1]) + val, err := strconv.ParseInt(item, 10, 60) + if err != nil { + logutil.GetLogger(ctx).Error("invalid time format", zap.String("data", v)) + return 0 + } + sec += val * int64(math.Pow(60, float64(i))) + } + return sec + } +} + +func DefaultDurationParser(ctx context.Context) decoder.NumberParseFunc { + return func(v string) int64 { + val, err := utils.ToDuration(v) + if err != nil { + logutil.GetLogger(ctx).Error("decode duration failed", zap.Error(err), zap.String("data", v)) + return 0 + } + return val + } +} diff --git a/searcher/parser/duration_parser_test.go b/searcher/parser/duration_parser_test.go new file mode 100644 index 0000000..679812b --- /dev/null +++ b/searcher/parser/duration_parser_test.go @@ -0,0 +1,24 @@ +package parser + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" +) + +type testPair struct { + in string + sec int64 +} + +func TestHHMMSS(t *testing.T) { + tests := []testPair{ + {in: "01 :01: 01", sec: 1*3600 + 60 + 1}, + {in: "02: 05", sec: 2*60 + 5}, + } + for _, tst := range tests { + out := DefaultHHMMSSDurationParser(context.Background())(tst.in) + assert.Equal(t, tst.sec, out) + } +} diff --git a/searcher/plugin/18av.go b/searcher/plugin/18av.go index a5eb548..ebcf72c 100644 --- a/searcher/plugin/18av.go +++ b/searcher/plugin/18av.go @@ -7,6 +7,7 @@ import ( "yamdc/model" "yamdc/number" "yamdc/searcher/decoder" + "yamdc/searcher/parser" ) type av18 struct { @@ -82,8 +83,8 @@ func (p *av18) OnDecodeHTTPData(ctx *PluginContext, data []byte) (*model.AvMeta, meta, err := dec.DecodeHTML(data, decoder.WithCoverParser(p.coverParser), decoder.WithPlotParser(p.plotParser), - decoder.WithDurationParser(DefaultDurationParser(ctx.GetContext())), - decoder.WithReleaseDateParser(DefaultReleaseDateParser(ctx.GetContext())), + decoder.WithDurationParser(parser.DefaultDurationParser(ctx.GetContext())), + decoder.WithReleaseDateParser(parser.DefaultReleaseDateParser(ctx.GetContext())), ) if err != nil { return nil, false, err diff --git a/searcher/plugin/avsox.go b/searcher/plugin/avsox.go index 86828d9..9e31411 100644 --- a/searcher/plugin/avsox.go +++ b/searcher/plugin/avsox.go @@ -7,6 +7,7 @@ import ( "yamdc/model" "yamdc/number" "yamdc/searcher/decoder" + "yamdc/searcher/parser" "yamdc/searcher/utils" "github.com/xxxsen/common/logutil" @@ -121,8 +122,8 @@ func (p *avsox) OnDecodeHTTPData(ctx *PluginContext, data []byte) (*model.AvMeta SampleImageListExpr: "", } meta, err := dec.DecodeHTML(data, - decoder.WithReleaseDateParser(DefaultReleaseDateParser(ctx.GetContext())), - decoder.WithDurationParser(DefaultDurationParser(ctx.GetContext())), + decoder.WithReleaseDateParser(parser.DefaultReleaseDateParser(ctx.GetContext())), + decoder.WithDurationParser(parser.DefaultDurationParser(ctx.GetContext())), decoder.WithDefaultStringProcessor(strings.TrimSpace), ) if err != nil { diff --git a/searcher/plugin/constant.go b/searcher/plugin/constant.go index 49a9271..501066d 100644 --- a/searcher/plugin/constant.go +++ b/searcher/plugin/constant.go @@ -11,4 +11,5 @@ const ( SSFreeJavBt = "freejavbt" SSJavDB = "javdb" SS18AV = "18av" + SSTKTube = "tktube" ) diff --git a/searcher/plugin/freejavbt.go b/searcher/plugin/freejavbt.go index ead2340..67603c2 100644 --- a/searcher/plugin/freejavbt.go +++ b/searcher/plugin/freejavbt.go @@ -5,6 +5,7 @@ import ( "yamdc/model" "yamdc/number" "yamdc/searcher/decoder" + "yamdc/searcher/parser" putils "yamdc/searcher/utils" ) @@ -36,8 +37,8 @@ func (p *freejavbt) OnDecodeHTTPData(ctx *PluginContext, data []byte) (*model.Av SampleImageListExpr: `//div[@class="preview"]/a/img/@data-src`, } res, err := dec.DecodeHTML(data, - decoder.WithDurationParser(DefaultDurationParser(ctx.GetContext())), - decoder.WithReleaseDateParser(DefaultReleaseDateParser(ctx.GetContext())), + decoder.WithDurationParser(parser.DefaultDurationParser(ctx.GetContext())), + decoder.WithReleaseDateParser(parser.DefaultReleaseDateParser(ctx.GetContext())), ) if err != nil { return nil, false, err diff --git a/searcher/plugin/jav321.go b/searcher/plugin/jav321.go index 82abd08..b94ee7c 100644 --- a/searcher/plugin/jav321.go +++ b/searcher/plugin/jav321.go @@ -8,6 +8,7 @@ import ( "yamdc/model" "yamdc/number" "yamdc/searcher/decoder" + "yamdc/searcher/parser" putils "yamdc/searcher/utils" ) @@ -70,8 +71,8 @@ func (p *jav321) OnDecodeHTTPData(ctx *PluginContext, data []byte) (*model.AvMet } rs, err := dec.DecodeHTML(data, decoder.WithDefaultStringProcessor(p.defaultStringProcessor), - decoder.WithReleaseDateParser(DefaultReleaseDateParser(ctx.GetContext())), - decoder.WithDurationParser(DefaultDurationParser(ctx.GetContext())), + decoder.WithReleaseDateParser(parser.DefaultReleaseDateParser(ctx.GetContext())), + decoder.WithDurationParser(parser.DefaultDurationParser(ctx.GetContext())), ) if err != nil { return nil, false, err diff --git a/searcher/plugin/javbus.go b/searcher/plugin/javbus.go index daf1f53..ce54098 100644 --- a/searcher/plugin/javbus.go +++ b/searcher/plugin/javbus.go @@ -5,6 +5,7 @@ import ( "yamdc/model" "yamdc/number" "yamdc/searcher/decoder" + "yamdc/searcher/parser" putils "yamdc/searcher/utils" ) @@ -52,8 +53,8 @@ func (p *javbus) OnDecodeHTTPData(ctx *PluginContext, data []byte) (*model.AvMet SampleImageListExpr: `//div[@id="sample-waterfall"]/a[@class="sample-box"]/@href`, } rs, err := dec.DecodeHTML(data, - decoder.WithReleaseDateParser(DefaultReleaseDateParser(ctx.GetContext())), - decoder.WithDurationParser(DefaultDurationParser(ctx.GetContext())), + decoder.WithReleaseDateParser(parser.DefaultReleaseDateParser(ctx.GetContext())), + decoder.WithDurationParser(parser.DefaultDurationParser(ctx.GetContext())), ) if err != nil { return nil, false, err diff --git a/searcher/plugin/javdb.go b/searcher/plugin/javdb.go index b4bf06b..1ed03aa 100644 --- a/searcher/plugin/javdb.go +++ b/searcher/plugin/javdb.go @@ -6,6 +6,7 @@ import ( "yamdc/model" "yamdc/number" "yamdc/searcher/decoder" + "yamdc/searcher/parser" "yamdc/searcher/utils" ) @@ -67,8 +68,8 @@ func (p *javdb) OnDecodeHTTPData(ctx *PluginContext, data []byte) (*model.AvMeta SampleImageListExpr: `//div[@class="tile-images preview-images"]/a[@class="tile-item"]/@href`, } meta, err := dec.DecodeHTML(data, - decoder.WithReleaseDateParser(DefaultReleaseDateParser(ctx.GetContext())), - decoder.WithDurationParser(DefaultDurationParser(ctx.GetContext())), + decoder.WithReleaseDateParser(parser.DefaultReleaseDateParser(ctx.GetContext())), + decoder.WithDurationParser(parser.DefaultDurationParser(ctx.GetContext())), ) if err != nil { return nil, false, err diff --git a/searcher/plugin/javhoo.go b/searcher/plugin/javhoo.go index d9ed03b..febb538 100644 --- a/searcher/plugin/javhoo.go +++ b/searcher/plugin/javhoo.go @@ -6,6 +6,7 @@ import ( "yamdc/model" "yamdc/number" "yamdc/searcher/decoder" + "yamdc/searcher/parser" putils "yamdc/searcher/utils" ) @@ -36,8 +37,8 @@ func (p *javhoo) OnDecodeHTTPData(ctx *PluginContext, data []byte) (*model.AvMet SampleImageListExpr: `//div[@id="sample-box"]/div/a/@href`, } meta, err := dec.DecodeHTML(data, - decoder.WithReleaseDateParser(DefaultReleaseDateParser(ctx.GetContext())), - decoder.WithDurationParser(DefaultDurationParser(ctx.GetContext())), + decoder.WithReleaseDateParser(parser.DefaultReleaseDateParser(ctx.GetContext())), + decoder.WithDurationParser(parser.DefaultDurationParser(ctx.GetContext())), ) if err != nil { return nil, false, err diff --git a/searcher/plugin/tktube.go b/searcher/plugin/tktube.go new file mode 100644 index 0000000..f0651d0 --- /dev/null +++ b/searcher/plugin/tktube.go @@ -0,0 +1,86 @@ +package plugin + +import ( + "fmt" + "net/http" + "strings" + "yamdc/model" + "yamdc/number" + "yamdc/searcher/decoder" + "yamdc/searcher/parser" +) + +type tktube struct { + DefaultPlugin +} + +func (p *tktube) OnPrecheckRequest(ctx *PluginContext, n *number.Number) (bool, error) { + return number.IsFc2(n.GetNumberID()), nil +} + +func (p *tktube) OnMakeHTTPRequest(ctx *PluginContext, n *number.Number) (*http.Request, error) { + nid := strings.ReplaceAll(n.GetNumberID(), "-", "--") + ctx.SetKey("number", n.GetNumberID()) + uri := fmt.Sprintf("https://tktube.com/zh/search/%s/", nid) + return http.NewRequest(http.MethodGet, uri, nil) +} + +func (p *tktube) OnHandleHTTPRequest(ctx *PluginContext, invoker HTTPInvoker, req *http.Request) (*http.Response, error) { + numberId := strings.ToUpper(ctx.GetKeyOrDefault("number", "").(string)) + return HandleXPathTwoStepSearch(ctx, invoker, req, &XPathTwoStepContext{ + Ps: []*XPathPair{ + { + Name: "links", + XPath: `//div[@id="list_videos_videos_list_search_result_items"]/div/a/@href`, + }, + { + Name: "names", + XPath: `//div[@id="list_videos_videos_list_search_result_items"]/div/a/strong[@class="title"]/text()`, + }, + }, + LinkSelector: func(ps []*XPathPair) (string, bool, error) { + links := ps[0].Result + names := ps[1].Result + for i := 0; i < len(links); i++ { + if strings.Contains(strings.ToUpper(names[i]), numberId) { + return links[i], true, nil + } + } + return "", false, nil + }, + ValidStatusCode: []int{http.StatusOK}, + CheckResultCountMatch: true, + LinkPrefix: "", + }) +} + +func (p *tktube) OnDecodeHTTPData(ctx *PluginContext, data []byte) (*model.AvMeta, bool, error) { + dec := decoder.XPathHtmlDecoder{ + TitleExpr: `//div[@class="headline"]/h1/text()`, + PlotExpr: "", + ActorListExpr: `//div[contains(text(), "女優:")]/a[contains(@href, "models")]/text()`, + ReleaseDateExpr: `//div[@class="item"]/span[contains(text(), "加入日期:")]/em/text()`, + DurationExpr: `//div[@class="item"]/span[contains(text(), "時長:")]/em/text()`, + StudioExpr: "", + LabelExpr: "", + DirectorExpr: "", + SeriesExpr: "", + GenreListExpr: `//div[contains(text(), "標籤:")]/a[contains(@href, "tags")]/text()`, + CoverExpr: `//meta[@property="og:image"]/@content`, + PosterExpr: "", + SampleImageListExpr: "", + } + meta, err := dec.DecodeHTML(data, + decoder.WithDurationParser(parser.DefaultHHMMSSDurationParser(ctx.GetContext())), + decoder.WithReleaseDateParser(parser.DefaultReleaseDateParser(ctx.GetContext())), + ) + if err != nil { + return nil, false, err + } + meta.Number = ctx.GetKeyOrDefault("number", "").(string) + return meta, true, nil +} + +func init() { + Register(SSTKTube, PluginToCreator(&tktube{})) +}