Skip to content

Commit

Permalink
code refactor; get page by cooly;
Browse files Browse the repository at this point in the history
  • Loading branch information
varg1714 committed May 25, 2024
1 parent a91f0b5 commit 6c1eaab
Show file tree
Hide file tree
Showing 4 changed files with 207 additions and 242 deletions.
2 changes: 1 addition & 1 deletion drivers/fc2/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ func (d *FC2) MakeDir(ctx context.Context, parentDir model.Obj, dirName string)
var url string
if actorType == 0 {
// 0 演员
url = fmt.Sprintf("https://adult.contents.fc2.com/users/%s/articles?sort=popular&order=desc&deal=", split[1]) + "&page=%d"
url = fmt.Sprintf("https://adult.contents.fc2.com/users/%s/articles?sort=assess&order=desc&deal=", split[1]) + "&page=%d"
} else if actorType == 1 {
// yearly
url = fmt.Sprintf("https://adult.contents.fc2.com/ranking/article/yearly?year=%s", split[1]) + "&page=%d"
Expand Down
219 changes: 59 additions & 160 deletions drivers/fc2/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ package fc2
import (
"fmt"
"github.com/alist-org/alist/v3/drivers/base"
"github.com/alist-org/alist/v3/internal/db"
"github.com/alist-org/alist/v3/drivers/virtual_file"
"github.com/alist-org/alist/v3/internal/model"
"gorm.io/gorm/utils"
"github.com/gocolly/colly/v2"
"regexp"
"strings"
"time"
Expand All @@ -14,65 +14,10 @@ import (
var subTitles, _ = regexp.Compile(".*<a href=\"(.*)\" title=\".*</a>.*")
var magnetUrl, _ = regexp.Compile(".*<a href=\"(.*)\" class=\".*\"><i class=\".*\"></i>Magnet</a>.*")

var actorUrlsRegexp, _ = regexp.Compile(".*<a href=\"/article_search.php\\?id=(.*)\"data-counter=\".*\" data-counter-id=\".*?\"title=\"(.*)\"class=\".*\"id=\".*\"false\">.*")
var actorImageRegexp, _ = regexp.Compile(".*<img src=\"(.*?)\">.*")
var rankingUrlsRegexp, _ = regexp.Compile(".*<h3><a href=\"/article_search.php\\?id=(.*?)\">(.*?)</a></h3>.*")
var rankingImageRegexp, _ = regexp.Compile(".*<img src=\"(.*?)\">.*<h3><a href=\"/article_search.php\\?id=.*?\">.*?</a></h3>.*")

func convertToModel(films []string, images []string, urls []string) []model.ObjThumb {

results := make([]model.ObjThumb, 0)

for index, film := range films {

var image string
if index < cap(images) {
image = images[index]
}
//log.Infof("index:%s,image:%s,cap:%s,images:%s\n", index, image, cap(images), images)

results = append(results, model.ObjThumb{
Object: model.Object{
Name: film,
IsFolder: true,
ID: urls[index],
Size: 622857143,
Modified: time.Now(),
},
Thumbnail: model.Thumbnail{Thumbnail: image},
})
}
return results
}

func (d *FC2) findPage(url string) (string, error) {

//log.Infof("开始查询:%s", url)

res, err := base.RestyClient.R().
SetBody(base.Json{
"url": url,
"httpMethod": "GET",
"headers": base.Json{
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
"referer": "https://adult.contents.fc2.com",
"X-PJAX": "true",
"Accept-Language": "zh-CN,zh;q=0.9,zh-TW;q=0.8,en;q=0.7,ko;q=0.6,ja;q=0.5",
"host": "adult.contents.fc2.com",
},
}).Post(d.Addition.SpiderServer)

if err != nil {
return "", err
}

return res.String(), err
}
var actorUrlsRegexp, _ = regexp.Compile(".*/article_search.php\\?id=(.*).")

func (d *FC2) findMagnet(url string) (string, error) {

//log.Infof("开始查询:%s", url)

res, err := base.RestyClient.R().
SetBody(base.Json{
"url": url,
Expand All @@ -88,85 +33,18 @@ func (d *FC2) findMagnet(url string) (string, error) {

func (d *FC2) getFilms(dirName string, urlFunc func(index int) string) ([]model.Obj, error) {

results := make([]model.Obj, 0)

films := make([]string, 0)
images := make([]string, 0)
urls := make([]string, 0)
nextPage := false
var err error

films, images, urls, nextPage, err = d.getPageInfo(urlFunc, 1, films, images, urls)
if err != nil {
return results, err
}

existFilms := db.QueryByUrls(dirName, urls)

// not exists
for index := 2; index <= 20 && nextPage && len(existFilms) == 0; index++ {

films, images, urls, nextPage, err = d.getPageInfo(urlFunc, index, films, images, urls)
if err != nil {
return results, err
}

existFilms = db.QueryByUrls(dirName, urls)

}
// exist
for index, url := range urls {
if utils.Contains(existFilms, url) {
if index == 0 {
urls = []string{}
images = []string{}
films = []string{}
} else {
urls = urls[:index]
images = images[:index]
films = films[:index]
}
break
}
}

if len(urls) != 0 {
err = db.CreateFilms("fc2", dirName, convertToModel(films, images, urls))
if err != nil {
return results, nil
}
if strings.HasPrefix(urlFunc(1), "https://adult.contents.fc2.com/users") {
return virtual_file.GetFilmsWitchStorage("fc2", dirName, urlFunc,
func(urlFunc func(index int) string, index int, data []model.ObjThumb) ([]model.ObjThumb, bool, error) {
return d.getPageInfo(urlFunc, index, data)
})
} else {
return virtual_file.GetFilms(dirName, urlFunc,
func(urlFunc func(index int) string, index int, data []model.ObjThumb) ([]model.ObjThumb, bool, error) {
return d.getPageInfo(urlFunc, index, data)
})
}

return d.convertFilm(dirName, db.QueryByActor("fc2", dirName), results), nil

}

func (d *FC2) convertFilm(dirName string, actor []model.Film, results []model.Obj) []model.Obj {
for index, film := range actor {
results = append(results, &model.ObjThumb{
Object: model.Object{
Name: fmt.Sprintf("%04d", index) + " " + film.Name,
IsFolder: true,
ID: film.Url,
Size: 622857143,
Modified: time.Now(),
Path: dirName,
},
Thumbnail: model.Thumbnail{Thumbnail: film.Image},
})
results = append(results, &model.ObjThumb{
Object: model.Object{
Name: fmt.Sprintf("%04d", index) + " " + film.Name + ".jpg",
IsFolder: false,
ID: film.Image,
Size: 622857143,
Modified: time.Now(),
Path: dirName,
},
Thumbnail: model.Thumbnail{Thumbnail: film.Image},
})
}
return results
}

func (d *FC2) getMagnet(file model.Obj) (string, error) {
Expand All @@ -193,41 +71,62 @@ func (d *FC2) getMagnet(file model.Obj) (string, error) {

}

func (d *FC2) getPageInfo(urlFunc func(index int) string, index int, films []string, images []string, urls []string) ([]string, []string, []string, bool, error) {
func (d *FC2) getPageInfo(urlFunc func(index int) string, index int, data []model.ObjThumb) ([]model.ObjThumb, bool, error) {

pageUrl := urlFunc(index)
preLen := len(data)

var urlsRegexp *regexp.Regexp
var imageRegexp *regexp.Regexp
collector := colly.NewCollector(func(c *colly.Collector) {
c.SetRequestTimeout(time.Second * 10)
_ = c.SetProxy("http://127.0.0.1:7890")
})

tableContainer := ""
filmDetailContainer := ""
filmUrlSelector := ""
filmTitleSelector := ""
filmImageSelector := ""

if strings.HasPrefix(pageUrl, "https://adult.contents.fc2.com/users") {
// user
urlsRegexp = actorUrlsRegexp
imageRegexp = actorImageRegexp
tableContainer = ".seller_user_articlesList"
filmDetailContainer = ".c-cntCard-110-f"
filmUrlSelector = ".c-cntCard-110-f_itemName"
filmTitleSelector = ".c-cntCard-110-f_itemName"
filmImageSelector = ".c-cntCard-110-f_thumb img"
} else {
// ranking
urlsRegexp = rankingUrlsRegexp
imageRegexp = rankingImageRegexp
}

res, err := d.findPage(pageUrl)
if err != nil {
return films, images, urls, false, nil
}

tempUrls := urlsRegexp.FindAllString(res, -1)
imageUrls := imageRegexp.FindAllString(res, -1)
tableContainer = ".c-rankbox-100"
filmDetailContainer = ".c-ranklist-110"
filmUrlSelector = ".c-ranklist-110_tmb a"
filmTitleSelector = ".c-ranklist-110_info a"
filmImageSelector = ".c-ranklist-110_tmb img"
}

collector.OnHTML(tableContainer, func(element *colly.HTMLElement) {
element.ForEach(filmDetailContainer, func(i int, element *colly.HTMLElement) {

href := element.ChildAttr(filmUrlSelector, "href")
title := element.ChildText(filmTitleSelector)
image := "https:" + element.ChildAttr(filmImageSelector, "src")

data = append(data, model.ObjThumb{
Object: model.Object{
Name: title,
IsFolder: true,
ID: actorUrlsRegexp.ReplaceAllString(href, "$1"),
Size: 622857143,
},
Thumbnail: model.Thumbnail{Thumbnail: image},
})
})
})

for _, file := range tempUrls {
films = append(films, urlsRegexp.ReplaceAllString(file, "$2"))
}
for _, imageUrl := range imageUrls {
images = append(images, "https:"+imageRegexp.ReplaceAllString(imageUrl, "$1"))
}
for _, tempUrl := range tempUrls {
urls = append(urls, urlsRegexp.ReplaceAllString(tempUrl, "$1"))
err := collector.Visit(pageUrl)
if err != nil && err.Error() == "Not Found" {
err = nil
}

return films, images, urls, len(tempUrls) != 0, nil
return data, len(data) != preLen, err

}
86 changes: 5 additions & 81 deletions drivers/javdb/util.go
Original file line number Diff line number Diff line change
@@ -1,97 +1,21 @@
package javdb

import (
"github.com/alist-org/alist/v3/internal/db"
"github.com/alist-org/alist/v3/drivers/virtual_file"
"github.com/alist-org/alist/v3/internal/model"
"github.com/gocolly/colly/v2"
"gorm.io/gorm/utils"
"net/http"
"strings"
"time"
)

func (d *Javdb) getFilms(dirName string, urlFunc func(index int) string) ([]model.Obj, error) {

results := make([]model.Obj, 0)
data := make([]model.ObjThumb, 0)

data, nextPage, err := d.getPageInfo(urlFunc, 1, data)
if err != nil {
return results, err
}

var urls []string
for _, item := range data {
urls = append(urls, item.ID)
}

existFilms := db.QueryByUrls(dirName, urls)

// not exists
for index := 2; index <= 20 && nextPage && len(existFilms) == 0; index++ {

data, nextPage, err = d.getPageInfo(urlFunc, index, data)
//films, images, urls, dates, nextPage, err = d.getPageInfo(urlFunc, index, films, images, urls, dates)
if err != nil {
return results, err
}
clear(urls)
for _, item := range data {
urls = append(urls, item.ID)
}

existFilms = db.QueryByUrls(dirName, urls)

}
// exist
for index, item := range data {
if utils.Contains(existFilms, item.ID) {
if index == 0 {
data = []model.ObjThumb{}
} else {
data = data[:index]
}
break
}
}

if len(data) != 0 {
err = db.CreateFilms("javdb", dirName, data)
if err != nil {
return results, nil
}
}

return d.convertFilm(dirName, db.QueryByActor("javdb", dirName), results), nil

}

func (d *Javdb) convertFilm(dirName string, actor []model.Film, results []model.Obj) []model.Obj {
for _, film := range actor {
results = append(results, &model.ObjThumb{
Object: model.Object{
Name: film.Name,
IsFolder: true,
ID: film.Url,
Size: 622857143,
Modified: film.Date,
Path: dirName,
},
Thumbnail: model.Thumbnail{Thumbnail: film.Image},
return virtual_file.GetFilmsWitchStorage("javdb", dirName, urlFunc,
func(urlFunc func(index int) string, index int, data []model.ObjThumb) ([]model.ObjThumb, bool, error) {
return d.getPageInfo(urlFunc, index, data)
})
results = append(results, &model.ObjThumb{
Object: model.Object{
Name: film.Name + ".jpg",
IsFolder: false,
ID: film.Image,
Size: 622857143,
Modified: film.Date,
Path: dirName,
},
Thumbnail: model.Thumbnail{Thumbnail: film.Image},
})
}
return results

}

func (d *Javdb) getMagnet(file model.Obj) (string, error) {
Expand Down
Loading

0 comments on commit 6c1eaab

Please sign in to comment.