game-crawler/crawler/1337x.go
2024-12-29 13:17:04 +08:00

211 lines
4.9 KiB
Go

package crawler
import (
"bytes"
"errors"
"fmt"
"regexp"
"strconv"
"strings"
"game-crawler/constant"
"game-crawler/db"
"game-crawler/model"
"game-crawler/utils"
"github.com/PuerkitoBio/goquery"
"go.uber.org/zap"
)
type FormatterFunc func(string) string
type s1337xCrawler struct {
*BaseLogger
*BaseError
source string
platform string
formatter FormatterFunc
logger *zap.Logger
}
func New1337xCrawler(source string, platform string, formatter FormatterFunc, logger *zap.Logger) *s1337xCrawler {
return &s1337xCrawler{
BaseLogger: &BaseLogger{
logger: logger,
},
BaseError: &BaseError{},
source: source,
formatter: formatter,
logger: logger,
platform: platform,
}
}
func (c *s1337xCrawler) Crawl(page int) ([]*model.GameItem, error) {
c.LogCrawlStart(page)
requestUrl := fmt.Sprintf("%s/%s/%d/", constant.C1337xBaseURL, c.source, page)
resp, err := utils.Request().SetLogger(c.logger.Sugar()).Get(requestUrl)
if err != nil {
return nil, c.ErrRequest(requestUrl, err)
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
if err != nil {
return nil, c.ErrParseDoc(requestUrl, err)
}
trSelection := doc.Find("tbody>tr")
var urls []string
trSelection.Each(func(i int, trNode *goquery.Selection) {
nameSelection := trNode.Find(".name").First()
if aNode := nameSelection.Find("a").Eq(1); aNode.Length() > 0 {
u, exists := aNode.Attr("href")
if exists {
urls = append(urls, fmt.Sprintf("%s%s", constant.C1337xBaseURL, u))
}
}
})
var res []*model.GameItem
for _, u := range urls {
if db.IsGameCrawledByURL(u) {
c.LogCrawlSkip(u)
continue
}
item, err := c.CrawlByUrl(u)
if err != nil {
c.LogCrawlByUrlError(u, err)
continue
}
err = db.SaveGameItem(item)
if err != nil {
c.LogSaveGameItemError(u, err)
continue
}
res = append(res, item)
if err := OrganizeGameItem(item); err != nil {
c.LogOrganizeGameWarn(item.ID, item.Name, err)
continue
}
}
return res, nil
}
func (c *s1337xCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
c.LogCrawlByUrlStart(URL)
resp, err := utils.Request().SetLogger(c.logger.Sugar()).Get(URL)
if err != nil {
return nil, c.ErrRequest(URL, err)
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
if err != nil {
return nil, c.ErrParseDoc(URL, err)
}
item := &model.GameItem{
Url: URL,
}
selection := doc.Find(".torrent-detail-page ul.list>li")
info := make(map[string]string)
selection.Each(func(i int, item *goquery.Selection) {
key := strings.TrimSpace(item.Find("strong").Text())
value := strings.TrimSpace(item.Find("span").Text())
info[key] = value
})
magnetRegex := regexp.MustCompile(`magnet:\?[^"]*`)
magnetRegexRes := magnetRegex.FindStringSubmatch(string(resp.Body()))
if len(magnetRegexRes) == 0 {
return nil, c.ErrGetGameItemDetail(URL, errors.New("magnet link not found"))
}
item.Size = info["Total size"]
item.RawName = doc.Find("title").Text()
item.RawName = strings.Replace(item.RawName, "Download ", "", 1)
item.RawName = strings.TrimSpace(strings.Replace(item.RawName, "Torrent | 1337x", " ", 1))
item.Name = c.formatter(item.RawName)
item.Downloads = map[string]string{
"magnet": magnetRegexRes[0],
}
item.Author = strings.Replace(c.source, "-torrents", "", -1)
item.Platform = c.platform
return item, nil
}
func (c *s1337xCrawler) CrawlMulti(pages []int) (res []*model.GameItem, err error) {
totalPageNum, err := c.GetTotalPageNum()
if err != nil {
return nil, c.ErrGetTotalPageNum(err)
}
for _, page := range pages {
if page > totalPageNum || page < 1 {
c.LogPageExceedWarn(page)
continue
}
items, err := c.Crawl(page)
if err != nil {
c.LogCrawlError(page, err)
continue
}
res = append(res, items...)
}
return res, nil
}
func (c *s1337xCrawler) CrawlAll() (res []*model.GameItem, err error) {
totalPageNum, err := c.GetTotalPageNum()
if err != nil {
return nil, c.ErrGetTotalPageNum(err)
}
for i := 1; i <= totalPageNum; i++ {
items, err := c.Crawl(i)
if err != nil {
c.LogCrawlError(i, err)
continue
}
res = append(res, items...)
}
return res, nil
}
func (c *s1337xCrawler) GetTotalPageNum() (int, error) {
requestUrl := fmt.Sprintf("%s/%s/%d/", constant.C1337xBaseURL, c.source, 1)
resp, err := utils.Request().SetLogger(c.logger.Sugar()).Get(requestUrl)
if err != nil {
return 0, c.ErrRequest(requestUrl, err)
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
if err != nil {
return 0, c.ErrParseDoc(requestUrl, err)
}
selection := doc.Find(".last")
pageStr, exists := selection.Find("a").Attr("href")
if !exists {
return 0, errors.New("page num not found")
}
pageStr = strings.ReplaceAll(pageStr, c.source, "")
pageStr = strings.ReplaceAll(pageStr, "/", "")
totalPageNum, err := strconv.Atoi(pageStr)
if err != nil {
return 0, c.ErrParseInt(pageStr, err)
}
return totalPageNum, nil
}