2024-09-24 06:17:11 -04:00
|
|
|
package crawler
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
|
2024-11-20 06:09:04 -05:00
|
|
|
"pcgamedb/constant"
|
|
|
|
"pcgamedb/db"
|
|
|
|
"pcgamedb/model"
|
|
|
|
"pcgamedb/utils"
|
2024-11-15 02:02:45 -05:00
|
|
|
|
2024-09-24 06:17:11 -04:00
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
"go.uber.org/zap"
|
|
|
|
)
|
|
|
|
|
|
|
|
type ChovkaCrawler struct {
|
|
|
|
logger *zap.Logger
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewChovkaCrawler(logger *zap.Logger) *ChovkaCrawler {
|
|
|
|
return &ChovkaCrawler{
|
|
|
|
logger: logger,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-11-14 12:29:19 -05:00
|
|
|
func (c *ChovkaCrawler) Name() string {
|
|
|
|
return "ChovkaCrawler"
|
|
|
|
}
|
|
|
|
|
2024-12-02 03:17:01 -05:00
|
|
|
func (c *ChovkaCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
|
|
|
|
resp, err := utils.Request().Get(URL)
|
2024-09-24 06:17:11 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-12-02 03:17:01 -05:00
|
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
2024-09-24 06:17:11 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-12-02 03:17:01 -05:00
|
|
|
item, err := db.GetGameItemByUrl(URL)
|
2024-09-24 06:17:11 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-12-02 03:17:01 -05:00
|
|
|
item.Url = URL
|
2024-09-24 06:17:11 -04:00
|
|
|
item.RawName = doc.Find(".inner-entry__title").First().Text()
|
|
|
|
item.Name = ChovkaFormatter(item.RawName)
|
|
|
|
item.Author = "Chovka"
|
|
|
|
item.UpdateFlag = item.RawName
|
2024-12-21 11:37:00 -05:00
|
|
|
item.Platform = "windows"
|
2024-09-24 06:17:11 -04:00
|
|
|
downloadURL := doc.Find(".download-torrent").AttrOr("href", "")
|
|
|
|
if downloadURL == "" {
|
2024-11-21 12:30:26 -05:00
|
|
|
return nil, errors.New("failed to find download URL")
|
2024-09-24 06:17:11 -04:00
|
|
|
}
|
2024-12-02 03:17:01 -05:00
|
|
|
resp, err = utils.Request().SetHeader("Referer", URL).Get(downloadURL)
|
2024-09-24 06:17:11 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-12-02 03:17:01 -05:00
|
|
|
magnet, size, err := utils.ConvertTorrentToMagnet(resp.Body())
|
2024-09-24 06:17:11 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
item.Size = size
|
|
|
|
item.Download = magnet
|
|
|
|
return item, nil
|
|
|
|
}
|
|
|
|
|
2024-11-16 00:48:48 -05:00
|
|
|
func (c *ChovkaCrawler) Crawl(page int) ([]*model.GameItem, error) {
|
2024-12-02 03:17:01 -05:00
|
|
|
resp, err := utils.Request().Get(fmt.Sprintf(constant.RepackInfoURL, page))
|
2024-09-24 06:17:11 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-12-02 03:17:01 -05:00
|
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
2024-09-24 06:17:11 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-11-21 12:30:26 -05:00
|
|
|
var urls []string
|
|
|
|
var updateFlags []string
|
2024-09-24 06:17:11 -04:00
|
|
|
doc.Find(".entry").Each(func(i int, s *goquery.Selection) {
|
|
|
|
u, exist := s.Find(".entry__title.h2 a").Attr("href")
|
|
|
|
if !exist {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
urls = append(urls, u)
|
|
|
|
updateFlags = append(updateFlags, s.Find(".entry__title.h2 a").Text())
|
|
|
|
})
|
2024-11-16 00:48:48 -05:00
|
|
|
var res []*model.GameItem
|
2024-09-24 06:17:11 -04:00
|
|
|
for i, u := range urls {
|
|
|
|
if db.IsChovkaCrawled(updateFlags[i]) {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
c.logger.Info("Crawling", zap.String("URL", u))
|
|
|
|
item, err := c.CrawlByUrl(u)
|
|
|
|
if err != nil {
|
|
|
|
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
|
|
|
|
continue
|
|
|
|
}
|
2024-11-16 00:48:48 -05:00
|
|
|
if err := db.SaveGameItem(item); err != nil {
|
2024-09-24 06:17:11 -04:00
|
|
|
c.logger.Warn("Failed to save", zap.Error(err), zap.String("URL", u))
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
res = append(res, item)
|
2024-11-21 12:30:26 -05:00
|
|
|
|
|
|
|
if err := OrganizeGameItem(item); err != nil {
|
2024-09-24 06:17:11 -04:00
|
|
|
c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u))
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return res, nil
|
|
|
|
}
|
|
|
|
|
2024-11-16 00:48:48 -05:00
|
|
|
func (c *ChovkaCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) {
|
|
|
|
var res []*model.GameItem
|
2024-09-24 06:17:11 -04:00
|
|
|
for _, page := range pages {
|
|
|
|
items, err := c.Crawl(page)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
res = append(res, items...)
|
|
|
|
}
|
|
|
|
return res, nil
|
|
|
|
}
|
|
|
|
|
2024-11-16 00:48:48 -05:00
|
|
|
func (c *ChovkaCrawler) CrawlAll() ([]*model.GameItem, error) {
|
2024-09-24 06:17:11 -04:00
|
|
|
totalPageNum, err := c.GetTotalPageNum()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-11-16 00:48:48 -05:00
|
|
|
var res []*model.GameItem
|
2024-09-24 06:17:11 -04:00
|
|
|
for i := 1; i <= totalPageNum; i++ {
|
|
|
|
items, err := c.Crawl(i)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
res = append(res, items...)
|
|
|
|
}
|
|
|
|
return res, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *ChovkaCrawler) GetTotalPageNum() (int, error) {
|
2024-12-02 03:17:01 -05:00
|
|
|
resp, err := utils.Request().Get(fmt.Sprintf(constant.RepackInfoURL, 1))
|
2024-09-24 06:17:11 -04:00
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
2024-12-02 03:17:01 -05:00
|
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
2024-09-24 06:17:11 -04:00
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
pageStr := doc.Find(".pagination>a").Last().Text()
|
|
|
|
totalPageNum, err := strconv.Atoi(pageStr)
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
return totalPageNum, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func ChovkaFormatter(name string) string {
|
|
|
|
idx := strings.Index(name, "| RePack")
|
|
|
|
if idx != -1 {
|
|
|
|
name = name[:idx]
|
|
|
|
}
|
|
|
|
idx = strings.Index(name, "| GOG")
|
|
|
|
if idx != -1 {
|
|
|
|
name = name[:idx]
|
|
|
|
}
|
|
|
|
idx = strings.Index(name, "| Portable")
|
|
|
|
if idx != -1 {
|
|
|
|
name = name[:idx]
|
|
|
|
}
|
|
|
|
return strings.TrimSpace(name)
|
|
|
|
}
|