pcgamedb/crawler/freegog.go

177 lines
4.4 KiB
Go
Raw Normal View History

2024-09-24 06:17:11 -04:00
package crawler
import (
"bytes"
"encoding/base64"
"errors"
2024-09-24 06:17:11 -04:00
"html"
"regexp"
"strings"
2024-11-20 06:09:04 -05:00
"pcgamedb/config"
"pcgamedb/constant"
"pcgamedb/db"
"pcgamedb/model"
"pcgamedb/utils"
2024-11-15 02:02:45 -05:00
2024-09-24 06:17:11 -04:00
"github.com/PuerkitoBio/goquery"
"go.uber.org/zap"
)
type FreeGOGCrawler struct {
logger *zap.Logger
session *utils.WAFSession
2024-09-24 06:17:11 -04:00
}
func NewFreeGOGCrawler(logger *zap.Logger) *FreeGOGCrawler {
return &FreeGOGCrawler{
logger: logger,
}
}
func (c *FreeGOGCrawler) Name() string {
return "FreeGOG"
}
func (c *FreeGOGCrawler) Crawl(num int) ([]*model.GameItem, error) {
2024-09-24 06:17:11 -04:00
count := 0
var err error
if c.session == nil {
c.session, err = utils.CCSWAFSession(config.Config.CFClearanceScraper.Url, constant.FreeGOGListURL)
}
if err != nil {
c.logger.Error("Failed to create session", zap.Error(err))
return nil, err
}
resp, err := utils.FetchWithWAFSession(utils.FetchConfig{
2024-09-24 06:17:11 -04:00
Url: constant.FreeGOGListURL,
}, c.session)
2024-09-24 06:17:11 -04:00
if err != nil {
c.logger.Error("Failed to fetch", zap.Error(err))
return nil, err
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Data))
if err != nil {
c.logger.Error("Failed to parse HTML", zap.Error(err))
return nil, err
}
urls := []string{}
updateFlags := []string{} //rawName+link
doc.Find(".items-outer li a").Each(func(i int, s *goquery.Selection) {
urls = append(urls, s.AttrOr("href", ""))
updateFlags = append(updateFlags, s.Text()+s.AttrOr("href", ""))
})
res := []*model.GameItem{}
2024-09-24 06:17:11 -04:00
for i, u := range urls {
if count == num {
break
}
if db.IsFreeGOGCrawled(updateFlags[i]) {
continue
}
c.logger.Info("Crawling", zap.String("URL", u))
item, err := c.CrawlByUrl(u)
if err != nil {
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
continue
}
item.UpdateFlag = updateFlags[i]
err = db.SaveGameItem(item)
2024-09-24 06:17:11 -04:00
if err != nil {
c.logger.Warn("Failed to save", zap.Error(err))
continue
}
res = append(res, item)
count++
info, err := OrganizeGameItem(item)
2024-09-24 06:17:11 -04:00
if err != nil {
c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u))
continue
}
err = db.SaveGameInfo(info)
if err != nil {
c.logger.Warn("Failed to save", zap.Error(err), zap.String("URL", u))
continue
}
}
return res, nil
}
func (c *FreeGOGCrawler) CrawlByUrl(url string) (*model.GameItem, error) {
var err error
if c.session == nil {
c.session, err = utils.CCSWAFSession(config.Config.CFClearanceScraper.Url, constant.FreeGOGListURL)
}
if err != nil {
return nil, errors.New("Failed to create session")
}
resp, err := utils.FetchWithWAFSession(utils.FetchConfig{
2024-09-24 06:17:11 -04:00
Url: url,
}, c.session)
2024-09-24 06:17:11 -04:00
if err != nil {
return nil, err
}
item, err := db.GetGameItemByUrl(url)
2024-09-24 06:17:11 -04:00
if err != nil {
return nil, err
}
item.Url = url
rawTitleRegex := regexp.MustCompile(`(?i)<h1 class="entry-title">(.*?)</h1>`)
rawTitleRegexRes := rawTitleRegex.FindStringSubmatch(string(resp.Data))
rawName := ""
if len(rawTitleRegexRes) > 1 {
rawName = html.UnescapeString(rawTitleRegexRes[1])
item.RawName = strings.Replace(rawName, "", "-", -1)
} else {
return nil, err
}
item.Name = FreeGOGFormatter(item.RawName)
sizeRegex := regexp.MustCompile(`(?i)>Size:\s?(.*?)<`)
sizeRegexRes := sizeRegex.FindStringSubmatch(string(resp.Data))
if len(sizeRegexRes) > 1 {
item.Size = sizeRegexRes[1]
}
magnetRegex := regexp.MustCompile(`<a class="download-btn" href="https://gdl.freegogpcgames.xyz/download-gen\.php\?url=(.*?)"`)
magnetRegexRes := magnetRegex.FindStringSubmatch(string(resp.Data))
if len(magnetRegexRes) > 1 {
magnet, err := base64.StdEncoding.DecodeString(magnetRegexRes[1])
if err != nil {
return nil, err
}
item.Download = string(magnet)
} else {
return nil, errors.New("Failed to find magnet link")
2024-09-24 06:17:11 -04:00
}
item.Author = "FreeGOG"
return item, nil
}
func (c *FreeGOGCrawler) CrawlAll() ([]*model.GameItem, error) {
2024-09-24 06:17:11 -04:00
return c.Crawl(-1)
}
var freeGOGRegexps = []*regexp.Regexp{
regexp.MustCompile(`(?i)\(.*\)`),
}
func FreeGOGFormatter(name string) string {
for _, re := range freeGOGRegexps {
name = re.ReplaceAllString(name, "")
}
reg1 := regexp.MustCompile(`(?i)v\d+(\.\d+)*`)
if index := reg1.FindIndex([]byte(name)); index != nil {
name = name[:index[0]]
}
if index := strings.Index(name, "+"); index != -1 {
name = name[:index]
}
reg2 := regexp.MustCompile(`(?i):\sgoty`)
name = reg2.ReplaceAllString(name, ": Game Of The Year")
return strings.TrimSpace(name)
}