2024-09-24 06:17:11 -04:00
|
|
|
|
package crawler
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"bytes"
|
|
|
|
|
"encoding/base64"
|
2024-12-02 03:17:01 -05:00
|
|
|
|
"encoding/json"
|
2024-11-19 21:40:33 -05:00
|
|
|
|
"errors"
|
2024-09-24 06:17:11 -04:00
|
|
|
|
"html"
|
2024-12-02 03:17:01 -05:00
|
|
|
|
"net/http"
|
2024-09-24 06:17:11 -04:00
|
|
|
|
"regexp"
|
|
|
|
|
"strings"
|
2024-12-02 03:17:01 -05:00
|
|
|
|
"time"
|
2024-09-24 06:17:11 -04:00
|
|
|
|
|
2024-12-02 03:17:01 -05:00
|
|
|
|
"pcgamedb/cache"
|
2024-11-20 06:09:04 -05:00
|
|
|
|
"pcgamedb/config"
|
|
|
|
|
"pcgamedb/constant"
|
|
|
|
|
"pcgamedb/db"
|
|
|
|
|
"pcgamedb/model"
|
2024-11-15 02:02:45 -05:00
|
|
|
|
|
2024-12-02 03:17:01 -05:00
|
|
|
|
"git.nite07.com/nite/ccs"
|
2024-09-24 06:17:11 -04:00
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
|
"go.uber.org/zap"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
type FreeGOGCrawler struct {
|
2024-12-02 03:51:06 -05:00
|
|
|
|
logger *zap.Logger
|
2024-09-24 06:17:11 -04:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func NewFreeGOGCrawler(logger *zap.Logger) *FreeGOGCrawler {
|
|
|
|
|
return &FreeGOGCrawler{
|
|
|
|
|
logger: logger,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-12-04 12:36:55 -05:00
|
|
|
|
func (c *FreeGOGCrawler) getSession() (*ccs.Session, error) {
|
2024-12-02 03:51:06 -05:00
|
|
|
|
var session ccs.Session
|
|
|
|
|
var err error
|
|
|
|
|
if val, exist := cache.Get("freegog_waf_session"); exist {
|
|
|
|
|
err := json.Unmarshal([]byte(val), &session)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
2024-12-02 03:17:01 -05:00
|
|
|
|
}
|
2024-12-02 03:51:06 -05:00
|
|
|
|
} else {
|
|
|
|
|
session, err = ccs.WAFSession(config.Config.CFClearanceScraper.Url, constant.FreeGOGListURL)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
2024-12-04 12:36:55 -05:00
|
|
|
|
jsonBytes, err := json.Marshal(session)
|
|
|
|
|
if err == nil {
|
2024-12-10 08:37:16 -05:00
|
|
|
|
_ = cache.SetWithExpire("freegog_waf_session", jsonBytes, 1*time.Hour)
|
2024-12-04 12:36:55 -05:00
|
|
|
|
}
|
2024-12-02 03:17:01 -05:00
|
|
|
|
}
|
2024-12-02 03:51:06 -05:00
|
|
|
|
return &session, nil
|
2024-12-02 03:17:01 -05:00
|
|
|
|
}
|
|
|
|
|
|
2024-11-19 21:40:33 -05:00
|
|
|
|
func (c *FreeGOGCrawler) Name() string {
|
|
|
|
|
return "FreeGOG"
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-16 00:48:48 -05:00
|
|
|
|
func (c *FreeGOGCrawler) Crawl(num int) ([]*model.GameItem, error) {
|
2024-09-24 06:17:11 -04:00
|
|
|
|
count := 0
|
2024-12-04 12:36:55 -05:00
|
|
|
|
session, err := c.getSession()
|
2024-11-19 21:40:33 -05:00
|
|
|
|
if err != nil {
|
|
|
|
|
c.logger.Error("Failed to create session", zap.Error(err))
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
2024-12-03 10:53:10 -05:00
|
|
|
|
resp, err := ccs.RequestWithWAFSession(http.MethodGet, constant.FreeGOGListURL, *session, nil)
|
2024-09-24 06:17:11 -04:00
|
|
|
|
if err != nil {
|
|
|
|
|
c.logger.Error("Failed to fetch", zap.Error(err))
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
2024-12-02 03:17:01 -05:00
|
|
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader([]byte(resp.Body)))
|
2024-09-24 06:17:11 -04:00
|
|
|
|
if err != nil {
|
|
|
|
|
c.logger.Error("Failed to parse HTML", zap.Error(err))
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-21 12:30:26 -05:00
|
|
|
|
var urls []string
|
|
|
|
|
var updateFlags []string //rawName+link
|
2024-09-24 06:17:11 -04:00
|
|
|
|
doc.Find(".items-outer li a").Each(func(i int, s *goquery.Selection) {
|
|
|
|
|
urls = append(urls, s.AttrOr("href", ""))
|
|
|
|
|
updateFlags = append(updateFlags, s.Text()+s.AttrOr("href", ""))
|
|
|
|
|
})
|
|
|
|
|
|
2024-11-21 12:30:26 -05:00
|
|
|
|
var res []*model.GameItem
|
2024-09-24 06:17:11 -04:00
|
|
|
|
for i, u := range urls {
|
|
|
|
|
if count == num {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if db.IsFreeGOGCrawled(updateFlags[i]) {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
c.logger.Info("Crawling", zap.String("URL", u))
|
2024-12-02 03:17:01 -05:00
|
|
|
|
item, err := c.CrawlByUrl(u)
|
2024-09-24 06:17:11 -04:00
|
|
|
|
if err != nil {
|
|
|
|
|
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
item.UpdateFlag = updateFlags[i]
|
2024-11-16 00:48:48 -05:00
|
|
|
|
err = db.SaveGameItem(item)
|
2024-09-24 06:17:11 -04:00
|
|
|
|
if err != nil {
|
|
|
|
|
c.logger.Warn("Failed to save", zap.Error(err))
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
res = append(res, item)
|
|
|
|
|
count++
|
2024-11-21 12:30:26 -05:00
|
|
|
|
if err := OrganizeGameItem(item); err != nil {
|
2024-09-24 06:17:11 -04:00
|
|
|
|
c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u))
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return res, nil
|
|
|
|
|
}
|
|
|
|
|
|
2024-12-02 03:17:01 -05:00
|
|
|
|
func (c *FreeGOGCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
|
2024-12-04 12:36:55 -05:00
|
|
|
|
session, err := c.getSession()
|
2024-12-02 03:17:01 -05:00
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
2024-12-03 10:53:10 -05:00
|
|
|
|
resp, err := ccs.RequestWithWAFSession(http.MethodGet, URL, *session, nil)
|
2024-09-24 06:17:11 -04:00
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
2024-12-02 03:17:01 -05:00
|
|
|
|
item, err := db.GetGameItemByUrl(URL)
|
2024-09-24 06:17:11 -04:00
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
2024-12-02 03:17:01 -05:00
|
|
|
|
item.Url = URL
|
2024-09-24 06:17:11 -04:00
|
|
|
|
rawTitleRegex := regexp.MustCompile(`(?i)<h1 class="entry-title">(.*?)</h1>`)
|
2024-12-02 03:17:01 -05:00
|
|
|
|
rawTitleRegexRes := rawTitleRegex.FindStringSubmatch(string(resp.Body))
|
2024-09-24 06:17:11 -04:00
|
|
|
|
rawName := ""
|
|
|
|
|
if len(rawTitleRegexRes) > 1 {
|
|
|
|
|
rawName = html.UnescapeString(rawTitleRegexRes[1])
|
|
|
|
|
item.RawName = strings.Replace(rawName, "–", "-", -1)
|
|
|
|
|
} else {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
item.Name = FreeGOGFormatter(item.RawName)
|
|
|
|
|
sizeRegex := regexp.MustCompile(`(?i)>Size:\s?(.*?)<`)
|
2024-12-02 03:17:01 -05:00
|
|
|
|
sizeRegexRes := sizeRegex.FindStringSubmatch(string(resp.Body))
|
2024-09-24 06:17:11 -04:00
|
|
|
|
if len(sizeRegexRes) > 1 {
|
|
|
|
|
item.Size = sizeRegexRes[1]
|
|
|
|
|
}
|
|
|
|
|
magnetRegex := regexp.MustCompile(`<a class="download-btn" href="https://gdl.freegogpcgames.xyz/download-gen\.php\?url=(.*?)"`)
|
2024-12-02 03:17:01 -05:00
|
|
|
|
magnetRegexRes := magnetRegex.FindStringSubmatch(string(resp.Body))
|
2024-09-24 06:17:11 -04:00
|
|
|
|
if len(magnetRegexRes) > 1 {
|
|
|
|
|
magnet, err := base64.StdEncoding.DecodeString(magnetRegexRes[1])
|
|
|
|
|
if err != nil {
|
|
|
|
|
return nil, err
|
|
|
|
|
}
|
|
|
|
|
item.Download = string(magnet)
|
|
|
|
|
} else {
|
2024-11-21 12:30:26 -05:00
|
|
|
|
return nil, errors.New("failed to find magnet link")
|
2024-09-24 06:17:11 -04:00
|
|
|
|
}
|
|
|
|
|
item.Author = "FreeGOG"
|
2024-12-21 11:37:00 -05:00
|
|
|
|
item.Platform = "windows"
|
2024-09-24 06:17:11 -04:00
|
|
|
|
return item, nil
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-16 00:48:48 -05:00
|
|
|
|
func (c *FreeGOGCrawler) CrawlAll() ([]*model.GameItem, error) {
|
2024-09-24 06:17:11 -04:00
|
|
|
|
return c.Crawl(-1)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var freeGOGRegexps = []*regexp.Regexp{
|
|
|
|
|
regexp.MustCompile(`(?i)\(.*\)`),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func FreeGOGFormatter(name string) string {
|
|
|
|
|
for _, re := range freeGOGRegexps {
|
|
|
|
|
name = re.ReplaceAllString(name, "")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
reg1 := regexp.MustCompile(`(?i)v\d+(\.\d+)*`)
|
|
|
|
|
if index := reg1.FindIndex([]byte(name)); index != nil {
|
|
|
|
|
name = name[:index[0]]
|
|
|
|
|
}
|
|
|
|
|
if index := strings.Index(name, "+"); index != -1 {
|
|
|
|
|
name = name[:index]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
reg2 := regexp.MustCompile(`(?i):\sgoty`)
|
|
|
|
|
name = reg2.ReplaceAllString(name, ": Game Of The Year")
|
|
|
|
|
|
|
|
|
|
return strings.TrimSpace(name)
|
|
|
|
|
}
|