game-crawler/crawler/goggames.go
2024-12-29 13:17:04 +08:00

332 lines
8.0 KiB
Go

package crawler
import (
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"path"
"strings"
"time"
"game-crawler/constant"
"game-crawler/db"
"game-crawler/model"
"game-crawler/utils"
"git.nite07.com/nite/ccs"
"go.uber.org/zap"
)
type GOGGamesCrawler struct {
*BaseLogger
*BaseError
cfClearanceUrl string
logger *zap.Logger
}
func NewGOGGamesCrawler(cfClearanceUrl string, logger *zap.Logger) *GOGGamesCrawler {
return &GOGGamesCrawler{
BaseLogger: &BaseLogger{
logger: logger,
},
BaseError: &BaseError{},
cfClearanceUrl: cfClearanceUrl,
logger: logger,
}
}
func (c *GOGGamesCrawler) Name() string {
return "GOGGamesCrawler"
}
func (c *GOGGamesCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
c.LogCrawlByUrlStart(URL)
if !strings.HasPrefix(URL, "https://www.gog-games.to/game/") {
return nil, c.ErrInvailValue(URL)
}
_, slug := path.Split(URL)
apiUrl := fmt.Sprintf(constant.GOGGamesGameAPIURL, slug)
token, err := ccs.TurnstileToken(c.cfClearanceUrl, apiUrl, "0x4AAAAAAAfOlgvCKbOdW1zc")
if err != nil {
return nil, c.ErrGetSession(err)
}
resp, err := utils.Request().SetLogger(c.logger.Sugar()).SetHeader("cf-turnstile-response", token).Get(apiUrl)
if err != nil {
return nil, c.ErrGetSession(err)
}
data := gameResult{}
err = json.Unmarshal(resp.Body(), &data)
if err != nil {
return nil, c.ErrUnmarshalJSON(err)
}
name := data.Title
links := make(map[string]string, 0)
for _, v := range data.Links.Game {
for _, link := range v.Links {
links[fmt.Sprintf("%s(%s)", link.Label, v.Name)] = link.Link
}
}
for _, v := range data.Links.Patch {
for _, link := range v.Links {
links[fmt.Sprintf("%s(%s)", link.Label, v.Name)] = link.Link
}
}
for _, v := range data.Links.Goodie {
for _, link := range v.Links {
links[fmt.Sprintf("%s(%s)", link.Label, v.Name)] = link.Link
}
}
if len(links) == 0 {
return nil, c.ErrGetGameItemDetail(URL, errors.New("no download links found"))
}
// Calculate total size
size := uint64(0)
sizeErr := false
for _, file := range data.Files.Game {
s, parseErr := utils.SizeToBytes(file.Size)
if parseErr != nil {
sizeErr = true
break
}
size += s
}
// Retrieve or create game item
item, err := db.GetGameItemByUrl(URL)
if err != nil {
return nil, c.ErrDBQuery(err)
}
item.Name = name
item.RawName = name
item.Downloads = links
item.Url = URL
if sizeErr {
item.Size = "unknown"
} else {
item.Size = utils.BytesToSize(size)
}
item.Author = "GOGGames"
item.Platform = "windows"
return item, nil
}
func (c *GOGGamesCrawler) Crawl(page int) ([]*model.GameItem, error) {
c.LogCrawlStart(page)
resp, err := utils.Request().SetLogger(c.logger.Sugar()).Get(fmt.Sprintf(constant.GOGGamesURL, page))
if err != nil {
return nil, c.ErrRequest(fmt.Sprintf(constant.GOGGamesURL, page), err)
}
data := searchResult{}
err = json.Unmarshal(resp.Body(), &data)
if err != nil {
return nil, c.ErrUnmarshalJSON(err)
}
urls := make([]string, 0)
var updateFlags []string // link+date
for _, item := range data.Data {
if item.Infohash == "" {
// skip unreleased games
continue
}
urls = append(urls, fmt.Sprintf(constant.GOGGamesPageURL, item.Slug))
updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf("%s%s", item.GogURL, item.LastUpdate))))
}
res := make([]*model.GameItem, 0)
for i, u := range urls {
if db.IsGameCrawled(updateFlags[i], "GOGGames") {
c.LogCrawlSkip(u)
continue
}
item, err := c.CrawlByUrl(u)
if err != nil {
c.LogCrawlByUrlError(u, err)
continue
}
item.UpdateFlag = updateFlags[i]
if err := db.SaveGameItem(item); err != nil {
c.LogSaveGameItemError(u, err)
continue
}
res = append(res, item)
if err := OrganizeGameItem(item); err != nil {
c.LogOrganizeGameWarn(item.ID, item.Name, err)
continue
}
}
return res, nil
}
func (c *GOGGamesCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) {
totalPageNum, err := c.GetTotalPageNum()
if err != nil {
return nil, c.ErrGetTotalPageNum(err)
}
res := make([]*model.GameItem, 0)
for _, page := range pages {
if page > totalPageNum || page < 1 {
c.LogPageExceedWarn(page)
continue
}
items, err := c.Crawl(page)
if err != nil {
c.LogCrawlError(page, err)
continue
}
res = append(res, items...)
}
return res, nil
}
func (c *GOGGamesCrawler) CrawlAll() ([]*model.GameItem, error) {
totalPageNum, err := c.GetTotalPageNum()
if err != nil {
return nil, c.ErrGetTotalPageNum(err)
}
var res []*model.GameItem
for i := 1; i <= totalPageNum; i++ {
items, err := c.Crawl(i)
if err != nil {
c.LogCrawlError(i, err)
continue
}
res = append(res, items...)
}
return res, nil
}
func (c *GOGGamesCrawler) GetTotalPageNum() (int, error) {
resp, err := utils.Request().SetLogger(c.logger.Sugar()).Get(fmt.Sprintf(constant.GOGGamesURL, 1))
if err != nil {
return 0, c.ErrRequest(fmt.Sprintf(constant.GOGGamesURL, 1), err)
}
data := searchResult{}
err = json.Unmarshal(resp.Body(), &data)
if err != nil {
return 0, c.ErrUnmarshalJSON(err)
}
return data.Meta.LastPage, nil
}
type searchResult struct {
Data []struct {
ID string `json:"id"`
Slug string `json:"slug"`
Title string `json:"title"`
Image string `json:"image"`
Background string `json:"background"`
GogURL string `json:"gog_url"`
IsIndev bool `json:"is_indev"`
IsNew bool `json:"is_new"`
IsUpdated bool `json:"is_updated"`
IsQueued bool `json:"is_queued"`
IsUploading bool `json:"is_uploading"`
VotedOn bool `json:"voted_on"`
LastUpdate time.Time `json:"last_update"`
Md5Filename string `json:"md5_filename"`
Infohash string `json:"infohash"`
IsVotable bool `json:"is_votable"`
} `json:"data"`
Links struct {
First string `json:"first"`
Last string `json:"last"`
Prev any `json:"prev"`
Next string `json:"next"`
} `json:"links"`
Meta struct {
CurrentPage int `json:"current_page"`
From int `json:"from"`
LastPage int `json:"last_page"`
Links []struct {
URL any `json:"url"`
Label string `json:"label"`
Active bool `json:"active"`
} `json:"links"`
Path string `json:"path"`
PerPage int `json:"per_page"`
To int `json:"to"`
Total int `json:"total"`
} `json:"meta"`
}
type gameResult struct {
ID string `json:"id"`
Slug string `json:"slug"`
Title string `json:"title"`
Image string `json:"image"`
Background string `json:"background"`
GogURL string `json:"gog_url"`
IsIndev bool `json:"is_indev"`
IsNew bool `json:"is_new"`
IsUpdated bool `json:"is_updated"`
IsQueued bool `json:"is_queued"`
IsUploading bool `json:"is_uploading"`
VotedOn bool `json:"voted_on"`
LastUpdate time.Time `json:"last_update"`
Md5Filename string `json:"md5_filename"`
Infohash string `json:"infohash"`
Links struct {
Goodie map[string]struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"goodie"`
Game map[string]struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"game"`
Patch map[string]struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"patch"`
} `json:"links"`
Files struct {
Game []struct {
Name string `json:"name"`
Type string `json:"type"`
Size string `json:"size"`
} `json:"game"`
Goodie []struct {
Name string `json:"name"`
Type string `json:"type"`
Size string `json:"size"`
} `json:"goodie"`
Patch []struct {
Name string `json:"name"`
Type string `json:"type"`
Size string `json:"size"`
} `json:"patch"`
} `json:"files"`
IsVotable bool `json:"is_votable"`
}