332 lines
8.0 KiB
Go
332 lines
8.0 KiB
Go
package crawler
|
|
|
|
import (
|
|
"encoding/base64"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"path"
|
|
"strings"
|
|
"time"
|
|
|
|
"game-crawler/constant"
|
|
"game-crawler/db"
|
|
"game-crawler/model"
|
|
"game-crawler/utils"
|
|
|
|
"git.nite07.com/nite/ccs"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
type GOGGamesCrawler struct {
|
|
*BaseLogger
|
|
*BaseError
|
|
cfClearanceUrl string
|
|
logger *zap.Logger
|
|
}
|
|
|
|
func NewGOGGamesCrawler(cfClearanceUrl string, logger *zap.Logger) *GOGGamesCrawler {
|
|
return &GOGGamesCrawler{
|
|
BaseLogger: &BaseLogger{
|
|
logger: logger,
|
|
},
|
|
BaseError: &BaseError{},
|
|
cfClearanceUrl: cfClearanceUrl,
|
|
logger: logger,
|
|
}
|
|
}
|
|
|
|
func (c *GOGGamesCrawler) Name() string {
|
|
return "GOGGamesCrawler"
|
|
}
|
|
|
|
func (c *GOGGamesCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
|
|
c.LogCrawlByUrlStart(URL)
|
|
if !strings.HasPrefix(URL, "https://www.gog-games.to/game/") {
|
|
return nil, c.ErrInvailValue(URL)
|
|
}
|
|
|
|
_, slug := path.Split(URL)
|
|
apiUrl := fmt.Sprintf(constant.GOGGamesGameAPIURL, slug)
|
|
|
|
token, err := ccs.TurnstileToken(c.cfClearanceUrl, apiUrl, "0x4AAAAAAAfOlgvCKbOdW1zc")
|
|
if err != nil {
|
|
return nil, c.ErrGetSession(err)
|
|
}
|
|
|
|
resp, err := utils.Request().SetLogger(c.logger.Sugar()).SetHeader("cf-turnstile-response", token).Get(apiUrl)
|
|
if err != nil {
|
|
return nil, c.ErrGetSession(err)
|
|
}
|
|
|
|
data := gameResult{}
|
|
err = json.Unmarshal(resp.Body(), &data)
|
|
if err != nil {
|
|
return nil, c.ErrUnmarshalJSON(err)
|
|
}
|
|
|
|
name := data.Title
|
|
|
|
links := make(map[string]string, 0)
|
|
for _, v := range data.Links.Game {
|
|
for _, link := range v.Links {
|
|
links[fmt.Sprintf("%s(%s)", link.Label, v.Name)] = link.Link
|
|
}
|
|
}
|
|
for _, v := range data.Links.Patch {
|
|
for _, link := range v.Links {
|
|
links[fmt.Sprintf("%s(%s)", link.Label, v.Name)] = link.Link
|
|
}
|
|
}
|
|
for _, v := range data.Links.Goodie {
|
|
for _, link := range v.Links {
|
|
links[fmt.Sprintf("%s(%s)", link.Label, v.Name)] = link.Link
|
|
}
|
|
}
|
|
|
|
if len(links) == 0 {
|
|
return nil, c.ErrGetGameItemDetail(URL, errors.New("no download links found"))
|
|
}
|
|
|
|
// Calculate total size
|
|
size := uint64(0)
|
|
sizeErr := false
|
|
for _, file := range data.Files.Game {
|
|
s, parseErr := utils.SizeToBytes(file.Size)
|
|
if parseErr != nil {
|
|
sizeErr = true
|
|
break
|
|
}
|
|
size += s
|
|
}
|
|
|
|
// Retrieve or create game item
|
|
item, err := db.GetGameItemByUrl(URL)
|
|
if err != nil {
|
|
return nil, c.ErrDBQuery(err)
|
|
}
|
|
|
|
item.Name = name
|
|
item.RawName = name
|
|
item.Downloads = links
|
|
item.Url = URL
|
|
if sizeErr {
|
|
item.Size = "unknown"
|
|
} else {
|
|
item.Size = utils.BytesToSize(size)
|
|
}
|
|
item.Author = "GOGGames"
|
|
item.Platform = "windows"
|
|
|
|
return item, nil
|
|
}
|
|
|
|
func (c *GOGGamesCrawler) Crawl(page int) ([]*model.GameItem, error) {
|
|
c.LogCrawlStart(page)
|
|
resp, err := utils.Request().SetLogger(c.logger.Sugar()).Get(fmt.Sprintf(constant.GOGGamesURL, page))
|
|
if err != nil {
|
|
return nil, c.ErrRequest(fmt.Sprintf(constant.GOGGamesURL, page), err)
|
|
}
|
|
|
|
data := searchResult{}
|
|
err = json.Unmarshal(resp.Body(), &data)
|
|
if err != nil {
|
|
return nil, c.ErrUnmarshalJSON(err)
|
|
}
|
|
|
|
urls := make([]string, 0)
|
|
var updateFlags []string // link+date
|
|
for _, item := range data.Data {
|
|
if item.Infohash == "" {
|
|
// skip unreleased games
|
|
continue
|
|
}
|
|
urls = append(urls, fmt.Sprintf(constant.GOGGamesPageURL, item.Slug))
|
|
updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf("%s%s", item.GogURL, item.LastUpdate))))
|
|
}
|
|
|
|
res := make([]*model.GameItem, 0)
|
|
for i, u := range urls {
|
|
if db.IsGameCrawled(updateFlags[i], "GOGGames") {
|
|
c.LogCrawlSkip(u)
|
|
continue
|
|
}
|
|
|
|
item, err := c.CrawlByUrl(u)
|
|
if err != nil {
|
|
c.LogCrawlByUrlError(u, err)
|
|
continue
|
|
}
|
|
|
|
item.UpdateFlag = updateFlags[i]
|
|
if err := db.SaveGameItem(item); err != nil {
|
|
c.LogSaveGameItemError(u, err)
|
|
continue
|
|
}
|
|
|
|
res = append(res, item)
|
|
if err := OrganizeGameItem(item); err != nil {
|
|
c.LogOrganizeGameWarn(item.ID, item.Name, err)
|
|
continue
|
|
}
|
|
}
|
|
return res, nil
|
|
}
|
|
|
|
func (c *GOGGamesCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) {
|
|
totalPageNum, err := c.GetTotalPageNum()
|
|
if err != nil {
|
|
return nil, c.ErrGetTotalPageNum(err)
|
|
}
|
|
res := make([]*model.GameItem, 0)
|
|
for _, page := range pages {
|
|
if page > totalPageNum || page < 1 {
|
|
c.LogPageExceedWarn(page)
|
|
continue
|
|
}
|
|
items, err := c.Crawl(page)
|
|
if err != nil {
|
|
c.LogCrawlError(page, err)
|
|
continue
|
|
}
|
|
res = append(res, items...)
|
|
}
|
|
return res, nil
|
|
}
|
|
|
|
func (c *GOGGamesCrawler) CrawlAll() ([]*model.GameItem, error) {
|
|
totalPageNum, err := c.GetTotalPageNum()
|
|
if err != nil {
|
|
return nil, c.ErrGetTotalPageNum(err)
|
|
}
|
|
|
|
var res []*model.GameItem
|
|
for i := 1; i <= totalPageNum; i++ {
|
|
items, err := c.Crawl(i)
|
|
if err != nil {
|
|
c.LogCrawlError(i, err)
|
|
continue
|
|
}
|
|
res = append(res, items...)
|
|
}
|
|
return res, nil
|
|
}
|
|
|
|
func (c *GOGGamesCrawler) GetTotalPageNum() (int, error) {
|
|
resp, err := utils.Request().SetLogger(c.logger.Sugar()).Get(fmt.Sprintf(constant.GOGGamesURL, 1))
|
|
if err != nil {
|
|
return 0, c.ErrRequest(fmt.Sprintf(constant.GOGGamesURL, 1), err)
|
|
}
|
|
|
|
data := searchResult{}
|
|
err = json.Unmarshal(resp.Body(), &data)
|
|
if err != nil {
|
|
return 0, c.ErrUnmarshalJSON(err)
|
|
}
|
|
|
|
return data.Meta.LastPage, nil
|
|
}
|
|
|
|
type searchResult struct {
|
|
Data []struct {
|
|
ID string `json:"id"`
|
|
Slug string `json:"slug"`
|
|
Title string `json:"title"`
|
|
Image string `json:"image"`
|
|
Background string `json:"background"`
|
|
GogURL string `json:"gog_url"`
|
|
IsIndev bool `json:"is_indev"`
|
|
IsNew bool `json:"is_new"`
|
|
IsUpdated bool `json:"is_updated"`
|
|
IsQueued bool `json:"is_queued"`
|
|
IsUploading bool `json:"is_uploading"`
|
|
VotedOn bool `json:"voted_on"`
|
|
LastUpdate time.Time `json:"last_update"`
|
|
Md5Filename string `json:"md5_filename"`
|
|
Infohash string `json:"infohash"`
|
|
IsVotable bool `json:"is_votable"`
|
|
} `json:"data"`
|
|
Links struct {
|
|
First string `json:"first"`
|
|
Last string `json:"last"`
|
|
Prev any `json:"prev"`
|
|
Next string `json:"next"`
|
|
} `json:"links"`
|
|
Meta struct {
|
|
CurrentPage int `json:"current_page"`
|
|
From int `json:"from"`
|
|
LastPage int `json:"last_page"`
|
|
Links []struct {
|
|
URL any `json:"url"`
|
|
Label string `json:"label"`
|
|
Active bool `json:"active"`
|
|
} `json:"links"`
|
|
Path string `json:"path"`
|
|
PerPage int `json:"per_page"`
|
|
To int `json:"to"`
|
|
Total int `json:"total"`
|
|
} `json:"meta"`
|
|
}
|
|
|
|
type gameResult struct {
|
|
ID string `json:"id"`
|
|
Slug string `json:"slug"`
|
|
Title string `json:"title"`
|
|
Image string `json:"image"`
|
|
Background string `json:"background"`
|
|
GogURL string `json:"gog_url"`
|
|
IsIndev bool `json:"is_indev"`
|
|
IsNew bool `json:"is_new"`
|
|
IsUpdated bool `json:"is_updated"`
|
|
IsQueued bool `json:"is_queued"`
|
|
IsUploading bool `json:"is_uploading"`
|
|
VotedOn bool `json:"voted_on"`
|
|
LastUpdate time.Time `json:"last_update"`
|
|
Md5Filename string `json:"md5_filename"`
|
|
Infohash string `json:"infohash"`
|
|
Links struct {
|
|
Goodie map[string]struct {
|
|
ID string `json:"id"`
|
|
Name string `json:"name"`
|
|
Links []struct {
|
|
Label string `json:"label"`
|
|
Link string `json:"link"`
|
|
} `json:"links"`
|
|
} `json:"goodie"`
|
|
Game map[string]struct {
|
|
ID string `json:"id"`
|
|
Name string `json:"name"`
|
|
Links []struct {
|
|
Label string `json:"label"`
|
|
Link string `json:"link"`
|
|
} `json:"links"`
|
|
} `json:"game"`
|
|
Patch map[string]struct {
|
|
ID string `json:"id"`
|
|
Name string `json:"name"`
|
|
Links []struct {
|
|
Label string `json:"label"`
|
|
Link string `json:"link"`
|
|
} `json:"links"`
|
|
} `json:"patch"`
|
|
} `json:"links"`
|
|
Files struct {
|
|
Game []struct {
|
|
Name string `json:"name"`
|
|
Type string `json:"type"`
|
|
Size string `json:"size"`
|
|
} `json:"game"`
|
|
Goodie []struct {
|
|
Name string `json:"name"`
|
|
Type string `json:"type"`
|
|
Size string `json:"size"`
|
|
} `json:"goodie"`
|
|
Patch []struct {
|
|
Name string `json:"name"`
|
|
Type string `json:"type"`
|
|
Size string `json:"size"`
|
|
} `json:"patch"`
|
|
} `json:"files"`
|
|
IsVotable bool `json:"is_votable"`
|
|
}
|