pcgamedb/crawler/goggames.go

315 lines
7.7 KiB
Go
Raw Normal View History

2024-09-24 06:17:11 -04:00
package crawler
import (
"encoding/json"
2024-09-24 06:17:11 -04:00
"fmt"
"path"
2024-09-24 06:17:11 -04:00
"strings"
"time"
2024-09-24 06:17:11 -04:00
2024-11-20 06:09:04 -05:00
"pcgamedb/config"
"pcgamedb/constant"
"pcgamedb/db"
"pcgamedb/model"
"pcgamedb/utils"
2024-11-15 02:02:45 -05:00
2024-12-02 03:17:01 -05:00
"git.nite07.com/nite/ccs"
2024-09-24 06:17:11 -04:00
"go.uber.org/zap"
)
type GOGGamesCrawler struct {
logger *zap.Logger
}
func NewGOGGamesCrawler(logger *zap.Logger) *GOGGamesCrawler {
return &GOGGamesCrawler{
logger: logger,
}
}
2024-11-14 12:29:19 -05:00
func (c *GOGGamesCrawler) Name() string {
return "GOGGamesCrawler"
}
func (c *GOGGamesCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
if !strings.HasPrefix(URL, "https://www.gog-games.to/game/") {
return nil, fmt.Errorf("invalid url")
}
_, slug := path.Split(URL)
apiUrl := fmt.Sprintf(constant.GOGGamesGameAPIURL, slug)
2024-12-02 03:17:01 -05:00
token, err := ccs.TurnstileToken(config.Config.CFClearanceScraper.Url, apiUrl, "0x4AAAAAAAfOlgvCKbOdW1zc")
if err != nil {
return nil, err
}
2024-12-02 03:17:01 -05:00
resp, err := utils.Request().SetHeader("cf-turnstile-response", token).Get(apiUrl)
2024-09-24 06:17:11 -04:00
if err != nil {
return nil, err
}
data := gameResult{}
2024-12-02 03:17:01 -05:00
err = json.Unmarshal(resp.Body(), &data)
2024-09-24 06:17:11 -04:00
if err != nil {
return nil, err
}
name := data.Title
// find download links
fileHosters := []string{
"gofile",
"fileditch",
"qiwi",
"filesfm",
"pixeldrain",
"1fichier",
2024-09-24 06:17:11 -04:00
}
links := make([]string, 0)
for _, h := range fileHosters {
if value, exist := data.Links.Game[h]; exist {
for _, link := range value.Links {
links = append(links, link.Link)
}
}
if value, exist := data.Links.Patch[h]; exist {
for _, link := range value.Links {
links = append(links, link.Link)
}
}
}
if len(links) == 0 {
return nil, fmt.Errorf("no download link found")
}
size := uint64(0)
for _, file := range data.Files.Game {
s, _ := utils.SizeToBytes(file.Size)
size += s
2024-09-24 06:17:11 -04:00
}
item, err := db.GetGameItemByUrl(URL)
2024-09-24 06:17:11 -04:00
if err != nil {
return nil, err
}
item.Name = name
item.RawName = name
item.Download = strings.Join(links, ",")
item.Url = URL
item.Size = utils.BytesToSize(size)
2024-09-24 06:17:11 -04:00
item.Author = "GOGGames"
2024-12-21 11:37:00 -05:00
item.Platform = "windows"
2024-09-24 06:17:11 -04:00
return item, nil
}
func (c *GOGGamesCrawler) Crawl(page int) ([]*model.GameItem, error) {
2024-12-02 03:17:01 -05:00
resp, err := utils.Request().Get(fmt.Sprintf(constant.GOGGamesURL, page))
2024-09-24 06:17:11 -04:00
if err != nil {
return nil, err
}
data := searchResult{}
2024-12-02 03:17:01 -05:00
err = json.Unmarshal(resp.Body(), &data)
2024-09-24 06:17:11 -04:00
if err != nil {
return nil, err
}
urls := make([]string, 0)
var updateFlags []string //link+date
for _, item := range data.Data {
urls = append(urls, fmt.Sprintf(constant.GOGGamesPageURL, item.Slug))
updateFlags = append(updateFlags, fmt.Sprintf("%s%s", item.GogURL, item.LastUpdate))
}
res := make([]*model.GameItem, 0)
for i, u := range urls {
if db.IsGameCrawled(updateFlags[i], "GOGGames") {
continue
}
c.logger.Info("Crawling", zap.String("URL", u))
2024-09-24 06:17:11 -04:00
item, err := c.CrawlByUrl(u)
if err != nil {
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
continue
}
item.UpdateFlag = updateFlags[i]
if err := db.SaveGameItem(item); err != nil {
2024-09-24 06:17:11 -04:00
c.logger.Warn("Failed to save", zap.Error(err), zap.String("URL", u))
continue
}
res = append(res, item)
if err := OrganizeGameItem(item); err != nil {
2024-09-24 06:17:11 -04:00
c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u))
continue
}
}
return res, nil
}
func (c *GOGGamesCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) {
res := make([]*model.GameItem, 0)
2024-09-24 06:17:11 -04:00
for _, page := range pages {
items, err := c.Crawl(page)
if err != nil {
return nil, err
}
res = append(res, items...)
}
return res, nil
}
func (c *GOGGamesCrawler) CrawlAll() ([]*model.GameItem, error) {
2024-09-24 06:17:11 -04:00
totalPageNum, err := c.GetTotalPageNum()
if err != nil {
return nil, err
}
var res []*model.GameItem
2024-09-24 06:17:11 -04:00
for i := 1; i <= totalPageNum; i++ {
items, err := c.Crawl(i)
if err != nil {
return nil, err
}
res = append(res, items...)
}
return res, nil
}
func (c *GOGGamesCrawler) GetTotalPageNum() (int, error) {
2024-12-02 03:17:01 -05:00
resp, err := utils.Request().Get(fmt.Sprintf(constant.GOGGamesURL, 1))
2024-09-24 06:17:11 -04:00
if err != nil {
return 0, err
}
data := searchResult{}
2024-12-02 03:17:01 -05:00
err = json.Unmarshal(resp.Body(), &data)
2024-09-24 06:17:11 -04:00
if err != nil {
return 0, err
}
return data.Meta.LastPage, nil
}
type searchResult struct {
Data []struct {
ID string `json:"id"`
Slug string `json:"slug"`
Title string `json:"title"`
Image string `json:"image"`
Background string `json:"background"`
GogURL string `json:"gog_url"`
IsIndev bool `json:"is_indev"`
IsNew bool `json:"is_new"`
IsUpdated bool `json:"is_updated"`
IsQueued bool `json:"is_queued"`
IsUploading bool `json:"is_uploading"`
VotedOn bool `json:"voted_on"`
LastUpdate time.Time `json:"last_update"`
Md5Filename string `json:"md5_filename"`
Infohash string `json:"infohash"`
IsVotable bool `json:"is_votable"`
} `json:"data"`
Links struct {
First string `json:"first"`
Last string `json:"last"`
Prev any `json:"prev"`
Next string `json:"next"`
} `json:"links"`
Meta struct {
CurrentPage int `json:"current_page"`
From int `json:"from"`
LastPage int `json:"last_page"`
Links []struct {
URL any `json:"url"`
Label string `json:"label"`
Active bool `json:"active"`
} `json:"links"`
Path string `json:"path"`
PerPage int `json:"per_page"`
To int `json:"to"`
Total int `json:"total"`
} `json:"meta"`
}
type gameResult struct {
ID string `json:"id"`
Slug string `json:"slug"`
Title string `json:"title"`
Image string `json:"image"`
Background string `json:"background"`
GogURL string `json:"gog_url"`
IsIndev bool `json:"is_indev"`
IsNew bool `json:"is_new"`
IsUpdated bool `json:"is_updated"`
IsQueued bool `json:"is_queued"`
IsUploading bool `json:"is_uploading"`
VotedOn bool `json:"voted_on"`
LastUpdate time.Time `json:"last_update"`
Md5Filename string `json:"md5_filename"`
Infohash string `json:"infohash"`
Links struct {
Goodie struct {
OneFichier struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"1fichier"`
Vikingfile struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"vikingfile"`
Pixeldrain struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"pixeldrain"`
Gofile struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"gofile"`
} `json:"goodie"`
Game map[string]struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"game"`
Patch map[string]struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"patch"`
} `json:"links"`
Files struct {
Game []struct {
Name string `json:"name"`
Type string `json:"type"`
Size string `json:"size"`
} `json:"game"`
Goodie []struct {
Name string `json:"name"`
Type string `json:"type"`
Size string `json:"size"`
} `json:"goodie"`
Patch []struct {
Name string `json:"name"`
Type string `json:"type"`
Size string `json:"size"`
} `json:"patch"`
} `json:"files"`
IsVotable bool `json:"is_votable"`
2024-09-24 06:17:11 -04:00
}