pcgamedb/crawler/goggames.go
nite07 b7aadf7f88 add cf-clearance-scraper
fix greegogcrawler
fix goggamescrawler
2024-11-20 10:40:33 +08:00

352 lines
8.9 KiB
Go

package crawler
import (
"encoding/json"
"fmt"
"strings"
"time"
"github.com/nitezs/pcgamedb/config"
"github.com/nitezs/pcgamedb/constant"
"github.com/nitezs/pcgamedb/db"
"github.com/nitezs/pcgamedb/model"
"github.com/nitezs/pcgamedb/utils"
"go.uber.org/zap"
)
type GOGGamesCrawler struct {
logger *zap.Logger
}
func NewGOGGamesCrawler(logger *zap.Logger) *GOGGamesCrawler {
return &GOGGamesCrawler{
logger: logger,
}
}
func (c *GOGGamesCrawler) Name() string {
return "GOGGamesCrawler"
}
// URL is api url, like https://www.gog-games.to/api/v1/games/%s
func (c *GOGGamesCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
token, err := utils.CCSTurnstileToken(config.Config.CFClearanceScraper.Url, URL, "0x4AAAAAAAfOlgvCKbOdW1zc")
if err != nil {
return nil, err
}
resp, err := utils.Fetch(utils.FetchConfig{
Url: URL,
Headers: map[string]string{
"cf-turnstile-response": token,
},
})
if err != nil {
return nil, err
}
data := gameResult{}
err = json.Unmarshal(resp.Data, &data)
if err != nil {
return nil, err
}
name := data.Title
links := make([]string, 0)
for _, link := range data.Links.Game.Gofile.Links {
links = append(links, link.Link)
}
if len(data.Links.Patch.Gofile.Links) > 0 {
for _, link := range data.Links.Patch.Gofile.Links {
links = append(links, link.Link)
}
}
size := uint64(0)
for _, file := range data.Files.Game {
s, _ := utils.SizeToBytes(file.Size)
size += s
}
item, err := db.GetGameItemByUrl(URL)
if err != nil {
return nil, err
}
item.Name = name
item.RawName = name
item.Download = strings.Join(links, ",")
item.Url = URL
item.Size = utils.BytesToSize(size)
item.Author = "GOGGames"
return item, nil
}
func (c *GOGGamesCrawler) Crawl(page int) ([]*model.GameItem, error) {
resp, err := utils.Fetch(utils.FetchConfig{
Url: fmt.Sprintf(constant.GOGGamesURL, page),
})
if err != nil {
return nil, err
}
data := searchResult{}
err = json.Unmarshal(resp.Data, &data)
if err != nil {
return nil, err
}
urls := make([]string, 0)
updateFlags := []string{} //link+date
for _, item := range data.Data {
urls = append(urls, fmt.Sprintf(constant.GOGGamesPageURL, item.Slug))
updateFlags = append(updateFlags, fmt.Sprintf("%s%s", item.GogURL, item.LastUpdate))
}
res := make([]*model.GameItem, 0)
for i, u := range urls {
c.logger.Info("Crawling", zap.String("URL", u))
if db.IsGameCrawled(updateFlags[i], "GOGGames") {
continue
}
item, err := c.CrawlByUrl(u)
if err != nil {
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
continue
}
if err := db.SaveGameItem(item); err != nil {
c.logger.Warn("Failed to save", zap.Error(err), zap.String("URL", u))
continue
}
res = append(res, item)
info, err := OrganizeGameItem(item)
if err != nil {
c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u))
continue
}
if err := db.SaveGameInfo(info); err != nil {
c.logger.Warn("Failed to save", zap.Error(err), zap.String("URL", u))
continue
}
}
return res, nil
}
func (c *GOGGamesCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) {
res := make([]*model.GameItem, 0)
for _, page := range pages {
items, err := c.Crawl(page)
if err != nil {
return nil, err
}
res = append(res, items...)
}
return res, nil
}
func (c *GOGGamesCrawler) CrawlAll() ([]*model.GameItem, error) {
totalPageNum, err := c.GetTotalPageNum()
if err != nil {
return nil, err
}
var res []*model.GameItem
for i := 1; i <= totalPageNum; i++ {
items, err := c.Crawl(i)
if err != nil {
return nil, err
}
res = append(res, items...)
}
return res, nil
}
func (c *GOGGamesCrawler) GetTotalPageNum() (int, error) {
resp, err := utils.Fetch(utils.FetchConfig{
Url: fmt.Sprintf(constant.GOGGamesURL, 1),
})
if err != nil {
return 0, err
}
data := searchResult{}
err = json.Unmarshal(resp.Data, &data)
if err != nil {
return 0, err
}
return data.Meta.LastPage, nil
}
type searchResult struct {
Data []struct {
ID string `json:"id"`
Slug string `json:"slug"`
Title string `json:"title"`
Image string `json:"image"`
Background string `json:"background"`
GogURL string `json:"gog_url"`
IsIndev bool `json:"is_indev"`
IsNew bool `json:"is_new"`
IsUpdated bool `json:"is_updated"`
IsQueued bool `json:"is_queued"`
IsUploading bool `json:"is_uploading"`
VotedOn bool `json:"voted_on"`
LastUpdate time.Time `json:"last_update"`
Md5Filename string `json:"md5_filename"`
Infohash string `json:"infohash"`
IsVotable bool `json:"is_votable"`
} `json:"data"`
Links struct {
First string `json:"first"`
Last string `json:"last"`
Prev any `json:"prev"`
Next string `json:"next"`
} `json:"links"`
Meta struct {
CurrentPage int `json:"current_page"`
From int `json:"from"`
LastPage int `json:"last_page"`
Links []struct {
URL any `json:"url"`
Label string `json:"label"`
Active bool `json:"active"`
} `json:"links"`
Path string `json:"path"`
PerPage int `json:"per_page"`
To int `json:"to"`
Total int `json:"total"`
} `json:"meta"`
}
type gameResult struct {
ID string `json:"id"`
Slug string `json:"slug"`
Title string `json:"title"`
Image string `json:"image"`
Background string `json:"background"`
GogURL string `json:"gog_url"`
IsIndev bool `json:"is_indev"`
IsNew bool `json:"is_new"`
IsUpdated bool `json:"is_updated"`
IsQueued bool `json:"is_queued"`
IsUploading bool `json:"is_uploading"`
VotedOn bool `json:"voted_on"`
LastUpdate time.Time `json:"last_update"`
Md5Filename string `json:"md5_filename"`
Infohash string `json:"infohash"`
Links struct {
Goodie struct {
OneFichier struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"1fichier"`
Vikingfile struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"vikingfile"`
Pixeldrain struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"pixeldrain"`
Gofile struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"gofile"`
} `json:"goodie"`
Game struct {
OneFichier struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"1fichier"`
Vikingfile struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"vikingfile"`
Pixeldrain struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"pixeldrain"`
Gofile struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"gofile"`
} `json:"game"`
Patch struct {
OneFichier struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"1fichier"`
Vikingfile struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"vikingfile"`
Pixeldrain struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"pixeldrain"`
Gofile struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"gofile"`
} `json:"patch"`
} `json:"links"`
Files struct {
Game []struct {
Name string `json:"name"`
Type string `json:"type"`
Size string `json:"size"`
} `json:"game"`
Goodie []struct {
Name string `json:"name"`
Type string `json:"type"`
Size string `json:"size"`
} `json:"goodie"`
Patch []struct {
Name string `json:"name"`
Type string `json:"type"`
Size string `json:"size"`
} `json:"patch"`
} `json:"files"`
IsVotable bool `json:"is_votable"`
}