2024-09-24 06:17:11 -04:00
|
|
|
package crawler
|
|
|
|
|
|
|
|
import (
|
2024-11-18 07:21:08 -05:00
|
|
|
"encoding/json"
|
2024-09-24 06:17:11 -04:00
|
|
|
"fmt"
|
2024-11-21 12:30:26 -05:00
|
|
|
"path"
|
2024-09-24 06:17:11 -04:00
|
|
|
"strings"
|
2024-11-18 07:21:08 -05:00
|
|
|
"time"
|
2024-09-24 06:17:11 -04:00
|
|
|
|
2024-11-20 06:09:04 -05:00
|
|
|
"pcgamedb/config"
|
|
|
|
"pcgamedb/constant"
|
|
|
|
"pcgamedb/db"
|
|
|
|
"pcgamedb/model"
|
|
|
|
"pcgamedb/utils"
|
2024-11-15 02:02:45 -05:00
|
|
|
|
2024-12-02 03:17:01 -05:00
|
|
|
"git.nite07.com/nite/ccs"
|
2024-09-24 06:17:11 -04:00
|
|
|
"go.uber.org/zap"
|
|
|
|
)
|
|
|
|
|
|
|
|
type GOGGamesCrawler struct {
|
|
|
|
logger *zap.Logger
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewGOGGamesCrawler(logger *zap.Logger) *GOGGamesCrawler {
|
|
|
|
return &GOGGamesCrawler{
|
|
|
|
logger: logger,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-11-14 12:29:19 -05:00
|
|
|
func (c *GOGGamesCrawler) Name() string {
|
|
|
|
return "GOGGamesCrawler"
|
|
|
|
}
|
|
|
|
|
2024-11-19 21:40:33 -05:00
|
|
|
func (c *GOGGamesCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
|
2024-11-21 12:30:26 -05:00
|
|
|
if !strings.HasPrefix(URL, "https://www.gog-games.to/game/") {
|
|
|
|
return nil, fmt.Errorf("invalid url")
|
|
|
|
}
|
|
|
|
_, slug := path.Split(URL)
|
|
|
|
|
|
|
|
apiUrl := fmt.Sprintf(constant.GOGGamesGameAPIURL, slug)
|
|
|
|
|
2024-12-02 03:17:01 -05:00
|
|
|
token, err := ccs.TurnstileToken(config.Config.CFClearanceScraper.Url, apiUrl, "0x4AAAAAAAfOlgvCKbOdW1zc")
|
2024-11-19 21:40:33 -05:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-12-02 03:17:01 -05:00
|
|
|
resp, err := utils.Request().SetHeader("cf-turnstile-response", token).Get(apiUrl)
|
2024-09-24 06:17:11 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-11-18 07:21:08 -05:00
|
|
|
data := gameResult{}
|
2024-12-02 03:17:01 -05:00
|
|
|
err = json.Unmarshal(resp.Body(), &data)
|
2024-09-24 06:17:11 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-11-18 07:21:08 -05:00
|
|
|
|
|
|
|
name := data.Title
|
2024-11-21 12:30:26 -05:00
|
|
|
|
|
|
|
// find download links
|
|
|
|
fileHosters := []string{
|
|
|
|
"gofile",
|
|
|
|
"fileditch",
|
|
|
|
"qiwi",
|
|
|
|
"filesfm",
|
|
|
|
"pixeldrain",
|
|
|
|
"1fichier",
|
2024-09-24 06:17:11 -04:00
|
|
|
}
|
2024-11-21 12:30:26 -05:00
|
|
|
links := make([]string, 0)
|
|
|
|
for _, h := range fileHosters {
|
|
|
|
if value, exist := data.Links.Game[h]; exist {
|
|
|
|
for _, link := range value.Links {
|
|
|
|
links = append(links, link.Link)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if value, exist := data.Links.Patch[h]; exist {
|
|
|
|
for _, link := range value.Links {
|
|
|
|
links = append(links, link.Link)
|
|
|
|
}
|
2024-11-18 07:21:08 -05:00
|
|
|
}
|
|
|
|
}
|
2024-11-21 12:30:26 -05:00
|
|
|
|
|
|
|
if len(links) == 0 {
|
|
|
|
return nil, fmt.Errorf("no download link found")
|
|
|
|
}
|
|
|
|
|
2024-11-18 07:21:08 -05:00
|
|
|
size := uint64(0)
|
|
|
|
for _, file := range data.Files.Game {
|
|
|
|
s, _ := utils.SizeToBytes(file.Size)
|
|
|
|
size += s
|
2024-09-24 06:17:11 -04:00
|
|
|
}
|
2024-11-18 07:21:08 -05:00
|
|
|
|
2024-11-19 21:40:33 -05:00
|
|
|
item, err := db.GetGameItemByUrl(URL)
|
2024-09-24 06:17:11 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
item.Name = name
|
|
|
|
item.RawName = name
|
2024-11-18 07:21:08 -05:00
|
|
|
item.Download = strings.Join(links, ",")
|
2024-11-19 21:40:33 -05:00
|
|
|
item.Url = URL
|
2024-11-18 07:21:08 -05:00
|
|
|
item.Size = utils.BytesToSize(size)
|
2024-09-24 06:17:11 -04:00
|
|
|
item.Author = "GOGGames"
|
2024-12-21 11:37:00 -05:00
|
|
|
item.Platform = "windows"
|
2024-09-24 06:17:11 -04:00
|
|
|
return item, nil
|
|
|
|
}
|
|
|
|
|
2024-11-16 00:48:48 -05:00
|
|
|
func (c *GOGGamesCrawler) Crawl(page int) ([]*model.GameItem, error) {
|
2024-12-02 03:17:01 -05:00
|
|
|
resp, err := utils.Request().Get(fmt.Sprintf(constant.GOGGamesURL, page))
|
2024-09-24 06:17:11 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-11-18 07:21:08 -05:00
|
|
|
data := searchResult{}
|
2024-12-02 03:17:01 -05:00
|
|
|
err = json.Unmarshal(resp.Body(), &data)
|
2024-09-24 06:17:11 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
urls := make([]string, 0)
|
2024-11-21 12:30:26 -05:00
|
|
|
var updateFlags []string //link+date
|
2024-11-18 07:21:08 -05:00
|
|
|
for _, item := range data.Data {
|
|
|
|
urls = append(urls, fmt.Sprintf(constant.GOGGamesPageURL, item.Slug))
|
|
|
|
updateFlags = append(updateFlags, fmt.Sprintf("%s%s", item.GogURL, item.LastUpdate))
|
|
|
|
}
|
2024-11-16 00:48:48 -05:00
|
|
|
res := make([]*model.GameItem, 0)
|
2024-11-18 07:21:08 -05:00
|
|
|
for i, u := range urls {
|
|
|
|
if db.IsGameCrawled(updateFlags[i], "GOGGames") {
|
|
|
|
continue
|
|
|
|
}
|
2024-11-21 12:30:26 -05:00
|
|
|
c.logger.Info("Crawling", zap.String("URL", u))
|
2024-09-24 06:17:11 -04:00
|
|
|
item, err := c.CrawlByUrl(u)
|
|
|
|
if err != nil {
|
|
|
|
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
|
|
|
|
continue
|
|
|
|
}
|
2024-11-21 12:30:26 -05:00
|
|
|
item.UpdateFlag = updateFlags[i]
|
2024-11-16 00:48:48 -05:00
|
|
|
if err := db.SaveGameItem(item); err != nil {
|
2024-09-24 06:17:11 -04:00
|
|
|
c.logger.Warn("Failed to save", zap.Error(err), zap.String("URL", u))
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
res = append(res, item)
|
2024-11-21 12:30:26 -05:00
|
|
|
if err := OrganizeGameItem(item); err != nil {
|
2024-09-24 06:17:11 -04:00
|
|
|
c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u))
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return res, nil
|
|
|
|
}
|
|
|
|
|
2024-11-16 00:48:48 -05:00
|
|
|
func (c *GOGGamesCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) {
|
|
|
|
res := make([]*model.GameItem, 0)
|
2024-09-24 06:17:11 -04:00
|
|
|
for _, page := range pages {
|
|
|
|
items, err := c.Crawl(page)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
res = append(res, items...)
|
|
|
|
}
|
|
|
|
return res, nil
|
|
|
|
}
|
|
|
|
|
2024-11-16 00:48:48 -05:00
|
|
|
func (c *GOGGamesCrawler) CrawlAll() ([]*model.GameItem, error) {
|
2024-09-24 06:17:11 -04:00
|
|
|
totalPageNum, err := c.GetTotalPageNum()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-11-16 00:48:48 -05:00
|
|
|
var res []*model.GameItem
|
2024-09-24 06:17:11 -04:00
|
|
|
for i := 1; i <= totalPageNum; i++ {
|
|
|
|
items, err := c.Crawl(i)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
res = append(res, items...)
|
|
|
|
}
|
|
|
|
return res, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *GOGGamesCrawler) GetTotalPageNum() (int, error) {
|
2024-12-02 03:17:01 -05:00
|
|
|
resp, err := utils.Request().Get(fmt.Sprintf(constant.GOGGamesURL, 1))
|
2024-09-24 06:17:11 -04:00
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
2024-11-18 07:21:08 -05:00
|
|
|
data := searchResult{}
|
2024-12-02 03:17:01 -05:00
|
|
|
err = json.Unmarshal(resp.Body(), &data)
|
2024-09-24 06:17:11 -04:00
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
2024-11-18 07:21:08 -05:00
|
|
|
return data.Meta.LastPage, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
type searchResult struct {
|
|
|
|
Data []struct {
|
|
|
|
ID string `json:"id"`
|
|
|
|
Slug string `json:"slug"`
|
|
|
|
Title string `json:"title"`
|
|
|
|
Image string `json:"image"`
|
|
|
|
Background string `json:"background"`
|
|
|
|
GogURL string `json:"gog_url"`
|
|
|
|
IsIndev bool `json:"is_indev"`
|
|
|
|
IsNew bool `json:"is_new"`
|
|
|
|
IsUpdated bool `json:"is_updated"`
|
|
|
|
IsQueued bool `json:"is_queued"`
|
|
|
|
IsUploading bool `json:"is_uploading"`
|
|
|
|
VotedOn bool `json:"voted_on"`
|
|
|
|
LastUpdate time.Time `json:"last_update"`
|
|
|
|
Md5Filename string `json:"md5_filename"`
|
|
|
|
Infohash string `json:"infohash"`
|
|
|
|
IsVotable bool `json:"is_votable"`
|
|
|
|
} `json:"data"`
|
|
|
|
Links struct {
|
|
|
|
First string `json:"first"`
|
|
|
|
Last string `json:"last"`
|
|
|
|
Prev any `json:"prev"`
|
|
|
|
Next string `json:"next"`
|
|
|
|
} `json:"links"`
|
|
|
|
Meta struct {
|
|
|
|
CurrentPage int `json:"current_page"`
|
|
|
|
From int `json:"from"`
|
|
|
|
LastPage int `json:"last_page"`
|
|
|
|
Links []struct {
|
|
|
|
URL any `json:"url"`
|
|
|
|
Label string `json:"label"`
|
|
|
|
Active bool `json:"active"`
|
|
|
|
} `json:"links"`
|
|
|
|
Path string `json:"path"`
|
|
|
|
PerPage int `json:"per_page"`
|
|
|
|
To int `json:"to"`
|
|
|
|
Total int `json:"total"`
|
|
|
|
} `json:"meta"`
|
|
|
|
}
|
|
|
|
|
|
|
|
type gameResult struct {
|
|
|
|
ID string `json:"id"`
|
|
|
|
Slug string `json:"slug"`
|
|
|
|
Title string `json:"title"`
|
|
|
|
Image string `json:"image"`
|
|
|
|
Background string `json:"background"`
|
|
|
|
GogURL string `json:"gog_url"`
|
|
|
|
IsIndev bool `json:"is_indev"`
|
|
|
|
IsNew bool `json:"is_new"`
|
|
|
|
IsUpdated bool `json:"is_updated"`
|
|
|
|
IsQueued bool `json:"is_queued"`
|
|
|
|
IsUploading bool `json:"is_uploading"`
|
|
|
|
VotedOn bool `json:"voted_on"`
|
|
|
|
LastUpdate time.Time `json:"last_update"`
|
|
|
|
Md5Filename string `json:"md5_filename"`
|
|
|
|
Infohash string `json:"infohash"`
|
|
|
|
Links struct {
|
|
|
|
Goodie struct {
|
|
|
|
OneFichier struct {
|
|
|
|
ID string `json:"id"`
|
|
|
|
Name string `json:"name"`
|
|
|
|
Links []struct {
|
|
|
|
Label string `json:"label"`
|
|
|
|
Link string `json:"link"`
|
|
|
|
} `json:"links"`
|
|
|
|
} `json:"1fichier"`
|
|
|
|
Vikingfile struct {
|
|
|
|
ID string `json:"id"`
|
|
|
|
Name string `json:"name"`
|
|
|
|
Links []struct {
|
|
|
|
Label string `json:"label"`
|
|
|
|
Link string `json:"link"`
|
|
|
|
} `json:"links"`
|
|
|
|
} `json:"vikingfile"`
|
|
|
|
Pixeldrain struct {
|
|
|
|
ID string `json:"id"`
|
|
|
|
Name string `json:"name"`
|
|
|
|
Links []struct {
|
|
|
|
Label string `json:"label"`
|
|
|
|
Link string `json:"link"`
|
|
|
|
} `json:"links"`
|
|
|
|
} `json:"pixeldrain"`
|
|
|
|
Gofile struct {
|
|
|
|
ID string `json:"id"`
|
|
|
|
Name string `json:"name"`
|
|
|
|
Links []struct {
|
|
|
|
Label string `json:"label"`
|
|
|
|
Link string `json:"link"`
|
|
|
|
} `json:"links"`
|
|
|
|
} `json:"gofile"`
|
|
|
|
} `json:"goodie"`
|
2024-11-21 12:30:26 -05:00
|
|
|
Game map[string]struct {
|
|
|
|
ID string `json:"id"`
|
|
|
|
Name string `json:"name"`
|
|
|
|
Links []struct {
|
|
|
|
Label string `json:"label"`
|
|
|
|
Link string `json:"link"`
|
|
|
|
} `json:"links"`
|
2024-11-18 07:21:08 -05:00
|
|
|
} `json:"game"`
|
2024-11-21 12:30:26 -05:00
|
|
|
Patch map[string]struct {
|
|
|
|
ID string `json:"id"`
|
|
|
|
Name string `json:"name"`
|
|
|
|
Links []struct {
|
|
|
|
Label string `json:"label"`
|
|
|
|
Link string `json:"link"`
|
|
|
|
} `json:"links"`
|
2024-11-18 07:21:08 -05:00
|
|
|
} `json:"patch"`
|
|
|
|
} `json:"links"`
|
|
|
|
Files struct {
|
|
|
|
Game []struct {
|
|
|
|
Name string `json:"name"`
|
|
|
|
Type string `json:"type"`
|
|
|
|
Size string `json:"size"`
|
|
|
|
} `json:"game"`
|
|
|
|
Goodie []struct {
|
|
|
|
Name string `json:"name"`
|
|
|
|
Type string `json:"type"`
|
|
|
|
Size string `json:"size"`
|
|
|
|
} `json:"goodie"`
|
|
|
|
Patch []struct {
|
|
|
|
Name string `json:"name"`
|
|
|
|
Type string `json:"type"`
|
|
|
|
Size string `json:"size"`
|
|
|
|
} `json:"patch"`
|
|
|
|
} `json:"files"`
|
|
|
|
IsVotable bool `json:"is_votable"`
|
2024-09-24 06:17:11 -04:00
|
|
|
}
|