This commit is contained in:
Nite07 2024-12-27 01:24:56 +08:00
parent 2553a35a4a
commit 29dd7fc058
21 changed files with 1519 additions and 625 deletions

View File

@ -3,6 +3,7 @@ package cmd
import (
"game-crawler/crawler"
"game-crawler/log"
"go.uber.org/zap"
"github.com/spf13/cobra"
@ -13,7 +14,7 @@ var supplementCmd = &cobra.Command{
Long: "Supplement platform id to game info",
Short: "Supplement platform id to game info",
Run: func(cmd *cobra.Command, args []string) {
err := crawler.SupplementPlatformIDToGameInfo(log.Logger)
err := crawler.SupplementPlatformIDToGameInfo()
if err != nil {
log.Logger.Error("Error supplementing platform id to game info", zap.Error(err))
}

View File

@ -1,10 +1,8 @@
package config
import (
"bytes"
"encoding/json"
"os"
"os/exec"
"reflect"
"strconv"
"strings"
@ -18,9 +16,14 @@ type config struct {
Redis redis `json:"redis"`
OnlineFix onlinefix `json:"online_fix"`
Twitch twitch `json:"twitch"`
Rutracker rutracker `json:"rutracker"`
Webhooks webhooks `json:"webhooks"`
CFClearanceScraper cfClearanceScraper `json:"cf_clearance_scraper"`
MegaAvaliable bool
}
type rutracker struct {
User string `env:"RUTRACKER_USER" json:"user"`
Password string `env:"RUTRACKER_PASSWORD" json:"password"`
}
type cfClearanceScraper struct {
@ -78,7 +81,6 @@ func init() {
User: "root",
Password: "password",
},
MegaAvaliable: TestMega(),
Server: server{
AutoCrawlCron: "0 */3 * * *",
},
@ -147,11 +149,3 @@ func loadEnvVariables(cfg interface{}) {
}
}
}
func TestMega() bool {
cmd := exec.Command("mega-get", "--help")
var out bytes.Buffer
cmd.Stdout = &out
err := cmd.Run()
return err == nil
}

View File

@ -31,4 +31,8 @@ const (
SteamRIPGameListURL = "https://steamrip.com/games-list-page/"
RepackInfoURL = "https://repack.info/page/%v/"
GnarlyURL = "https://rentry.org/gnarly_repacks"
RutrackerTopicURL = "https://rutracker.org/forum/%s"
RutrackerURL = "https://rutracker.org/forum/index.php"
RutrackerLoginURL = "https://rutracker.org/forum/login.php"
RutrackerAuthorURL = "https://rutracker.org/forum/tracker.php?rid=%s&start=%v"
)

View File

@ -17,16 +17,16 @@ import (
"go.uber.org/zap"
)
type Formatter func(string) string
type FormatterFunc func(string) string
type s1337xCrawler struct {
source string
platform string
formatter Formatter
formatter FormatterFunc
logger *zap.Logger
}
func New1337xCrawler(source string, platform string, formatter Formatter, logger *zap.Logger) *s1337xCrawler {
func New1337xCrawler(source string, platform string, formatter FormatterFunc, logger *zap.Logger) *s1337xCrawler {
return &s1337xCrawler{
source: source,
formatter: formatter,
@ -36,69 +36,101 @@ func New1337xCrawler(source string, platform string, formatter Formatter, logger
}
func (c *s1337xCrawler) Crawl(page int) ([]*model.GameItem, error) {
var doc *goquery.Document
c.logger.Info("Starting Crawl", zap.Int("Page", page), zap.String("Source", c.source))
requestUrl := fmt.Sprintf("%s/%s/%d/", constant.C1337xBaseURL, c.source, page)
resp, err := utils.Request().Get(requestUrl)
if err != nil {
return nil, err
c.logger.Error("Failed to fetch page", zap.String("URL", requestUrl), zap.Error(err))
return nil, fmt.Errorf("failed to fetch page %d for source %s: %w", page, c.source, err)
}
doc, err = goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
if err != nil {
return nil, err
c.logger.Error("Failed to parse HTML document", zap.String("URL", requestUrl), zap.Error(err))
return nil, fmt.Errorf("failed to parse HTML document for page %d: %w", page, err)
}
trSelection := doc.Find("tbody>tr")
var urls []string
trSelection.Each(func(i int, trNode *goquery.Selection) {
nameSelection := trNode.Find(".name").First()
if aNode := nameSelection.Find("a").Eq(1); aNode.Length() > 0 {
url, _ := aNode.Attr("href")
url, exists := aNode.Attr("href")
if exists {
urls = append(urls, url)
} else {
c.logger.Warn("Failed to find URL in row", zap.Int("RowIndex", i))
}
}
})
var res []*model.GameItem
for _, u := range urls {
u = fmt.Sprintf("%s%s", constant.C1337xBaseURL, u)
if db.IsGameCrawledByURL(u) {
fullURL := fmt.Sprintf("%s%s", constant.C1337xBaseURL, u)
if db.IsGameCrawledByURL(fullURL) {
c.logger.Info("Skipping already crawled URL", zap.String("URL", fullURL))
continue
}
c.logger.Info("Crawling", zap.String("URL", u))
item, err := c.CrawlByUrl(u)
c.logger.Info("Crawling URL", zap.String("URL", fullURL))
item, err := c.CrawlByUrl(fullURL)
if err != nil {
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
c.logger.Warn("Failed to crawl URL", zap.String("URL", fullURL), zap.Error(err))
continue
}
err = db.SaveGameItem(item)
if err != nil {
c.logger.Warn("Failed to save", zap.Error(err), zap.String("URL", u))
c.logger.Warn("Failed to save game item to database", zap.String("URL", fullURL), zap.Error(err))
continue
}
res = append(res, item)
if err := OrganizeGameItem(item); err != nil {
c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u))
c.logger.Warn("Failed to organize game item", zap.String("URL", fullURL), zap.Error(err))
continue
}
}
c.logger.Info("Finished Crawl", zap.Int("Page", page), zap.Int("ItemsCrawled", len(res)))
return res, nil
}
func (c *s1337xCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
c.logger.Info("Starting CrawlByUrl", zap.String("URL", URL))
resp, err := utils.Request().Get(URL)
if err != nil {
return nil, err
c.logger.Error("Failed to fetch URL", zap.String("URL", URL), zap.Error(err))
return nil, fmt.Errorf("failed to fetch URL %s: %w", URL, err)
}
var item = &model.GameItem{}
item.Url = URL
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
if err != nil {
return nil, err
c.logger.Error("Failed to parse HTML document", zap.String("URL", URL), zap.Error(err))
return nil, fmt.Errorf("failed to parse HTML document for URL %s: %w", URL, err)
}
item := &model.GameItem{
Url: URL,
}
selection := doc.Find(".torrent-detail-page ul.list>li")
info := make(map[string]string)
selection.Each(func(i int, item *goquery.Selection) {
info[strings.TrimSpace(item.Find("strong").Text())] = strings.TrimSpace(item.Find("span").Text())
key := strings.TrimSpace(item.Find("strong").Text())
value := strings.TrimSpace(item.Find("span").Text())
info[key] = value
c.logger.Debug("Extracted info", zap.String("Key", key), zap.String("Value", value))
})
magnetRegex := regexp.MustCompile(`magnet:\?[^"]*`)
magnetRegexRes := magnetRegex.FindStringSubmatch(string(resp.Body()))
if len(magnetRegexRes) == 0 {
c.logger.Error("Failed to find magnet link", zap.String("URL", URL))
return nil, fmt.Errorf("failed to find magnet link on URL %s", URL)
}
item.Size = info["Total size"]
item.RawName = doc.Find("title").Text()
item.RawName = strings.Replace(item.RawName, "Download ", "", 1)
@ -107,63 +139,88 @@ func (c *s1337xCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
item.DownloadLinks = []string{magnetRegexRes[0]}
item.Author = strings.Replace(c.source, "-torrents", "", -1)
item.Platform = c.platform
c.logger.Info("Successfully crawled URL", zap.String("URL", URL))
return item, nil
}
func (c *s1337xCrawler) CrawlMulti(pages []int) (res []*model.GameItem, err error) {
var items []*model.GameItem
c.logger.Info("Starting CrawlMulti", zap.Ints("Pages", pages), zap.String("Source", c.source))
totalPageNum, err := c.GetTotalPageNum()
if err != nil {
return nil, err
c.logger.Error("Failed to get total page number", zap.Error(err))
return nil, fmt.Errorf("failed to get total page number: %w", err)
}
for _, page := range pages {
if page > totalPageNum {
c.logger.Warn("Page exceeds total page number", zap.Int("Page", page), zap.Int("TotalPages", totalPageNum))
continue
}
items, err = c.Crawl(page)
res = append(res, items...)
items, err := c.Crawl(page)
if err != nil {
return nil, err
c.logger.Error("Failed to crawl page", zap.Int("Page", page), zap.Error(err))
continue
}
res = append(res, items...)
}
c.logger.Info("Finished CrawlMulti", zap.Int("TotalItems", len(res)))
return res, nil
}
func (c *s1337xCrawler) CrawlAll() (res []*model.GameItem, err error) {
c.logger.Info("Starting CrawlAll", zap.String("Source", c.source))
totalPageNum, err := c.GetTotalPageNum()
if err != nil {
return nil, err
c.logger.Error("Failed to get total page number", zap.Error(err))
return nil, fmt.Errorf("failed to get total page number: %w", err)
}
var items []*model.GameItem
for i := 1; i <= totalPageNum; i++ {
items, err = c.Crawl(i)
res = append(res, items...)
items, err := c.Crawl(i)
if err != nil {
return nil, err
c.logger.Error("Failed to crawl page", zap.Int("Page", i), zap.Error(err))
continue
}
res = append(res, items...)
}
c.logger.Info("Finished CrawlAll", zap.Int("TotalPages", totalPageNum), zap.Int("TotalItems", len(res)))
return res, nil
}
func (c *s1337xCrawler) GetTotalPageNum() (int, error) {
var doc *goquery.Document
c.logger.Info("Fetching total page number", zap.String("Source", c.source))
requestUrl := fmt.Sprintf("%s/%s/%d/", constant.C1337xBaseURL, c.source, 1)
resp, err := utils.Request().Get(requestUrl)
if err != nil {
return 0, err
c.logger.Error("Failed to fetch first page for total page number", zap.String("URL", requestUrl), zap.Error(err))
return 0, fmt.Errorf("failed to fetch first page for total page number: %w", err)
}
doc, _ = goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
if err != nil {
c.logger.Error("Failed to parse HTML document for total page number", zap.String("URL", requestUrl), zap.Error(err))
return 0, fmt.Errorf("failed to parse HTML document for total page number: %w", err)
}
selection := doc.Find(".last")
pageStr, exist := selection.Find("a").Attr("href")
if !exist {
return 0, errors.New("total page num not found")
pageStr, exists := selection.Find("a").Attr("href")
if !exists {
c.logger.Error("Failed to find total page number in pagination", zap.String("URL", requestUrl))
return 0, errors.New("total page number not found in pagination")
}
pageStr = strings.ReplaceAll(pageStr, c.source, "")
pageStr = strings.ReplaceAll(pageStr, "/", "")
totalPageNum, err := strconv.Atoi(pageStr)
if err != nil {
return 0, err
c.logger.Error("Failed to convert total page number to integer", zap.String("PageString", pageStr), zap.Error(err))
return 0, fmt.Errorf("failed to convert total page number '%s' to integer: %w", pageStr, err)
}
c.logger.Info("Successfully fetched total page number", zap.Int("TotalPages", totalPageNum))
return totalPageNum, nil
}

View File

@ -3,7 +3,6 @@ package crawler
import (
"bytes"
"encoding/base64"
"errors"
"fmt"
"strconv"
"strings"
@ -32,142 +31,181 @@ func (c *ChovkaCrawler) Name() string {
}
func (c *ChovkaCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
c.logger.Info("Starting CrawlByUrl", zap.String("URL", URL))
resp, err := utils.Request().Get(URL)
if err != nil {
return nil, err
c.logger.Error("Failed to fetch URL", zap.String("URL", URL), zap.Error(err))
return nil, fmt.Errorf("failed to fetch URL %s: %w", URL, err)
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
if err != nil {
return nil, err
c.logger.Error("Failed to parse HTML document", zap.String("URL", URL), zap.Error(err))
return nil, fmt.Errorf("failed to parse HTML document from URL %s: %w", URL, err)
}
item, err := db.GetGameItemByUrl(URL)
if err != nil {
return nil, err
c.logger.Error("Failed to retrieve game item from database", zap.String("URL", URL), zap.Error(err))
return nil, fmt.Errorf("failed to retrieve game item from database for URL %s: %w", URL, err)
}
item.Url = URL
item.RawName = doc.Find(".inner-entry__title").First().Text()
item.Name = ChovkaFormatter(item.RawName)
item.Author = "Chovka"
item.Platform = "windows"
downloadURL := doc.Find(".download-torrent").AttrOr("href", "")
if downloadURL == "" {
return nil, errors.New("failed to find download URL")
c.logger.Error("Download URL not found", zap.String("URL", URL))
return nil, fmt.Errorf("failed to find download URL on page %s", URL)
}
resp, err = utils.Request().SetHeader("Referer", URL).Get(downloadURL)
if err != nil {
return nil, err
c.logger.Error("Failed to fetch download URL", zap.String("downloadURL", downloadURL), zap.String("URL", URL), zap.Error(err))
return nil, fmt.Errorf("failed to fetch download URL %s for page %s: %w", downloadURL, URL, err)
}
magnet, size, err := utils.ConvertTorrentToMagnet(resp.Body())
if err != nil {
return nil, err
c.logger.Error("Failed to convert torrent to magnet", zap.String("URL", URL), zap.Error(err))
return nil, fmt.Errorf("failed to convert torrent to magnet for URL %s: %w", URL, err)
}
item.Size = size
item.DownloadLinks = []string{magnet}
c.logger.Info("Successfully crawled URL", zap.String("URL", URL))
return item, nil
}
func (c *ChovkaCrawler) Crawl(page int) ([]*model.GameItem, error) {
c.logger.Info("Starting Crawl", zap.Int("Page", page))
resp, err := utils.Request().Get(fmt.Sprintf(constant.RepackInfoURL, page))
if err != nil {
return nil, err
c.logger.Error("Failed to fetch page", zap.Int("Page", page), zap.Error(err))
return nil, fmt.Errorf("failed to fetch page %d: %w", page, err)
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
if err != nil {
return nil, err
c.logger.Error("Failed to parse HTML document", zap.Int("Page", page), zap.Error(err))
return nil, fmt.Errorf("failed to parse HTML document for page %d: %w", page, err)
}
var urls []string
var updateFlags []string
doc.Find(".entry").Each(func(i int, s *goquery.Selection) {
u, exist := s.Find(".entry__title.h2 a").Attr("href")
if !exist {
c.logger.Warn("Entry does not contain a valid URL", zap.Int("Index", i))
return
}
urls = append(urls, u)
updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(s.Find(".entry__title.h2 a").Text())))
})
var res []*model.GameItem
for i, u := range urls {
if db.IsChovkaCrawled(updateFlags[i]) {
c.logger.Info("Skipping already crawled URL", zap.String("URL", u))
continue
}
c.logger.Info("Crawling", zap.String("URL", u))
c.logger.Info("Crawling URL", zap.String("URL", u))
item, err := c.CrawlByUrl(u)
if err != nil {
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
c.logger.Warn("Failed to crawl URL", zap.String("URL", u), zap.Error(err))
continue
}
item.UpdateFlag = updateFlags[i]
if err := db.SaveGameItem(item); err != nil {
c.logger.Warn("Failed to save", zap.Error(err), zap.String("URL", u))
c.logger.Warn("Failed to save game item to database", zap.String("URL", u), zap.Error(err))
continue
}
res = append(res, item)
if err := OrganizeGameItem(item); err != nil {
c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u))
c.logger.Warn("Failed to organize game item", zap.String("URL", u), zap.Error(err))
continue
}
}
c.logger.Info("Finished Crawl", zap.Int("Page", page), zap.Int("ItemsCrawled", len(res)))
return res, nil
}
func (c *ChovkaCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) {
c.logger.Info("Starting CrawlMulti", zap.Ints("Pages", pages))
var res []*model.GameItem
for _, page := range pages {
items, err := c.Crawl(page)
if err != nil {
return nil, err
c.logger.Error("Failed to crawl multiple pages", zap.Int("Page", page), zap.Error(err))
return nil, fmt.Errorf("failed to crawl page %d: %w", page, err)
}
res = append(res, items...)
}
c.logger.Info("Finished CrawlMulti", zap.Int("TotalPages", len(pages)), zap.Int("TotalItems", len(res)))
return res, nil
}
func (c *ChovkaCrawler) CrawlAll() ([]*model.GameItem, error) {
c.logger.Info("Starting CrawlAll")
totalPageNum, err := c.GetTotalPageNum()
if err != nil {
return nil, err
c.logger.Error("Failed to get total page number", zap.Error(err))
return nil, fmt.Errorf("failed to get total page number: %w", err)
}
var res []*model.GameItem
for i := 1; i <= totalPageNum; i++ {
items, err := c.Crawl(i)
if err != nil {
return nil, err
c.logger.Error("Failed to crawl all pages", zap.Int("Page", i), zap.Error(err))
return nil, fmt.Errorf("failed to crawl page %d: %w", i, err)
}
res = append(res, items...)
}
c.logger.Info("Finished CrawlAll", zap.Int("TotalPages", totalPageNum), zap.Int("TotalItems", len(res)))
return res, nil
}
func (c *ChovkaCrawler) GetTotalPageNum() (int, error) {
c.logger.Info("Fetching total page number")
resp, err := utils.Request().Get(fmt.Sprintf(constant.RepackInfoURL, 1))
if err != nil {
return 0, err
c.logger.Error("Failed to fetch first page for total page number", zap.Error(err))
return 0, fmt.Errorf("failed to fetch first page for total page number: %w", err)
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
if err != nil {
return 0, err
c.logger.Error("Failed to parse HTML document for total page number", zap.Error(err))
return 0, fmt.Errorf("failed to parse HTML document for total page number: %w", err)
}
pageStr := doc.Find(".pagination>a").Last().Text()
totalPageNum, err := strconv.Atoi(pageStr)
if err != nil {
return 0, err
c.logger.Error("Failed to convert total page number to integer", zap.String("PageString", pageStr), zap.Error(err))
return 0, fmt.Errorf("failed to convert total page number '%s' to integer: %w", pageStr, err)
}
c.logger.Info("Successfully fetched total page number", zap.Int("TotalPages", totalPageNum))
return totalPageNum, nil
}
func ChovkaFormatter(name string) string {
idx := strings.Index(name, "| RePack")
if idx != -1 {
cutoffs := []string{"| RePack", "| GOG", "| Portable"}
for _, cutoff := range cutoffs {
if idx := strings.Index(name, cutoff); idx != -1 {
name = name[:idx]
}
idx = strings.Index(name, "| GOG")
if idx != -1 {
name = name[:idx]
}
idx = strings.Index(name, "| Portable")
if idx != -1 {
name = name[:idx]
}
return strings.TrimSpace(name)
}

View File

@ -1,6 +1,7 @@
package crawler
import (
"game-crawler/config"
"game-crawler/model"
"go.uber.org/zap"
@ -28,13 +29,14 @@ func BuildCrawlerMap(logger *zap.Logger) map[string]Crawler {
"fitgirl": NewFitGirlCrawler(logger),
"dodi": NewDODICrawler(logger),
"kaoskrew": NewKaOsKrewCrawler(logger),
"freegog": NewFreeGOGCrawler(logger),
"freegog": NewFreeGOGCrawler(config.Config.CFClearanceScraper.Url, logger),
"xatab": NewXatabCrawler(logger),
"onlinefix": NewOnlineFixCrawler(logger),
"onlinefix": NewOnlineFixCrawler(config.Config.OnlineFix.User, config.Config.OnlineFix.Password, logger),
"steamrip": NewSteamRIPCrawler(logger),
"chovka": NewChovkaCrawler(logger),
"goggames": NewGOGGamesCrawler(logger),
"goggames": NewGOGGamesCrawler(config.Config.CFClearanceScraper.Url, logger),
"johncena141": NewJohncena141Crawler(logger),
"omggods": NewOmgGodsCrawler(config.Config.CFClearanceScraper.Url, config.Config.Rutracker.User, config.Config.Rutracker.Password, logger),
// "gnarly": NewGnarlyCrawler(logger),
}
return ret

View File

@ -3,7 +3,6 @@ package crawler
import (
"bytes"
"encoding/base64"
"errors"
"fmt"
"regexp"
"strconv"
@ -33,37 +32,51 @@ func (c *FitGirlCrawler) Name() string {
}
func (c *FitGirlCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
c.logger.Info("Starting CrawlByUrl", zap.String("URL", URL))
resp, err := utils.Request().Get(URL)
if err != nil {
return nil, err
c.logger.Error("Failed to fetch URL", zap.String("URL", URL), zap.Error(err))
return nil, fmt.Errorf("failed to fetch URL %s: %w", URL, err)
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
if err != nil {
return nil, err
c.logger.Error("Failed to parse HTML document", zap.String("URL", URL), zap.Error(err))
return nil, fmt.Errorf("failed to parse HTML document for URL %s: %w", URL, err)
}
titleElem := doc.Find("h3").First().Find("strong")
if titleElem.Length() == 0 {
return nil, errors.New("failed to find title")
c.logger.Error("Failed to find title", zap.String("URL", URL))
return nil, fmt.Errorf("failed to find title on page %s", URL)
}
rawTitle := titleElem.Text()
titleElem.Children().Remove()
title := strings.TrimSpace(titleElem.Text())
sizeRegex := regexp.MustCompile(`Repack Size: <strong>(.*?)</strong>`)
sizeRegexRes := sizeRegex.FindStringSubmatch(string(resp.Body()))
if len(sizeRegexRes) == 0 {
return nil, errors.New("failed to find size")
c.logger.Error("Failed to find size", zap.String("URL", URL))
return nil, fmt.Errorf("failed to find size information on page %s", URL)
}
size := sizeRegexRes[1]
magnetRegex := regexp.MustCompile(`magnet:\?[^"]*`)
magnetRegexRes := magnetRegex.FindStringSubmatch(string(resp.Body()))
if len(magnetRegexRes) == 0 {
return nil, errors.New("failed to find magnet")
c.logger.Error("Failed to find magnet link", zap.String("URL", URL))
return nil, fmt.Errorf("failed to find magnet link on page %s", URL)
}
magnet := magnetRegexRes[0]
item, err := db.GetGameItemByUrl(URL)
if err != nil {
return nil, err
c.logger.Error("Failed to retrieve game item from database", zap.String("URL", URL), zap.Error(err))
return nil, fmt.Errorf("failed to retrieve game item from database for URL %s: %w", URL, err)
}
item.Name = strings.TrimSpace(title)
item.RawName = rawTitle
item.Url = URL
@ -71,96 +84,130 @@ func (c *FitGirlCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
item.Author = "FitGirl"
item.DownloadLinks = []string{magnet}
item.Platform = "windows"
c.logger.Info("Successfully crawled URL", zap.String("URL", URL))
return item, nil
}
func (c *FitGirlCrawler) Crawl(page int) ([]*model.GameItem, error) {
c.logger.Info("Starting Crawl", zap.Int("Page", page))
resp, err := utils.Request().Get(fmt.Sprintf(constant.FitGirlURL, page))
if err != nil {
c.logger.Error("Failed to fetch", zap.Error(err))
return nil, err
c.logger.Error("Failed to fetch page", zap.Int("Page", page), zap.Error(err))
return nil, fmt.Errorf("failed to fetch page %d: %w", page, err)
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
if err != nil {
c.logger.Error("Failed to parse HTML", zap.Error(err))
return nil, err
c.logger.Error("Failed to parse HTML document", zap.Int("Page", page), zap.Error(err))
return nil, fmt.Errorf("failed to parse HTML document for page %d: %w", page, err)
}
var urls []string
var updateFlags []string //link+date
var updateFlags []string // link + date (encoded)
doc.Find("article").Each(func(i int, s *goquery.Selection) {
u, exist1 := s.Find(".entry-title>a").First().Attr("href")
d, exist2 := s.Find("time").First().Attr("datetime")
if exist1 && exist2 {
urls = append(urls, u)
updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf("%s%s", u, d))))
} else {
c.logger.Warn("Failed to extract URL or datetime", zap.Int("Index", i), zap.Bool("HasURL", exist1), zap.Bool("HasDate", exist2))
}
})
var res []*model.GameItem
for i, u := range urls {
if db.IsFitgirlCrawled(updateFlags[i]) {
c.logger.Info("Skipping already crawled URL", zap.String("URL", u))
continue
}
c.logger.Info("Crawling", zap.String("URL", u))
c.logger.Info("Crawling URL", zap.String("URL", u))
item, err := c.CrawlByUrl(u)
if err != nil {
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
c.logger.Warn("Failed to crawl URL", zap.String("URL", u), zap.Error(err))
continue
}
item.UpdateFlag = updateFlags[i]
err = db.SaveGameItem(item)
if err != nil {
c.logger.Warn("Failed to save", zap.Error(err))
c.logger.Warn("Failed to save game item to database", zap.String("URL", u), zap.Error(err))
continue
}
res = append(res, item)
if err := OrganizeGameItem(item); err != nil {
c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u))
c.logger.Warn("Failed to organize game item", zap.String("URL", u), zap.Error(err))
continue
}
}
c.logger.Info("Finished Crawl", zap.Int("Page", page), zap.Int("ItemsCrawled", len(res)))
return res, nil
}
func (c *FitGirlCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) {
c.logger.Info("Starting CrawlMulti", zap.Ints("Pages", pages))
var res []*model.GameItem
for _, page := range pages {
items, err := c.Crawl(page)
if err != nil {
return nil, err
c.logger.Error("Failed to crawl page", zap.Int("Page", page), zap.Error(err))
return nil, fmt.Errorf("failed to crawl page %d: %w", page, err)
}
res = append(res, items...)
}
c.logger.Info("Finished CrawlMulti", zap.Int("TotalPages", len(pages)), zap.Int("TotalItems", len(res)))
return res, nil
}
func (c *FitGirlCrawler) CrawlAll() ([]*model.GameItem, error) {
var res []*model.GameItem
c.logger.Info("Starting CrawlAll")
totalPageNum, err := c.GetTotalPageNum()
if err != nil {
return nil, err
c.logger.Error("Failed to get total page number", zap.Error(err))
return nil, fmt.Errorf("failed to get total page number: %w", err)
}
var res []*model.GameItem
for i := 1; i <= totalPageNum; i++ {
items, err := c.Crawl(i)
if err != nil {
return nil, err
c.logger.Error("Failed to crawl page", zap.Int("Page", i), zap.Error(err))
return nil, fmt.Errorf("failed to crawl page %d: %w", i, err)
}
res = append(res, items...)
}
c.logger.Info("Finished CrawlAll", zap.Int("TotalPages", totalPageNum), zap.Int("TotalItems", len(res)))
return res, nil
}
func (c *FitGirlCrawler) GetTotalPageNum() (int, error) {
c.logger.Info("Fetching total page number")
resp, err := utils.Request().Get(fmt.Sprintf(constant.FitGirlURL, 1))
if err != nil {
return 0, err
c.logger.Error("Failed to fetch first page for total page number", zap.Error(err))
return 0, fmt.Errorf("failed to fetch first page for total page number: %w", err)
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
if err != nil {
return 0, err
c.logger.Error("Failed to parse HTML document", zap.Error(err))
return 0, fmt.Errorf("failed to parse HTML document for total page number: %w", err)
}
page, err := strconv.Atoi(doc.Find(".page-numbers.dots").First().Next().Text())
pageStr := doc.Find(".page-numbers.dots").First().Next().Text()
totalPageNum, err := strconv.Atoi(pageStr)
if err != nil {
return 0, err
c.logger.Error("Failed to convert total page number to integer", zap.String("PageString", pageStr), zap.Error(err))
return 0, fmt.Errorf("failed to convert total page number '%s' to integer: %w", pageStr, err)
}
return page, nil
c.logger.Info("Successfully fetched total page number", zap.Int("TotalPages", totalPageNum))
return totalPageNum, nil
}

View File

@ -4,7 +4,7 @@ import (
"bytes"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"html"
"net/http"
"regexp"
@ -12,7 +12,6 @@ import (
"time"
"game-crawler/cache"
"game-crawler/config"
"game-crawler/constant"
"game-crawler/db"
"game-crawler/model"
@ -23,31 +22,37 @@ import (
)
type FreeGOGCrawler struct {
cfClearanceUrl string
logger *zap.Logger
}
func NewFreeGOGCrawler(logger *zap.Logger) *FreeGOGCrawler {
func NewFreeGOGCrawler(cfClearanceUrl string, logger *zap.Logger) *FreeGOGCrawler {
return &FreeGOGCrawler{
cfClearanceUrl: cfClearanceUrl,
logger: logger,
}
}
func (c *FreeGOGCrawler) getSession() (*ccs.Session, error) {
c.logger.Info("Fetching session for FreeGOGCrawler")
cacheKey := "freegog_waf_session"
var session ccs.Session
var err error
if val, exist := cache.Get("freegog_waf_session"); exist {
if val, exist := cache.Get(cacheKey); exist {
err := json.Unmarshal([]byte(val), &session)
if err != nil {
return nil, err
c.logger.Error("Failed to unmarshal cached session", zap.Error(err))
return nil, fmt.Errorf("failed to unmarshal cached session: %w", err)
}
} else {
session, err = ccs.WAFSession(config.Config.CFClearanceScraper.Url, constant.FreeGOGListURL)
var err error
session, err = ccs.WAFSession(c.cfClearanceUrl, constant.FreeGOGListURL)
if err != nil {
return nil, err
c.logger.Error("Failed to create WAF session", zap.Error(err))
return nil, fmt.Errorf("failed to create WAF session: %w", err)
}
jsonBytes, err := json.Marshal(session)
if err == nil {
_ = cache.SetWithExpire("freegog_waf_session", jsonBytes, 1*time.Hour)
_ = cache.SetWithExpire(cacheKey, jsonBytes, 1*time.Hour)
}
}
return &session, nil
@ -58,106 +63,144 @@ func (c *FreeGOGCrawler) Name() string {
}
func (c *FreeGOGCrawler) Crawl(num int) ([]*model.GameItem, error) {
c.logger.Info("Starting Crawl", zap.Int("Num", num))
count := 0
session, err := c.getSession()
if err != nil {
c.logger.Error("Failed to create session", zap.Error(err))
return nil, err
return nil, fmt.Errorf("failed to get session: %w", err)
}
resp, err := ccs.RequestWithWAFSession(http.MethodGet, constant.FreeGOGListURL, *session, nil)
if err != nil {
c.logger.Error("Failed to fetch", zap.Error(err))
return nil, err
c.logger.Error("Failed to fetch FreeGOG list page", zap.Error(err))
return nil, fmt.Errorf("failed to fetch FreeGOG list page: %w", err)
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader([]byte(resp.Body)))
if err != nil {
c.logger.Error("Failed to parse HTML", zap.Error(err))
return nil, err
c.logger.Error("Failed to parse HTML document", zap.Error(err))
return nil, fmt.Errorf("failed to parse HTML document: %w", err)
}
var urls []string
var updateFlags []string //rawName+link
var updateFlags []string // RawName+Link
doc.Find(".items-outer li a").Each(func(i int, s *goquery.Selection) {
urls = append(urls, s.AttrOr("href", ""))
updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(s.Text()+s.AttrOr("href", ""))))
url := s.AttrOr("href", "")
rawName := s.Text()
if url != "" && rawName != "" {
urls = append(urls, url)
updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(rawName+url)))
} else {
c.logger.Warn("Invalid URL or raw name found in item", zap.Int("Index", i), zap.String("URL", url), zap.String("RawName", rawName))
}
})
var res []*model.GameItem
for i, u := range urls {
if count == num {
c.logger.Info("Reached target number of items", zap.Int("Count", count))
break
}
if db.IsFreeGOGCrawled(updateFlags[i]) {
c.logger.Info("Skipping already crawled URL", zap.String("URL", u))
continue
}
c.logger.Info("Crawling", zap.String("URL", u))
c.logger.Info("Crawling URL", zap.String("URL", u))
item, err := c.CrawlByUrl(u)
if err != nil {
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
c.logger.Warn("Failed to crawl URL", zap.String("URL", u), zap.Error(err))
continue
}
item.UpdateFlag = updateFlags[i]
err = db.SaveGameItem(item)
if err != nil {
c.logger.Warn("Failed to save", zap.Error(err))
c.logger.Warn("Failed to save game item to database", zap.String("URL", u), zap.Error(err))
continue
}
res = append(res, item)
count++
if err := OrganizeGameItem(item); err != nil {
c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u))
c.logger.Warn("Failed to organize game item", zap.String("URL", u), zap.Error(err))
continue
}
}
c.logger.Info("Finished Crawl", zap.Int("TotalItems", len(res)))
return res, nil
}
func (c *FreeGOGCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
c.logger.Info("Starting CrawlByUrl", zap.String("URL", URL))
session, err := c.getSession()
if err != nil {
return nil, err
c.logger.Error("Failed to get session", zap.Error(err))
return nil, fmt.Errorf("failed to get session: %w", err)
}
resp, err := ccs.RequestWithWAFSession(http.MethodGet, URL, *session, nil)
if err != nil {
return nil, err
c.logger.Error("Failed to fetch game page", zap.String("URL", URL), zap.Error(err))
return nil, fmt.Errorf("failed to fetch game page %s: %w", URL, err)
}
item, err := db.GetGameItemByUrl(URL)
if err != nil {
return nil, err
c.logger.Error("Failed to retrieve game item from database", zap.String("URL", URL), zap.Error(err))
return nil, fmt.Errorf("failed to retrieve game item from database for URL %s: %w", URL, err)
}
item.Url = URL
// Extract raw title
rawTitleRegex := regexp.MustCompile(`(?i)<h1 class="entry-title">(.*?)</h1>`)
rawTitleRegexRes := rawTitleRegex.FindStringSubmatch(string(resp.Body))
rawName := ""
if len(rawTitleRegexRes) > 1 {
rawName = html.UnescapeString(rawTitleRegexRes[1])
rawName := html.UnescapeString(rawTitleRegexRes[1])
item.RawName = strings.Replace(rawName, "", "-", -1)
} else {
return nil, err
c.logger.Error("Failed to find raw title", zap.String("URL", URL))
return nil, fmt.Errorf("failed to find raw title on page %s", URL)
}
item.Name = FreeGOGFormatter(item.RawName)
// Extract size
sizeRegex := regexp.MustCompile(`(?i)>Size:\s?(.*?)<`)
sizeRegexRes := sizeRegex.FindStringSubmatch(string(resp.Body))
if len(sizeRegexRes) > 1 {
item.Size = sizeRegexRes[1]
} else {
c.logger.Warn("Failed to find game size", zap.String("URL", URL))
}
// Extract magnet link
magnetRegex := regexp.MustCompile(`<a class="download-btn" href="https://gdl.freegogpcgames.xyz/download-gen\.php\?url=(.*?)"`)
magnetRegexRes := magnetRegex.FindStringSubmatch(string(resp.Body))
if len(magnetRegexRes) > 1 {
magnet, err := base64.StdEncoding.DecodeString(magnetRegexRes[1])
if err != nil {
return nil, err
c.logger.Error("Failed to decode magnet link", zap.String("URL", URL), zap.Error(err))
return nil, fmt.Errorf("failed to decode magnet link on page %s: %w", URL, err)
}
item.DownloadLinks = []string{string(magnet)}
} else {
return nil, errors.New("failed to find magnet link")
c.logger.Error("Failed to find magnet link", zap.String("URL", URL))
return nil, fmt.Errorf("failed to find magnet link on page %s", URL)
}
item.Author = "FreeGOG"
item.Platform = "windows"
c.logger.Info("Successfully crawled URL", zap.String("URL", URL))
return item, nil
}
func (c *FreeGOGCrawler) CrawlAll() ([]*model.GameItem, error) {
c.logger.Info("Starting CrawlAll")
return c.Crawl(-1)
}
@ -165,6 +208,7 @@ var freeGOGRegexps = []*regexp.Regexp{
regexp.MustCompile(`(?i)\(.*\)`),
}
// FreeGOGFormatter formats the raw game name into a clean title.
func FreeGOGFormatter(name string) string {
for _, re := range freeGOGRegexps {
name = re.ReplaceAllString(name, "")

View File

@ -16,6 +16,7 @@ import (
"go.mongodb.org/mongo-driver/mongo"
)
// GenerateGameInfo generates game info based on the platform and ID.
func GenerateGameInfo(platform string, id int) (*model.GameInfo, error) {
switch platform {
case "steam":
@ -23,45 +24,54 @@ func GenerateGameInfo(platform string, id int) (*model.GameInfo, error) {
case "igdb":
return GenerateIGDBGameInfo(id)
default:
return nil, errors.New("invalid ID type")
return nil, errors.New("invalid platform type")
}
}
// OrganizeGameItem Organize game item and save game info to database
// OrganizeGameItem organizes the given game item and saves its associated game info to the database.
func OrganizeGameItem(game *model.GameItem) error {
hasOriganized, _ := db.HasGameItemOrganized(game.ID)
if hasOriganized {
hasOrganized := db.HasGameItemOrganized(game.ID)
if hasOrganized {
return nil
}
item, err := OrganizeGameItemWithIGDB(game)
if err == nil {
if err != nil {
return err
}
// Attempt to supplement SteamID if missing
if item.SteamID == 0 {
// get steam id from igdb
steamID, err := GetSteamIDByIGDBID(item.IGDBID)
if err == nil {
item.SteamID = steamID
} else {
return err
}
}
err = db.SaveGameInfo(item)
if err != nil {
// Save the organized game info to the database
if err := db.SaveGameInfo(item); err != nil {
return err
}
return nil
}
return err
}
func AddGameInfoManually(gameID primitive.ObjectID, platform string, plateformID int) (*model.GameInfo, error) {
info, err := GenerateGameInfo(platform, plateformID)
// AddGameInfoManually manually adds a game info entry to the database.
func AddGameInfoManually(gameID primitive.ObjectID, platform string, platformID int) (*model.GameInfo, error) {
info, err := GenerateGameInfo(platform, platformID)
if err != nil {
return nil, err
}
info.GameIDs = append(info.GameIDs, gameID)
info.GameIDs = utils.Unique(info.GameIDs)
return info, db.SaveGameInfo(info)
info.GameIDs = utils.Unique(append(info.GameIDs, gameID))
if err := db.SaveGameInfo(info); err != nil {
return nil, err
}
return info, nil
}
// OrganizeGameItemManually organizes a game item manually based on the platform and platform ID.
func OrganizeGameItemManually(gameID primitive.ObjectID, platform string, platformID int) (*model.GameInfo, error) {
info, err := db.GetGameInfoByPlatformID(platform, platformID)
if err != nil {
@ -74,27 +84,30 @@ func OrganizeGameItemManually(gameID primitive.ObjectID, platform string, platfo
return nil, err
}
}
info.GameIDs = append(info.GameIDs, gameID)
info.GameIDs = utils.Unique(info.GameIDs)
err = db.SaveGameInfo(info)
if err != nil {
return nil, err
}
if platform == "igdb" {
info.GameIDs = utils.Unique(append(info.GameIDs, gameID))
// Supplement missing platform IDs
if platform == "igdb" && info.SteamID == 0 {
steamID, err := GetSteamIDByIGDBID(platformID)
if err == nil {
info.SteamID = steamID
}
}
if platform == "steam" {
if platform == "steam" && info.IGDBID == 0 {
igdbID, err := GetIGDBIDBySteamAppID(platformID)
if err == nil {
info.IGDBID = igdbID
}
}
if err := db.SaveGameInfo(info); err != nil {
return nil, err
}
return info, nil
}
// FormatName formats a raw game name into a clean and consistent format.
func FormatName(name string) string {
name = regexp.MustCompile(`(?i)[\w'-]+\s(Edition|Vision|Collection|Bundle|Pack|Deluxe)`).ReplaceAllString(name, " ")
name = regexp.MustCompile(`(?i)GOTY`).ReplaceAllString(name, "")
@ -108,61 +121,85 @@ func FormatName(name string) string {
return name
}
func SupplementPlatformIDToGameInfo(logger *zap.Logger) error {
// SupplementPlatformIDToGameInfo supplements missing platform IDs (SteamID or IGDBID) for all game info entries.
func SupplementPlatformIDToGameInfo() error {
logger := zap.L()
logger.Info("Starting to supplement missing platform IDs")
infos, err := db.GetAllGameInfos()
if err != nil {
logger.Error("Failed to fetch game infos", zap.Error(err))
return err
}
for _, info := range infos {
changed := false
// Supplement SteamID using IGDBID
if info.IGDBID != 0 && info.SteamID == 0 {
steamID, err := GetSteamIDByIGDBID(info.IGDBID)
time.Sleep(time.Millisecond * 100)
if err != nil {
continue
}
time.Sleep(100 * time.Millisecond)
if err == nil {
info.SteamID = steamID
changed = true
} else {
logger.Warn("Failed to get SteamID from IGDB", zap.Int("IGDBID", info.IGDBID), zap.Error(err))
}
}
// Supplement IGDBID using SteamID
if info.SteamID != 0 && info.IGDBID == 0 {
igdbID, err := GetIGDBIDBySteamAppID(info.SteamID)
time.Sleep(time.Millisecond * 100)
if err != nil {
continue
}
time.Sleep(100 * time.Millisecond)
if err == nil {
info.IGDBID = igdbID
changed = true
}
if changed {
logger.Info("supp", zap.String("name", info.Name), zap.Int("igdb", info.IGDBID), zap.Int("steam", info.SteamID))
_ = db.SaveGameInfo(info)
} else {
logger.Info("skip", zap.String("name", info.Name), zap.Int("igdb", info.IGDBID), zap.Int("steam", info.SteamID))
logger.Warn("Failed to get IGDBID from SteamID", zap.Int("SteamID", info.SteamID), zap.Error(err))
}
}
if changed {
logger.Info("Supplemented platform IDs", zap.String("Name", info.Name), zap.Int("IGDBID", info.IGDBID), zap.Int("SteamID", info.SteamID))
if err := db.SaveGameInfo(info); err != nil {
logger.Error("Failed to save updated game info", zap.String("Name", info.Name), zap.Error(err))
}
} else {
logger.Info("No changes needed", zap.String("Name", info.Name), zap.Int("IGDBID", info.IGDBID), zap.Int("SteamID", info.SteamID))
}
}
return nil
}
// UpdateGameInfo updates outdated game info entries and returns a channel to monitor updates.
func UpdateGameInfo(num int) (chan *model.GameInfo, error) {
logger := zap.L()
logger.Info("Starting to update outdated game info", zap.Int("Num", num))
infos, err := db.GetOutdatedGameInfos(num)
if err != nil {
logger.Error("Failed to fetch outdated game infos", zap.Error(err))
return nil, err
}
updateChan := make(chan *model.GameInfo)
go func() {
defer close(updateChan)
for _, info := range infos {
if info.IGDBID != 0 {
newInfo, err := GenerateIGDBGameInfo(info.IGDBID)
if err != nil {
logger.Warn("Failed to generate IGDB game info", zap.Int("IGDBID", info.IGDBID), zap.Error(err))
continue
}
db.MergeGameInfo(info, newInfo)
err = db.SaveGameInfo(newInfo)
if err != nil {
if err := db.SaveGameInfo(newInfo); err != nil {
logger.Error("Failed to save updated game info", zap.String("Name", newInfo.Name), zap.Error(err))
continue
}
updateChan <- newInfo
logger.Info("Updated game info", zap.String("Name", newInfo.Name), zap.Int("IGDBID", newInfo.IGDBID))
}
}
}()

View File

@ -8,7 +8,6 @@ import (
"strings"
"time"
"game-crawler/config"
"game-crawler/constant"
"game-crawler/db"
"game-crawler/model"
@ -19,11 +18,13 @@ import (
)
type GOGGamesCrawler struct {
cfClearanceUrl string
logger *zap.Logger
}
func NewGOGGamesCrawler(logger *zap.Logger) *GOGGamesCrawler {
func NewGOGGamesCrawler(cfClearanceUrl string, logger *zap.Logger) *GOGGamesCrawler {
return &GOGGamesCrawler{
cfClearanceUrl: cfClearanceUrl,
logger: logger,
}
}
@ -34,29 +35,36 @@ func (c *GOGGamesCrawler) Name() string {
func (c *GOGGamesCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
if !strings.HasPrefix(URL, "https://www.gog-games.to/game/") {
return nil, fmt.Errorf("invalid url")
err := fmt.Errorf("invalid URL: %s", URL)
c.logger.Error("Invalid URL", zap.Error(err))
return nil, err
}
_, slug := path.Split(URL)
_, slug := path.Split(URL)
apiUrl := fmt.Sprintf(constant.GOGGamesGameAPIURL, slug)
token, err := ccs.TurnstileToken(config.Config.CFClearanceScraper.Url, apiUrl, "0x4AAAAAAAfOlgvCKbOdW1zc")
token, err := ccs.TurnstileToken(c.cfClearanceUrl, apiUrl, "0x4AAAAAAAfOlgvCKbOdW1zc")
if err != nil {
return nil, err
c.logger.Error("Failed to get Turnstile token", zap.Error(err), zap.String("apiUrl", apiUrl))
return nil, fmt.Errorf("failed to get Turnstile token for URL %s: %w", apiUrl, err)
}
resp, err := utils.Request().SetHeader("cf-turnstile-response", token).Get(apiUrl)
if err != nil {
return nil, err
c.logger.Error("Failed to fetch data from API", zap.Error(err), zap.String("apiUrl", apiUrl))
return nil, fmt.Errorf("failed to fetch API data for URL %s: %w", apiUrl, err)
}
data := gameResult{}
err = json.Unmarshal(resp.Body(), &data)
if err != nil {
return nil, err
c.logger.Error("Failed to unmarshal API response", zap.Error(err), zap.String("apiUrl", apiUrl))
return nil, fmt.Errorf("failed to parse API response for URL %s: %w", apiUrl, err)
}
name := data.Title
// find download links
// Find download links
fileHosters := []string{
"gofile",
"fileditch",
@ -80,19 +88,28 @@ func (c *GOGGamesCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
}
if len(links) == 0 {
return nil, fmt.Errorf("no download link found")
err := fmt.Errorf("no download links found for URL %s", URL)
c.logger.Warn("No download links found", zap.Error(err))
return nil, err
}
// Calculate total size
size := uint64(0)
for _, file := range data.Files.Game {
s, _ := utils.SizeToBytes(file.Size)
s, parseErr := utils.SizeToBytes(file.Size)
if parseErr != nil {
c.logger.Warn("Failed to parse file size", zap.Error(parseErr), zap.String("fileSize", file.Size))
}
size += s
}
// Retrieve or create game item
item, err := db.GetGameItemByUrl(URL)
if err != nil {
return nil, err
c.logger.Error("Failed to retrieve game item from database", zap.Error(err), zap.String("URL", URL))
return nil, fmt.Errorf("failed to get game item for URL %s: %w", URL, err)
}
item.Name = name
item.RawName = name
item.DownloadLinks = links
@ -100,44 +117,54 @@ func (c *GOGGamesCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
item.Size = utils.BytesToSize(size)
item.Author = "GOGGames"
item.Platform = "windows"
return item, nil
}
func (c *GOGGamesCrawler) Crawl(page int) ([]*model.GameItem, error) {
resp, err := utils.Request().Get(fmt.Sprintf(constant.GOGGamesURL, page))
if err != nil {
return nil, err
c.logger.Error("Failed to fetch page", zap.Error(err), zap.Int("page", page))
return nil, fmt.Errorf("failed to fetch page %d: %w", page, err)
}
data := searchResult{}
err = json.Unmarshal(resp.Body(), &data)
if err != nil {
return nil, err
c.logger.Error("Failed to parse page response", zap.Error(err), zap.Int("page", page))
return nil, fmt.Errorf("failed to parse page %d: %w", page, err)
}
urls := make([]string, 0)
var updateFlags []string //link+date
var updateFlags []string // link+date
for _, item := range data.Data {
urls = append(urls, fmt.Sprintf(constant.GOGGamesPageURL, item.Slug))
updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf("%s%s", item.GogURL, item.LastUpdate))))
}
res := make([]*model.GameItem, 0)
for i, u := range urls {
if db.IsGameCrawled(updateFlags[i], "GOGGames") {
c.logger.Info("Game already crawled", zap.String("URL", u))
continue
}
c.logger.Info("Crawling", zap.String("URL", u))
c.logger.Info("Crawling game", zap.String("URL", u))
item, err := c.CrawlByUrl(u)
if err != nil {
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
c.logger.Warn("Failed to crawl game", zap.Error(err), zap.String("URL", u))
continue
}
item.UpdateFlag = updateFlags[i]
if err := db.SaveGameItem(item); err != nil {
c.logger.Warn("Failed to save", zap.Error(err), zap.String("URL", u))
c.logger.Warn("Failed to save game item to database", zap.Error(err), zap.String("URL", u))
continue
}
res = append(res, item)
if err := OrganizeGameItem(item); err != nil {
c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u))
c.logger.Warn("Failed to organize game item", zap.Error(err), zap.String("URL", u))
continue
}
}
@ -149,7 +176,8 @@ func (c *GOGGamesCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) {
for _, page := range pages {
items, err := c.Crawl(page)
if err != nil {
return nil, err
c.logger.Error("Failed to crawl multiple pages", zap.Error(err), zap.Int("page", page))
return nil, fmt.Errorf("failed to crawl page %d: %w", page, err)
}
res = append(res, items...)
}
@ -159,13 +187,17 @@ func (c *GOGGamesCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) {
func (c *GOGGamesCrawler) CrawlAll() ([]*model.GameItem, error) {
totalPageNum, err := c.GetTotalPageNum()
if err != nil {
return nil, err
c.logger.Error("Failed to get total page number", zap.Error(err))
return nil, fmt.Errorf("failed to get total page number: %w", err)
}
var res []*model.GameItem
for i := 1; i <= totalPageNum; i++ {
c.logger.Info("Crawling page", zap.Int("page", i))
items, err := c.Crawl(i)
if err != nil {
return nil, err
c.logger.Warn("Failed to crawl page", zap.Error(err), zap.Int("page", i))
return nil, fmt.Errorf("failed to crawl page %d: %w", i, err)
}
res = append(res, items...)
}
@ -175,13 +207,17 @@ func (c *GOGGamesCrawler) CrawlAll() ([]*model.GameItem, error) {
func (c *GOGGamesCrawler) GetTotalPageNum() (int, error) {
resp, err := utils.Request().Get(fmt.Sprintf(constant.GOGGamesURL, 1))
if err != nil {
return 0, err
c.logger.Error("Failed to fetch first page", zap.Error(err))
return 0, fmt.Errorf("failed to fetch first page: %w", err)
}
data := searchResult{}
err = json.Unmarshal(resp.Body(), &data)
if err != nil {
return 0, err
c.logger.Error("Failed to parse first page response", zap.Error(err))
return 0, fmt.Errorf("failed to parse first page response: %w", err)
}
return data.Meta.LastPage, nil
}

View File

@ -20,6 +20,7 @@ import (
"github.com/PuerkitoBio/goquery"
"github.com/go-resty/resty/v2"
"go.uber.org/zap"
)
type twitchToken struct {
@ -33,9 +34,13 @@ func (t *twitchToken) getToken() (string, error) {
}
token, expires, err := loginTwitch()
if err != nil {
zap.L().Error("failed to login to Twitch", zap.Error(err))
return "", fmt.Errorf("failed to login twitch: %w", err)
}
_ = cache.SetWithExpire("twitch_token", token, expires)
err = cache.SetWithExpire("twitch_token", token, expires)
if err != nil {
zap.L().Error("failed to set Twitch token in cache", zap.Error(err))
}
return token, nil
}
@ -46,10 +51,13 @@ func loginTwitch() (string, time.Duration, error) {
params.Add("client_secret", config.Config.Twitch.ClientSecret)
params.Add("grant_type", "client_credentials")
baseURL.RawQuery = params.Encode()
resp, err := utils.Request().SetHeader("User-Agent", "").Post(baseURL.String())
if err != nil {
zap.L().Error("failed to make Twitch login request", zap.String("url", baseURL.String()), zap.Error(err))
return "", 0, err
}
data := struct {
AccessToken string `json:"access_token"`
ExpiresIn int64 `json:"expires_in"`
@ -57,6 +65,7 @@ func loginTwitch() (string, time.Duration, error) {
}{}
err = json.Unmarshal(resp.Body(), &data)
if err != nil {
zap.L().Error("failed to parse Twitch login response", zap.String("response", string(resp.Body())), zap.Error(err))
return "", 0, err
}
return data.AccessToken, time.Second * time.Duration(data.ExpiresIn), nil
@ -65,68 +74,82 @@ func loginTwitch() (string, time.Duration, error) {
func igdbRequest(URL string, dataBody any) (*resty.Response, error) {
t, err := token.getToken()
if err != nil {
zap.L().Error("failed to get Twitch token", zap.Error(err))
return nil, err
}
resp, err := utils.Request().SetBody(dataBody).SetHeaders(map[string]string{
"Client-ID": config.Config.Twitch.ClientID,
"Authorization": "Bearer " + t,
"User-Agent": "",
"Content-Type": "text/plain",
}).Post(URL)
if err != nil {
zap.L().Error("failed to make IGDB request", zap.String("url", URL), zap.Any("dataBody", dataBody), zap.Error(err))
return nil, err
}
return resp, nil
}
func getIGDBID(name string) (int, error) {
var err error
resp, err := igdbRequest(constant.IGDBSearchURL, fmt.Sprintf(`search "%s"; fields *; limit 50; where game.platforms = [6] | game.platforms=[130] | game.platforms=[384] | game.platforms=[163];`, name))
if err != nil {
zap.L().Error("failed to search IGDB ID", zap.String("name", name), zap.Error(err))
return 0, err
}
if string(resp.Body()) == "[]" {
resp, err = igdbRequest(constant.IGDBSearchURL, fmt.Sprintf(`search "%s"; fields *; limit 50;`, name))
if err != nil {
zap.L().Error("failed to fallback search IGDB ID", zap.String("name", name), zap.Error(err))
return 0, err
}
}
var data model.IGDBSearches
if err = json.Unmarshal(resp.Body(), &data); err != nil {
zap.L().Error("failed to unmarshal IGDB search response", zap.String("response", string(resp.Body())), zap.Error(err))
return 0, fmt.Errorf("failed to unmarshal: %w, %s", err, debug.Stack())
}
if len(data) == 1 {
return GetIGDBAppParent(data[0].Game)
}
maxSimilairty := 0.0
maxSimilairtyIndex := 0
maxSimilarity := 0.0
maxSimilarityIndex := 0
for i, item := range data {
if strings.EqualFold(item.Name, name) {
return item.Game, nil
}
if sim := utils.Similarity(name, item.Name); sim >= 0.8 {
if sim > maxSimilairty {
maxSimilairty = sim
maxSimilairtyIndex = i
if sim > maxSimilarity {
maxSimilarity = sim
maxSimilarityIndex = i
}
}
detail, err := GetIGDBAppDetail(item.Game)
if err != nil {
zap.L().Error("failed to get IGDB app detail", zap.Int("gameID", item.Game), zap.Error(err))
return 0, err
}
for _, alternativeNames := range detail.AlternativeNames {
if sim := utils.Similarity(alternativeNames.Name, name); sim >= 0.8 {
if sim > maxSimilairty {
maxSimilairty = sim
maxSimilairtyIndex = i
for _, altName := range detail.AlternativeNames {
if sim := utils.Similarity(altName.Name, name); sim >= 0.8 {
if sim > maxSimilarity {
maxSimilarity = sim
maxSimilarityIndex = i
}
}
}
}
if maxSimilairty >= 0.8 {
return GetIGDBAppParent(data[maxSimilairtyIndex].Game)
if maxSimilarity >= 0.8 {
return GetIGDBAppParent(data[maxSimilarityIndex].Game)
}
zap.L().Warn("no IGDB ID found", zap.String("name", name))
return 0, fmt.Errorf("IGDB ID not found: %s", name)
}
@ -212,12 +235,14 @@ func GetIGDBAppParent(id int) (int, error) {
if exist {
id, err := strconv.Atoi(val)
if err != nil {
zap.L().Error("failed to parse cached IGDB parent ID", zap.String("cacheKey", key), zap.Error(err))
return 0, err
}
return id, nil
}
detail, err := GetIGDBAppDetail(id)
if err != nil {
zap.L().Error("failed to fetch IGDB app detail for parent", zap.Int("gameID", id), zap.Error(err))
return 0, err
}
hasParent := false
@ -225,6 +250,7 @@ func GetIGDBAppParent(id int) (int, error) {
hasParent = true
detail, err = GetIGDBAppDetail(detail.VersionParent)
if err != nil {
zap.L().Error("failed to fetch IGDB version parent", zap.Int("parentID", detail.VersionParent), zap.Error(err))
return 0, err
}
}
@ -232,39 +258,49 @@ func GetIGDBAppParent(id int) (int, error) {
return detail.ID, nil
}
_ = cache.Set(key, id)
err = cache.Set(key, id)
if err != nil {
zap.L().Error("failed to cache IGDB parent ID", zap.String("cacheKey", key), zap.Error(err))
}
return id, nil
}
// GetIGDBID returns the IGDB ID of the game, try directly IGDB api first, then steam search
// GetIGDBID retrieves the IGDB ID of a game by its name using IGDB API and fallback mechanisms.
func GetIGDBID(name string) (int, error) {
key := fmt.Sprintf("igdb_id:%s", name)
val, exist := cache.Get(key)
if exist {
if val, exist := cache.Get(key); exist {
zap.L().Info("cache hit for IGDB ID", zap.String("name", name), zap.String("cacheKey", key))
return strconv.Atoi(val)
}
name1 := name
name2 := FormatName(name)
names := []string{name1}
if name1 != name2 {
names = append(names, name2)
}
for _, name := range names {
id, err := getIGDBID(name)
// Normalize game name and try multiple variations
normalizedNames := []string{name, FormatName(name)}
for _, n := range normalizedNames {
id, err := getIGDBID(n)
if err == nil {
_ = cache.Set(key, id)
cacheErr := cache.Set(key, id)
if cacheErr != nil {
zap.L().Warn("failed to cache IGDB ID", zap.String("name", n), zap.Error(cacheErr))
}
return id, nil
}
}
for _, name := range names {
id, err := getIGDBIDBySteamSearch(name)
// Fallback to Steam search if IGDB search fails
for _, n := range normalizedNames {
id, err := getIGDBIDBySteamSearch(n)
if err == nil {
_ = cache.Set(key, id)
cacheErr := cache.Set(key, id)
if cacheErr != nil {
zap.L().Warn("failed to cache IGDB ID after Steam search", zap.String("name", n), zap.Error(cacheErr))
}
return id, nil
}
}
return 0, errors.New("IGDB ID not found")
zap.L().Warn("failed to retrieve IGDB ID", zap.String("name", name))
return 0, fmt.Errorf("IGDB ID not found for '%s'", name)
}
func GetIGDBAppDetail(id int) (*model.IGDBGameDetail, error) {
@ -273,267 +309,267 @@ func GetIGDBAppDetail(id int) (*model.IGDBGameDetail, error) {
if exist {
var data model.IGDBGameDetail
if err := json.Unmarshal([]byte(val), &data); err != nil {
zap.L().Error("failed to parse cached IGDB game detail", zap.String("cacheKey", key), zap.Error(err))
return nil, err
}
return &data, nil
}
var err error
resp, err := igdbRequest(constant.IGDBGameURL, fmt.Sprintf(`where id = %v;fields *,alternative_names.*,language_supports.*,screenshots.*,cover.*,involved_companies.*,involved_companies.*,game_engines.*,game_modes.*,genres.*,player_perspectives.*,release_dates.*,videos.*,websites.*,platforms.*,themes.*,collections.*;`, id))
resp, err := igdbRequest(constant.IGDBGameURL, fmt.Sprintf(`where id = %v; fields *,alternative_names.*,language_supports.*,screenshots.*,cover.*,involved_companies.*,game_engines.*,game_modes.*,genres.*,player_perspectives.*,release_dates.*,videos.*,websites.*,platforms.*,themes.*,collections.*;`, id))
if err != nil {
zap.L().Error("failed to fetch IGDB game detail", zap.Int("gameID", id), zap.Error(err))
return nil, err
}
var data model.IGDBGameDetails
if err = json.Unmarshal(resp.Body(), &data); err != nil {
zap.L().Error("failed to unmarshal IGDB game detail response", zap.String("response", string(resp.Body())), zap.Error(err))
return nil, err
}
if len(data) == 0 {
zap.L().Warn("IGDB game not found", zap.Int("gameID", id))
return nil, errors.New("IGDB App not found")
}
if data[0].Name == "" {
return GetIGDBAppDetail(id)
}
jsonBytes, err := json.Marshal(data[0])
if err == nil {
_ = cache.Set(key, string(jsonBytes))
err = cache.Set(key, string(jsonBytes))
if err != nil {
zap.L().Error("failed to cache IGDB game detail", zap.String("cacheKey", key), zap.Error(err))
}
}
return data[0], nil
}
// GetIGDBCompany retrieves the company name from IGDB by its ID.
func GetIGDBCompany(id int) (string, error) {
key := fmt.Sprintf("igdb_companies:%v", id)
val, exist := cache.Get(key)
if exist {
key := fmt.Sprintf("igdb_companies:%d", id)
if val, exist := cache.Get(key); exist {
zap.L().Info("cache hit for IGDB company", zap.Int("companyID", id), zap.String("cacheKey", key))
return val, nil
}
var err error
resp, err := igdbRequest(constant.IGDBCompaniesURL, fmt.Sprintf(`where id=%v; fields *;`, id))
query := fmt.Sprintf(`where id=%d; fields *;`, id)
resp, err := igdbRequest(constant.IGDBCompaniesURL, query)
if err != nil {
return "", err
zap.L().Error("failed to fetch IGDB company", zap.Int("companyID", id), zap.Error(err))
return "", fmt.Errorf("failed to fetch IGDB company for ID %d: %w", id, err)
}
var data model.IGDBCompanies
if err = json.Unmarshal(resp.Body(), &data); err != nil {
return "", err
zap.L().Error("failed to unmarshal IGDB company response", zap.String("response", string(resp.Body())), zap.Error(err))
return "", fmt.Errorf("failed to unmarshal IGDB companies response: %w", err)
}
if len(data) == 0 {
return "", errors.New("not found")
}
if data[0].Name == "" {
return GetIGDBCompany(id)
zap.L().Warn("no company found in IGDB for ID", zap.Int("companyID", id))
return "", errors.New("company not found")
}
_ = cache.Set(key, data[0].Name)
return data[0].Name, nil
companyName := data[0].Name
cacheErr := cache.Set(key, companyName)
if cacheErr != nil {
zap.L().Warn("failed to cache IGDB company name", zap.Int("companyID", id), zap.Error(cacheErr))
}
return companyName, nil
}
// GenerateIGDBGameInfo generates detailed game information based on an IGDB ID.
func GenerateIGDBGameInfo(id int) (*model.GameInfo, error) {
item := &model.GameInfo{}
detail, err := GetIGDBAppDetail(id)
if err != nil {
return nil, err
zap.L().Error("failed to fetch IGDB app detail", zap.Int("igdbID", id), zap.Error(err))
return nil, fmt.Errorf("failed to fetch IGDB app detail for ID %d: %w", id, err)
}
gameInfo := &model.GameInfo{
IGDBID: id,
Name: detail.Name,
Description: detail.Summary,
Cover: strings.Replace(detail.Cover.URL, "t_thumb", "t_original", 1),
}
item.IGDBID = id
item.Name = detail.Name
item.Description = detail.Summary
item.Cover = strings.Replace(detail.Cover.URL, "t_thumb", "t_original", 1)
for _, lang := range detail.LanguageSupports {
if lang.LanguageSupportType == 3 {
l, exist := constant.IGDBLanguages[lang.Language]
if !exist {
continue
if l, exist := constant.IGDBLanguages[lang.Language]; exist {
gameInfo.Languages = append(gameInfo.Languages, l.Name)
}
item.Languages = append(item.Languages, l.Name)
}
}
for _, screenshot := range detail.Screenshots {
item.Screenshots = append(item.Screenshots, strings.Replace(screenshot.URL, "t_thumb", "t_original", 1))
gameInfo.Screenshots = append(gameInfo.Screenshots, strings.Replace(screenshot.URL, "t_thumb", "t_original", 1))
}
for _, alias := range detail.AlternativeNames {
item.Aliases = append(item.Aliases, alias.Name)
gameInfo.Aliases = append(gameInfo.Aliases, alias.Name)
}
for _, company := range detail.InvolvedCompanies {
if company.Developer || company.Publisher {
companyName, err := GetIGDBCompany(company.Company)
if err != nil {
zap.L().Warn("failed to fetch company name", zap.Int("companyID", company.Company), zap.Error(err))
continue
}
if company.Developer {
item.Developers = append(item.Developers, companyName)
gameInfo.Developers = append(gameInfo.Developers, companyName)
}
if company.Publisher {
item.Publishers = append(item.Publishers, companyName)
}
gameInfo.Publishers = append(gameInfo.Publishers, companyName)
}
}
item.GameEngines = make([]string, 0)
for _, engine := range detail.GameEngines {
item.GameEngines = append(item.GameEngines, engine.Name)
}
item.GameModes = make([]string, 0)
for _, mode := range detail.GameModes {
item.GameModes = append(item.GameModes, mode.Name)
gameInfo.GameModes = append(gameInfo.GameModes, mode.Name)
}
item.Genres = make([]string, 0)
for _, genre := range detail.Genres {
item.Genres = append(item.Genres, genre.Name)
gameInfo.Genres = append(gameInfo.Genres, genre.Name)
}
item.Themes = make([]string, 0)
for _, theme := range detail.Themes {
item.Themes = append(item.Themes, theme.Name)
}
item.Platforms = make([]string, 0)
for _, platform := range detail.Platforms {
item.Platforms = append(item.Platforms, platform.Name)
gameInfo.Platforms = append(gameInfo.Platforms, platform.Name)
}
item.PlayerPerspectives = make([]string, 0)
for _, perspective := range detail.PlayerPerspectives {
item.PlayerPerspectives = append(item.PlayerPerspectives, perspective.Name)
}
item.SimilarGames = detail.SimilarGames
item.Videos = make([]string, 0)
for _, video := range detail.Videos {
item.Videos = append(item.Videos, fmt.Sprintf("https://www.youtube.com/watch?v=%s", video.VideoID))
}
item.Websites = make([]string, 0)
for _, website := range detail.Websites {
item.Websites = append(item.Websites, website.URL)
}
item.Collections = make([]model.GameCollection, 0)
for _, collection := range detail.Collections {
item.Collections = append(item.Collections, model.GameCollection{
Games: collection.Games,
Name: collection.Name,
})
}
return item, nil
return gameInfo, nil
}
// OrganizeGameItemWithIGDB Will add GameItem.ID to the newly added GameInfo.GameIDs
// OrganizeGameItemWithIGDB links a game item with its corresponding IGDB game information.
func OrganizeGameItemWithIGDB(game *model.GameItem) (*model.GameInfo, error) {
id, err := GetIGDBID(game.Name)
if err != nil {
return nil, err
zap.L().Error("failed to get IGDB ID for game", zap.String("gameName", game.Name), zap.Error(err))
return nil, fmt.Errorf("failed to get IGDB ID for game '%s': %w", game.Name, err)
}
d, err := db.GetGameInfoByPlatformID("igdb", id)
info, err := db.GetGameInfoByPlatformID("igdb", id)
if err == nil {
d.GameIDs = append(d.GameIDs, game.ID)
d.GameIDs = utils.Unique(d.GameIDs)
return d, nil
info.GameIDs = utils.Unique(append(info.GameIDs, game.ID))
return info, nil
}
info, err := GenerateGameInfo("igdb", id)
info, err = GenerateIGDBGameInfo(id)
if err != nil {
return nil, err
zap.L().Error("failed to generate IGDB game info", zap.Int("igdbID", id), zap.Error(err))
return nil, fmt.Errorf("failed to generate IGDB game info for ID %d: %w", id, err)
}
info.GameIDs = append(info.GameIDs, game.ID)
info.GameIDs = utils.Unique(info.GameIDs)
info.GameIDs = utils.Unique(append(info.GameIDs, game.ID))
return info, nil
}
// GetIGDBIDBySteamAppID retrieves the IGDB ID of a game using its Steam App ID.
func GetIGDBIDBySteamAppID(id int) (int, error) {
key := fmt.Sprintf("igdb_id_by_steam_app_id:%v", id)
val, exist := cache.Get(key)
if exist {
key := fmt.Sprintf("igdb_id_by_steam_app_id:%d", id)
if val, exist := cache.Get(key); exist {
zap.L().Info("cache hit for IGDB ID by Steam App ID", zap.Int("steamAppID", id), zap.String("cacheKey", key))
return strconv.Atoi(val)
}
var err error
resp, err := igdbRequest(constant.IGDBWebsitesURL, fmt.Sprintf(`where url = "https://store.steampowered.com/app/%v" | url = "https://store.steampowered.com/app/%v/"*; fields *; limit 500;`, id, id))
query := fmt.Sprintf(`where url = "https://store.steampowered.com/app/%d" | url = "https://store.steampowered.com/app/%d/"; fields game;`, id, id)
resp, err := igdbRequest(constant.IGDBWebsitesURL, query)
if err != nil {
return 0, err
zap.L().Error("failed to fetch IGDB ID by Steam App ID", zap.Int("steamAppID", id), zap.Error(err))
return 0, fmt.Errorf("failed to fetch IGDB ID by Steam App ID %d: %w", id, err)
}
var data []struct {
Game int `json:"game"`
}
if err = json.Unmarshal(resp.Body(), &data); err != nil {
return 0, err
}
if len(data) == 0 {
return 0, errors.New("not found")
}
if data[0].Game == 0 {
return GetIGDBIDBySteamAppID(id)
zap.L().Error("failed to unmarshal IGDB response", zap.String("response", string(resp.Body())), zap.Error(err))
return 0, fmt.Errorf("failed to unmarshal IGDB response for Steam App ID %d: %w", id, err)
}
_ = cache.Set(key, strconv.Itoa(data[0].Game))
if len(data) == 0 || data[0].Game == 0 {
zap.L().Warn("no matching IGDB game found for Steam App ID", zap.Int("steamAppID", id))
return 0, errors.New("no matching IGDB game found")
}
return GetIGDBAppParent(data[0].Game)
igdbID := data[0].Game
cacheErr := cache.Set(key, strconv.Itoa(igdbID))
if cacheErr != nil {
zap.L().Warn("failed to cache IGDB ID by Steam App ID", zap.Int("steamAppID", id), zap.Error(cacheErr))
}
return GetIGDBAppParent(igdbID)
}
// GetIGDBIDBySteamBundleID retrieves the IGDB ID of a game using its Steam Bundle ID.
func GetIGDBIDBySteamBundleID(id int) (int, error) {
key := fmt.Sprintf("igdb_id_by_steam_bundle_id:%v", id)
val, exist := cache.Get(key)
if exist {
key := fmt.Sprintf("igdb_id_by_steam_bundle_id:%d", id)
if val, exist := cache.Get(key); exist {
zap.L().Info("cache hit for IGDB ID by Steam Bundle ID", zap.Int("steamBundleID", id), zap.String("cacheKey", key))
return strconv.Atoi(val)
}
var err error
resp, err := igdbRequest(constant.IGDBWebsitesURL, fmt.Sprintf(`where url = "https://store.steampowered.com/bundle/%v" | url = "https://store.steampowered.com/bundle/%v/"*; fields *; limit 500;`, id, id))
query := fmt.Sprintf(`where url = "https://store.steampowered.com/bundle/%d" | url = "https://store.steampowered.com/bundle/%d/"; fields game;`, id, id)
resp, err := igdbRequest(constant.IGDBWebsitesURL, query)
if err != nil {
return 0, err
zap.L().Error("failed to fetch IGDB ID by Steam Bundle ID", zap.Int("steamBundleID", id), zap.Error(err))
return 0, fmt.Errorf("failed to fetch IGDB ID by Steam Bundle ID %d: %w", id, err)
}
var data []struct {
Game int `json:"game"`
}
if err = json.Unmarshal(resp.Body(), &data); err != nil {
return 0, err
}
if len(data) == 0 {
return 0, errors.New("not found")
}
if data[0].Game == 0 {
return GetIGDBIDBySteamBundleID(id)
zap.L().Error("failed to unmarshal IGDB response", zap.String("response", string(resp.Body())), zap.Error(err))
return 0, fmt.Errorf("failed to unmarshal IGDB response for Steam Bundle ID %d: %w", id, err)
}
_ = cache.Set(key, strconv.Itoa(data[0].Game))
if len(data) == 0 || data[0].Game == 0 {
zap.L().Warn("no matching IGDB game found for Steam Bundle ID", zap.Int("steamBundleID", id))
return 0, errors.New("no matching IGDB game found")
}
return GetIGDBAppParent(data[0].Game)
igdbID := data[0].Game
cacheErr := cache.Set(key, strconv.Itoa(igdbID))
if cacheErr != nil {
zap.L().Warn("failed to cache IGDB ID by Steam Bundle ID", zap.Int("steamBundleID", id), zap.Error(cacheErr))
}
return GetIGDBAppParent(igdbID)
}
// GetIGDBPopularGameIDs get IGDB popular game IDs
// GetIGDBPopularGameIDs retrieves popular IGDB game IDs based on a given popularity type.
// popularity_type = 1 IGDB Visits: Game page visits on IGDB.com.
// popularity_type = 2 IGDB Want to Play: Additions to IGDB.com users “Want to Play” lists.
// popularity_type = 3 IGDB Playing: Additions to IGDB.com users “Playing” lists.
// popularity_type = 4 IGDB Played: Additions to IGDB.com users “Played” lists.
func GetIGDBPopularGameIDs(popularityType int, offset int, limit int) ([]int, error) {
var err error
resp, err := igdbRequest(constant.IGDBPopularityURL, fmt.Sprintf("fields game_id,value,popularity_type; sort value desc; limit %v; offset %v; where popularity_type = %v;", limit, offset, popularityType))
func GetIGDBPopularGameIDs(popularityType, offset, limit int) ([]int, error) {
query := fmt.Sprintf("fields game_id,value,popularity_type; sort value desc; limit %d; offset %d; where popularity_type = %d;", limit, offset, popularityType)
resp, err := igdbRequest(constant.IGDBPopularityURL, query)
if err != nil {
return nil, err
zap.L().Error("failed to fetch popular IGDB game IDs", zap.Int("popularityType", popularityType), zap.Error(err))
return nil, fmt.Errorf("failed to fetch popular IGDB game IDs for type %d: %w", popularityType, err)
}
type IgdbPopularity struct {
var data []struct {
GameID int `json:"game_id"`
Value float64 `json:"value"`
}
var data []IgdbPopularity
if err = json.Unmarshal(resp.Body(), &data); err != nil {
return nil, err
zap.L().Error("failed to unmarshal IGDB popular games response", zap.String("response", string(resp.Body())), zap.Error(err))
return nil, fmt.Errorf("failed to unmarshal IGDB popular games response: %w", err)
}
ret := make([]int, 0)
gameIDs := make([]int, 0, len(data))
for _, d := range data {
pid, err := GetIGDBAppParent(d.GameID)
parentID, err := GetIGDBAppParent(d.GameID)
if err != nil {
ret = append(ret, d.GameID)
continue
zap.L().Warn("failed to fetch parent IGDB ID for game", zap.Int("gameID", d.GameID), zap.Error(err))
gameIDs = append(gameIDs, d.GameID)
} else {
gameIDs = append(gameIDs, parentID)
}
ret = append(ret, pid)
}
return ret, nil
return gameIDs, nil
}

68
crawler/omg_gods.go Normal file
View File

@ -0,0 +1,68 @@
package crawler
import (
"game-crawler/model"
"regexp"
"strings"
"go.uber.org/zap"
)
type OmgGodsCrawler struct {
logger *zap.Logger
crawler RutrackerCrawler
}
func NewOmgGodsCrawler(cfClearanceUrl, username, password string, logger *zap.Logger) *OmgGodsCrawler {
return &OmgGodsCrawler{
logger: logger,
crawler: *NewRutrackerCrawler(
"OmgGods",
"switch",
"8994327",
username,
password,
cfClearanceUrl,
omgGodsFormatter,
logger,
),
}
}
func (c *OmgGodsCrawler) Name() string {
return "OmgGodsCrawler"
}
func (c *OmgGodsCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
return c.crawler.CrawlByUrl(URL)
}
func (c *OmgGodsCrawler) Crawl(page int) ([]*model.GameItem, error) {
return c.crawler.Crawl(page)
}
func (c *OmgGodsCrawler) CrawlAll() ([]*model.GameItem, error) {
return c.crawler.CrawlAll()
}
func (c *OmgGodsCrawler) GetTotalPageNum() (int, error) {
return c.crawler.GetTotalPageNum()
}
var omgGodsFormatRegex = []*regexp.Regexp{
regexp.MustCompile(`\(.*?\)`),
regexp.MustCompile(`\[.*?\]`),
}
func omgGodsFormatter(name string) string {
for _, regex := range omgGodsFormatRegex {
name = regex.ReplaceAllString(name, "")
}
if strings.Contains(name, " + ") {
name = strings.Split(name, " + ")[0]
}
if strings.Contains(name, " / ") {
name = strings.Split(name, " / ")[0]
}
return strings.TrimSpace(name)
}

View File

@ -8,14 +8,12 @@ import (
"fmt"
"net/http"
"net/url"
"os"
"regexp"
"strconv"
"strings"
"time"
"game-crawler/cache"
"game-crawler/config"
"game-crawler/constant"
"game-crawler/db"
"game-crawler/model"
@ -26,11 +24,15 @@ import (
)
type OnlineFixCrawler struct {
username string
password string
logger *zap.Logger
}
func NewOnlineFixCrawler(logger *zap.Logger) *OnlineFixCrawler {
func NewOnlineFixCrawler(username, password string, logger *zap.Logger) *OnlineFixCrawler {
return &OnlineFixCrawler{
username: username,
password: password,
logger: logger,
}
}
@ -42,206 +44,218 @@ func (c *OnlineFixCrawler) Name() string {
func (c *OnlineFixCrawler) Crawl(page int) ([]*model.GameItem, error) {
cookies, err := c.getCookies()
if err != nil {
return nil, err
c.logger.Error("Failed to get cookies", zap.Error(err))
return nil, fmt.Errorf("failed to get cookies: %w", err)
}
requestURL := fmt.Sprintf("%s/page/%d/", constant.OnlineFixURL, page)
resp, err := utils.Request().SetHeader("Referer", constant.OnlineFixURL).SetCookies(cookies).Get(requestURL)
if err != nil {
c.logger.Error("Failed to fetch", zap.Error(err))
return nil, err
c.logger.Error("Failed to fetch page", zap.String("url", requestURL), zap.Error(err))
return nil, fmt.Errorf("failed to fetch page %d: %w", page, err)
}
body := utils.Windows1251ToUTF8(resp.Body())
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(body))
if err != nil {
c.logger.Error("Failed to parse HTML", zap.Error(err))
return nil, err
c.logger.Error("Failed to parse HTML", zap.String("url", requestURL), zap.Error(err))
return nil, fmt.Errorf("failed to parse HTML for page %d: %w", page, err)
}
var urls []string
var updateFlags []string //link+date
var updateFlags []string
doc.Find("article.news").Each(func(i int, s *goquery.Selection) {
urls = append(urls, s.Find(".big-link").First().AttrOr("href", ""))
updateFlags = append(
updateFlags,
base64.StdEncoding.EncodeToString([]byte(s.Find(".big-link").First().AttrOr("href", "")+s.Find("time").Text())),
)
url := s.Find(".big-link").First().AttrOr("href", "")
if url != "" {
urls = append(urls, url)
updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(url+s.Find("time").Text())))
}
})
var res []*model.GameItem
for i, u := range urls {
// Skip already crawled links
if db.IsOnlineFixCrawled(updateFlags[i]) {
c.logger.Info("Skipping already crawled URL", zap.String("url", u))
continue
}
c.logger.Info("Crawling", zap.String("URL", u))
c.logger.Info("Crawling URL", zap.String("url", u))
item, err := c.CrawlByUrl(u)
if err != nil {
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
c.logger.Warn("Failed to crawl URL", zap.String("url", u), zap.Error(err))
continue
}
item.UpdateFlag = updateFlags[i]
err = db.SaveGameItem(item)
if err != nil {
c.logger.Warn("Failed to save", zap.Error(err))
if err := db.SaveGameItem(item); err != nil {
c.logger.Warn("Failed to save game item", zap.String("url", u), zap.Error(err))
continue
}
res = append(res, item)
if err := OrganizeGameItem(item); err != nil {
c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u))
c.logger.Warn("Failed to organize game item", zap.String("url", u), zap.Error(err))
continue
}
}
return res, nil
}
func (c *OnlineFixCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
cookies, err := c.getCookies()
if err != nil {
return nil, err
c.logger.Error("Failed to get cookies", zap.Error(err))
return nil, fmt.Errorf("failed to get cookies: %w", err)
}
resp, err := utils.Request().SetHeaders(map[string]string{
"Referer": constant.OnlineFixURL,
}).SetCookies(cookies).Get(URL)
if err != nil {
return nil, err
c.logger.Error("Failed to fetch URL", zap.String("url", URL), zap.Error(err))
return nil, fmt.Errorf("failed to fetch URL %s: %w", URL, err)
}
body := utils.Windows1251ToUTF8(resp.Body())
titleRegex := regexp.MustCompile(`(?i)<h1.*?>(.*?)</h1>`)
titleRegexRes := titleRegex.FindAllStringSubmatch(string(body), -1)
if len(titleRegexRes) == 0 {
return nil, errors.New("failed to find title")
titleMatch := titleRegex.FindStringSubmatch(string(body))
if len(titleMatch) == 0 {
c.logger.Warn("Failed to find title in HTML", zap.String("url", URL))
return nil, errors.New("failed to find title in HTML")
}
downloadRegex := regexp.MustCompile(`(?i)<a[^>]+\bhref="([^"]+)"[^>]+>(Скачать Torrent|Скачать торрент)</a>`)
downloadRegexRes := downloadRegex.FindAllStringSubmatch(string(body), -1)
if len(downloadRegexRes) == 0 {
downloadMatch := downloadRegex.FindStringSubmatch(string(body))
if len(downloadMatch) == 0 {
c.logger.Warn("Failed to find download button", zap.String("url", URL))
return nil, errors.New("failed to find download button")
}
// Retrieve or create game item
item, err := db.GetGameItemByUrl(URL)
if err != nil {
return nil, err
item = &model.GameItem{}
}
item.RawName = titleRegexRes[0][1]
item.RawName = titleMatch[1]
item.Name = OnlineFixFormatter(item.RawName)
item.Url = URL
item.Author = "OnlineFix"
item.Size = "0"
resp, err = utils.Request().SetHeader("Referer", URL).SetCookies(cookies).Get(downloadRegexRes[0][1])
// Handle download links
downloadURL := downloadMatch[1]
resp, err = utils.Request().SetHeader("Referer", URL).SetCookies(cookies).Get(downloadURL)
if err != nil {
c.logger.Error("Failed to fetch download link", zap.String("url", downloadURL), zap.Error(err))
return nil, fmt.Errorf("failed to fetch download link %s: %w", downloadURL, err)
}
body = utils.Windows1251ToUTF8(resp.Body())
if err != nil {
return nil, err
}
if strings.Contains(downloadRegexRes[0][1], "uploads.online-fix.me") {
if strings.Contains(downloadURL, "uploads.online-fix.me") {
// Handle torrent file
magnetRegex := regexp.MustCompile(`(?i)"(.*?).torrent"`)
magnetRegexRes := magnetRegex.FindAllStringSubmatch(string(body), -1)
if len(magnetRegexRes) == 0 {
return nil, errors.New("failed to find magnet")
magnetMatch := magnetRegex.FindStringSubmatch(string(body))
if len(magnetMatch) == 0 {
c.logger.Warn("Failed to find torrent magnet link", zap.String("url", downloadURL))
return nil, errors.New("failed to find torrent magnet link")
}
resp, err = utils.Request().SetHeader("Referer", URL).SetCookies(cookies).Get(downloadRegexRes[0][1] + strings.Trim(magnetRegexRes[0][0], "\""))
torrentURL := downloadURL + strings.Trim(magnetMatch[0], "\"")
resp, err = utils.Request().SetHeader("Referer", URL).SetCookies(cookies).Get(torrentURL)
if err != nil {
return nil, err
c.logger.Error("Failed to fetch torrent file", zap.String("url", torrentURL), zap.Error(err))
return nil, fmt.Errorf("failed to fetch torrent file %s: %w", torrentURL, err)
}
magnet, size, err := utils.ConvertTorrentToMagnet(resp.Body())
if err != nil {
c.logger.Error("Failed to convert torrent to magnet", zap.String("url", torrentURL), zap.Error(err))
return nil, fmt.Errorf("failed to convert torrent to magnet: %w", err)
}
item.DownloadLinks = []string{magnet}
item.Size = size
if err != nil {
return nil, err
}
} else if strings.Contains(downloadRegexRes[0][1], "online-fix.me/ext") {
if strings.Contains(string(body), "mega.nz") {
if !config.Config.MegaAvaliable {
return nil, errors.New("mega is not avaliable")
}
megaRegex := regexp.MustCompile(`(?i)location.href=\\'([^\\']*)\\'`)
megaRegexRes := megaRegex.FindAllStringSubmatch(string(body), -1)
if len(megaRegexRes) == 0 {
return nil, errors.New("failed to find download link")
}
path, files, err := utils.MegaDownload(megaRegexRes[0][1], "torrent")
if err != nil {
return nil, err
}
torrent := ""
for _, file := range files {
if strings.HasSuffix(file, ".torrent") {
torrent = file
break
}
}
dataBytes, err := os.ReadFile(torrent)
if err != nil {
return nil, err
}
magnet, size, err := utils.ConvertTorrentToMagnet(dataBytes)
item.DownloadLinks = []string{magnet}
item.Size = size
if err != nil {
return nil, err
}
_ = os.RemoveAll(path)
} else {
return nil, errors.New("failed to find download link")
}
} else {
return nil, errors.New("failed to find download link")
c.logger.Warn("Unsupported download link format", zap.String("url", downloadURL))
return nil, errors.New("unsupported download link format")
}
item.Platform = "windows"
return item, nil
}
// Crawl multiple pages
func (c *OnlineFixCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) {
var res []*model.GameItem
for _, page := range pages {
items, err := c.Crawl(page)
if err != nil {
return nil, err
c.logger.Error("Failed to crawl page", zap.Int("page", page), zap.Error(err))
return nil, fmt.Errorf("failed to crawl page %d: %w", page, err)
}
res = append(res, items...)
}
return res, nil
}
// Crawl all pages
func (c *OnlineFixCrawler) CrawlAll() ([]*model.GameItem, error) {
var res []*model.GameItem
totalPageNum, err := c.GetTotalPageNum()
if err != nil {
return nil, err
c.logger.Error("Failed to get total page number", zap.Error(err))
return nil, fmt.Errorf("failed to get total page number: %w", err)
}
for i := 1; i <= totalPageNum; i++ {
items, err := c.Crawl(i)
if err != nil {
return nil, err
c.logger.Warn("Failed to crawl page", zap.Int("page", i), zap.Error(err))
continue
}
res = append(res, items...)
}
return res, nil
}
// Get total page number
func (c *OnlineFixCrawler) GetTotalPageNum() (int, error) {
resp, err := utils.Request().SetHeader("Referer", constant.OnlineFixURL).Get(constant.OnlineFixURL)
if err != nil {
return 0, err
c.logger.Error("Failed to fetch main page", zap.Error(err))
return 0, fmt.Errorf("failed to fetch main page: %w", err)
}
pageRegex := regexp.MustCompile(`(?i)<a href="https://online-fix.me/page/(\d+)/">.*?</a>`)
pageRegexRes := pageRegex.FindAllStringSubmatch(string(resp.Body()), -1)
if len(pageRegexRes) == 0 {
return 0, err
pageMatches := pageRegex.FindAllStringSubmatch(string(resp.Body()), -1)
if len(pageMatches) < 2 {
c.logger.Warn("Failed to find total page number in HTML")
return 0, errors.New("failed to find total page number")
}
totalPageNum, err := strconv.Atoi(pageRegexRes[len(pageRegexRes)-2][1])
totalPageNum, err := strconv.Atoi(pageMatches[len(pageMatches)-2][1])
if err != nil {
return 0, err
c.logger.Error("Failed to parse total page number", zap.Error(err))
return 0, fmt.Errorf("failed to parse total page number: %w", err)
}
return totalPageNum, nil
}
type csrf struct {
Field string `json:"field"`
Value string `json:"value"`
}
// Get cookies for authentication
func (c *OnlineFixCrawler) getCookies() ([]*http.Cookie, error) {
if c.username == "" || c.password == "" {
return nil, errors.New("username or password is empty")
}
val, exists := cache.Get("onlinefix_cookies")
if exists {
var cookies []*http.Cookie
if err := json.Unmarshal([]byte(val), &cookies); err != nil {
return nil, err
c.logger.Warn("Failed to parse cached cookies", zap.Error(err))
return nil, fmt.Errorf("failed to parse cached cookies: %w", err)
}
return cookies, nil
}
@ -251,38 +265,48 @@ func (c *OnlineFixCrawler) getCookies() ([]*http.Cookie, error) {
"Referer": constant.OnlineFixURL,
}).Get(constant.OnlineFixCSRFURL)
if err != nil {
return nil, err
c.logger.Error("Failed to fetch CSRF token", zap.Error(err))
return nil, fmt.Errorf("failed to fetch CSRF token: %w", err)
}
var csrf csrf
if err = json.Unmarshal(resp.Body(), &csrf); err != nil {
return nil, err
type csrf struct {
Field string `json:"field"`
Value string `json:"value"`
}
var csrfToken csrf
if err = json.Unmarshal(resp.Body(), &csrfToken); err != nil {
c.logger.Error("Failed to parse CSRF token", zap.Error(err))
return nil, fmt.Errorf("failed to parse CSRF token: %w", err)
}
cookies := resp.Cookies()
params := url.Values{}
params.Add("login_name", config.Config.OnlineFix.User)
params.Add("login_password", config.Config.OnlineFix.Password)
params.Add(csrf.Field, csrf.Value)
params.Add("login_name", c.username)
params.Add("login_password", c.password)
params.Add(csrfToken.Field, csrfToken.Value)
params.Add("login", "submit")
resp, err = utils.Request().SetHeaders(map[string]string{
"Origin": constant.OnlineFixURL,
"Content-Type": "application/x-www-form-urlencoded",
"Referer": constant.OnlineFixURL,
}).SetCookies(cookies).SetBody(params.Encode()).Post(constant.OnlineFixURL)
if err != nil {
return nil, err
c.logger.Error("Failed to log in", zap.Error(err))
return nil, fmt.Errorf("failed to log in: %w", err)
}
cookies = resp.Cookies()
jsonBytes, _ := json.Marshal(cookies)
_ = cache.SetWithExpire("onlinefix_cookies", string(jsonBytes), time.Hour)
cookiesJSON, _ := json.Marshal(cookies)
_ = cache.SetWithExpire("onlinefix_cookies", string(cookiesJSON), time.Hour)
return cookies, nil
}
// Format game name
func OnlineFixFormatter(name string) string {
name = strings.Replace(name, "по сети", "", -1)
reg1 := regexp.MustCompile(`(?i)\(.*?\)`)
name = reg1.ReplaceAllString(name, "")
return strings.TrimSpace(name)
name = strings.ReplaceAll(name, "по сети", "")
reg := regexp.MustCompile(`(?i)\(.*?\)`)
return strings.TrimSpace(reg.ReplaceAllString(name, ""))
}

298
crawler/rutracker.go Normal file
View File

@ -0,0 +1,298 @@
package crawler
import (
"encoding/base64"
"encoding/json"
"fmt"
"game-crawler/cache"
"game-crawler/constant"
"game-crawler/db"
"game-crawler/model"
"game-crawler/utils"
"net/url"
"strconv"
"strings"
"time"
"unicode"
"git.nite07.com/nite/ccs"
"github.com/Danny-Dasilva/CycleTLS/cycletls"
http "github.com/Danny-Dasilva/fhttp"
"github.com/PuerkitoBio/goquery"
"go.uber.org/zap"
)
type RutrackerCrawler struct {
source string
rid string
platform string
username string
password string
formatter FormatterFunc
logger *zap.Logger
cfClearanceUrl string
}
func NewRutrackerCrawler(source, platform, rid, username, password, cfClearanceUrl string, formatter FormatterFunc, logger *zap.Logger) *RutrackerCrawler {
return &RutrackerCrawler{
source: source,
rid: rid,
formatter: formatter,
logger: logger,
platform: platform,
username: username,
password: password,
cfClearanceUrl: cfClearanceUrl,
}
}
func (r *RutrackerCrawler) getSession() (*ccs.Session, error) {
r.logger.Info("Fetching session for RutrackerCrawler")
if r.username == "" || r.password == "" {
r.logger.Error("Username or password is empty")
return nil, fmt.Errorf("username or password is empty")
}
cacheKey := "rutracker_session"
var session ccs.Session
if val, exist := cache.Get(cacheKey); exist {
err := json.Unmarshal([]byte(val), &session)
if err != nil {
r.logger.Error("Failed to unmarshal cached session", zap.Error(err))
return nil, fmt.Errorf("failed to unmarshal cached session: %w", err)
}
} else {
var err error
session, err = ccs.WAFSession(r.cfClearanceUrl, constant.RutrackerURL)
if err != nil {
r.logger.Error("Failed to create WAF session", zap.Error(err))
return nil, fmt.Errorf("failed to create WAF session: %w", err)
}
// login
params := url.Values{}
params.Add("login_username", r.username)
params.Add("login_password", r.password)
params.Add("login", "Вход")
resp, err := ccs.RequestWithWAFSession(http.MethodPost, constant.RutrackerLoginURL, session, &cycletls.Options{
Headers: map[string]string{
"Content-Type": "application/x-www-form-urlencoded",
},
Body: params.Encode(),
UserAgent: "Mozilla/5.0 (X11; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0",
DisableRedirect: true,
})
if err != nil {
r.logger.Error("Failed to login", zap.Error(err))
return nil, fmt.Errorf("failed to login: %w", err)
}
if len(resp.Cookies) == 0 {
r.logger.Error("Failed to login, no cookies found")
return nil, fmt.Errorf("failed to login, no cookies found")
}
success := false
loginCookies := make([]ccs.Cookie, 0)
for _, cookie := range resp.Cookies {
if cookie.Name == "bb_session" {
success = true
}
loginCookies = append(loginCookies, ccs.Cookie{
Name: cookie.Name,
Value: cookie.Value,
})
}
if !success {
r.logger.Error("Failed to login, no bb_session cookie found")
return nil, fmt.Errorf("failed to login, no bb_session cookie found")
}
session.Cookies = append(session.Cookies, loginCookies...)
jsonBytes, err := json.Marshal(session)
if err == nil {
_ = cache.SetWithExpire(cacheKey, jsonBytes, 24*time.Hour)
}
}
return &session, nil
}
func (r *RutrackerCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
session, err := r.getSession()
if err != nil {
return nil, fmt.Errorf("failed to get session: %w", err)
}
resp, err := ccs.RequestWithWAFSession(http.MethodGet, URL, *session, nil)
if err != nil {
r.logger.Error("Failed to request URL", zap.Error(err))
return nil, fmt.Errorf("failed to request URL: %w", err)
}
doc, err := goquery.NewDocumentFromReader(strings.NewReader(resp.Body))
if err != nil {
r.logger.Error("Failed to parse HTML", zap.Error(err))
return nil, fmt.Errorf("failed to parse HTML: %w", err)
}
item, err := db.GetGameItemByUrl(URL)
if err != nil {
r.logger.Error("Failed to get game item by url", zap.Error(err))
return nil, fmt.Errorf("failed to get game item by url: %w", err)
}
item.RawName = doc.Find("#topic-title").Text()
item.Name = r.formatter(item.RawName)
item.Author = r.source
item.Platform = r.platform
item.Url = URL
magnet := doc.Find(".magnet-link").AttrOr("href", "")
if magnet == "" {
r.logger.Error("Failed to find magnet link")
return nil, fmt.Errorf("failed to find magnet link")
}
item.DownloadLinks = []string{magnet}
sizeStr := doc.Find("#tor-size-humn").AttrOr("title", "")
if sizeStr == "" {
r.logger.Warn("Failed to find size")
item.Size = "unknown"
} else {
size, err := strconv.ParseUint(sizeStr, 10, 64)
if err != nil {
r.logger.Error("Failed to parse size", zap.Error(err))
} else {
item.Size = utils.BytesToSize(size)
}
}
return item, nil
}
func (r *RutrackerCrawler) Crawl(page int) ([]*model.GameItem, error) {
r.logger.Info("Crawling Rutracker", zap.Int("page", page), zap.String("rid", r.rid))
session, err := r.getSession()
if err != nil {
r.logger.Error("Failed to get session", zap.Error(err))
return nil, fmt.Errorf("failed to get session: %w", err)
}
URL := fmt.Sprintf(constant.RutrackerAuthorURL, r.rid, (page-1)*50)
resp, err := ccs.RequestWithWAFSession(http.MethodGet, URL, *session, nil)
if err != nil {
r.logger.Error("Failed to request URL", zap.String("url", URL), zap.Error(err))
return nil, fmt.Errorf("failed to request URL: %w", err)
}
doc, err := goquery.NewDocumentFromReader(strings.NewReader(resp.Body))
if err != nil {
r.logger.Error("Failed to parse HTML", zap.Error(err))
return nil, fmt.Errorf("failed to parse HTML: %w", err)
}
var urls []string
var updateFlags []string
doc.Find("[id^='trs-tr']").Each(func(i int, s *goquery.Selection) {
a := s.Find(".t-title")
datetime := s.Find("td").Last().Text()
url, exists := a.Attr("href")
if !exists {
r.logger.Error("Failed to find URL")
return
}
fullURL := fmt.Sprintf(constant.RutrackerTopicURL, url)
urls = append(urls, fullURL)
updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(fullURL+datetime)))
})
var res []*model.GameItem
for i, URL := range urls {
if db.IsGameCrawled(updateFlags[i], r.source) {
r.logger.Info("Skipping already crawled URL", zap.String("URL", URL))
continue
}
r.logger.Info("Crawling URL", zap.String("URL", URL))
item, err := r.CrawlByUrl(URL)
if err != nil {
r.logger.Error("Failed to crawl URL", zap.String("URL", URL), zap.Error(err))
continue
}
err = db.SaveGameItem(item)
if err != nil {
r.logger.Error("Failed to save game item to database", zap.String("URL", URL), zap.Error(err))
continue
}
res = append(res, item)
if err := OrganizeGameItem(item); err != nil {
r.logger.Warn("Failed to organize game item", zap.String("URL", URL), zap.Error(err))
continue
}
}
r.logger.Info("Finished Crawl", zap.Int("Page", page), zap.Int("ItemsCrawled", len(res)))
return res, nil
}
func (r *RutrackerCrawler) CrawlAll() (res []*model.GameItem, err error) {
r.logger.Info("Starting CrawlAll", zap.String("Source", r.source))
totalPage, err := r.GetTotalPageNum()
if err != nil {
return nil, fmt.Errorf("failed to get total page number: %w", err)
}
for i := 1; i <= totalPage; i++ {
items, err := r.Crawl(i)
if err != nil {
return nil, fmt.Errorf("failed to crawl page %d: %w", i, err)
}
res = append(res, items...)
}
return res, nil
}
func (r *RutrackerCrawler) GetTotalPageNum() (int, error) {
session, err := r.getSession()
if err != nil {
return 0, fmt.Errorf("failed to get session: %w", err)
}
URL := fmt.Sprintf(constant.RutrackerAuthorURL, r.rid, 0)
resp, err := ccs.RequestWithWAFSession(http.MethodGet, URL, *session, nil)
if err != nil {
r.logger.Error("Failed to request URL", zap.String("url", URL), zap.Error(err))
return 0, fmt.Errorf("failed to request URL: %w", err)
}
doc, err := goquery.NewDocumentFromReader(strings.NewReader(resp.Body))
if err != nil {
r.logger.Error("Failed to parse HTML", zap.Error(err))
return 0, fmt.Errorf("failed to parse HTML: %w", err)
}
var pg []string
doc.Find(".pg").Each(func(i int, s *goquery.Selection) {
pg = append(pg, s.Text())
})
if len(pg) == 0 {
r.logger.Error("Failed to find page number")
return 0, fmt.Errorf("failed to find page number")
}
totalPage := 0
for _, c := range pg[len(pg)-1] {
if unicode.IsDigit(c) {
totalPage, err = strconv.Atoi(pg[len(pg)-1])
break
} else {
totalPage, err = strconv.Atoi(pg[len(pg)-2])
break
}
}
if err != nil {
r.logger.Error("Failed to parse page number", zap.Error(err))
return 0, fmt.Errorf("failed to parse page number: %w", err)
}
return totalPage, nil
}

View File

@ -13,15 +13,19 @@ import (
"game-crawler/constant"
"game-crawler/model"
"game-crawler/utils"
"go.uber.org/zap"
)
// GetSteamAppDetail fetches the details of a Steam app by its ID.
func GetSteamAppDetail(id int) (*model.SteamAppDetail, error) {
key := fmt.Sprintf("steam_game:%d", id)
val, exist := cache.Get(key)
if exist {
if val, exist := cache.Get(key); exist {
zap.L().Info("Cache hit for Steam app detail", zap.Int("steamID", id))
var detail model.SteamAppDetail
if err := json.Unmarshal([]byte(val), &detail); err != nil {
return nil, err
zap.L().Warn("Failed to unmarshal cached Steam app detail", zap.Int("steamID", id), zap.Error(err))
return nil, fmt.Errorf("failed to unmarshal cached Steam app detail for ID %d: %w", id, err)
}
return &detail, nil
}
@ -29,93 +33,117 @@ func GetSteamAppDetail(id int) (*model.SteamAppDetail, error) {
baseURL, _ := url.Parse(constant.SteamAppDetailURL)
params := url.Values{}
params.Add("appids", strconv.Itoa(id))
// params.Add("l", "schinese")
baseURL.RawQuery = params.Encode()
resp, err := utils.Request().SetHeaders(map[string]string{
"User-Agent": "",
}).Get(baseURL.String())
if err != nil {
return nil, err
}
var detail map[string]*model.SteamAppDetail
if err = json.Unmarshal(resp.Body(), &detail); err != nil {
return nil, err
}
if _, ok := detail[strconv.Itoa(id)]; !ok {
return nil, fmt.Errorf("steam App not found: %d", id)
}
if detail[strconv.Itoa(id)] == nil {
return nil, fmt.Errorf("steam App not found: %d", id)
zap.L().Error("Failed to fetch Steam app detail", zap.Int("steamID", id), zap.String("url", baseURL.String()), zap.Error(err))
return nil, fmt.Errorf("failed to fetch Steam app detail for ID %d: %w", id, err)
}
jsonBytes, err := json.Marshal(detail[strconv.Itoa(id)])
var detail map[string]*model.SteamAppDetail
if err := json.Unmarshal(resp.Body(), &detail); err != nil {
zap.L().Error("Failed to unmarshal Steam app detail response", zap.Int("steamID", id), zap.String("response", string(resp.Body())), zap.Error(err))
return nil, fmt.Errorf("failed to unmarshal Steam app detail for ID %d: %w", id, err)
}
if appDetail, ok := detail[strconv.Itoa(id)]; !ok || appDetail == nil {
zap.L().Warn("Steam app detail not found", zap.Int("steamID", id))
return nil, fmt.Errorf("steam app not found: %d", id)
} else {
// Cache the result
jsonBytes, err := json.Marshal(appDetail)
if err == nil {
_ = cache.Set(key, string(jsonBytes))
}
return detail[strconv.Itoa(id)], nil
return appDetail, nil
}
}
// GenerateSteamGameInfo generates detailed game information based on a Steam App ID.
func GenerateSteamGameInfo(id int) (*model.GameInfo, error) {
item := &model.GameInfo{}
detail, err := GetSteamAppDetail(id)
if err != nil {
return nil, err
zap.L().Error("Failed to fetch Steam app detail for game info generation", zap.Int("steamID", id), zap.Error(err))
return nil, fmt.Errorf("failed to fetch Steam app detail for ID %d: %w", id, err)
}
item.SteamID = id
item.Name = detail.Data.Name
item.Description = detail.Data.ShortDescription
item.Cover = fmt.Sprintf("https://shared.cloudflare.steamstatic.com/store_item_assets/steam/apps/%v/library_600x900_2x.jpg", id)
item.Developers = detail.Data.Developers
item.Publishers = detail.Data.Publishers
var screenshots []string
item := &model.GameInfo{
SteamID: id,
Name: detail.Data.Name,
Description: detail.Data.ShortDescription,
Cover: fmt.Sprintf("https://shared.cloudflare.steamstatic.com/store_item_assets/steam/apps/%v/library_600x900_2x.jpg", id),
Developers: detail.Data.Developers,
Publishers: detail.Data.Publishers,
Screenshots: make([]string, 0, len(detail.Data.Screenshots)),
}
for _, screenshot := range detail.Data.Screenshots {
screenshots = append(screenshots, screenshot.PathFull)
item.Screenshots = append(item.Screenshots, screenshot.PathFull)
}
item.Screenshots = screenshots
zap.L().Info("Generated Steam game info", zap.Int("steamID", id), zap.String("name", item.Name))
return item, nil
}
// GetSteamIDByIGDBID retrieves the Steam App ID associated with a given IGDB ID.
func GetSteamIDByIGDBID(IGDBID int) (int, error) {
key := fmt.Sprintf("steam_game:%d", IGDBID)
val, exist := cache.Get(key)
if exist {
if val, exist := cache.Get(key); exist {
zap.L().Info("Cache hit for Steam ID by IGDB ID", zap.Int("IGDBID", IGDBID))
id, err := strconv.Atoi(val)
if err != nil {
return 0, err
zap.L().Warn("Failed to parse cached Steam ID", zap.Int("IGDBID", IGDBID), zap.Error(err))
return 0, fmt.Errorf("failed to parse cached Steam ID for IGDB ID %d: %w", IGDBID, err)
}
return id, nil
}
var err error
resp, err := igdbRequest(constant.IGDBWebsitesURL, fmt.Sprintf(`where game = %v; fields *; limit 500;`, IGDBID))
query := fmt.Sprintf(`where game = %v; fields *; limit 500;`, IGDBID)
resp, err := igdbRequest(constant.IGDBWebsitesURL, query)
if err != nil {
return 0, err
zap.L().Error("Failed to fetch IGDB websites for Steam ID", zap.Int("IGDBID", IGDBID), zap.Error(err))
return 0, fmt.Errorf("failed to fetch IGDB websites for IGDB ID %d: %w", IGDBID, err)
}
var data []struct {
Game int `json:"game"`
Url string `json:"url"`
}
if err = json.Unmarshal(resp.Body(), &data); err != nil {
return 0, err
if err := json.Unmarshal(resp.Body(), &data); err != nil {
zap.L().Error("Failed to unmarshal IGDB websites response", zap.Int("IGDBID", IGDBID), zap.String("response", string(resp.Body())), zap.Error(err))
return 0, fmt.Errorf("failed to unmarshal IGDB websites response for IGDB ID %d: %w", IGDBID, err)
}
if len(data) == 0 {
return 0, errors.New("not found")
zap.L().Warn("No Steam ID found for IGDB ID", zap.Int("IGDBID", IGDBID))
return 0, errors.New("steam ID not found")
}
for _, v := range data {
if strings.HasPrefix(v.Url, "https://store.steampowered.com/app/") {
regex := regexp.MustCompile(`https://store.steampowered.com/app/(\d+)/?`)
idStr := regex.FindStringSubmatch(v.Url)
if len(idStr) < 2 {
return 0, errors.New("failed parse")
idMatch := regex.FindStringSubmatch(v.Url)
if len(idMatch) < 2 {
zap.L().Warn("Failed to parse Steam ID from URL", zap.String("url", v.Url))
return 0, errors.New("failed to parse Steam ID from URL")
}
steamID, err := strconv.Atoi(idStr[1])
steamID, err := strconv.Atoi(idMatch[1])
if err != nil {
return 0, err
zap.L().Error("Failed to convert Steam ID to integer", zap.String("url", v.Url), zap.Error(err))
return 0, fmt.Errorf("failed to convert Steam ID from URL %s: %w", v.Url, err)
}
// Cache the result
_ = cache.Set(key, strconv.Itoa(steamID))
zap.L().Info("Found Steam ID for IGDB ID", zap.Int("IGDBID", IGDBID), zap.Int("steamID", steamID))
return steamID, nil
}
}
return 0, errors.New("not found")
zap.L().Warn("No valid Steam ID found in IGDB websites data", zap.Int("IGDBID", IGDBID))
return 0, errors.New("steam ID not found")
}

View File

@ -16,73 +16,125 @@ import (
"game-crawler/utils"
"github.com/PuerkitoBio/goquery"
"go.uber.org/zap"
)
// GetSteam250 fetches Steam250 game rankings from the given URL.
func GetSteam250(URL string) ([]*model.GameInfo, error) {
key := "steam250:" + url.QueryEscape(URL)
if val, ok := cache.Get(key); ok {
zap.L().Info("Cache hit for Steam250 rankings", zap.String("url", URL))
var infos []*model.GameInfo
err := json.Unmarshal([]byte(val), &infos)
if err != nil {
return nil, err
if err := json.Unmarshal([]byte(val), &infos); err != nil {
zap.L().Warn("Failed to unmarshal cached Steam250 data", zap.String("url", URL), zap.Error(err))
return nil, fmt.Errorf("failed to unmarshal cached Steam250 data for URL %s: %w", URL, err)
}
return infos, nil
}
zap.L().Info("Fetching Steam250 rankings from URL", zap.String("url", URL))
resp, err := utils.Request().Get(URL)
if err != nil {
return nil, err
zap.L().Error("Failed to fetch Steam250 rankings", zap.String("url", URL), zap.Error(err))
return nil, fmt.Errorf("failed to fetch Steam250 rankings from URL %s: %w", URL, err)
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
if err != nil {
return nil, err
zap.L().Error("Failed to parse Steam250 HTML document", zap.String("url", URL), zap.Error(err))
return nil, fmt.Errorf("failed to parse Steam250 HTML document for URL %s: %w", URL, err)
}
var rank []model.Steam250Item
var item model.Steam250Item
steamIDs := make([]int, 0)
doc.Find(".appline").Each(func(i int, s *goquery.Selection) {
// Extract game name
item.Name = s.Find(".title>a").First().Text()
if item.Name == "" {
zap.L().Warn("Game name not found in Steam250 rankings", zap.String("url", URL), zap.Int("index", i))
return
}
// Extract Steam ID
idStr := s.Find(".store").AttrOr("href", "")
idSlice := regexp.MustCompile(`app/(\d+)/`).FindStringSubmatch(idStr)
if len(idSlice) < 2 {
zap.L().Warn("Failed to extract Steam ID from URL", zap.String("url", idStr), zap.Int("index", i))
return
}
item.SteamID, _ = strconv.Atoi(idSlice[1])
rank = append(rank, item)
steamIDs = append(steamIDs, item.SteamID)
})
infos, err := db.GetGameInfosByPlatformIDs("steam", steamIDs)
steamID, err := strconv.Atoi(idSlice[1])
if err != nil {
return nil, err
}
if len(infos) > 10 {
return infos[:10], nil
zap.L().Warn("Failed to convert Steam ID to integer", zap.String("id", idSlice[1]), zap.Error(err))
return
}
item.SteamID = steamID
rank = append(rank, item)
steamIDs = append(steamIDs, steamID)
})
if len(steamIDs) == 0 {
zap.L().Warn("No valid Steam IDs found in Steam250 rankings", zap.String("url", URL))
return nil, fmt.Errorf("no valid Steam IDs found in Steam250 rankings for URL %s", URL)
}
// Fetch game info from the database
zap.L().Info("Fetching game info from database", zap.Ints("steamIDs", steamIDs))
infos, err := db.GetGameInfosByPlatformIDs("steam", steamIDs)
if err != nil {
zap.L().Error("Failed to fetch game info from database", zap.Ints("steamIDs", steamIDs), zap.Error(err))
return nil, fmt.Errorf("failed to fetch game info for Steam IDs %v: %w", steamIDs, err)
}
// Limit the result to 10 entries (if applicable)
if len(infos) > 10 {
infos = infos[:10]
}
// Cache the result
jsonBytes, err := json.Marshal(infos)
if err == nil {
_ = cache.SetWithExpire(key, string(jsonBytes), 12*time.Hour)
cacheErr := cache.SetWithExpire(key, string(jsonBytes), 12*time.Hour)
if cacheErr != nil {
zap.L().Warn("Failed to cache Steam250 rankings", zap.String("url", URL), zap.Error(cacheErr))
}
} else {
zap.L().Warn("Failed to marshal Steam250 rankings for caching", zap.String("url", URL), zap.Error(err))
}
return infos, nil
}
// GetSteam250Top250 retrieves the top 250 games from Steam250.
func GetSteam250Top250() ([]*model.GameInfo, error) {
zap.L().Info("Fetching Steam250 Top 250 games")
return GetSteam250(constant.Steam250Top250URL)
}
// GetSteam250BestOfTheYear retrieves the best games of the current year from Steam250.
func GetSteam250BestOfTheYear() ([]*model.GameInfo, error) {
return GetSteam250(fmt.Sprintf(constant.Steam250BestOfTheYearURL, time.Now().UTC().Year()))
year := time.Now().UTC().Year()
zap.L().Info("Fetching Steam250 Best of the Year games", zap.Int("year", year))
return GetSteam250(fmt.Sprintf(constant.Steam250BestOfTheYearURL, year))
}
// GetSteam250WeekTop50 retrieves the top 50 games of the week from Steam250.
func GetSteam250WeekTop50() ([]*model.GameInfo, error) {
zap.L().Info("Fetching Steam250 Week Top 50 games")
return GetSteam250(constant.Steam250WeekTop50URL)
}
// GetSteam250MonthTop50 retrieves the top 50 games of the month from Steam250.
func GetSteam250MonthTop50() ([]*model.GameInfo, error) {
zap.L().Info("Fetching Steam250 Month Top 50 games")
return GetSteam250(constant.Steam250MonthTop50URL)
}
// GetSteam250MostPlayed retrieves the most played games from Steam250.
func GetSteam250MostPlayed() ([]*model.GameInfo, error) {
zap.L().Info("Fetching Steam250 Most Played games")
return GetSteam250(constant.Steam250MostPlayedURL)
}

View File

@ -17,122 +17,178 @@ import (
"go.uber.org/zap"
)
// SteamRIPCrawler defines a crawler for the SteamRIP website.
type SteamRIPCrawler struct {
logger *zap.Logger
}
// NewSteamRIPCrawler creates a new instance of SteamRIPCrawler.
func NewSteamRIPCrawler(logger *zap.Logger) *SteamRIPCrawler {
return &SteamRIPCrawler{
logger: logger,
}
}
// Name returns the name of the crawler.
func (c *SteamRIPCrawler) Name() string {
return "SteamRIPCrawler"
}
// CrawlByUrl crawls a single game page from SteamRIP by URL.
func (c *SteamRIPCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
c.logger.Info("Crawling game details", zap.String("URL", URL))
// Fetch the page content
resp, err := utils.Request().Get(URL)
if err != nil {
return nil, err
c.logger.Error("Failed to fetch URL", zap.String("URL", URL), zap.Error(err))
return nil, fmt.Errorf("failed to fetch URL %s: %w", URL, err)
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
if err != nil {
return nil, err
c.logger.Error("Failed to parse HTML document", zap.String("URL", URL), zap.Error(err))
return nil, fmt.Errorf("failed to parse HTML document for URL %s: %w", URL, err)
}
// Retrieve or create game item
item, err := db.GetGameItemByUrl(URL)
if err != nil {
return nil, err
c.logger.Warn("Game item not found in database, creating a new one", zap.String("URL", URL), zap.Error(err))
item = &model.GameItem{}
}
// Extract game details
item.RawName = strings.TrimSpace(doc.Find(".entry-title").First().Text())
if item.RawName == "" {
c.logger.Warn("Game title not found", zap.String("URL", URL))
return nil, errors.New("game title not found")
}
item.Name = SteamRIPFormatter(item.RawName)
item.Url = URL
item.Author = "SteamRIP"
item.Platform = "windows"
// Extract game size
sizeRegex := regexp.MustCompile(`(?i)<li><strong>Game Size:\s?</strong>(.*?)</li>`)
sizeRegexRes := sizeRegex.FindStringSubmatch(string(resp.Body()))
if len(sizeRegexRes) != 0 {
item.Size = strings.TrimSpace(sizeRegexRes[1])
sizeMatch := sizeRegex.FindStringSubmatch(string(resp.Body()))
if len(sizeMatch) > 1 {
item.Size = strings.TrimSpace(sizeMatch[1])
} else {
item.Size = "unknown"
}
megadbRegex := regexp.MustCompile(`(?i)(?:https?:)?(//megadb\.net/[^"]+)`)
megadbRegexRes := megadbRegex.FindStringSubmatch(string(resp.Body()))
links := []string{}
if len(megadbRegexRes) != 0 {
links = append(links, fmt.Sprintf("https:%s", megadbRegexRes[1]))
}
gofileRegex := regexp.MustCompile(`(?i)(?:https?:)?(//gofile\.io/d/[^"]+)`)
gofileRegexRes := gofileRegex.FindStringSubmatch(string(resp.Body()))
if len(gofileRegexRes) != 0 {
links = append(links, fmt.Sprintf("https:%s", gofileRegexRes[1]))
}
filecryptRegex := regexp.MustCompile(`(?i)(?:https?:)?(//filecrypt\.co/Container/[^"]+)`)
filecryptRegexRes := filecryptRegex.FindStringSubmatch(string(resp.Body()))
if len(filecryptRegexRes) != 0 {
links = append(links, fmt.Sprintf("https:%s", filecryptRegexRes[1]))
}
item.DownloadLinks = links
// Extract download links
item.DownloadLinks = c.extractDownloadLinks(string(resp.Body()))
if len(item.DownloadLinks) == 0 {
c.logger.Warn("No download links found", zap.String("URL", URL))
return nil, errors.New("failed to find download link")
}
c.logger.Info("Crawled game details successfully", zap.String("Name", item.Name), zap.String("URL", URL))
return item, nil
}
// extractDownloadLinks extracts download links from the game page HTML.
func (c *SteamRIPCrawler) extractDownloadLinks(pageContent string) []string {
var links []string
// Match MegaDB links
megadbRegex := regexp.MustCompile(`(?i)(?:https?:)?(//megadb\.net/[^"]+)`)
if matches := megadbRegex.FindStringSubmatch(pageContent); len(matches) > 1 {
links = append(links, fmt.Sprintf("https:%s", matches[1]))
}
// Match Gofile links
gofileRegex := regexp.MustCompile(`(?i)(?:https?:)?(//gofile\.io/d/[^"]+)`)
if matches := gofileRegex.FindStringSubmatch(pageContent); len(matches) > 1 {
links = append(links, fmt.Sprintf("https:%s", matches[1]))
}
// Match Filecrypt links
filecryptRegex := regexp.MustCompile(`(?i)(?:https?:)?(//filecrypt\.co/Container/[^"]+)`)
if matches := filecryptRegex.FindStringSubmatch(pageContent); len(matches) > 1 {
links = append(links, fmt.Sprintf("https:%s", matches[1]))
}
return links
}
// Crawl crawls a limited number of games from the SteamRIP game list.
func (c *SteamRIPCrawler) Crawl(num int) ([]*model.GameItem, error) {
count := 0
c.logger.Info("Starting SteamRIP crawl", zap.Int("limit", num))
// Fetch the game list page
resp, err := utils.Request().Get(constant.SteamRIPGameListURL)
if err != nil {
return nil, err
c.logger.Error("Failed to fetch game list", zap.String("URL", constant.SteamRIPGameListURL), zap.Error(err))
return nil, fmt.Errorf("failed to fetch game list: %w", err)
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
if err != nil {
return nil, err
c.logger.Error("Failed to parse game list HTML document", zap.String("URL", constant.SteamRIPGameListURL), zap.Error(err))
return nil, fmt.Errorf("failed to parse game list HTML document: %w", err)
}
var items []*model.GameItem
var urls []string
var updateFlags []string // title
var updateFlags []string
// Extract game URLs
doc.Find(".az-list-item>a").Each(func(i int, s *goquery.Selection) {
u, exist := s.Attr("href")
if !exist {
u, exists := s.Attr("href")
if !exists {
return
}
urls = append(urls, fmt.Sprintf("%s%s", constant.SteamRIPBaseURL, u))
updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(s.Text())))
})
// Crawl games
count := 0
for i, u := range urls {
if count == num {
if num > 0 && count == num {
break
}
if db.IsSteamRIPCrawled(updateFlags[i]) {
c.logger.Info("Skipping already crawled game", zap.String("URL", u))
continue
}
c.logger.Info("Crawling", zap.String("URL", u))
c.logger.Info("Crawling game", zap.String("URL", u))
item, err := c.CrawlByUrl(u)
if err != nil {
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
c.logger.Warn("Failed to crawl game", zap.String("URL", u), zap.Error(err))
continue
}
item.UpdateFlag = updateFlags[i]
if err := db.SaveGameItem(item); err != nil {
c.logger.Warn("Failed to save item", zap.Error(err))
c.logger.Warn("Failed to save game item", zap.String("URL", u), zap.Error(err))
continue
}
items = append(items, item)
count++
if err := OrganizeGameItem(item); err != nil {
c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u))
c.logger.Warn("Failed to organize game item", zap.String("URL", u), zap.Error(err))
continue
}
}
c.logger.Info("SteamRIP crawl completed", zap.Int("gamesCrawled", len(items)))
return items, nil
}
// CrawlAll crawls all games from the SteamRIP game list.
func (c *SteamRIPCrawler) CrawlAll() ([]*model.GameItem, error) {
c.logger.Info("Starting full crawl of SteamRIP")
return c.Crawl(-1)
}
// SteamRIPFormatter formats the game name by removing unnecessary text.
func SteamRIPFormatter(name string) string {
name = regexp.MustCompile(`\([^\)]+\)`).ReplaceAllString(name, "")
name = strings.Replace(name, "Free Download", "", -1)

View File

@ -32,145 +32,214 @@ func (c *XatabCrawler) Name() string {
return "XatabCrawler"
}
// Crawl crawls a single page of the Xatab website.
func (c *XatabCrawler) Crawl(page int) ([]*model.GameItem, error) {
requestURL := fmt.Sprintf("%s/page/%v", constant.XatabBaseURL, page)
c.logger.Info("Fetching page", zap.String("URL", requestURL))
// Fetch the page content
resp, err := utils.Request().Get(requestURL)
if err != nil {
c.logger.Error("Failed to fetch", zap.Error(err))
return nil, err
c.logger.Error("Failed to fetch page", zap.String("URL", requestURL), zap.Error(err))
return nil, fmt.Errorf("failed to fetch page %d: %w", page, err)
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
if err != nil {
c.logger.Error("Failed to parse HTML", zap.Error(err))
return nil, err
c.logger.Error("Failed to parse HTML", zap.String("URL", requestURL), zap.Error(err))
return nil, fmt.Errorf("failed to parse HTML for page %d: %w", page, err)
}
var urls []string
var updateFlags []string // title
var updateFlags []string
// Extract game URLs and titles
doc.Find(".entry").Each(func(i int, s *goquery.Selection) {
u, exist := s.Find(".entry__title.h2 a").Attr("href")
if !exist {
u, exists := s.Find(".entry__title.h2 a").Attr("href")
if !exists {
return
}
urls = append(urls, u)
updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(s.Find(".entry__title.h2 a").Text())))
})
// Process each game URL
var res []*model.GameItem
for i, u := range urls {
if db.IsXatabCrawled(updateFlags[i]) {
c.logger.Info("Skipping already crawled game", zap.String("URL", u))
continue
}
c.logger.Info("Crawling", zap.String("URL", u))
c.logger.Info("Crawling game", zap.String("URL", u))
item, err := c.CrawlByUrl(u)
if err != nil {
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
c.logger.Warn("Failed to crawl game", zap.String("URL", u), zap.Error(err))
continue
}
item.UpdateFlag = updateFlags[i]
err = db.SaveGameItem(item)
if err != nil {
c.logger.Warn("Failed to save", zap.Error(err))
if err := db.SaveGameItem(item); err != nil {
c.logger.Warn("Failed to save game item", zap.String("URL", u), zap.Error(err))
continue
}
res = append(res, item)
if err := OrganizeGameItem(item); err != nil {
c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u))
c.logger.Warn("Failed to organize game item", zap.String("URL", u), zap.Error(err))
continue
}
}
c.logger.Info("Crawled page successfully", zap.Int("gamesCrawled", len(res)), zap.Int("page", page))
return res, nil
}
// CrawlByUrl crawls a single game page from Xatab by URL.
func (c *XatabCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
c.logger.Info("Crawling game details", zap.String("URL", URL))
// Fetch the game page
resp, err := utils.Request().Get(URL)
if err != nil {
return nil, err
c.logger.Error("Failed to fetch game page", zap.String("URL", URL), zap.Error(err))
return nil, fmt.Errorf("failed to fetch game page %s: %w", URL, err)
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
if err != nil {
return nil, err
c.logger.Error("Failed to parse game HTML", zap.String("URL", URL), zap.Error(err))
return nil, fmt.Errorf("failed to parse game HTML for URL %s: %w", URL, err)
}
// Retrieve or create game item
item, err := db.GetGameItemByUrl(URL)
if err != nil {
return nil, err
c.logger.Warn("Failed to fetch game item from database, creating new", zap.String("URL", URL), zap.Error(err))
item = &model.GameItem{}
}
item.Url = URL
item.RawName = doc.Find(".inner-entry__title").First().Text()
item.RawName = strings.TrimSpace(doc.Find(".inner-entry__title").First().Text())
if item.RawName == "" {
c.logger.Warn("Game title not found", zap.String("URL", URL))
return nil, errors.New("game title not found")
}
item.Name = XatabFormatter(item.RawName)
item.Author = "Xatab"
item.Platform = "windows"
// Extract download URL
downloadURL := doc.Find("#download>a").First().AttrOr("href", "")
if downloadURL == "" {
c.logger.Warn("Download URL not found", zap.String("URL", URL))
return nil, errors.New("failed to find download URL")
}
// Fetch torrent file and convert to magnet link
resp, err = utils.Request().SetHeaders(map[string]string{"Referer": URL}).Get(downloadURL)
if err != nil {
return nil, err
c.logger.Error("Failed to fetch download link", zap.String("URL", downloadURL), zap.Error(err))
return nil, fmt.Errorf("failed to fetch download link %s: %w", downloadURL, err)
}
magnet, size, err := utils.ConvertTorrentToMagnet(resp.Body())
if err != nil {
return nil, err
c.logger.Error("Failed to convert torrent to magnet", zap.String("URL", downloadURL), zap.Error(err))
return nil, fmt.Errorf("failed to convert torrent to magnet: %w", err)
}
item.Size = size
item.DownloadLinks = []string{magnet}
c.logger.Info("Crawled game details successfully", zap.String("Name", item.Name), zap.String("URL", URL))
return item, nil
}
// CrawlMulti crawls multiple pages from Xatab.
func (c *XatabCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) {
c.logger.Info("Starting multi-page crawl", zap.Ints("pages", pages))
totalPageNum, err := c.GetTotalPageNum()
if err != nil {
return nil, err
c.logger.Error("Failed to get total page number", zap.Error(err))
return nil, fmt.Errorf("failed to get total page number: %w", err)
}
var res []*model.GameItem
for _, page := range pages {
if page > totalPageNum {
c.logger.Warn("Skipping page out of range", zap.Int("page", page), zap.Int("totalPages", totalPageNum))
continue
}
items, err := c.Crawl(page)
if err != nil {
return nil, err
c.logger.Warn("Failed to crawl page", zap.Int("page", page), zap.Error(err))
continue
}
res = append(res, items...)
}
c.logger.Info("Multi-page crawl completed", zap.Int("gamesCrawled", len(res)))
return res, nil
}
// CrawlAll crawls all pages from Xatab.
func (c *XatabCrawler) CrawlAll() ([]*model.GameItem, error) {
c.logger.Info("Starting full crawl of Xatab")
totalPageNum, err := c.GetTotalPageNum()
if err != nil {
return nil, err
c.logger.Error("Failed to get total page number", zap.Error(err))
return nil, fmt.Errorf("failed to get total page number: %w", err)
}
var res []*model.GameItem
for i := 1; i <= totalPageNum; i++ {
items, err := c.Crawl(i)
if err != nil {
return nil, err
c.logger.Warn("Failed to crawl page", zap.Int("page", i), zap.Error(err))
continue
}
res = append(res, items...)
}
c.logger.Info("Full crawl completed", zap.Int("gamesCrawled", len(res)))
return res, nil
}
// GetTotalPageNum retrieves the total number of pages from Xatab.
func (c *XatabCrawler) GetTotalPageNum() (int, error) {
c.logger.Info("Fetching total page number")
resp, err := utils.Request().Get(constant.XatabBaseURL)
if err != nil {
return 0, err
c.logger.Error("Failed to fetch base URL", zap.String("URL", constant.XatabBaseURL), zap.Error(err))
return 0, fmt.Errorf("failed to fetch base URL: %w", err)
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
if err != nil {
return 0, err
c.logger.Error("Failed to parse base HTML", zap.String("URL", constant.XatabBaseURL), zap.Error(err))
return 0, fmt.Errorf("failed to parse base HTML: %w", err)
}
pageStr := doc.Find(".pagination>a").Last().Text()
totalPageNum, err := strconv.Atoi(pageStr)
if err != nil {
return 0, err
c.logger.Error("Failed to parse total page number", zap.String("pageStr", pageStr), zap.Error(err))
return 0, fmt.Errorf("failed to parse total page number: %w", err)
}
c.logger.Info("Fetched total page number", zap.Int("totalPages", totalPageNum))
return totalPageNum, nil
}
var xatabRegexps = []*regexp.Regexp{
regexp.MustCompile(`(?i)\sPC$`),
}
// XatabFormatter formats the game name by removing unnecessary text.
func XatabFormatter(name string) string {
reg1 := regexp.MustCompile(`(?i)v(er)?\s?(\.)?\d+(\.\d+)*`)
if index := reg1.FindIndex([]byte(name)); index != nil {
@ -189,10 +258,13 @@ func XatabFormatter(name string) string {
name = name[:index]
}
name = strings.TrimSpace(name)
// Remove specific patterns
for _, re := range xatabRegexps {
name = re.ReplaceAllString(name, "")
}
// Handle names separated by "/"
if index := strings.Index(name, "/"); index != -1 {
names := strings.Split(name, "/")
longestLength := 0
@ -208,3 +280,7 @@ func XatabFormatter(name string) string {
return strings.TrimSpace(name)
}
var xatabRegexps = []*regexp.Regexp{
regexp.MustCompile(`(?i)\sPC$`),
}

View File

@ -396,22 +396,22 @@ func GetGameInfosByPlatformIDs(platform string, ids []int) ([]*model.GameInfo, e
return games, nil
}
func HasGameItemOrganized(id primitive.ObjectID) (bool, []*model.GameInfo) {
func HasGameItemOrganized(id primitive.ObjectID) bool {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
filter := bson.M{"games": id}
var res []*model.GameInfo
cursor, err := GameInfoCollection.Find(ctx, filter)
if err != nil {
return false, nil
return false
}
if err = cursor.All(ctx, &res); err != nil {
return false, nil
return false
}
if len(res) == 0 {
return false, nil
return false
}
return true, res
return true
}
func GetUnorganizedGameItems(num int) ([]*model.GameItem, error) {

View File

@ -12,16 +12,14 @@ import (
)
var Logger *zap.Logger
var ConsoleLogger *zap.Logger
var FileLogger *zap.Logger
var TaskLogger *zap.Logger
func init() {
fileCore, consoleCore, combinedCore, taskCore := buildZapCore(getZapLogLevel(config.Config.LogLevel))
FileLogger = zap.New(fileCore, zap.AddCaller())
ConsoleLogger = zap.New(consoleCore, zap.AddCaller())
_, _, combinedCore, taskCore := buildZapCore(getZapLogLevel(config.Config.LogLevel))
Logger = zap.New(combinedCore, zap.AddCaller())
TaskLogger = zap.New(taskCore, zap.AddCaller())
zap.ReplaceGlobals(Logger)
}
func buildZapCore(logLevel zapcore.Level) (fileCore zapcore.Core, consoleCore zapcore.Core, combinedCore zapcore.Core, taskCore zapcore.Core) {

View File

@ -24,7 +24,6 @@ type HealthCheckResponse struct {
GameItem int64 `json:"game_num"`
GameInfo int64 `json:"game_info_num"`
Unorganized int64 `json:"unorganized_game_num"`
MegaAvaliable bool `json:"mega_avaliable"`
}
// HealthCheckHandler performs a health check of the service.
@ -57,6 +56,5 @@ func HealthCheckHandler(c *gin.Context) {
GameItem: downloadCount,
GameInfo: infoCount,
Unorganized: unorganizedCount,
MegaAvaliable: config.Config.MegaAvaliable,
})
}