u
This commit is contained in:
parent
2553a35a4a
commit
29dd7fc058
@ -3,6 +3,7 @@ package cmd
|
||||
import (
|
||||
"game-crawler/crawler"
|
||||
"game-crawler/log"
|
||||
|
||||
"go.uber.org/zap"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
@ -13,7 +14,7 @@ var supplementCmd = &cobra.Command{
|
||||
Long: "Supplement platform id to game info",
|
||||
Short: "Supplement platform id to game info",
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
err := crawler.SupplementPlatformIDToGameInfo(log.Logger)
|
||||
err := crawler.SupplementPlatformIDToGameInfo()
|
||||
if err != nil {
|
||||
log.Logger.Error("Error supplementing platform id to game info", zap.Error(err))
|
||||
}
|
||||
|
@ -1,10 +1,8 @@
|
||||
package config
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"os"
|
||||
"os/exec"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
@ -18,9 +16,14 @@ type config struct {
|
||||
Redis redis `json:"redis"`
|
||||
OnlineFix onlinefix `json:"online_fix"`
|
||||
Twitch twitch `json:"twitch"`
|
||||
Rutracker rutracker `json:"rutracker"`
|
||||
Webhooks webhooks `json:"webhooks"`
|
||||
CFClearanceScraper cfClearanceScraper `json:"cf_clearance_scraper"`
|
||||
MegaAvaliable bool
|
||||
}
|
||||
|
||||
type rutracker struct {
|
||||
User string `env:"RUTRACKER_USER" json:"user"`
|
||||
Password string `env:"RUTRACKER_PASSWORD" json:"password"`
|
||||
}
|
||||
|
||||
type cfClearanceScraper struct {
|
||||
@ -78,7 +81,6 @@ func init() {
|
||||
User: "root",
|
||||
Password: "password",
|
||||
},
|
||||
MegaAvaliable: TestMega(),
|
||||
Server: server{
|
||||
AutoCrawlCron: "0 */3 * * *",
|
||||
},
|
||||
@ -147,11 +149,3 @@ func loadEnvVariables(cfg interface{}) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMega() bool {
|
||||
cmd := exec.Command("mega-get", "--help")
|
||||
var out bytes.Buffer
|
||||
cmd.Stdout = &out
|
||||
err := cmd.Run()
|
||||
return err == nil
|
||||
}
|
||||
|
@ -31,4 +31,8 @@ const (
|
||||
SteamRIPGameListURL = "https://steamrip.com/games-list-page/"
|
||||
RepackInfoURL = "https://repack.info/page/%v/"
|
||||
GnarlyURL = "https://rentry.org/gnarly_repacks"
|
||||
RutrackerTopicURL = "https://rutracker.org/forum/%s"
|
||||
RutrackerURL = "https://rutracker.org/forum/index.php"
|
||||
RutrackerLoginURL = "https://rutracker.org/forum/login.php"
|
||||
RutrackerAuthorURL = "https://rutracker.org/forum/tracker.php?rid=%s&start=%v"
|
||||
)
|
||||
|
135
crawler/1337x.go
135
crawler/1337x.go
@ -17,16 +17,16 @@ import (
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
type Formatter func(string) string
|
||||
type FormatterFunc func(string) string
|
||||
|
||||
type s1337xCrawler struct {
|
||||
source string
|
||||
platform string
|
||||
formatter Formatter
|
||||
formatter FormatterFunc
|
||||
logger *zap.Logger
|
||||
}
|
||||
|
||||
func New1337xCrawler(source string, platform string, formatter Formatter, logger *zap.Logger) *s1337xCrawler {
|
||||
func New1337xCrawler(source string, platform string, formatter FormatterFunc, logger *zap.Logger) *s1337xCrawler {
|
||||
return &s1337xCrawler{
|
||||
source: source,
|
||||
formatter: formatter,
|
||||
@ -36,69 +36,101 @@ func New1337xCrawler(source string, platform string, formatter Formatter, logger
|
||||
}
|
||||
|
||||
func (c *s1337xCrawler) Crawl(page int) ([]*model.GameItem, error) {
|
||||
var doc *goquery.Document
|
||||
c.logger.Info("Starting Crawl", zap.Int("Page", page), zap.String("Source", c.source))
|
||||
requestUrl := fmt.Sprintf("%s/%s/%d/", constant.C1337xBaseURL, c.source, page)
|
||||
resp, err := utils.Request().Get(requestUrl)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to fetch page", zap.String("URL", requestUrl), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to fetch page %d for source %s: %w", page, c.source, err)
|
||||
}
|
||||
doc, err = goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to parse HTML document", zap.String("URL", requestUrl), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to parse HTML document for page %d: %w", page, err)
|
||||
}
|
||||
|
||||
trSelection := doc.Find("tbody>tr")
|
||||
var urls []string
|
||||
trSelection.Each(func(i int, trNode *goquery.Selection) {
|
||||
nameSelection := trNode.Find(".name").First()
|
||||
if aNode := nameSelection.Find("a").Eq(1); aNode.Length() > 0 {
|
||||
url, _ := aNode.Attr("href")
|
||||
urls = append(urls, url)
|
||||
url, exists := aNode.Attr("href")
|
||||
if exists {
|
||||
urls = append(urls, url)
|
||||
} else {
|
||||
c.logger.Warn("Failed to find URL in row", zap.Int("RowIndex", i))
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
var res []*model.GameItem
|
||||
for _, u := range urls {
|
||||
u = fmt.Sprintf("%s%s", constant.C1337xBaseURL, u)
|
||||
if db.IsGameCrawledByURL(u) {
|
||||
fullURL := fmt.Sprintf("%s%s", constant.C1337xBaseURL, u)
|
||||
if db.IsGameCrawledByURL(fullURL) {
|
||||
c.logger.Info("Skipping already crawled URL", zap.String("URL", fullURL))
|
||||
continue
|
||||
}
|
||||
c.logger.Info("Crawling", zap.String("URL", u))
|
||||
item, err := c.CrawlByUrl(u)
|
||||
|
||||
c.logger.Info("Crawling URL", zap.String("URL", fullURL))
|
||||
item, err := c.CrawlByUrl(fullURL)
|
||||
if err != nil {
|
||||
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
|
||||
c.logger.Warn("Failed to crawl URL", zap.String("URL", fullURL), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
err = db.SaveGameItem(item)
|
||||
if err != nil {
|
||||
c.logger.Warn("Failed to save", zap.Error(err), zap.String("URL", u))
|
||||
c.logger.Warn("Failed to save game item to database", zap.String("URL", fullURL), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
res = append(res, item)
|
||||
|
||||
if err := OrganizeGameItem(item); err != nil {
|
||||
c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u))
|
||||
c.logger.Warn("Failed to organize game item", zap.String("URL", fullURL), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
c.logger.Info("Finished Crawl", zap.Int("Page", page), zap.Int("ItemsCrawled", len(res)))
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (c *s1337xCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
|
||||
c.logger.Info("Starting CrawlByUrl", zap.String("URL", URL))
|
||||
resp, err := utils.Request().Get(URL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to fetch URL", zap.String("URL", URL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to fetch URL %s: %w", URL, err)
|
||||
}
|
||||
var item = &model.GameItem{}
|
||||
item.Url = URL
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to parse HTML document", zap.String("URL", URL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to parse HTML document for URL %s: %w", URL, err)
|
||||
}
|
||||
|
||||
item := &model.GameItem{
|
||||
Url: URL,
|
||||
}
|
||||
|
||||
selection := doc.Find(".torrent-detail-page ul.list>li")
|
||||
info := make(map[string]string)
|
||||
selection.Each(func(i int, item *goquery.Selection) {
|
||||
info[strings.TrimSpace(item.Find("strong").Text())] = strings.TrimSpace(item.Find("span").Text())
|
||||
key := strings.TrimSpace(item.Find("strong").Text())
|
||||
value := strings.TrimSpace(item.Find("span").Text())
|
||||
info[key] = value
|
||||
c.logger.Debug("Extracted info", zap.String("Key", key), zap.String("Value", value))
|
||||
})
|
||||
|
||||
magnetRegex := regexp.MustCompile(`magnet:\?[^"]*`)
|
||||
magnetRegexRes := magnetRegex.FindStringSubmatch(string(resp.Body()))
|
||||
if len(magnetRegexRes) == 0 {
|
||||
c.logger.Error("Failed to find magnet link", zap.String("URL", URL))
|
||||
return nil, fmt.Errorf("failed to find magnet link on URL %s", URL)
|
||||
}
|
||||
|
||||
item.Size = info["Total size"]
|
||||
item.RawName = doc.Find("title").Text()
|
||||
item.RawName = strings.Replace(item.RawName, "Download ", "", 1)
|
||||
@ -107,63 +139,88 @@ func (c *s1337xCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
|
||||
item.DownloadLinks = []string{magnetRegexRes[0]}
|
||||
item.Author = strings.Replace(c.source, "-torrents", "", -1)
|
||||
item.Platform = c.platform
|
||||
|
||||
c.logger.Info("Successfully crawled URL", zap.String("URL", URL))
|
||||
return item, nil
|
||||
}
|
||||
|
||||
func (c *s1337xCrawler) CrawlMulti(pages []int) (res []*model.GameItem, err error) {
|
||||
var items []*model.GameItem
|
||||
c.logger.Info("Starting CrawlMulti", zap.Ints("Pages", pages), zap.String("Source", c.source))
|
||||
totalPageNum, err := c.GetTotalPageNum()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to get total page number", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to get total page number: %w", err)
|
||||
}
|
||||
|
||||
for _, page := range pages {
|
||||
if page > totalPageNum {
|
||||
c.logger.Warn("Page exceeds total page number", zap.Int("Page", page), zap.Int("TotalPages", totalPageNum))
|
||||
continue
|
||||
}
|
||||
items, err = c.Crawl(page)
|
||||
res = append(res, items...)
|
||||
|
||||
items, err := c.Crawl(page)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to crawl page", zap.Int("Page", page), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
res = append(res, items...)
|
||||
}
|
||||
|
||||
c.logger.Info("Finished CrawlMulti", zap.Int("TotalItems", len(res)))
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (c *s1337xCrawler) CrawlAll() (res []*model.GameItem, err error) {
|
||||
c.logger.Info("Starting CrawlAll", zap.String("Source", c.source))
|
||||
totalPageNum, err := c.GetTotalPageNum()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to get total page number", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to get total page number: %w", err)
|
||||
}
|
||||
var items []*model.GameItem
|
||||
|
||||
for i := 1; i <= totalPageNum; i++ {
|
||||
items, err = c.Crawl(i)
|
||||
res = append(res, items...)
|
||||
items, err := c.Crawl(i)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to crawl page", zap.Int("Page", i), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
res = append(res, items...)
|
||||
}
|
||||
|
||||
c.logger.Info("Finished CrawlAll", zap.Int("TotalPages", totalPageNum), zap.Int("TotalItems", len(res)))
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (c *s1337xCrawler) GetTotalPageNum() (int, error) {
|
||||
var doc *goquery.Document
|
||||
|
||||
c.logger.Info("Fetching total page number", zap.String("Source", c.source))
|
||||
requestUrl := fmt.Sprintf("%s/%s/%d/", constant.C1337xBaseURL, c.source, 1)
|
||||
resp, err := utils.Request().Get(requestUrl)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
c.logger.Error("Failed to fetch first page for total page number", zap.String("URL", requestUrl), zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to fetch first page for total page number: %w", err)
|
||||
}
|
||||
doc, _ = goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
||||
if err != nil {
|
||||
c.logger.Error("Failed to parse HTML document for total page number", zap.String("URL", requestUrl), zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to parse HTML document for total page number: %w", err)
|
||||
}
|
||||
|
||||
selection := doc.Find(".last")
|
||||
pageStr, exist := selection.Find("a").Attr("href")
|
||||
if !exist {
|
||||
return 0, errors.New("total page num not found")
|
||||
pageStr, exists := selection.Find("a").Attr("href")
|
||||
if !exists {
|
||||
c.logger.Error("Failed to find total page number in pagination", zap.String("URL", requestUrl))
|
||||
return 0, errors.New("total page number not found in pagination")
|
||||
}
|
||||
|
||||
pageStr = strings.ReplaceAll(pageStr, c.source, "")
|
||||
pageStr = strings.ReplaceAll(pageStr, "/", "")
|
||||
totalPageNum, err := strconv.Atoi(pageStr)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
c.logger.Error("Failed to convert total page number to integer", zap.String("PageString", pageStr), zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to convert total page number '%s' to integer: %w", pageStr, err)
|
||||
}
|
||||
|
||||
c.logger.Info("Successfully fetched total page number", zap.Int("TotalPages", totalPageNum))
|
||||
return totalPageNum, nil
|
||||
}
|
||||
|
@ -3,7 +3,6 @@ package crawler
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
@ -32,142 +31,181 @@ func (c *ChovkaCrawler) Name() string {
|
||||
}
|
||||
|
||||
func (c *ChovkaCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
|
||||
c.logger.Info("Starting CrawlByUrl", zap.String("URL", URL))
|
||||
resp, err := utils.Request().Get(URL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to fetch URL", zap.String("URL", URL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to fetch URL %s: %w", URL, err)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to parse HTML document", zap.String("URL", URL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to parse HTML document from URL %s: %w", URL, err)
|
||||
}
|
||||
|
||||
item, err := db.GetGameItemByUrl(URL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to retrieve game item from database", zap.String("URL", URL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to retrieve game item from database for URL %s: %w", URL, err)
|
||||
}
|
||||
|
||||
item.Url = URL
|
||||
item.RawName = doc.Find(".inner-entry__title").First().Text()
|
||||
item.Name = ChovkaFormatter(item.RawName)
|
||||
item.Author = "Chovka"
|
||||
item.Platform = "windows"
|
||||
|
||||
downloadURL := doc.Find(".download-torrent").AttrOr("href", "")
|
||||
if downloadURL == "" {
|
||||
return nil, errors.New("failed to find download URL")
|
||||
c.logger.Error("Download URL not found", zap.String("URL", URL))
|
||||
return nil, fmt.Errorf("failed to find download URL on page %s", URL)
|
||||
}
|
||||
|
||||
resp, err = utils.Request().SetHeader("Referer", URL).Get(downloadURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to fetch download URL", zap.String("downloadURL", downloadURL), zap.String("URL", URL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to fetch download URL %s for page %s: %w", downloadURL, URL, err)
|
||||
}
|
||||
|
||||
magnet, size, err := utils.ConvertTorrentToMagnet(resp.Body())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to convert torrent to magnet", zap.String("URL", URL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to convert torrent to magnet for URL %s: %w", URL, err)
|
||||
}
|
||||
|
||||
item.Size = size
|
||||
item.DownloadLinks = []string{magnet}
|
||||
c.logger.Info("Successfully crawled URL", zap.String("URL", URL))
|
||||
return item, nil
|
||||
}
|
||||
|
||||
func (c *ChovkaCrawler) Crawl(page int) ([]*model.GameItem, error) {
|
||||
c.logger.Info("Starting Crawl", zap.Int("Page", page))
|
||||
resp, err := utils.Request().Get(fmt.Sprintf(constant.RepackInfoURL, page))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to fetch page", zap.Int("Page", page), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to fetch page %d: %w", page, err)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to parse HTML document", zap.Int("Page", page), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to parse HTML document for page %d: %w", page, err)
|
||||
}
|
||||
|
||||
var urls []string
|
||||
var updateFlags []string
|
||||
doc.Find(".entry").Each(func(i int, s *goquery.Selection) {
|
||||
u, exist := s.Find(".entry__title.h2 a").Attr("href")
|
||||
if !exist {
|
||||
c.logger.Warn("Entry does not contain a valid URL", zap.Int("Index", i))
|
||||
return
|
||||
}
|
||||
urls = append(urls, u)
|
||||
updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(s.Find(".entry__title.h2 a").Text())))
|
||||
})
|
||||
|
||||
var res []*model.GameItem
|
||||
for i, u := range urls {
|
||||
if db.IsChovkaCrawled(updateFlags[i]) {
|
||||
c.logger.Info("Skipping already crawled URL", zap.String("URL", u))
|
||||
continue
|
||||
}
|
||||
c.logger.Info("Crawling", zap.String("URL", u))
|
||||
|
||||
c.logger.Info("Crawling URL", zap.String("URL", u))
|
||||
item, err := c.CrawlByUrl(u)
|
||||
if err != nil {
|
||||
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
|
||||
c.logger.Warn("Failed to crawl URL", zap.String("URL", u), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
item.UpdateFlag = updateFlags[i]
|
||||
if err := db.SaveGameItem(item); err != nil {
|
||||
c.logger.Warn("Failed to save", zap.Error(err), zap.String("URL", u))
|
||||
c.logger.Warn("Failed to save game item to database", zap.String("URL", u), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
res = append(res, item)
|
||||
|
||||
if err := OrganizeGameItem(item); err != nil {
|
||||
c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u))
|
||||
c.logger.Warn("Failed to organize game item", zap.String("URL", u), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
c.logger.Info("Finished Crawl", zap.Int("Page", page), zap.Int("ItemsCrawled", len(res)))
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (c *ChovkaCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) {
|
||||
c.logger.Info("Starting CrawlMulti", zap.Ints("Pages", pages))
|
||||
var res []*model.GameItem
|
||||
for _, page := range pages {
|
||||
items, err := c.Crawl(page)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to crawl multiple pages", zap.Int("Page", page), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to crawl page %d: %w", page, err)
|
||||
}
|
||||
res = append(res, items...)
|
||||
}
|
||||
c.logger.Info("Finished CrawlMulti", zap.Int("TotalPages", len(pages)), zap.Int("TotalItems", len(res)))
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (c *ChovkaCrawler) CrawlAll() ([]*model.GameItem, error) {
|
||||
c.logger.Info("Starting CrawlAll")
|
||||
totalPageNum, err := c.GetTotalPageNum()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to get total page number", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to get total page number: %w", err)
|
||||
}
|
||||
|
||||
var res []*model.GameItem
|
||||
for i := 1; i <= totalPageNum; i++ {
|
||||
items, err := c.Crawl(i)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to crawl all pages", zap.Int("Page", i), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to crawl page %d: %w", i, err)
|
||||
}
|
||||
res = append(res, items...)
|
||||
}
|
||||
|
||||
c.logger.Info("Finished CrawlAll", zap.Int("TotalPages", totalPageNum), zap.Int("TotalItems", len(res)))
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (c *ChovkaCrawler) GetTotalPageNum() (int, error) {
|
||||
c.logger.Info("Fetching total page number")
|
||||
resp, err := utils.Request().Get(fmt.Sprintf(constant.RepackInfoURL, 1))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
c.logger.Error("Failed to fetch first page for total page number", zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to fetch first page for total page number: %w", err)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
c.logger.Error("Failed to parse HTML document for total page number", zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to parse HTML document for total page number: %w", err)
|
||||
}
|
||||
|
||||
pageStr := doc.Find(".pagination>a").Last().Text()
|
||||
totalPageNum, err := strconv.Atoi(pageStr)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
c.logger.Error("Failed to convert total page number to integer", zap.String("PageString", pageStr), zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to convert total page number '%s' to integer: %w", pageStr, err)
|
||||
}
|
||||
|
||||
c.logger.Info("Successfully fetched total page number", zap.Int("TotalPages", totalPageNum))
|
||||
return totalPageNum, nil
|
||||
}
|
||||
|
||||
func ChovkaFormatter(name string) string {
|
||||
idx := strings.Index(name, "| RePack")
|
||||
if idx != -1 {
|
||||
name = name[:idx]
|
||||
}
|
||||
idx = strings.Index(name, "| GOG")
|
||||
if idx != -1 {
|
||||
name = name[:idx]
|
||||
}
|
||||
idx = strings.Index(name, "| Portable")
|
||||
if idx != -1 {
|
||||
name = name[:idx]
|
||||
cutoffs := []string{"| RePack", "| GOG", "| Portable"}
|
||||
for _, cutoff := range cutoffs {
|
||||
if idx := strings.Index(name, cutoff); idx != -1 {
|
||||
name = name[:idx]
|
||||
}
|
||||
}
|
||||
return strings.TrimSpace(name)
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
package crawler
|
||||
|
||||
import (
|
||||
"game-crawler/config"
|
||||
"game-crawler/model"
|
||||
|
||||
"go.uber.org/zap"
|
||||
@ -28,13 +29,14 @@ func BuildCrawlerMap(logger *zap.Logger) map[string]Crawler {
|
||||
"fitgirl": NewFitGirlCrawler(logger),
|
||||
"dodi": NewDODICrawler(logger),
|
||||
"kaoskrew": NewKaOsKrewCrawler(logger),
|
||||
"freegog": NewFreeGOGCrawler(logger),
|
||||
"freegog": NewFreeGOGCrawler(config.Config.CFClearanceScraper.Url, logger),
|
||||
"xatab": NewXatabCrawler(logger),
|
||||
"onlinefix": NewOnlineFixCrawler(logger),
|
||||
"onlinefix": NewOnlineFixCrawler(config.Config.OnlineFix.User, config.Config.OnlineFix.Password, logger),
|
||||
"steamrip": NewSteamRIPCrawler(logger),
|
||||
"chovka": NewChovkaCrawler(logger),
|
||||
"goggames": NewGOGGamesCrawler(logger),
|
||||
"goggames": NewGOGGamesCrawler(config.Config.CFClearanceScraper.Url, logger),
|
||||
"johncena141": NewJohncena141Crawler(logger),
|
||||
"omggods": NewOmgGodsCrawler(config.Config.CFClearanceScraper.Url, config.Config.Rutracker.User, config.Config.Rutracker.Password, logger),
|
||||
// "gnarly": NewGnarlyCrawler(logger),
|
||||
}
|
||||
return ret
|
||||
|
@ -3,7 +3,6 @@ package crawler
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strconv"
|
||||
@ -33,37 +32,51 @@ func (c *FitGirlCrawler) Name() string {
|
||||
}
|
||||
|
||||
func (c *FitGirlCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
|
||||
c.logger.Info("Starting CrawlByUrl", zap.String("URL", URL))
|
||||
resp, err := utils.Request().Get(URL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to fetch URL", zap.String("URL", URL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to fetch URL %s: %w", URL, err)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to parse HTML document", zap.String("URL", URL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to parse HTML document for URL %s: %w", URL, err)
|
||||
}
|
||||
|
||||
titleElem := doc.Find("h3").First().Find("strong")
|
||||
if titleElem.Length() == 0 {
|
||||
return nil, errors.New("failed to find title")
|
||||
c.logger.Error("Failed to find title", zap.String("URL", URL))
|
||||
return nil, fmt.Errorf("failed to find title on page %s", URL)
|
||||
}
|
||||
|
||||
rawTitle := titleElem.Text()
|
||||
titleElem.Children().Remove()
|
||||
title := strings.TrimSpace(titleElem.Text())
|
||||
|
||||
sizeRegex := regexp.MustCompile(`Repack Size: <strong>(.*?)</strong>`)
|
||||
sizeRegexRes := sizeRegex.FindStringSubmatch(string(resp.Body()))
|
||||
if len(sizeRegexRes) == 0 {
|
||||
return nil, errors.New("failed to find size")
|
||||
c.logger.Error("Failed to find size", zap.String("URL", URL))
|
||||
return nil, fmt.Errorf("failed to find size information on page %s", URL)
|
||||
}
|
||||
size := sizeRegexRes[1]
|
||||
|
||||
magnetRegex := regexp.MustCompile(`magnet:\?[^"]*`)
|
||||
magnetRegexRes := magnetRegex.FindStringSubmatch(string(resp.Body()))
|
||||
if len(magnetRegexRes) == 0 {
|
||||
return nil, errors.New("failed to find magnet")
|
||||
c.logger.Error("Failed to find magnet link", zap.String("URL", URL))
|
||||
return nil, fmt.Errorf("failed to find magnet link on page %s", URL)
|
||||
}
|
||||
magnet := magnetRegexRes[0]
|
||||
|
||||
item, err := db.GetGameItemByUrl(URL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to retrieve game item from database", zap.String("URL", URL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to retrieve game item from database for URL %s: %w", URL, err)
|
||||
}
|
||||
|
||||
item.Name = strings.TrimSpace(title)
|
||||
item.RawName = rawTitle
|
||||
item.Url = URL
|
||||
@ -71,96 +84,130 @@ func (c *FitGirlCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
|
||||
item.Author = "FitGirl"
|
||||
item.DownloadLinks = []string{magnet}
|
||||
item.Platform = "windows"
|
||||
|
||||
c.logger.Info("Successfully crawled URL", zap.String("URL", URL))
|
||||
return item, nil
|
||||
}
|
||||
|
||||
func (c *FitGirlCrawler) Crawl(page int) ([]*model.GameItem, error) {
|
||||
c.logger.Info("Starting Crawl", zap.Int("Page", page))
|
||||
resp, err := utils.Request().Get(fmt.Sprintf(constant.FitGirlURL, page))
|
||||
if err != nil {
|
||||
c.logger.Error("Failed to fetch", zap.Error(err))
|
||||
return nil, err
|
||||
c.logger.Error("Failed to fetch page", zap.Int("Page", page), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to fetch page %d: %w", page, err)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
||||
if err != nil {
|
||||
c.logger.Error("Failed to parse HTML", zap.Error(err))
|
||||
return nil, err
|
||||
c.logger.Error("Failed to parse HTML document", zap.Int("Page", page), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to parse HTML document for page %d: %w", page, err)
|
||||
}
|
||||
|
||||
var urls []string
|
||||
var updateFlags []string //link+date
|
||||
var updateFlags []string // link + date (encoded)
|
||||
doc.Find("article").Each(func(i int, s *goquery.Selection) {
|
||||
u, exist1 := s.Find(".entry-title>a").First().Attr("href")
|
||||
d, exist2 := s.Find("time").First().Attr("datetime")
|
||||
if exist1 && exist2 {
|
||||
urls = append(urls, u)
|
||||
updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf("%s%s", u, d))))
|
||||
} else {
|
||||
c.logger.Warn("Failed to extract URL or datetime", zap.Int("Index", i), zap.Bool("HasURL", exist1), zap.Bool("HasDate", exist2))
|
||||
}
|
||||
})
|
||||
|
||||
var res []*model.GameItem
|
||||
for i, u := range urls {
|
||||
if db.IsFitgirlCrawled(updateFlags[i]) {
|
||||
c.logger.Info("Skipping already crawled URL", zap.String("URL", u))
|
||||
continue
|
||||
}
|
||||
c.logger.Info("Crawling", zap.String("URL", u))
|
||||
|
||||
c.logger.Info("Crawling URL", zap.String("URL", u))
|
||||
item, err := c.CrawlByUrl(u)
|
||||
if err != nil {
|
||||
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
|
||||
c.logger.Warn("Failed to crawl URL", zap.String("URL", u), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
item.UpdateFlag = updateFlags[i]
|
||||
err = db.SaveGameItem(item)
|
||||
if err != nil {
|
||||
c.logger.Warn("Failed to save", zap.Error(err))
|
||||
c.logger.Warn("Failed to save game item to database", zap.String("URL", u), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
res = append(res, item)
|
||||
|
||||
if err := OrganizeGameItem(item); err != nil {
|
||||
c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u))
|
||||
c.logger.Warn("Failed to organize game item", zap.String("URL", u), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
c.logger.Info("Finished Crawl", zap.Int("Page", page), zap.Int("ItemsCrawled", len(res)))
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (c *FitGirlCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) {
|
||||
c.logger.Info("Starting CrawlMulti", zap.Ints("Pages", pages))
|
||||
var res []*model.GameItem
|
||||
for _, page := range pages {
|
||||
items, err := c.Crawl(page)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to crawl page", zap.Int("Page", page), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to crawl page %d: %w", page, err)
|
||||
}
|
||||
res = append(res, items...)
|
||||
}
|
||||
|
||||
c.logger.Info("Finished CrawlMulti", zap.Int("TotalPages", len(pages)), zap.Int("TotalItems", len(res)))
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (c *FitGirlCrawler) CrawlAll() ([]*model.GameItem, error) {
|
||||
var res []*model.GameItem
|
||||
c.logger.Info("Starting CrawlAll")
|
||||
totalPageNum, err := c.GetTotalPageNum()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to get total page number", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to get total page number: %w", err)
|
||||
}
|
||||
|
||||
var res []*model.GameItem
|
||||
for i := 1; i <= totalPageNum; i++ {
|
||||
items, err := c.Crawl(i)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to crawl page", zap.Int("Page", i), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to crawl page %d: %w", i, err)
|
||||
}
|
||||
res = append(res, items...)
|
||||
}
|
||||
|
||||
c.logger.Info("Finished CrawlAll", zap.Int("TotalPages", totalPageNum), zap.Int("TotalItems", len(res)))
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (c *FitGirlCrawler) GetTotalPageNum() (int, error) {
|
||||
c.logger.Info("Fetching total page number")
|
||||
resp, err := utils.Request().Get(fmt.Sprintf(constant.FitGirlURL, 1))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
c.logger.Error("Failed to fetch first page for total page number", zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to fetch first page for total page number: %w", err)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
c.logger.Error("Failed to parse HTML document", zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to parse HTML document for total page number: %w", err)
|
||||
}
|
||||
page, err := strconv.Atoi(doc.Find(".page-numbers.dots").First().Next().Text())
|
||||
|
||||
pageStr := doc.Find(".page-numbers.dots").First().Next().Text()
|
||||
totalPageNum, err := strconv.Atoi(pageStr)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
c.logger.Error("Failed to convert total page number to integer", zap.String("PageString", pageStr), zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to convert total page number '%s' to integer: %w", pageStr, err)
|
||||
}
|
||||
return page, nil
|
||||
|
||||
c.logger.Info("Successfully fetched total page number", zap.Int("TotalPages", totalPageNum))
|
||||
return totalPageNum, nil
|
||||
}
|
||||
|
@ -4,7 +4,7 @@ import (
|
||||
"bytes"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"html"
|
||||
"net/http"
|
||||
"regexp"
|
||||
@ -12,7 +12,6 @@ import (
|
||||
"time"
|
||||
|
||||
"game-crawler/cache"
|
||||
"game-crawler/config"
|
||||
"game-crawler/constant"
|
||||
"game-crawler/db"
|
||||
"game-crawler/model"
|
||||
@ -23,31 +22,37 @@ import (
|
||||
)
|
||||
|
||||
type FreeGOGCrawler struct {
|
||||
logger *zap.Logger
|
||||
cfClearanceUrl string
|
||||
logger *zap.Logger
|
||||
}
|
||||
|
||||
func NewFreeGOGCrawler(logger *zap.Logger) *FreeGOGCrawler {
|
||||
func NewFreeGOGCrawler(cfClearanceUrl string, logger *zap.Logger) *FreeGOGCrawler {
|
||||
return &FreeGOGCrawler{
|
||||
logger: logger,
|
||||
cfClearanceUrl: cfClearanceUrl,
|
||||
logger: logger,
|
||||
}
|
||||
}
|
||||
|
||||
func (c *FreeGOGCrawler) getSession() (*ccs.Session, error) {
|
||||
c.logger.Info("Fetching session for FreeGOGCrawler")
|
||||
cacheKey := "freegog_waf_session"
|
||||
var session ccs.Session
|
||||
var err error
|
||||
if val, exist := cache.Get("freegog_waf_session"); exist {
|
||||
if val, exist := cache.Get(cacheKey); exist {
|
||||
err := json.Unmarshal([]byte(val), &session)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to unmarshal cached session", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to unmarshal cached session: %w", err)
|
||||
}
|
||||
} else {
|
||||
session, err = ccs.WAFSession(config.Config.CFClearanceScraper.Url, constant.FreeGOGListURL)
|
||||
var err error
|
||||
session, err = ccs.WAFSession(c.cfClearanceUrl, constant.FreeGOGListURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to create WAF session", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to create WAF session: %w", err)
|
||||
}
|
||||
jsonBytes, err := json.Marshal(session)
|
||||
if err == nil {
|
||||
_ = cache.SetWithExpire("freegog_waf_session", jsonBytes, 1*time.Hour)
|
||||
_ = cache.SetWithExpire(cacheKey, jsonBytes, 1*time.Hour)
|
||||
}
|
||||
}
|
||||
return &session, nil
|
||||
@ -58,106 +63,144 @@ func (c *FreeGOGCrawler) Name() string {
|
||||
}
|
||||
|
||||
func (c *FreeGOGCrawler) Crawl(num int) ([]*model.GameItem, error) {
|
||||
c.logger.Info("Starting Crawl", zap.Int("Num", num))
|
||||
count := 0
|
||||
session, err := c.getSession()
|
||||
if err != nil {
|
||||
c.logger.Error("Failed to create session", zap.Error(err))
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("failed to get session: %w", err)
|
||||
}
|
||||
|
||||
resp, err := ccs.RequestWithWAFSession(http.MethodGet, constant.FreeGOGListURL, *session, nil)
|
||||
if err != nil {
|
||||
c.logger.Error("Failed to fetch", zap.Error(err))
|
||||
return nil, err
|
||||
c.logger.Error("Failed to fetch FreeGOG list page", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to fetch FreeGOG list page: %w", err)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader([]byte(resp.Body)))
|
||||
if err != nil {
|
||||
c.logger.Error("Failed to parse HTML", zap.Error(err))
|
||||
return nil, err
|
||||
c.logger.Error("Failed to parse HTML document", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to parse HTML document: %w", err)
|
||||
}
|
||||
|
||||
var urls []string
|
||||
var updateFlags []string //rawName+link
|
||||
var updateFlags []string // RawName+Link
|
||||
doc.Find(".items-outer li a").Each(func(i int, s *goquery.Selection) {
|
||||
urls = append(urls, s.AttrOr("href", ""))
|
||||
updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(s.Text()+s.AttrOr("href", ""))))
|
||||
url := s.AttrOr("href", "")
|
||||
rawName := s.Text()
|
||||
if url != "" && rawName != "" {
|
||||
urls = append(urls, url)
|
||||
updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(rawName+url)))
|
||||
} else {
|
||||
c.logger.Warn("Invalid URL or raw name found in item", zap.Int("Index", i), zap.String("URL", url), zap.String("RawName", rawName))
|
||||
}
|
||||
})
|
||||
|
||||
var res []*model.GameItem
|
||||
for i, u := range urls {
|
||||
if count == num {
|
||||
c.logger.Info("Reached target number of items", zap.Int("Count", count))
|
||||
break
|
||||
}
|
||||
if db.IsFreeGOGCrawled(updateFlags[i]) {
|
||||
c.logger.Info("Skipping already crawled URL", zap.String("URL", u))
|
||||
continue
|
||||
}
|
||||
c.logger.Info("Crawling", zap.String("URL", u))
|
||||
|
||||
c.logger.Info("Crawling URL", zap.String("URL", u))
|
||||
item, err := c.CrawlByUrl(u)
|
||||
if err != nil {
|
||||
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
|
||||
c.logger.Warn("Failed to crawl URL", zap.String("URL", u), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
item.UpdateFlag = updateFlags[i]
|
||||
err = db.SaveGameItem(item)
|
||||
if err != nil {
|
||||
c.logger.Warn("Failed to save", zap.Error(err))
|
||||
c.logger.Warn("Failed to save game item to database", zap.String("URL", u), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
res = append(res, item)
|
||||
count++
|
||||
|
||||
if err := OrganizeGameItem(item); err != nil {
|
||||
c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u))
|
||||
c.logger.Warn("Failed to organize game item", zap.String("URL", u), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
c.logger.Info("Finished Crawl", zap.Int("TotalItems", len(res)))
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (c *FreeGOGCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
|
||||
c.logger.Info("Starting CrawlByUrl", zap.String("URL", URL))
|
||||
session, err := c.getSession()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to get session", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to get session: %w", err)
|
||||
}
|
||||
|
||||
resp, err := ccs.RequestWithWAFSession(http.MethodGet, URL, *session, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to fetch game page", zap.String("URL", URL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to fetch game page %s: %w", URL, err)
|
||||
}
|
||||
|
||||
item, err := db.GetGameItemByUrl(URL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to retrieve game item from database", zap.String("URL", URL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to retrieve game item from database for URL %s: %w", URL, err)
|
||||
}
|
||||
|
||||
item.Url = URL
|
||||
|
||||
// Extract raw title
|
||||
rawTitleRegex := regexp.MustCompile(`(?i)<h1 class="entry-title">(.*?)</h1>`)
|
||||
rawTitleRegexRes := rawTitleRegex.FindStringSubmatch(string(resp.Body))
|
||||
rawName := ""
|
||||
if len(rawTitleRegexRes) > 1 {
|
||||
rawName = html.UnescapeString(rawTitleRegexRes[1])
|
||||
rawName := html.UnescapeString(rawTitleRegexRes[1])
|
||||
item.RawName = strings.Replace(rawName, "–", "-", -1)
|
||||
} else {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to find raw title", zap.String("URL", URL))
|
||||
return nil, fmt.Errorf("failed to find raw title on page %s", URL)
|
||||
}
|
||||
|
||||
item.Name = FreeGOGFormatter(item.RawName)
|
||||
|
||||
// Extract size
|
||||
sizeRegex := regexp.MustCompile(`(?i)>Size:\s?(.*?)<`)
|
||||
sizeRegexRes := sizeRegex.FindStringSubmatch(string(resp.Body))
|
||||
if len(sizeRegexRes) > 1 {
|
||||
item.Size = sizeRegexRes[1]
|
||||
} else {
|
||||
c.logger.Warn("Failed to find game size", zap.String("URL", URL))
|
||||
}
|
||||
|
||||
// Extract magnet link
|
||||
magnetRegex := regexp.MustCompile(`<a class="download-btn" href="https://gdl.freegogpcgames.xyz/download-gen\.php\?url=(.*?)"`)
|
||||
magnetRegexRes := magnetRegex.FindStringSubmatch(string(resp.Body))
|
||||
if len(magnetRegexRes) > 1 {
|
||||
magnet, err := base64.StdEncoding.DecodeString(magnetRegexRes[1])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to decode magnet link", zap.String("URL", URL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to decode magnet link on page %s: %w", URL, err)
|
||||
}
|
||||
item.DownloadLinks = []string{string(magnet)}
|
||||
} else {
|
||||
return nil, errors.New("failed to find magnet link")
|
||||
c.logger.Error("Failed to find magnet link", zap.String("URL", URL))
|
||||
return nil, fmt.Errorf("failed to find magnet link on page %s", URL)
|
||||
}
|
||||
|
||||
item.Author = "FreeGOG"
|
||||
item.Platform = "windows"
|
||||
|
||||
c.logger.Info("Successfully crawled URL", zap.String("URL", URL))
|
||||
return item, nil
|
||||
}
|
||||
|
||||
func (c *FreeGOGCrawler) CrawlAll() ([]*model.GameItem, error) {
|
||||
c.logger.Info("Starting CrawlAll")
|
||||
return c.Crawl(-1)
|
||||
}
|
||||
|
||||
@ -165,6 +208,7 @@ var freeGOGRegexps = []*regexp.Regexp{
|
||||
regexp.MustCompile(`(?i)\(.*\)`),
|
||||
}
|
||||
|
||||
// FreeGOGFormatter formats the raw game name into a clean title.
|
||||
func FreeGOGFormatter(name string) string {
|
||||
for _, re := range freeGOGRegexps {
|
||||
name = re.ReplaceAllString(name, "")
|
||||
|
127
crawler/game.go
127
crawler/game.go
@ -16,6 +16,7 @@ import (
|
||||
"go.mongodb.org/mongo-driver/mongo"
|
||||
)
|
||||
|
||||
// GenerateGameInfo generates game info based on the platform and ID.
|
||||
func GenerateGameInfo(platform string, id int) (*model.GameInfo, error) {
|
||||
switch platform {
|
||||
case "steam":
|
||||
@ -23,45 +24,54 @@ func GenerateGameInfo(platform string, id int) (*model.GameInfo, error) {
|
||||
case "igdb":
|
||||
return GenerateIGDBGameInfo(id)
|
||||
default:
|
||||
return nil, errors.New("invalid ID type")
|
||||
return nil, errors.New("invalid platform type")
|
||||
}
|
||||
}
|
||||
|
||||
// OrganizeGameItem Organize game item and save game info to database
|
||||
// OrganizeGameItem organizes the given game item and saves its associated game info to the database.
|
||||
func OrganizeGameItem(game *model.GameItem) error {
|
||||
hasOriganized, _ := db.HasGameItemOrganized(game.ID)
|
||||
if hasOriganized {
|
||||
hasOrganized := db.HasGameItemOrganized(game.ID)
|
||||
if hasOrganized {
|
||||
return nil
|
||||
}
|
||||
|
||||
item, err := OrganizeGameItemWithIGDB(game)
|
||||
if err == nil {
|
||||
if item.SteamID == 0 {
|
||||
// get steam id from igdb
|
||||
steamID, err := GetSteamIDByIGDBID(item.IGDBID)
|
||||
if err == nil {
|
||||
item.SteamID = steamID
|
||||
}
|
||||
}
|
||||
err = db.SaveGameInfo(item)
|
||||
if err != nil {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Attempt to supplement SteamID if missing
|
||||
if item.SteamID == 0 {
|
||||
steamID, err := GetSteamIDByIGDBID(item.IGDBID)
|
||||
if err == nil {
|
||||
item.SteamID = steamID
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
|
||||
// Save the organized game info to the database
|
||||
if err := db.SaveGameInfo(item); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func AddGameInfoManually(gameID primitive.ObjectID, platform string, plateformID int) (*model.GameInfo, error) {
|
||||
info, err := GenerateGameInfo(platform, plateformID)
|
||||
// AddGameInfoManually manually adds a game info entry to the database.
|
||||
func AddGameInfoManually(gameID primitive.ObjectID, platform string, platformID int) (*model.GameInfo, error) {
|
||||
info, err := GenerateGameInfo(platform, platformID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
info.GameIDs = append(info.GameIDs, gameID)
|
||||
info.GameIDs = utils.Unique(info.GameIDs)
|
||||
return info, db.SaveGameInfo(info)
|
||||
|
||||
info.GameIDs = utils.Unique(append(info.GameIDs, gameID))
|
||||
if err := db.SaveGameInfo(info); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return info, nil
|
||||
}
|
||||
|
||||
// OrganizeGameItemManually organizes a game item manually based on the platform and platform ID.
|
||||
func OrganizeGameItemManually(gameID primitive.ObjectID, platform string, platformID int) (*model.GameInfo, error) {
|
||||
info, err := db.GetGameInfoByPlatformID(platform, platformID)
|
||||
if err != nil {
|
||||
@ -74,27 +84,30 @@ func OrganizeGameItemManually(gameID primitive.ObjectID, platform string, platfo
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
info.GameIDs = append(info.GameIDs, gameID)
|
||||
info.GameIDs = utils.Unique(info.GameIDs)
|
||||
err = db.SaveGameInfo(info)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if platform == "igdb" {
|
||||
|
||||
info.GameIDs = utils.Unique(append(info.GameIDs, gameID))
|
||||
|
||||
// Supplement missing platform IDs
|
||||
if platform == "igdb" && info.SteamID == 0 {
|
||||
steamID, err := GetSteamIDByIGDBID(platformID)
|
||||
if err == nil {
|
||||
info.SteamID = steamID
|
||||
}
|
||||
}
|
||||
if platform == "steam" {
|
||||
if platform == "steam" && info.IGDBID == 0 {
|
||||
igdbID, err := GetIGDBIDBySteamAppID(platformID)
|
||||
if err == nil {
|
||||
info.IGDBID = igdbID
|
||||
}
|
||||
}
|
||||
|
||||
if err := db.SaveGameInfo(info); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return info, nil
|
||||
}
|
||||
|
||||
// FormatName formats a raw game name into a clean and consistent format.
|
||||
func FormatName(name string) string {
|
||||
name = regexp.MustCompile(`(?i)[\w’'-]+\s(Edition|Vision|Collection|Bundle|Pack|Deluxe)`).ReplaceAllString(name, " ")
|
||||
name = regexp.MustCompile(`(?i)GOTY`).ReplaceAllString(name, "")
|
||||
@ -108,61 +121,85 @@ func FormatName(name string) string {
|
||||
return name
|
||||
}
|
||||
|
||||
func SupplementPlatformIDToGameInfo(logger *zap.Logger) error {
|
||||
// SupplementPlatformIDToGameInfo supplements missing platform IDs (SteamID or IGDBID) for all game info entries.
|
||||
func SupplementPlatformIDToGameInfo() error {
|
||||
logger := zap.L()
|
||||
logger.Info("Starting to supplement missing platform IDs")
|
||||
infos, err := db.GetAllGameInfos()
|
||||
if err != nil {
|
||||
logger.Error("Failed to fetch game infos", zap.Error(err))
|
||||
return err
|
||||
}
|
||||
|
||||
for _, info := range infos {
|
||||
changed := false
|
||||
|
||||
// Supplement SteamID using IGDBID
|
||||
if info.IGDBID != 0 && info.SteamID == 0 {
|
||||
steamID, err := GetSteamIDByIGDBID(info.IGDBID)
|
||||
time.Sleep(time.Millisecond * 100)
|
||||
if err != nil {
|
||||
continue
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
if err == nil {
|
||||
info.SteamID = steamID
|
||||
changed = true
|
||||
} else {
|
||||
logger.Warn("Failed to get SteamID from IGDB", zap.Int("IGDBID", info.IGDBID), zap.Error(err))
|
||||
}
|
||||
info.SteamID = steamID
|
||||
changed = true
|
||||
}
|
||||
|
||||
// Supplement IGDBID using SteamID
|
||||
if info.SteamID != 0 && info.IGDBID == 0 {
|
||||
igdbID, err := GetIGDBIDBySteamAppID(info.SteamID)
|
||||
time.Sleep(time.Millisecond * 100)
|
||||
if err != nil {
|
||||
continue
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
if err == nil {
|
||||
info.IGDBID = igdbID
|
||||
changed = true
|
||||
} else {
|
||||
logger.Warn("Failed to get IGDBID from SteamID", zap.Int("SteamID", info.SteamID), zap.Error(err))
|
||||
}
|
||||
info.IGDBID = igdbID
|
||||
changed = true
|
||||
}
|
||||
|
||||
if changed {
|
||||
logger.Info("supp", zap.String("name", info.Name), zap.Int("igdb", info.IGDBID), zap.Int("steam", info.SteamID))
|
||||
_ = db.SaveGameInfo(info)
|
||||
logger.Info("Supplemented platform IDs", zap.String("Name", info.Name), zap.Int("IGDBID", info.IGDBID), zap.Int("SteamID", info.SteamID))
|
||||
if err := db.SaveGameInfo(info); err != nil {
|
||||
logger.Error("Failed to save updated game info", zap.String("Name", info.Name), zap.Error(err))
|
||||
}
|
||||
} else {
|
||||
logger.Info("skip", zap.String("name", info.Name), zap.Int("igdb", info.IGDBID), zap.Int("steam", info.SteamID))
|
||||
logger.Info("No changes needed", zap.String("Name", info.Name), zap.Int("IGDBID", info.IGDBID), zap.Int("SteamID", info.SteamID))
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// UpdateGameInfo updates outdated game info entries and returns a channel to monitor updates.
|
||||
func UpdateGameInfo(num int) (chan *model.GameInfo, error) {
|
||||
logger := zap.L()
|
||||
logger.Info("Starting to update outdated game info", zap.Int("Num", num))
|
||||
infos, err := db.GetOutdatedGameInfos(num)
|
||||
if err != nil {
|
||||
logger.Error("Failed to fetch outdated game infos", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
updateChan := make(chan *model.GameInfo)
|
||||
|
||||
go func() {
|
||||
defer close(updateChan)
|
||||
for _, info := range infos {
|
||||
if info.IGDBID != 0 {
|
||||
newInfo, err := GenerateIGDBGameInfo(info.IGDBID)
|
||||
if err != nil {
|
||||
logger.Warn("Failed to generate IGDB game info", zap.Int("IGDBID", info.IGDBID), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
db.MergeGameInfo(info, newInfo)
|
||||
err = db.SaveGameInfo(newInfo)
|
||||
if err != nil {
|
||||
if err := db.SaveGameInfo(newInfo); err != nil {
|
||||
logger.Error("Failed to save updated game info", zap.String("Name", newInfo.Name), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
updateChan <- newInfo
|
||||
logger.Info("Updated game info", zap.String("Name", newInfo.Name), zap.Int("IGDBID", newInfo.IGDBID))
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
@ -8,7 +8,6 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"game-crawler/config"
|
||||
"game-crawler/constant"
|
||||
"game-crawler/db"
|
||||
"game-crawler/model"
|
||||
@ -19,12 +18,14 @@ import (
|
||||
)
|
||||
|
||||
type GOGGamesCrawler struct {
|
||||
logger *zap.Logger
|
||||
cfClearanceUrl string
|
||||
logger *zap.Logger
|
||||
}
|
||||
|
||||
func NewGOGGamesCrawler(logger *zap.Logger) *GOGGamesCrawler {
|
||||
func NewGOGGamesCrawler(cfClearanceUrl string, logger *zap.Logger) *GOGGamesCrawler {
|
||||
return &GOGGamesCrawler{
|
||||
logger: logger,
|
||||
cfClearanceUrl: cfClearanceUrl,
|
||||
logger: logger,
|
||||
}
|
||||
}
|
||||
|
||||
@ -34,29 +35,36 @@ func (c *GOGGamesCrawler) Name() string {
|
||||
|
||||
func (c *GOGGamesCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
|
||||
if !strings.HasPrefix(URL, "https://www.gog-games.to/game/") {
|
||||
return nil, fmt.Errorf("invalid url")
|
||||
err := fmt.Errorf("invalid URL: %s", URL)
|
||||
c.logger.Error("Invalid URL", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
_, slug := path.Split(URL)
|
||||
|
||||
_, slug := path.Split(URL)
|
||||
apiUrl := fmt.Sprintf(constant.GOGGamesGameAPIURL, slug)
|
||||
|
||||
token, err := ccs.TurnstileToken(config.Config.CFClearanceScraper.Url, apiUrl, "0x4AAAAAAAfOlgvCKbOdW1zc")
|
||||
token, err := ccs.TurnstileToken(c.cfClearanceUrl, apiUrl, "0x4AAAAAAAfOlgvCKbOdW1zc")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to get Turnstile token", zap.Error(err), zap.String("apiUrl", apiUrl))
|
||||
return nil, fmt.Errorf("failed to get Turnstile token for URL %s: %w", apiUrl, err)
|
||||
}
|
||||
|
||||
resp, err := utils.Request().SetHeader("cf-turnstile-response", token).Get(apiUrl)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to fetch data from API", zap.Error(err), zap.String("apiUrl", apiUrl))
|
||||
return nil, fmt.Errorf("failed to fetch API data for URL %s: %w", apiUrl, err)
|
||||
}
|
||||
|
||||
data := gameResult{}
|
||||
err = json.Unmarshal(resp.Body(), &data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to unmarshal API response", zap.Error(err), zap.String("apiUrl", apiUrl))
|
||||
return nil, fmt.Errorf("failed to parse API response for URL %s: %w", apiUrl, err)
|
||||
}
|
||||
|
||||
name := data.Title
|
||||
|
||||
// find download links
|
||||
// Find download links
|
||||
fileHosters := []string{
|
||||
"gofile",
|
||||
"fileditch",
|
||||
@ -80,19 +88,28 @@ func (c *GOGGamesCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
|
||||
}
|
||||
|
||||
if len(links) == 0 {
|
||||
return nil, fmt.Errorf("no download link found")
|
||||
err := fmt.Errorf("no download links found for URL %s", URL)
|
||||
c.logger.Warn("No download links found", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Calculate total size
|
||||
size := uint64(0)
|
||||
for _, file := range data.Files.Game {
|
||||
s, _ := utils.SizeToBytes(file.Size)
|
||||
s, parseErr := utils.SizeToBytes(file.Size)
|
||||
if parseErr != nil {
|
||||
c.logger.Warn("Failed to parse file size", zap.Error(parseErr), zap.String("fileSize", file.Size))
|
||||
}
|
||||
size += s
|
||||
}
|
||||
|
||||
// Retrieve or create game item
|
||||
item, err := db.GetGameItemByUrl(URL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to retrieve game item from database", zap.Error(err), zap.String("URL", URL))
|
||||
return nil, fmt.Errorf("failed to get game item for URL %s: %w", URL, err)
|
||||
}
|
||||
|
||||
item.Name = name
|
||||
item.RawName = name
|
||||
item.DownloadLinks = links
|
||||
@ -100,44 +117,54 @@ func (c *GOGGamesCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
|
||||
item.Size = utils.BytesToSize(size)
|
||||
item.Author = "GOGGames"
|
||||
item.Platform = "windows"
|
||||
|
||||
return item, nil
|
||||
}
|
||||
|
||||
func (c *GOGGamesCrawler) Crawl(page int) ([]*model.GameItem, error) {
|
||||
resp, err := utils.Request().Get(fmt.Sprintf(constant.GOGGamesURL, page))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to fetch page", zap.Error(err), zap.Int("page", page))
|
||||
return nil, fmt.Errorf("failed to fetch page %d: %w", page, err)
|
||||
}
|
||||
|
||||
data := searchResult{}
|
||||
err = json.Unmarshal(resp.Body(), &data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to parse page response", zap.Error(err), zap.Int("page", page))
|
||||
return nil, fmt.Errorf("failed to parse page %d: %w", page, err)
|
||||
}
|
||||
|
||||
urls := make([]string, 0)
|
||||
var updateFlags []string //link+date
|
||||
var updateFlags []string // link+date
|
||||
for _, item := range data.Data {
|
||||
urls = append(urls, fmt.Sprintf(constant.GOGGamesPageURL, item.Slug))
|
||||
updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf("%s%s", item.GogURL, item.LastUpdate))))
|
||||
}
|
||||
|
||||
res := make([]*model.GameItem, 0)
|
||||
for i, u := range urls {
|
||||
if db.IsGameCrawled(updateFlags[i], "GOGGames") {
|
||||
c.logger.Info("Game already crawled", zap.String("URL", u))
|
||||
continue
|
||||
}
|
||||
c.logger.Info("Crawling", zap.String("URL", u))
|
||||
|
||||
c.logger.Info("Crawling game", zap.String("URL", u))
|
||||
item, err := c.CrawlByUrl(u)
|
||||
if err != nil {
|
||||
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
|
||||
c.logger.Warn("Failed to crawl game", zap.Error(err), zap.String("URL", u))
|
||||
continue
|
||||
}
|
||||
|
||||
item.UpdateFlag = updateFlags[i]
|
||||
if err := db.SaveGameItem(item); err != nil {
|
||||
c.logger.Warn("Failed to save", zap.Error(err), zap.String("URL", u))
|
||||
c.logger.Warn("Failed to save game item to database", zap.Error(err), zap.String("URL", u))
|
||||
continue
|
||||
}
|
||||
|
||||
res = append(res, item)
|
||||
if err := OrganizeGameItem(item); err != nil {
|
||||
c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u))
|
||||
c.logger.Warn("Failed to organize game item", zap.Error(err), zap.String("URL", u))
|
||||
continue
|
||||
}
|
||||
}
|
||||
@ -149,7 +176,8 @@ func (c *GOGGamesCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) {
|
||||
for _, page := range pages {
|
||||
items, err := c.Crawl(page)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to crawl multiple pages", zap.Error(err), zap.Int("page", page))
|
||||
return nil, fmt.Errorf("failed to crawl page %d: %w", page, err)
|
||||
}
|
||||
res = append(res, items...)
|
||||
}
|
||||
@ -159,13 +187,17 @@ func (c *GOGGamesCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) {
|
||||
func (c *GOGGamesCrawler) CrawlAll() ([]*model.GameItem, error) {
|
||||
totalPageNum, err := c.GetTotalPageNum()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to get total page number", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to get total page number: %w", err)
|
||||
}
|
||||
|
||||
var res []*model.GameItem
|
||||
for i := 1; i <= totalPageNum; i++ {
|
||||
c.logger.Info("Crawling page", zap.Int("page", i))
|
||||
items, err := c.Crawl(i)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Warn("Failed to crawl page", zap.Error(err), zap.Int("page", i))
|
||||
return nil, fmt.Errorf("failed to crawl page %d: %w", i, err)
|
||||
}
|
||||
res = append(res, items...)
|
||||
}
|
||||
@ -175,13 +207,17 @@ func (c *GOGGamesCrawler) CrawlAll() ([]*model.GameItem, error) {
|
||||
func (c *GOGGamesCrawler) GetTotalPageNum() (int, error) {
|
||||
resp, err := utils.Request().Get(fmt.Sprintf(constant.GOGGamesURL, 1))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
c.logger.Error("Failed to fetch first page", zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to fetch first page: %w", err)
|
||||
}
|
||||
|
||||
data := searchResult{}
|
||||
err = json.Unmarshal(resp.Body(), &data)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
c.logger.Error("Failed to parse first page response", zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to parse first page response: %w", err)
|
||||
}
|
||||
|
||||
return data.Meta.LastPage, nil
|
||||
}
|
||||
|
||||
|
372
crawler/igdb.go
372
crawler/igdb.go
@ -20,6 +20,7 @@ import (
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/go-resty/resty/v2"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
type twitchToken struct {
|
||||
@ -33,9 +34,13 @@ func (t *twitchToken) getToken() (string, error) {
|
||||
}
|
||||
token, expires, err := loginTwitch()
|
||||
if err != nil {
|
||||
zap.L().Error("failed to login to Twitch", zap.Error(err))
|
||||
return "", fmt.Errorf("failed to login twitch: %w", err)
|
||||
}
|
||||
_ = cache.SetWithExpire("twitch_token", token, expires)
|
||||
err = cache.SetWithExpire("twitch_token", token, expires)
|
||||
if err != nil {
|
||||
zap.L().Error("failed to set Twitch token in cache", zap.Error(err))
|
||||
}
|
||||
return token, nil
|
||||
}
|
||||
|
||||
@ -46,10 +51,13 @@ func loginTwitch() (string, time.Duration, error) {
|
||||
params.Add("client_secret", config.Config.Twitch.ClientSecret)
|
||||
params.Add("grant_type", "client_credentials")
|
||||
baseURL.RawQuery = params.Encode()
|
||||
|
||||
resp, err := utils.Request().SetHeader("User-Agent", "").Post(baseURL.String())
|
||||
if err != nil {
|
||||
zap.L().Error("failed to make Twitch login request", zap.String("url", baseURL.String()), zap.Error(err))
|
||||
return "", 0, err
|
||||
}
|
||||
|
||||
data := struct {
|
||||
AccessToken string `json:"access_token"`
|
||||
ExpiresIn int64 `json:"expires_in"`
|
||||
@ -57,6 +65,7 @@ func loginTwitch() (string, time.Duration, error) {
|
||||
}{}
|
||||
err = json.Unmarshal(resp.Body(), &data)
|
||||
if err != nil {
|
||||
zap.L().Error("failed to parse Twitch login response", zap.String("response", string(resp.Body())), zap.Error(err))
|
||||
return "", 0, err
|
||||
}
|
||||
return data.AccessToken, time.Second * time.Duration(data.ExpiresIn), nil
|
||||
@ -65,68 +74,82 @@ func loginTwitch() (string, time.Duration, error) {
|
||||
func igdbRequest(URL string, dataBody any) (*resty.Response, error) {
|
||||
t, err := token.getToken()
|
||||
if err != nil {
|
||||
zap.L().Error("failed to get Twitch token", zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp, err := utils.Request().SetBody(dataBody).SetHeaders(map[string]string{
|
||||
"Client-ID": config.Config.Twitch.ClientID,
|
||||
"Authorization": "Bearer " + t,
|
||||
"User-Agent": "",
|
||||
"Content-Type": "text/plain",
|
||||
}).Post(URL)
|
||||
|
||||
if err != nil {
|
||||
zap.L().Error("failed to make IGDB request", zap.String("url", URL), zap.Any("dataBody", dataBody), zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func getIGDBID(name string) (int, error) {
|
||||
var err error
|
||||
resp, err := igdbRequest(constant.IGDBSearchURL, fmt.Sprintf(`search "%s"; fields *; limit 50; where game.platforms = [6] | game.platforms=[130] | game.platforms=[384] | game.platforms=[163];`, name))
|
||||
if err != nil {
|
||||
zap.L().Error("failed to search IGDB ID", zap.String("name", name), zap.Error(err))
|
||||
return 0, err
|
||||
}
|
||||
|
||||
if string(resp.Body()) == "[]" {
|
||||
resp, err = igdbRequest(constant.IGDBSearchURL, fmt.Sprintf(`search "%s"; fields *; limit 50;`, name))
|
||||
if err != nil {
|
||||
zap.L().Error("failed to fallback search IGDB ID", zap.String("name", name), zap.Error(err))
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
var data model.IGDBSearches
|
||||
if err = json.Unmarshal(resp.Body(), &data); err != nil {
|
||||
zap.L().Error("failed to unmarshal IGDB search response", zap.String("response", string(resp.Body())), zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to unmarshal: %w, %s", err, debug.Stack())
|
||||
}
|
||||
|
||||
if len(data) == 1 {
|
||||
return GetIGDBAppParent(data[0].Game)
|
||||
}
|
||||
maxSimilairty := 0.0
|
||||
maxSimilairtyIndex := 0
|
||||
|
||||
maxSimilarity := 0.0
|
||||
maxSimilarityIndex := 0
|
||||
for i, item := range data {
|
||||
if strings.EqualFold(item.Name, name) {
|
||||
return item.Game, nil
|
||||
}
|
||||
if sim := utils.Similarity(name, item.Name); sim >= 0.8 {
|
||||
if sim > maxSimilairty {
|
||||
maxSimilairty = sim
|
||||
maxSimilairtyIndex = i
|
||||
if sim > maxSimilarity {
|
||||
maxSimilarity = sim
|
||||
maxSimilarityIndex = i
|
||||
}
|
||||
}
|
||||
|
||||
detail, err := GetIGDBAppDetail(item.Game)
|
||||
if err != nil {
|
||||
zap.L().Error("failed to get IGDB app detail", zap.Int("gameID", item.Game), zap.Error(err))
|
||||
return 0, err
|
||||
}
|
||||
for _, alternativeNames := range detail.AlternativeNames {
|
||||
if sim := utils.Similarity(alternativeNames.Name, name); sim >= 0.8 {
|
||||
if sim > maxSimilairty {
|
||||
maxSimilairty = sim
|
||||
maxSimilairtyIndex = i
|
||||
for _, altName := range detail.AlternativeNames {
|
||||
if sim := utils.Similarity(altName.Name, name); sim >= 0.8 {
|
||||
if sim > maxSimilarity {
|
||||
maxSimilarity = sim
|
||||
maxSimilarityIndex = i
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if maxSimilairty >= 0.8 {
|
||||
return GetIGDBAppParent(data[maxSimilairtyIndex].Game)
|
||||
|
||||
if maxSimilarity >= 0.8 {
|
||||
return GetIGDBAppParent(data[maxSimilarityIndex].Game)
|
||||
}
|
||||
|
||||
zap.L().Warn("no IGDB ID found", zap.String("name", name))
|
||||
return 0, fmt.Errorf("IGDB ID not found: %s", name)
|
||||
}
|
||||
|
||||
@ -212,12 +235,14 @@ func GetIGDBAppParent(id int) (int, error) {
|
||||
if exist {
|
||||
id, err := strconv.Atoi(val)
|
||||
if err != nil {
|
||||
zap.L().Error("failed to parse cached IGDB parent ID", zap.String("cacheKey", key), zap.Error(err))
|
||||
return 0, err
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
detail, err := GetIGDBAppDetail(id)
|
||||
if err != nil {
|
||||
zap.L().Error("failed to fetch IGDB app detail for parent", zap.Int("gameID", id), zap.Error(err))
|
||||
return 0, err
|
||||
}
|
||||
hasParent := false
|
||||
@ -225,6 +250,7 @@ func GetIGDBAppParent(id int) (int, error) {
|
||||
hasParent = true
|
||||
detail, err = GetIGDBAppDetail(detail.VersionParent)
|
||||
if err != nil {
|
||||
zap.L().Error("failed to fetch IGDB version parent", zap.Int("parentID", detail.VersionParent), zap.Error(err))
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
@ -232,39 +258,49 @@ func GetIGDBAppParent(id int) (int, error) {
|
||||
return detail.ID, nil
|
||||
}
|
||||
|
||||
_ = cache.Set(key, id)
|
||||
err = cache.Set(key, id)
|
||||
if err != nil {
|
||||
zap.L().Error("failed to cache IGDB parent ID", zap.String("cacheKey", key), zap.Error(err))
|
||||
}
|
||||
|
||||
return id, nil
|
||||
}
|
||||
|
||||
// GetIGDBID returns the IGDB ID of the game, try directly IGDB api first, then steam search
|
||||
// GetIGDBID retrieves the IGDB ID of a game by its name using IGDB API and fallback mechanisms.
|
||||
func GetIGDBID(name string) (int, error) {
|
||||
key := fmt.Sprintf("igdb_id:%s", name)
|
||||
val, exist := cache.Get(key)
|
||||
if exist {
|
||||
if val, exist := cache.Get(key); exist {
|
||||
zap.L().Info("cache hit for IGDB ID", zap.String("name", name), zap.String("cacheKey", key))
|
||||
return strconv.Atoi(val)
|
||||
}
|
||||
name1 := name
|
||||
name2 := FormatName(name)
|
||||
names := []string{name1}
|
||||
if name1 != name2 {
|
||||
names = append(names, name2)
|
||||
}
|
||||
for _, name := range names {
|
||||
id, err := getIGDBID(name)
|
||||
|
||||
// Normalize game name and try multiple variations
|
||||
normalizedNames := []string{name, FormatName(name)}
|
||||
for _, n := range normalizedNames {
|
||||
id, err := getIGDBID(n)
|
||||
if err == nil {
|
||||
_ = cache.Set(key, id)
|
||||
cacheErr := cache.Set(key, id)
|
||||
if cacheErr != nil {
|
||||
zap.L().Warn("failed to cache IGDB ID", zap.String("name", n), zap.Error(cacheErr))
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
}
|
||||
for _, name := range names {
|
||||
id, err := getIGDBIDBySteamSearch(name)
|
||||
|
||||
// Fallback to Steam search if IGDB search fails
|
||||
for _, n := range normalizedNames {
|
||||
id, err := getIGDBIDBySteamSearch(n)
|
||||
if err == nil {
|
||||
_ = cache.Set(key, id)
|
||||
cacheErr := cache.Set(key, id)
|
||||
if cacheErr != nil {
|
||||
zap.L().Warn("failed to cache IGDB ID after Steam search", zap.String("name", n), zap.Error(cacheErr))
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
}
|
||||
return 0, errors.New("IGDB ID not found")
|
||||
|
||||
zap.L().Warn("failed to retrieve IGDB ID", zap.String("name", name))
|
||||
return 0, fmt.Errorf("IGDB ID not found for '%s'", name)
|
||||
}
|
||||
|
||||
func GetIGDBAppDetail(id int) (*model.IGDBGameDetail, error) {
|
||||
@ -273,267 +309,267 @@ func GetIGDBAppDetail(id int) (*model.IGDBGameDetail, error) {
|
||||
if exist {
|
||||
var data model.IGDBGameDetail
|
||||
if err := json.Unmarshal([]byte(val), &data); err != nil {
|
||||
zap.L().Error("failed to parse cached IGDB game detail", zap.String("cacheKey", key), zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
return &data, nil
|
||||
}
|
||||
var err error
|
||||
resp, err := igdbRequest(constant.IGDBGameURL, fmt.Sprintf(`where id = %v;fields *,alternative_names.*,language_supports.*,screenshots.*,cover.*,involved_companies.*,involved_companies.*,game_engines.*,game_modes.*,genres.*,player_perspectives.*,release_dates.*,videos.*,websites.*,platforms.*,themes.*,collections.*;`, id))
|
||||
|
||||
resp, err := igdbRequest(constant.IGDBGameURL, fmt.Sprintf(`where id = %v; fields *,alternative_names.*,language_supports.*,screenshots.*,cover.*,involved_companies.*,game_engines.*,game_modes.*,genres.*,player_perspectives.*,release_dates.*,videos.*,websites.*,platforms.*,themes.*,collections.*;`, id))
|
||||
if err != nil {
|
||||
zap.L().Error("failed to fetch IGDB game detail", zap.Int("gameID", id), zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var data model.IGDBGameDetails
|
||||
if err = json.Unmarshal(resp.Body(), &data); err != nil {
|
||||
zap.L().Error("failed to unmarshal IGDB game detail response", zap.String("response", string(resp.Body())), zap.Error(err))
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(data) == 0 {
|
||||
zap.L().Warn("IGDB game not found", zap.Int("gameID", id))
|
||||
return nil, errors.New("IGDB App not found")
|
||||
}
|
||||
|
||||
if data[0].Name == "" {
|
||||
return GetIGDBAppDetail(id)
|
||||
}
|
||||
|
||||
jsonBytes, err := json.Marshal(data[0])
|
||||
if err == nil {
|
||||
_ = cache.Set(key, string(jsonBytes))
|
||||
err = cache.Set(key, string(jsonBytes))
|
||||
if err != nil {
|
||||
zap.L().Error("failed to cache IGDB game detail", zap.String("cacheKey", key), zap.Error(err))
|
||||
}
|
||||
}
|
||||
|
||||
return data[0], nil
|
||||
}
|
||||
|
||||
// GetIGDBCompany retrieves the company name from IGDB by its ID.
|
||||
func GetIGDBCompany(id int) (string, error) {
|
||||
key := fmt.Sprintf("igdb_companies:%v", id)
|
||||
val, exist := cache.Get(key)
|
||||
if exist {
|
||||
key := fmt.Sprintf("igdb_companies:%d", id)
|
||||
if val, exist := cache.Get(key); exist {
|
||||
zap.L().Info("cache hit for IGDB company", zap.Int("companyID", id), zap.String("cacheKey", key))
|
||||
return val, nil
|
||||
}
|
||||
var err error
|
||||
resp, err := igdbRequest(constant.IGDBCompaniesURL, fmt.Sprintf(`where id=%v; fields *;`, id))
|
||||
|
||||
query := fmt.Sprintf(`where id=%d; fields *;`, id)
|
||||
resp, err := igdbRequest(constant.IGDBCompaniesURL, query)
|
||||
if err != nil {
|
||||
return "", err
|
||||
zap.L().Error("failed to fetch IGDB company", zap.Int("companyID", id), zap.Error(err))
|
||||
return "", fmt.Errorf("failed to fetch IGDB company for ID %d: %w", id, err)
|
||||
}
|
||||
|
||||
var data model.IGDBCompanies
|
||||
if err = json.Unmarshal(resp.Body(), &data); err != nil {
|
||||
return "", err
|
||||
zap.L().Error("failed to unmarshal IGDB company response", zap.String("response", string(resp.Body())), zap.Error(err))
|
||||
return "", fmt.Errorf("failed to unmarshal IGDB companies response: %w", err)
|
||||
}
|
||||
|
||||
if len(data) == 0 {
|
||||
return "", errors.New("not found")
|
||||
}
|
||||
if data[0].Name == "" {
|
||||
return GetIGDBCompany(id)
|
||||
zap.L().Warn("no company found in IGDB for ID", zap.Int("companyID", id))
|
||||
return "", errors.New("company not found")
|
||||
}
|
||||
|
||||
_ = cache.Set(key, data[0].Name)
|
||||
|
||||
return data[0].Name, nil
|
||||
companyName := data[0].Name
|
||||
cacheErr := cache.Set(key, companyName)
|
||||
if cacheErr != nil {
|
||||
zap.L().Warn("failed to cache IGDB company name", zap.Int("companyID", id), zap.Error(cacheErr))
|
||||
}
|
||||
return companyName, nil
|
||||
}
|
||||
|
||||
// GenerateIGDBGameInfo generates detailed game information based on an IGDB ID.
|
||||
func GenerateIGDBGameInfo(id int) (*model.GameInfo, error) {
|
||||
item := &model.GameInfo{}
|
||||
detail, err := GetIGDBAppDetail(id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
zap.L().Error("failed to fetch IGDB app detail", zap.Int("igdbID", id), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to fetch IGDB app detail for ID %d: %w", id, err)
|
||||
}
|
||||
|
||||
gameInfo := &model.GameInfo{
|
||||
IGDBID: id,
|
||||
Name: detail.Name,
|
||||
Description: detail.Summary,
|
||||
Cover: strings.Replace(detail.Cover.URL, "t_thumb", "t_original", 1),
|
||||
}
|
||||
item.IGDBID = id
|
||||
item.Name = detail.Name
|
||||
item.Description = detail.Summary
|
||||
item.Cover = strings.Replace(detail.Cover.URL, "t_thumb", "t_original", 1)
|
||||
|
||||
for _, lang := range detail.LanguageSupports {
|
||||
if lang.LanguageSupportType == 3 {
|
||||
l, exist := constant.IGDBLanguages[lang.Language]
|
||||
if !exist {
|
||||
continue
|
||||
if l, exist := constant.IGDBLanguages[lang.Language]; exist {
|
||||
gameInfo.Languages = append(gameInfo.Languages, l.Name)
|
||||
}
|
||||
item.Languages = append(item.Languages, l.Name)
|
||||
}
|
||||
}
|
||||
|
||||
for _, screenshot := range detail.Screenshots {
|
||||
item.Screenshots = append(item.Screenshots, strings.Replace(screenshot.URL, "t_thumb", "t_original", 1))
|
||||
gameInfo.Screenshots = append(gameInfo.Screenshots, strings.Replace(screenshot.URL, "t_thumb", "t_original", 1))
|
||||
}
|
||||
|
||||
for _, alias := range detail.AlternativeNames {
|
||||
item.Aliases = append(item.Aliases, alias.Name)
|
||||
gameInfo.Aliases = append(gameInfo.Aliases, alias.Name)
|
||||
}
|
||||
|
||||
for _, company := range detail.InvolvedCompanies {
|
||||
if company.Developer || company.Publisher {
|
||||
companyName, err := GetIGDBCompany(company.Company)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if company.Developer {
|
||||
item.Developers = append(item.Developers, companyName)
|
||||
}
|
||||
if company.Publisher {
|
||||
item.Publishers = append(item.Publishers, companyName)
|
||||
}
|
||||
companyName, err := GetIGDBCompany(company.Company)
|
||||
if err != nil {
|
||||
zap.L().Warn("failed to fetch company name", zap.Int("companyID", company.Company), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
if company.Developer {
|
||||
gameInfo.Developers = append(gameInfo.Developers, companyName)
|
||||
}
|
||||
if company.Publisher {
|
||||
gameInfo.Publishers = append(gameInfo.Publishers, companyName)
|
||||
}
|
||||
}
|
||||
|
||||
item.GameEngines = make([]string, 0)
|
||||
for _, engine := range detail.GameEngines {
|
||||
item.GameEngines = append(item.GameEngines, engine.Name)
|
||||
}
|
||||
|
||||
item.GameModes = make([]string, 0)
|
||||
for _, mode := range detail.GameModes {
|
||||
item.GameModes = append(item.GameModes, mode.Name)
|
||||
gameInfo.GameModes = append(gameInfo.GameModes, mode.Name)
|
||||
}
|
||||
|
||||
item.Genres = make([]string, 0)
|
||||
for _, genre := range detail.Genres {
|
||||
item.Genres = append(item.Genres, genre.Name)
|
||||
gameInfo.Genres = append(gameInfo.Genres, genre.Name)
|
||||
}
|
||||
|
||||
item.Themes = make([]string, 0)
|
||||
for _, theme := range detail.Themes {
|
||||
item.Themes = append(item.Themes, theme.Name)
|
||||
}
|
||||
|
||||
item.Platforms = make([]string, 0)
|
||||
for _, platform := range detail.Platforms {
|
||||
item.Platforms = append(item.Platforms, platform.Name)
|
||||
gameInfo.Platforms = append(gameInfo.Platforms, platform.Name)
|
||||
}
|
||||
|
||||
item.PlayerPerspectives = make([]string, 0)
|
||||
for _, perspective := range detail.PlayerPerspectives {
|
||||
item.PlayerPerspectives = append(item.PlayerPerspectives, perspective.Name)
|
||||
}
|
||||
|
||||
item.SimilarGames = detail.SimilarGames
|
||||
|
||||
item.Videos = make([]string, 0)
|
||||
for _, video := range detail.Videos {
|
||||
item.Videos = append(item.Videos, fmt.Sprintf("https://www.youtube.com/watch?v=%s", video.VideoID))
|
||||
}
|
||||
|
||||
item.Websites = make([]string, 0)
|
||||
for _, website := range detail.Websites {
|
||||
item.Websites = append(item.Websites, website.URL)
|
||||
}
|
||||
|
||||
item.Collections = make([]model.GameCollection, 0)
|
||||
|
||||
for _, collection := range detail.Collections {
|
||||
item.Collections = append(item.Collections, model.GameCollection{
|
||||
Games: collection.Games,
|
||||
Name: collection.Name,
|
||||
})
|
||||
}
|
||||
|
||||
return item, nil
|
||||
return gameInfo, nil
|
||||
}
|
||||
|
||||
// OrganizeGameItemWithIGDB Will add GameItem.ID to the newly added GameInfo.GameIDs
|
||||
// OrganizeGameItemWithIGDB links a game item with its corresponding IGDB game information.
|
||||
func OrganizeGameItemWithIGDB(game *model.GameItem) (*model.GameInfo, error) {
|
||||
id, err := GetIGDBID(game.Name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
zap.L().Error("failed to get IGDB ID for game", zap.String("gameName", game.Name), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to get IGDB ID for game '%s': %w", game.Name, err)
|
||||
}
|
||||
d, err := db.GetGameInfoByPlatformID("igdb", id)
|
||||
|
||||
info, err := db.GetGameInfoByPlatformID("igdb", id)
|
||||
if err == nil {
|
||||
d.GameIDs = append(d.GameIDs, game.ID)
|
||||
d.GameIDs = utils.Unique(d.GameIDs)
|
||||
return d, nil
|
||||
info.GameIDs = utils.Unique(append(info.GameIDs, game.ID))
|
||||
return info, nil
|
||||
}
|
||||
info, err := GenerateGameInfo("igdb", id)
|
||||
|
||||
info, err = GenerateIGDBGameInfo(id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
zap.L().Error("failed to generate IGDB game info", zap.Int("igdbID", id), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to generate IGDB game info for ID %d: %w", id, err)
|
||||
}
|
||||
info.GameIDs = append(info.GameIDs, game.ID)
|
||||
info.GameIDs = utils.Unique(info.GameIDs)
|
||||
|
||||
info.GameIDs = utils.Unique(append(info.GameIDs, game.ID))
|
||||
return info, nil
|
||||
}
|
||||
|
||||
// GetIGDBIDBySteamAppID retrieves the IGDB ID of a game using its Steam App ID.
|
||||
func GetIGDBIDBySteamAppID(id int) (int, error) {
|
||||
key := fmt.Sprintf("igdb_id_by_steam_app_id:%v", id)
|
||||
val, exist := cache.Get(key)
|
||||
if exist {
|
||||
key := fmt.Sprintf("igdb_id_by_steam_app_id:%d", id)
|
||||
if val, exist := cache.Get(key); exist {
|
||||
zap.L().Info("cache hit for IGDB ID by Steam App ID", zap.Int("steamAppID", id), zap.String("cacheKey", key))
|
||||
return strconv.Atoi(val)
|
||||
}
|
||||
var err error
|
||||
resp, err := igdbRequest(constant.IGDBWebsitesURL, fmt.Sprintf(`where url = "https://store.steampowered.com/app/%v" | url = "https://store.steampowered.com/app/%v/"*; fields *; limit 500;`, id, id))
|
||||
|
||||
query := fmt.Sprintf(`where url = "https://store.steampowered.com/app/%d" | url = "https://store.steampowered.com/app/%d/"; fields game;`, id, id)
|
||||
resp, err := igdbRequest(constant.IGDBWebsitesURL, query)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
zap.L().Error("failed to fetch IGDB ID by Steam App ID", zap.Int("steamAppID", id), zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to fetch IGDB ID by Steam App ID %d: %w", id, err)
|
||||
}
|
||||
|
||||
var data []struct {
|
||||
Game int `json:"game"`
|
||||
}
|
||||
if err = json.Unmarshal(resp.Body(), &data); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if len(data) == 0 {
|
||||
return 0, errors.New("not found")
|
||||
}
|
||||
if data[0].Game == 0 {
|
||||
return GetIGDBIDBySteamAppID(id)
|
||||
zap.L().Error("failed to unmarshal IGDB response", zap.String("response", string(resp.Body())), zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to unmarshal IGDB response for Steam App ID %d: %w", id, err)
|
||||
}
|
||||
|
||||
_ = cache.Set(key, strconv.Itoa(data[0].Game))
|
||||
if len(data) == 0 || data[0].Game == 0 {
|
||||
zap.L().Warn("no matching IGDB game found for Steam App ID", zap.Int("steamAppID", id))
|
||||
return 0, errors.New("no matching IGDB game found")
|
||||
}
|
||||
|
||||
return GetIGDBAppParent(data[0].Game)
|
||||
igdbID := data[0].Game
|
||||
cacheErr := cache.Set(key, strconv.Itoa(igdbID))
|
||||
if cacheErr != nil {
|
||||
zap.L().Warn("failed to cache IGDB ID by Steam App ID", zap.Int("steamAppID", id), zap.Error(cacheErr))
|
||||
}
|
||||
|
||||
return GetIGDBAppParent(igdbID)
|
||||
}
|
||||
|
||||
// GetIGDBIDBySteamBundleID retrieves the IGDB ID of a game using its Steam Bundle ID.
|
||||
func GetIGDBIDBySteamBundleID(id int) (int, error) {
|
||||
key := fmt.Sprintf("igdb_id_by_steam_bundle_id:%v", id)
|
||||
val, exist := cache.Get(key)
|
||||
if exist {
|
||||
key := fmt.Sprintf("igdb_id_by_steam_bundle_id:%d", id)
|
||||
if val, exist := cache.Get(key); exist {
|
||||
zap.L().Info("cache hit for IGDB ID by Steam Bundle ID", zap.Int("steamBundleID", id), zap.String("cacheKey", key))
|
||||
return strconv.Atoi(val)
|
||||
}
|
||||
|
||||
var err error
|
||||
resp, err := igdbRequest(constant.IGDBWebsitesURL, fmt.Sprintf(`where url = "https://store.steampowered.com/bundle/%v" | url = "https://store.steampowered.com/bundle/%v/"*; fields *; limit 500;`, id, id))
|
||||
query := fmt.Sprintf(`where url = "https://store.steampowered.com/bundle/%d" | url = "https://store.steampowered.com/bundle/%d/"; fields game;`, id, id)
|
||||
resp, err := igdbRequest(constant.IGDBWebsitesURL, query)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
zap.L().Error("failed to fetch IGDB ID by Steam Bundle ID", zap.Int("steamBundleID", id), zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to fetch IGDB ID by Steam Bundle ID %d: %w", id, err)
|
||||
}
|
||||
|
||||
var data []struct {
|
||||
Game int `json:"game"`
|
||||
}
|
||||
if err = json.Unmarshal(resp.Body(), &data); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
if len(data) == 0 {
|
||||
return 0, errors.New("not found")
|
||||
}
|
||||
if data[0].Game == 0 {
|
||||
return GetIGDBIDBySteamBundleID(id)
|
||||
zap.L().Error("failed to unmarshal IGDB response", zap.String("response", string(resp.Body())), zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to unmarshal IGDB response for Steam Bundle ID %d: %w", id, err)
|
||||
}
|
||||
|
||||
_ = cache.Set(key, strconv.Itoa(data[0].Game))
|
||||
if len(data) == 0 || data[0].Game == 0 {
|
||||
zap.L().Warn("no matching IGDB game found for Steam Bundle ID", zap.Int("steamBundleID", id))
|
||||
return 0, errors.New("no matching IGDB game found")
|
||||
}
|
||||
|
||||
return GetIGDBAppParent(data[0].Game)
|
||||
igdbID := data[0].Game
|
||||
cacheErr := cache.Set(key, strconv.Itoa(igdbID))
|
||||
if cacheErr != nil {
|
||||
zap.L().Warn("failed to cache IGDB ID by Steam Bundle ID", zap.Int("steamBundleID", id), zap.Error(cacheErr))
|
||||
}
|
||||
|
||||
return GetIGDBAppParent(igdbID)
|
||||
}
|
||||
|
||||
// GetIGDBPopularGameIDs get IGDB popular game IDs
|
||||
// GetIGDBPopularGameIDs retrieves popular IGDB game IDs based on a given popularity type.
|
||||
// popularity_type = 1 IGDB Visits: Game page visits on IGDB.com.
|
||||
// popularity_type = 2 IGDB Want to Play: Additions to IGDB.com users’ “Want to Play” lists.
|
||||
// popularity_type = 3 IGDB Playing: Additions to IGDB.com users’ “Playing” lists.
|
||||
// popularity_type = 4 IGDB Played: Additions to IGDB.com users’ “Played” lists.
|
||||
func GetIGDBPopularGameIDs(popularityType int, offset int, limit int) ([]int, error) {
|
||||
var err error
|
||||
resp, err := igdbRequest(constant.IGDBPopularityURL, fmt.Sprintf("fields game_id,value,popularity_type; sort value desc; limit %v; offset %v; where popularity_type = %v;", limit, offset, popularityType))
|
||||
func GetIGDBPopularGameIDs(popularityType, offset, limit int) ([]int, error) {
|
||||
query := fmt.Sprintf("fields game_id,value,popularity_type; sort value desc; limit %d; offset %d; where popularity_type = %d;", limit, offset, popularityType)
|
||||
resp, err := igdbRequest(constant.IGDBPopularityURL, query)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
zap.L().Error("failed to fetch popular IGDB game IDs", zap.Int("popularityType", popularityType), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to fetch popular IGDB game IDs for type %d: %w", popularityType, err)
|
||||
}
|
||||
type IgdbPopularity struct {
|
||||
|
||||
var data []struct {
|
||||
GameID int `json:"game_id"`
|
||||
Value float64 `json:"value"`
|
||||
}
|
||||
var data []IgdbPopularity
|
||||
if err = json.Unmarshal(resp.Body(), &data); err != nil {
|
||||
return nil, err
|
||||
zap.L().Error("failed to unmarshal IGDB popular games response", zap.String("response", string(resp.Body())), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to unmarshal IGDB popular games response: %w", err)
|
||||
}
|
||||
ret := make([]int, 0)
|
||||
|
||||
gameIDs := make([]int, 0, len(data))
|
||||
for _, d := range data {
|
||||
pid, err := GetIGDBAppParent(d.GameID)
|
||||
parentID, err := GetIGDBAppParent(d.GameID)
|
||||
if err != nil {
|
||||
ret = append(ret, d.GameID)
|
||||
continue
|
||||
zap.L().Warn("failed to fetch parent IGDB ID for game", zap.Int("gameID", d.GameID), zap.Error(err))
|
||||
gameIDs = append(gameIDs, d.GameID)
|
||||
} else {
|
||||
gameIDs = append(gameIDs, parentID)
|
||||
}
|
||||
ret = append(ret, pid)
|
||||
}
|
||||
return ret, nil
|
||||
return gameIDs, nil
|
||||
}
|
||||
|
68
crawler/omg_gods.go
Normal file
68
crawler/omg_gods.go
Normal file
@ -0,0 +1,68 @@
|
||||
package crawler
|
||||
|
||||
import (
|
||||
"game-crawler/model"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
type OmgGodsCrawler struct {
|
||||
logger *zap.Logger
|
||||
crawler RutrackerCrawler
|
||||
}
|
||||
|
||||
func NewOmgGodsCrawler(cfClearanceUrl, username, password string, logger *zap.Logger) *OmgGodsCrawler {
|
||||
return &OmgGodsCrawler{
|
||||
logger: logger,
|
||||
crawler: *NewRutrackerCrawler(
|
||||
"OmgGods",
|
||||
"switch",
|
||||
"8994327",
|
||||
username,
|
||||
password,
|
||||
cfClearanceUrl,
|
||||
omgGodsFormatter,
|
||||
logger,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
func (c *OmgGodsCrawler) Name() string {
|
||||
return "OmgGodsCrawler"
|
||||
}
|
||||
|
||||
func (c *OmgGodsCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
|
||||
return c.crawler.CrawlByUrl(URL)
|
||||
}
|
||||
|
||||
func (c *OmgGodsCrawler) Crawl(page int) ([]*model.GameItem, error) {
|
||||
return c.crawler.Crawl(page)
|
||||
}
|
||||
|
||||
func (c *OmgGodsCrawler) CrawlAll() ([]*model.GameItem, error) {
|
||||
return c.crawler.CrawlAll()
|
||||
}
|
||||
|
||||
func (c *OmgGodsCrawler) GetTotalPageNum() (int, error) {
|
||||
return c.crawler.GetTotalPageNum()
|
||||
}
|
||||
|
||||
var omgGodsFormatRegex = []*regexp.Regexp{
|
||||
regexp.MustCompile(`\(.*?\)`),
|
||||
regexp.MustCompile(`\[.*?\]`),
|
||||
}
|
||||
|
||||
func omgGodsFormatter(name string) string {
|
||||
for _, regex := range omgGodsFormatRegex {
|
||||
name = regex.ReplaceAllString(name, "")
|
||||
}
|
||||
if strings.Contains(name, " + ") {
|
||||
name = strings.Split(name, " + ")[0]
|
||||
}
|
||||
if strings.Contains(name, " / ") {
|
||||
name = strings.Split(name, " / ")[0]
|
||||
}
|
||||
return strings.TrimSpace(name)
|
||||
}
|
@ -8,14 +8,12 @@ import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"game-crawler/cache"
|
||||
"game-crawler/config"
|
||||
"game-crawler/constant"
|
||||
"game-crawler/db"
|
||||
"game-crawler/model"
|
||||
@ -26,12 +24,16 @@ import (
|
||||
)
|
||||
|
||||
type OnlineFixCrawler struct {
|
||||
logger *zap.Logger
|
||||
username string
|
||||
password string
|
||||
logger *zap.Logger
|
||||
}
|
||||
|
||||
func NewOnlineFixCrawler(logger *zap.Logger) *OnlineFixCrawler {
|
||||
func NewOnlineFixCrawler(username, password string, logger *zap.Logger) *OnlineFixCrawler {
|
||||
return &OnlineFixCrawler{
|
||||
logger: logger,
|
||||
username: username,
|
||||
password: password,
|
||||
logger: logger,
|
||||
}
|
||||
}
|
||||
|
||||
@ -42,206 +44,218 @@ func (c *OnlineFixCrawler) Name() string {
|
||||
func (c *OnlineFixCrawler) Crawl(page int) ([]*model.GameItem, error) {
|
||||
cookies, err := c.getCookies()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to get cookies", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to get cookies: %w", err)
|
||||
}
|
||||
|
||||
requestURL := fmt.Sprintf("%s/page/%d/", constant.OnlineFixURL, page)
|
||||
resp, err := utils.Request().SetHeader("Referer", constant.OnlineFixURL).SetCookies(cookies).Get(requestURL)
|
||||
if err != nil {
|
||||
c.logger.Error("Failed to fetch", zap.Error(err))
|
||||
return nil, err
|
||||
c.logger.Error("Failed to fetch page", zap.String("url", requestURL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to fetch page %d: %w", page, err)
|
||||
}
|
||||
|
||||
body := utils.Windows1251ToUTF8(resp.Body())
|
||||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(body))
|
||||
if err != nil {
|
||||
c.logger.Error("Failed to parse HTML", zap.Error(err))
|
||||
return nil, err
|
||||
c.logger.Error("Failed to parse HTML", zap.String("url", requestURL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to parse HTML for page %d: %w", page, err)
|
||||
}
|
||||
|
||||
var urls []string
|
||||
var updateFlags []string //link+date
|
||||
var updateFlags []string
|
||||
doc.Find("article.news").Each(func(i int, s *goquery.Selection) {
|
||||
urls = append(urls, s.Find(".big-link").First().AttrOr("href", ""))
|
||||
updateFlags = append(
|
||||
updateFlags,
|
||||
base64.StdEncoding.EncodeToString([]byte(s.Find(".big-link").First().AttrOr("href", "")+s.Find("time").Text())),
|
||||
)
|
||||
url := s.Find(".big-link").First().AttrOr("href", "")
|
||||
if url != "" {
|
||||
urls = append(urls, url)
|
||||
updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(url+s.Find("time").Text())))
|
||||
}
|
||||
})
|
||||
|
||||
var res []*model.GameItem
|
||||
for i, u := range urls {
|
||||
// Skip already crawled links
|
||||
if db.IsOnlineFixCrawled(updateFlags[i]) {
|
||||
c.logger.Info("Skipping already crawled URL", zap.String("url", u))
|
||||
continue
|
||||
}
|
||||
c.logger.Info("Crawling", zap.String("URL", u))
|
||||
|
||||
c.logger.Info("Crawling URL", zap.String("url", u))
|
||||
item, err := c.CrawlByUrl(u)
|
||||
if err != nil {
|
||||
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
|
||||
c.logger.Warn("Failed to crawl URL", zap.String("url", u), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
item.UpdateFlag = updateFlags[i]
|
||||
err = db.SaveGameItem(item)
|
||||
if err != nil {
|
||||
c.logger.Warn("Failed to save", zap.Error(err))
|
||||
if err := db.SaveGameItem(item); err != nil {
|
||||
c.logger.Warn("Failed to save game item", zap.String("url", u), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
res = append(res, item)
|
||||
|
||||
if err := OrganizeGameItem(item); err != nil {
|
||||
c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u))
|
||||
c.logger.Warn("Failed to organize game item", zap.String("url", u), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (c *OnlineFixCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
|
||||
cookies, err := c.getCookies()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to get cookies", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to get cookies: %w", err)
|
||||
}
|
||||
|
||||
resp, err := utils.Request().SetHeaders(map[string]string{
|
||||
"Referer": constant.OnlineFixURL,
|
||||
}).SetCookies(cookies).Get(URL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to fetch URL", zap.String("url", URL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to fetch URL %s: %w", URL, err)
|
||||
}
|
||||
|
||||
body := utils.Windows1251ToUTF8(resp.Body())
|
||||
titleRegex := regexp.MustCompile(`(?i)<h1.*?>(.*?)</h1>`)
|
||||
titleRegexRes := titleRegex.FindAllStringSubmatch(string(body), -1)
|
||||
if len(titleRegexRes) == 0 {
|
||||
return nil, errors.New("failed to find title")
|
||||
titleMatch := titleRegex.FindStringSubmatch(string(body))
|
||||
if len(titleMatch) == 0 {
|
||||
c.logger.Warn("Failed to find title in HTML", zap.String("url", URL))
|
||||
return nil, errors.New("failed to find title in HTML")
|
||||
}
|
||||
|
||||
downloadRegex := regexp.MustCompile(`(?i)<a[^>]+\bhref="([^"]+)"[^>]+>(Скачать Torrent|Скачать торрент)</a>`)
|
||||
downloadRegexRes := downloadRegex.FindAllStringSubmatch(string(body), -1)
|
||||
if len(downloadRegexRes) == 0 {
|
||||
downloadMatch := downloadRegex.FindStringSubmatch(string(body))
|
||||
if len(downloadMatch) == 0 {
|
||||
c.logger.Warn("Failed to find download button", zap.String("url", URL))
|
||||
return nil, errors.New("failed to find download button")
|
||||
}
|
||||
|
||||
// Retrieve or create game item
|
||||
item, err := db.GetGameItemByUrl(URL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
item = &model.GameItem{}
|
||||
}
|
||||
item.RawName = titleRegexRes[0][1]
|
||||
|
||||
item.RawName = titleMatch[1]
|
||||
item.Name = OnlineFixFormatter(item.RawName)
|
||||
item.Url = URL
|
||||
item.Author = "OnlineFix"
|
||||
item.Size = "0"
|
||||
resp, err = utils.Request().SetHeader("Referer", URL).SetCookies(cookies).Get(downloadRegexRes[0][1])
|
||||
body = utils.Windows1251ToUTF8(resp.Body())
|
||||
|
||||
// Handle download links
|
||||
downloadURL := downloadMatch[1]
|
||||
resp, err = utils.Request().SetHeader("Referer", URL).SetCookies(cookies).Get(downloadURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to fetch download link", zap.String("url", downloadURL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to fetch download link %s: %w", downloadURL, err)
|
||||
}
|
||||
if strings.Contains(downloadRegexRes[0][1], "uploads.online-fix.me") {
|
||||
|
||||
body = utils.Windows1251ToUTF8(resp.Body())
|
||||
if strings.Contains(downloadURL, "uploads.online-fix.me") {
|
||||
// Handle torrent file
|
||||
magnetRegex := regexp.MustCompile(`(?i)"(.*?).torrent"`)
|
||||
magnetRegexRes := magnetRegex.FindAllStringSubmatch(string(body), -1)
|
||||
if len(magnetRegexRes) == 0 {
|
||||
return nil, errors.New("failed to find magnet")
|
||||
magnetMatch := magnetRegex.FindStringSubmatch(string(body))
|
||||
if len(magnetMatch) == 0 {
|
||||
c.logger.Warn("Failed to find torrent magnet link", zap.String("url", downloadURL))
|
||||
return nil, errors.New("failed to find torrent magnet link")
|
||||
}
|
||||
resp, err = utils.Request().SetHeader("Referer", URL).SetCookies(cookies).Get(downloadRegexRes[0][1] + strings.Trim(magnetRegexRes[0][0], "\""))
|
||||
|
||||
torrentURL := downloadURL + strings.Trim(magnetMatch[0], "\"")
|
||||
resp, err = utils.Request().SetHeader("Referer", URL).SetCookies(cookies).Get(torrentURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to fetch torrent file", zap.String("url", torrentURL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to fetch torrent file %s: %w", torrentURL, err)
|
||||
}
|
||||
|
||||
magnet, size, err := utils.ConvertTorrentToMagnet(resp.Body())
|
||||
if err != nil {
|
||||
c.logger.Error("Failed to convert torrent to magnet", zap.String("url", torrentURL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to convert torrent to magnet: %w", err)
|
||||
}
|
||||
|
||||
item.DownloadLinks = []string{magnet}
|
||||
item.Size = size
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
} else if strings.Contains(downloadRegexRes[0][1], "online-fix.me/ext") {
|
||||
if strings.Contains(string(body), "mega.nz") {
|
||||
if !config.Config.MegaAvaliable {
|
||||
return nil, errors.New("mega is not avaliable")
|
||||
}
|
||||
megaRegex := regexp.MustCompile(`(?i)location.href=\\'([^\\']*)\\'`)
|
||||
megaRegexRes := megaRegex.FindAllStringSubmatch(string(body), -1)
|
||||
if len(megaRegexRes) == 0 {
|
||||
return nil, errors.New("failed to find download link")
|
||||
}
|
||||
path, files, err := utils.MegaDownload(megaRegexRes[0][1], "torrent")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
torrent := ""
|
||||
for _, file := range files {
|
||||
if strings.HasSuffix(file, ".torrent") {
|
||||
torrent = file
|
||||
break
|
||||
}
|
||||
}
|
||||
dataBytes, err := os.ReadFile(torrent)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
magnet, size, err := utils.ConvertTorrentToMagnet(dataBytes)
|
||||
item.DownloadLinks = []string{magnet}
|
||||
item.Size = size
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
_ = os.RemoveAll(path)
|
||||
} else {
|
||||
return nil, errors.New("failed to find download link")
|
||||
}
|
||||
} else {
|
||||
return nil, errors.New("failed to find download link")
|
||||
c.logger.Warn("Unsupported download link format", zap.String("url", downloadURL))
|
||||
return nil, errors.New("unsupported download link format")
|
||||
}
|
||||
|
||||
item.Platform = "windows"
|
||||
return item, nil
|
||||
}
|
||||
|
||||
// Crawl multiple pages
|
||||
func (c *OnlineFixCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) {
|
||||
var res []*model.GameItem
|
||||
for _, page := range pages {
|
||||
items, err := c.Crawl(page)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to crawl page", zap.Int("page", page), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to crawl page %d: %w", page, err)
|
||||
}
|
||||
res = append(res, items...)
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// Crawl all pages
|
||||
func (c *OnlineFixCrawler) CrawlAll() ([]*model.GameItem, error) {
|
||||
var res []*model.GameItem
|
||||
totalPageNum, err := c.GetTotalPageNum()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to get total page number", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to get total page number: %w", err)
|
||||
}
|
||||
|
||||
for i := 1; i <= totalPageNum; i++ {
|
||||
items, err := c.Crawl(i)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Warn("Failed to crawl page", zap.Int("page", i), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
res = append(res, items...)
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// Get total page number
|
||||
func (c *OnlineFixCrawler) GetTotalPageNum() (int, error) {
|
||||
resp, err := utils.Request().SetHeader("Referer", constant.OnlineFixURL).Get(constant.OnlineFixURL)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
c.logger.Error("Failed to fetch main page", zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to fetch main page: %w", err)
|
||||
}
|
||||
|
||||
pageRegex := regexp.MustCompile(`(?i)<a href="https://online-fix.me/page/(\d+)/">.*?</a>`)
|
||||
pageRegexRes := pageRegex.FindAllStringSubmatch(string(resp.Body()), -1)
|
||||
if len(pageRegexRes) == 0 {
|
||||
return 0, err
|
||||
pageMatches := pageRegex.FindAllStringSubmatch(string(resp.Body()), -1)
|
||||
if len(pageMatches) < 2 {
|
||||
c.logger.Warn("Failed to find total page number in HTML")
|
||||
return 0, errors.New("failed to find total page number")
|
||||
}
|
||||
totalPageNum, err := strconv.Atoi(pageRegexRes[len(pageRegexRes)-2][1])
|
||||
|
||||
totalPageNum, err := strconv.Atoi(pageMatches[len(pageMatches)-2][1])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
c.logger.Error("Failed to parse total page number", zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to parse total page number: %w", err)
|
||||
}
|
||||
|
||||
return totalPageNum, nil
|
||||
}
|
||||
|
||||
type csrf struct {
|
||||
Field string `json:"field"`
|
||||
Value string `json:"value"`
|
||||
}
|
||||
|
||||
// Get cookies for authentication
|
||||
func (c *OnlineFixCrawler) getCookies() ([]*http.Cookie, error) {
|
||||
if c.username == "" || c.password == "" {
|
||||
return nil, errors.New("username or password is empty")
|
||||
}
|
||||
val, exists := cache.Get("onlinefix_cookies")
|
||||
if exists {
|
||||
var cookies []*http.Cookie
|
||||
if err := json.Unmarshal([]byte(val), &cookies); err != nil {
|
||||
return nil, err
|
||||
c.logger.Warn("Failed to parse cached cookies", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to parse cached cookies: %w", err)
|
||||
}
|
||||
return cookies, nil
|
||||
}
|
||||
@ -251,38 +265,48 @@ func (c *OnlineFixCrawler) getCookies() ([]*http.Cookie, error) {
|
||||
"Referer": constant.OnlineFixURL,
|
||||
}).Get(constant.OnlineFixCSRFURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to fetch CSRF token", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to fetch CSRF token: %w", err)
|
||||
}
|
||||
var csrf csrf
|
||||
if err = json.Unmarshal(resp.Body(), &csrf); err != nil {
|
||||
return nil, err
|
||||
|
||||
type csrf struct {
|
||||
Field string `json:"field"`
|
||||
Value string `json:"value"`
|
||||
}
|
||||
|
||||
var csrfToken csrf
|
||||
if err = json.Unmarshal(resp.Body(), &csrfToken); err != nil {
|
||||
c.logger.Error("Failed to parse CSRF token", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to parse CSRF token: %w", err)
|
||||
}
|
||||
|
||||
cookies := resp.Cookies()
|
||||
|
||||
params := url.Values{}
|
||||
params.Add("login_name", config.Config.OnlineFix.User)
|
||||
params.Add("login_password", config.Config.OnlineFix.Password)
|
||||
params.Add(csrf.Field, csrf.Value)
|
||||
params.Add("login_name", c.username)
|
||||
params.Add("login_password", c.password)
|
||||
params.Add(csrfToken.Field, csrfToken.Value)
|
||||
params.Add("login", "submit")
|
||||
|
||||
resp, err = utils.Request().SetHeaders(map[string]string{
|
||||
"Origin": constant.OnlineFixURL,
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
"Referer": constant.OnlineFixURL,
|
||||
}).SetCookies(cookies).SetBody(params.Encode()).Post(constant.OnlineFixURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to log in", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to log in: %w", err)
|
||||
}
|
||||
|
||||
cookies = resp.Cookies()
|
||||
jsonBytes, _ := json.Marshal(cookies)
|
||||
_ = cache.SetWithExpire("onlinefix_cookies", string(jsonBytes), time.Hour)
|
||||
cookiesJSON, _ := json.Marshal(cookies)
|
||||
_ = cache.SetWithExpire("onlinefix_cookies", string(cookiesJSON), time.Hour)
|
||||
|
||||
return cookies, nil
|
||||
}
|
||||
|
||||
// Format game name
|
||||
func OnlineFixFormatter(name string) string {
|
||||
name = strings.Replace(name, "по сети", "", -1)
|
||||
reg1 := regexp.MustCompile(`(?i)\(.*?\)`)
|
||||
name = reg1.ReplaceAllString(name, "")
|
||||
return strings.TrimSpace(name)
|
||||
name = strings.ReplaceAll(name, "по сети", "")
|
||||
reg := regexp.MustCompile(`(?i)\(.*?\)`)
|
||||
return strings.TrimSpace(reg.ReplaceAllString(name, ""))
|
||||
}
|
||||
|
298
crawler/rutracker.go
Normal file
298
crawler/rutracker.go
Normal file
@ -0,0 +1,298 @@
|
||||
package crawler
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"game-crawler/cache"
|
||||
"game-crawler/constant"
|
||||
"game-crawler/db"
|
||||
"game-crawler/model"
|
||||
"game-crawler/utils"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode"
|
||||
|
||||
"git.nite07.com/nite/ccs"
|
||||
"github.com/Danny-Dasilva/CycleTLS/cycletls"
|
||||
http "github.com/Danny-Dasilva/fhttp"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
type RutrackerCrawler struct {
|
||||
source string
|
||||
rid string
|
||||
platform string
|
||||
username string
|
||||
password string
|
||||
formatter FormatterFunc
|
||||
logger *zap.Logger
|
||||
cfClearanceUrl string
|
||||
}
|
||||
|
||||
func NewRutrackerCrawler(source, platform, rid, username, password, cfClearanceUrl string, formatter FormatterFunc, logger *zap.Logger) *RutrackerCrawler {
|
||||
return &RutrackerCrawler{
|
||||
source: source,
|
||||
rid: rid,
|
||||
formatter: formatter,
|
||||
logger: logger,
|
||||
platform: platform,
|
||||
username: username,
|
||||
password: password,
|
||||
cfClearanceUrl: cfClearanceUrl,
|
||||
}
|
||||
}
|
||||
|
||||
func (r *RutrackerCrawler) getSession() (*ccs.Session, error) {
|
||||
r.logger.Info("Fetching session for RutrackerCrawler")
|
||||
|
||||
if r.username == "" || r.password == "" {
|
||||
r.logger.Error("Username or password is empty")
|
||||
return nil, fmt.Errorf("username or password is empty")
|
||||
}
|
||||
|
||||
cacheKey := "rutracker_session"
|
||||
var session ccs.Session
|
||||
if val, exist := cache.Get(cacheKey); exist {
|
||||
err := json.Unmarshal([]byte(val), &session)
|
||||
if err != nil {
|
||||
r.logger.Error("Failed to unmarshal cached session", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to unmarshal cached session: %w", err)
|
||||
}
|
||||
} else {
|
||||
var err error
|
||||
session, err = ccs.WAFSession(r.cfClearanceUrl, constant.RutrackerURL)
|
||||
if err != nil {
|
||||
r.logger.Error("Failed to create WAF session", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to create WAF session: %w", err)
|
||||
}
|
||||
|
||||
// login
|
||||
params := url.Values{}
|
||||
params.Add("login_username", r.username)
|
||||
params.Add("login_password", r.password)
|
||||
params.Add("login", "Вход")
|
||||
resp, err := ccs.RequestWithWAFSession(http.MethodPost, constant.RutrackerLoginURL, session, &cycletls.Options{
|
||||
Headers: map[string]string{
|
||||
"Content-Type": "application/x-www-form-urlencoded",
|
||||
},
|
||||
Body: params.Encode(),
|
||||
UserAgent: "Mozilla/5.0 (X11; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0",
|
||||
DisableRedirect: true,
|
||||
})
|
||||
if err != nil {
|
||||
r.logger.Error("Failed to login", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to login: %w", err)
|
||||
}
|
||||
|
||||
if len(resp.Cookies) == 0 {
|
||||
r.logger.Error("Failed to login, no cookies found")
|
||||
return nil, fmt.Errorf("failed to login, no cookies found")
|
||||
}
|
||||
|
||||
success := false
|
||||
loginCookies := make([]ccs.Cookie, 0)
|
||||
for _, cookie := range resp.Cookies {
|
||||
if cookie.Name == "bb_session" {
|
||||
success = true
|
||||
}
|
||||
loginCookies = append(loginCookies, ccs.Cookie{
|
||||
Name: cookie.Name,
|
||||
Value: cookie.Value,
|
||||
})
|
||||
}
|
||||
|
||||
if !success {
|
||||
r.logger.Error("Failed to login, no bb_session cookie found")
|
||||
return nil, fmt.Errorf("failed to login, no bb_session cookie found")
|
||||
}
|
||||
|
||||
session.Cookies = append(session.Cookies, loginCookies...)
|
||||
|
||||
jsonBytes, err := json.Marshal(session)
|
||||
if err == nil {
|
||||
_ = cache.SetWithExpire(cacheKey, jsonBytes, 24*time.Hour)
|
||||
}
|
||||
}
|
||||
return &session, nil
|
||||
}
|
||||
|
||||
func (r *RutrackerCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
|
||||
session, err := r.getSession()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get session: %w", err)
|
||||
}
|
||||
|
||||
resp, err := ccs.RequestWithWAFSession(http.MethodGet, URL, *session, nil)
|
||||
if err != nil {
|
||||
r.logger.Error("Failed to request URL", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to request URL: %w", err)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(resp.Body))
|
||||
if err != nil {
|
||||
r.logger.Error("Failed to parse HTML", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to parse HTML: %w", err)
|
||||
}
|
||||
|
||||
item, err := db.GetGameItemByUrl(URL)
|
||||
if err != nil {
|
||||
r.logger.Error("Failed to get game item by url", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to get game item by url: %w", err)
|
||||
}
|
||||
|
||||
item.RawName = doc.Find("#topic-title").Text()
|
||||
item.Name = r.formatter(item.RawName)
|
||||
item.Author = r.source
|
||||
item.Platform = r.platform
|
||||
item.Url = URL
|
||||
|
||||
magnet := doc.Find(".magnet-link").AttrOr("href", "")
|
||||
if magnet == "" {
|
||||
r.logger.Error("Failed to find magnet link")
|
||||
return nil, fmt.Errorf("failed to find magnet link")
|
||||
}
|
||||
item.DownloadLinks = []string{magnet}
|
||||
|
||||
sizeStr := doc.Find("#tor-size-humn").AttrOr("title", "")
|
||||
if sizeStr == "" {
|
||||
r.logger.Warn("Failed to find size")
|
||||
item.Size = "unknown"
|
||||
} else {
|
||||
size, err := strconv.ParseUint(sizeStr, 10, 64)
|
||||
if err != nil {
|
||||
r.logger.Error("Failed to parse size", zap.Error(err))
|
||||
} else {
|
||||
item.Size = utils.BytesToSize(size)
|
||||
}
|
||||
}
|
||||
return item, nil
|
||||
}
|
||||
|
||||
func (r *RutrackerCrawler) Crawl(page int) ([]*model.GameItem, error) {
|
||||
r.logger.Info("Crawling Rutracker", zap.Int("page", page), zap.String("rid", r.rid))
|
||||
session, err := r.getSession()
|
||||
if err != nil {
|
||||
r.logger.Error("Failed to get session", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to get session: %w", err)
|
||||
}
|
||||
|
||||
URL := fmt.Sprintf(constant.RutrackerAuthorURL, r.rid, (page-1)*50)
|
||||
resp, err := ccs.RequestWithWAFSession(http.MethodGet, URL, *session, nil)
|
||||
if err != nil {
|
||||
r.logger.Error("Failed to request URL", zap.String("url", URL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to request URL: %w", err)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(resp.Body))
|
||||
if err != nil {
|
||||
r.logger.Error("Failed to parse HTML", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to parse HTML: %w", err)
|
||||
}
|
||||
var urls []string
|
||||
var updateFlags []string
|
||||
doc.Find("[id^='trs-tr']").Each(func(i int, s *goquery.Selection) {
|
||||
a := s.Find(".t-title")
|
||||
datetime := s.Find("td").Last().Text()
|
||||
url, exists := a.Attr("href")
|
||||
if !exists {
|
||||
r.logger.Error("Failed to find URL")
|
||||
return
|
||||
}
|
||||
fullURL := fmt.Sprintf(constant.RutrackerTopicURL, url)
|
||||
urls = append(urls, fullURL)
|
||||
updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(fullURL+datetime)))
|
||||
})
|
||||
|
||||
var res []*model.GameItem
|
||||
for i, URL := range urls {
|
||||
if db.IsGameCrawled(updateFlags[i], r.source) {
|
||||
r.logger.Info("Skipping already crawled URL", zap.String("URL", URL))
|
||||
continue
|
||||
}
|
||||
r.logger.Info("Crawling URL", zap.String("URL", URL))
|
||||
item, err := r.CrawlByUrl(URL)
|
||||
if err != nil {
|
||||
r.logger.Error("Failed to crawl URL", zap.String("URL", URL), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
err = db.SaveGameItem(item)
|
||||
if err != nil {
|
||||
r.logger.Error("Failed to save game item to database", zap.String("URL", URL), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
res = append(res, item)
|
||||
if err := OrganizeGameItem(item); err != nil {
|
||||
r.logger.Warn("Failed to organize game item", zap.String("URL", URL), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
r.logger.Info("Finished Crawl", zap.Int("Page", page), zap.Int("ItemsCrawled", len(res)))
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (r *RutrackerCrawler) CrawlAll() (res []*model.GameItem, err error) {
|
||||
r.logger.Info("Starting CrawlAll", zap.String("Source", r.source))
|
||||
totalPage, err := r.GetTotalPageNum()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get total page number: %w", err)
|
||||
}
|
||||
for i := 1; i <= totalPage; i++ {
|
||||
items, err := r.Crawl(i)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to crawl page %d: %w", i, err)
|
||||
}
|
||||
res = append(res, items...)
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func (r *RutrackerCrawler) GetTotalPageNum() (int, error) {
|
||||
session, err := r.getSession()
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("failed to get session: %w", err)
|
||||
}
|
||||
|
||||
URL := fmt.Sprintf(constant.RutrackerAuthorURL, r.rid, 0)
|
||||
resp, err := ccs.RequestWithWAFSession(http.MethodGet, URL, *session, nil)
|
||||
if err != nil {
|
||||
r.logger.Error("Failed to request URL", zap.String("url", URL), zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to request URL: %w", err)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(resp.Body))
|
||||
if err != nil {
|
||||
r.logger.Error("Failed to parse HTML", zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to parse HTML: %w", err)
|
||||
}
|
||||
|
||||
var pg []string
|
||||
doc.Find(".pg").Each(func(i int, s *goquery.Selection) {
|
||||
pg = append(pg, s.Text())
|
||||
})
|
||||
|
||||
if len(pg) == 0 {
|
||||
r.logger.Error("Failed to find page number")
|
||||
return 0, fmt.Errorf("failed to find page number")
|
||||
}
|
||||
|
||||
totalPage := 0
|
||||
for _, c := range pg[len(pg)-1] {
|
||||
if unicode.IsDigit(c) {
|
||||
totalPage, err = strconv.Atoi(pg[len(pg)-1])
|
||||
break
|
||||
} else {
|
||||
totalPage, err = strconv.Atoi(pg[len(pg)-2])
|
||||
break
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
r.logger.Error("Failed to parse page number", zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to parse page number: %w", err)
|
||||
}
|
||||
return totalPage, nil
|
||||
}
|
116
crawler/steam.go
116
crawler/steam.go
@ -13,15 +13,19 @@ import (
|
||||
"game-crawler/constant"
|
||||
"game-crawler/model"
|
||||
"game-crawler/utils"
|
||||
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// GetSteamAppDetail fetches the details of a Steam app by its ID.
|
||||
func GetSteamAppDetail(id int) (*model.SteamAppDetail, error) {
|
||||
key := fmt.Sprintf("steam_game:%d", id)
|
||||
val, exist := cache.Get(key)
|
||||
if exist {
|
||||
if val, exist := cache.Get(key); exist {
|
||||
zap.L().Info("Cache hit for Steam app detail", zap.Int("steamID", id))
|
||||
var detail model.SteamAppDetail
|
||||
if err := json.Unmarshal([]byte(val), &detail); err != nil {
|
||||
return nil, err
|
||||
zap.L().Warn("Failed to unmarshal cached Steam app detail", zap.Int("steamID", id), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to unmarshal cached Steam app detail for ID %d: %w", id, err)
|
||||
}
|
||||
return &detail, nil
|
||||
}
|
||||
@ -29,93 +33,117 @@ func GetSteamAppDetail(id int) (*model.SteamAppDetail, error) {
|
||||
baseURL, _ := url.Parse(constant.SteamAppDetailURL)
|
||||
params := url.Values{}
|
||||
params.Add("appids", strconv.Itoa(id))
|
||||
// params.Add("l", "schinese")
|
||||
baseURL.RawQuery = params.Encode()
|
||||
|
||||
resp, err := utils.Request().SetHeaders(map[string]string{
|
||||
"User-Agent": "",
|
||||
}).Get(baseURL.String())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
zap.L().Error("Failed to fetch Steam app detail", zap.Int("steamID", id), zap.String("url", baseURL.String()), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to fetch Steam app detail for ID %d: %w", id, err)
|
||||
}
|
||||
|
||||
var detail map[string]*model.SteamAppDetail
|
||||
if err = json.Unmarshal(resp.Body(), &detail); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if _, ok := detail[strconv.Itoa(id)]; !ok {
|
||||
return nil, fmt.Errorf("steam App not found: %d", id)
|
||||
}
|
||||
if detail[strconv.Itoa(id)] == nil {
|
||||
return nil, fmt.Errorf("steam App not found: %d", id)
|
||||
if err := json.Unmarshal(resp.Body(), &detail); err != nil {
|
||||
zap.L().Error("Failed to unmarshal Steam app detail response", zap.Int("steamID", id), zap.String("response", string(resp.Body())), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to unmarshal Steam app detail for ID %d: %w", id, err)
|
||||
}
|
||||
|
||||
jsonBytes, err := json.Marshal(detail[strconv.Itoa(id)])
|
||||
if err == nil {
|
||||
_ = cache.Set(key, string(jsonBytes))
|
||||
if appDetail, ok := detail[strconv.Itoa(id)]; !ok || appDetail == nil {
|
||||
zap.L().Warn("Steam app detail not found", zap.Int("steamID", id))
|
||||
return nil, fmt.Errorf("steam app not found: %d", id)
|
||||
} else {
|
||||
// Cache the result
|
||||
jsonBytes, err := json.Marshal(appDetail)
|
||||
if err == nil {
|
||||
_ = cache.Set(key, string(jsonBytes))
|
||||
}
|
||||
return appDetail, nil
|
||||
}
|
||||
|
||||
return detail[strconv.Itoa(id)], nil
|
||||
}
|
||||
|
||||
// GenerateSteamGameInfo generates detailed game information based on a Steam App ID.
|
||||
func GenerateSteamGameInfo(id int) (*model.GameInfo, error) {
|
||||
item := &model.GameInfo{}
|
||||
detail, err := GetSteamAppDetail(id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
zap.L().Error("Failed to fetch Steam app detail for game info generation", zap.Int("steamID", id), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to fetch Steam app detail for ID %d: %w", id, err)
|
||||
}
|
||||
item.SteamID = id
|
||||
item.Name = detail.Data.Name
|
||||
item.Description = detail.Data.ShortDescription
|
||||
item.Cover = fmt.Sprintf("https://shared.cloudflare.steamstatic.com/store_item_assets/steam/apps/%v/library_600x900_2x.jpg", id)
|
||||
item.Developers = detail.Data.Developers
|
||||
item.Publishers = detail.Data.Publishers
|
||||
var screenshots []string
|
||||
|
||||
item := &model.GameInfo{
|
||||
SteamID: id,
|
||||
Name: detail.Data.Name,
|
||||
Description: detail.Data.ShortDescription,
|
||||
Cover: fmt.Sprintf("https://shared.cloudflare.steamstatic.com/store_item_assets/steam/apps/%v/library_600x900_2x.jpg", id),
|
||||
Developers: detail.Data.Developers,
|
||||
Publishers: detail.Data.Publishers,
|
||||
Screenshots: make([]string, 0, len(detail.Data.Screenshots)),
|
||||
}
|
||||
|
||||
for _, screenshot := range detail.Data.Screenshots {
|
||||
screenshots = append(screenshots, screenshot.PathFull)
|
||||
item.Screenshots = append(item.Screenshots, screenshot.PathFull)
|
||||
}
|
||||
item.Screenshots = screenshots
|
||||
|
||||
zap.L().Info("Generated Steam game info", zap.Int("steamID", id), zap.String("name", item.Name))
|
||||
return item, nil
|
||||
}
|
||||
|
||||
// GetSteamIDByIGDBID retrieves the Steam App ID associated with a given IGDB ID.
|
||||
func GetSteamIDByIGDBID(IGDBID int) (int, error) {
|
||||
key := fmt.Sprintf("steam_game:%d", IGDBID)
|
||||
val, exist := cache.Get(key)
|
||||
if exist {
|
||||
if val, exist := cache.Get(key); exist {
|
||||
zap.L().Info("Cache hit for Steam ID by IGDB ID", zap.Int("IGDBID", IGDBID))
|
||||
id, err := strconv.Atoi(val)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
zap.L().Warn("Failed to parse cached Steam ID", zap.Int("IGDBID", IGDBID), zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to parse cached Steam ID for IGDB ID %d: %w", IGDBID, err)
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
|
||||
var err error
|
||||
resp, err := igdbRequest(constant.IGDBWebsitesURL, fmt.Sprintf(`where game = %v; fields *; limit 500;`, IGDBID))
|
||||
query := fmt.Sprintf(`where game = %v; fields *; limit 500;`, IGDBID)
|
||||
resp, err := igdbRequest(constant.IGDBWebsitesURL, query)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
zap.L().Error("Failed to fetch IGDB websites for Steam ID", zap.Int("IGDBID", IGDBID), zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to fetch IGDB websites for IGDB ID %d: %w", IGDBID, err)
|
||||
}
|
||||
|
||||
var data []struct {
|
||||
Game int `json:"game"`
|
||||
Url string `json:"url"`
|
||||
}
|
||||
if err = json.Unmarshal(resp.Body(), &data); err != nil {
|
||||
return 0, err
|
||||
if err := json.Unmarshal(resp.Body(), &data); err != nil {
|
||||
zap.L().Error("Failed to unmarshal IGDB websites response", zap.Int("IGDBID", IGDBID), zap.String("response", string(resp.Body())), zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to unmarshal IGDB websites response for IGDB ID %d: %w", IGDBID, err)
|
||||
}
|
||||
|
||||
if len(data) == 0 {
|
||||
return 0, errors.New("not found")
|
||||
zap.L().Warn("No Steam ID found for IGDB ID", zap.Int("IGDBID", IGDBID))
|
||||
return 0, errors.New("steam ID not found")
|
||||
}
|
||||
|
||||
for _, v := range data {
|
||||
if strings.HasPrefix(v.Url, "https://store.steampowered.com/app/") {
|
||||
regex := regexp.MustCompile(`https://store.steampowered.com/app/(\d+)/?`)
|
||||
idStr := regex.FindStringSubmatch(v.Url)
|
||||
if len(idStr) < 2 {
|
||||
return 0, errors.New("failed parse")
|
||||
idMatch := regex.FindStringSubmatch(v.Url)
|
||||
if len(idMatch) < 2 {
|
||||
zap.L().Warn("Failed to parse Steam ID from URL", zap.String("url", v.Url))
|
||||
return 0, errors.New("failed to parse Steam ID from URL")
|
||||
}
|
||||
steamID, err := strconv.Atoi(idStr[1])
|
||||
|
||||
steamID, err := strconv.Atoi(idMatch[1])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
zap.L().Error("Failed to convert Steam ID to integer", zap.String("url", v.Url), zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to convert Steam ID from URL %s: %w", v.Url, err)
|
||||
}
|
||||
|
||||
// Cache the result
|
||||
_ = cache.Set(key, strconv.Itoa(steamID))
|
||||
zap.L().Info("Found Steam ID for IGDB ID", zap.Int("IGDBID", IGDBID), zap.Int("steamID", steamID))
|
||||
return steamID, nil
|
||||
}
|
||||
}
|
||||
return 0, errors.New("not found")
|
||||
|
||||
zap.L().Warn("No valid Steam ID found in IGDB websites data", zap.Int("IGDBID", IGDBID))
|
||||
return 0, errors.New("steam ID not found")
|
||||
}
|
||||
|
@ -16,73 +16,125 @@ import (
|
||||
"game-crawler/utils"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// GetSteam250 fetches Steam250 game rankings from the given URL.
|
||||
func GetSteam250(URL string) ([]*model.GameInfo, error) {
|
||||
key := "steam250:" + url.QueryEscape(URL)
|
||||
if val, ok := cache.Get(key); ok {
|
||||
zap.L().Info("Cache hit for Steam250 rankings", zap.String("url", URL))
|
||||
var infos []*model.GameInfo
|
||||
err := json.Unmarshal([]byte(val), &infos)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
if err := json.Unmarshal([]byte(val), &infos); err != nil {
|
||||
zap.L().Warn("Failed to unmarshal cached Steam250 data", zap.String("url", URL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to unmarshal cached Steam250 data for URL %s: %w", URL, err)
|
||||
}
|
||||
return infos, nil
|
||||
}
|
||||
|
||||
zap.L().Info("Fetching Steam250 rankings from URL", zap.String("url", URL))
|
||||
resp, err := utils.Request().Get(URL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
zap.L().Error("Failed to fetch Steam250 rankings", zap.String("url", URL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to fetch Steam250 rankings from URL %s: %w", URL, err)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
zap.L().Error("Failed to parse Steam250 HTML document", zap.String("url", URL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to parse Steam250 HTML document for URL %s: %w", URL, err)
|
||||
}
|
||||
|
||||
var rank []model.Steam250Item
|
||||
var item model.Steam250Item
|
||||
steamIDs := make([]int, 0)
|
||||
|
||||
doc.Find(".appline").Each(func(i int, s *goquery.Selection) {
|
||||
// Extract game name
|
||||
item.Name = s.Find(".title>a").First().Text()
|
||||
if item.Name == "" {
|
||||
zap.L().Warn("Game name not found in Steam250 rankings", zap.String("url", URL), zap.Int("index", i))
|
||||
return
|
||||
}
|
||||
|
||||
// Extract Steam ID
|
||||
idStr := s.Find(".store").AttrOr("href", "")
|
||||
idSlice := regexp.MustCompile(`app/(\d+)/`).FindStringSubmatch(idStr)
|
||||
if len(idSlice) < 2 {
|
||||
zap.L().Warn("Failed to extract Steam ID from URL", zap.String("url", idStr), zap.Int("index", i))
|
||||
return
|
||||
}
|
||||
item.SteamID, _ = strconv.Atoi(idSlice[1])
|
||||
|
||||
steamID, err := strconv.Atoi(idSlice[1])
|
||||
if err != nil {
|
||||
zap.L().Warn("Failed to convert Steam ID to integer", zap.String("id", idSlice[1]), zap.Error(err))
|
||||
return
|
||||
}
|
||||
|
||||
item.SteamID = steamID
|
||||
rank = append(rank, item)
|
||||
steamIDs = append(steamIDs, item.SteamID)
|
||||
steamIDs = append(steamIDs, steamID)
|
||||
})
|
||||
infos, err := db.GetGameInfosByPlatformIDs("steam", steamIDs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(infos) > 10 {
|
||||
return infos[:10], nil
|
||||
|
||||
if len(steamIDs) == 0 {
|
||||
zap.L().Warn("No valid Steam IDs found in Steam250 rankings", zap.String("url", URL))
|
||||
return nil, fmt.Errorf("no valid Steam IDs found in Steam250 rankings for URL %s", URL)
|
||||
}
|
||||
|
||||
// Fetch game info from the database
|
||||
zap.L().Info("Fetching game info from database", zap.Ints("steamIDs", steamIDs))
|
||||
infos, err := db.GetGameInfosByPlatformIDs("steam", steamIDs)
|
||||
if err != nil {
|
||||
zap.L().Error("Failed to fetch game info from database", zap.Ints("steamIDs", steamIDs), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to fetch game info for Steam IDs %v: %w", steamIDs, err)
|
||||
}
|
||||
|
||||
// Limit the result to 10 entries (if applicable)
|
||||
if len(infos) > 10 {
|
||||
infos = infos[:10]
|
||||
}
|
||||
|
||||
// Cache the result
|
||||
jsonBytes, err := json.Marshal(infos)
|
||||
if err == nil {
|
||||
_ = cache.SetWithExpire(key, string(jsonBytes), 12*time.Hour)
|
||||
cacheErr := cache.SetWithExpire(key, string(jsonBytes), 12*time.Hour)
|
||||
if cacheErr != nil {
|
||||
zap.L().Warn("Failed to cache Steam250 rankings", zap.String("url", URL), zap.Error(cacheErr))
|
||||
}
|
||||
} else {
|
||||
zap.L().Warn("Failed to marshal Steam250 rankings for caching", zap.String("url", URL), zap.Error(err))
|
||||
}
|
||||
|
||||
return infos, nil
|
||||
}
|
||||
|
||||
// GetSteam250Top250 retrieves the top 250 games from Steam250.
|
||||
func GetSteam250Top250() ([]*model.GameInfo, error) {
|
||||
zap.L().Info("Fetching Steam250 Top 250 games")
|
||||
return GetSteam250(constant.Steam250Top250URL)
|
||||
}
|
||||
|
||||
// GetSteam250BestOfTheYear retrieves the best games of the current year from Steam250.
|
||||
func GetSteam250BestOfTheYear() ([]*model.GameInfo, error) {
|
||||
return GetSteam250(fmt.Sprintf(constant.Steam250BestOfTheYearURL, time.Now().UTC().Year()))
|
||||
year := time.Now().UTC().Year()
|
||||
zap.L().Info("Fetching Steam250 Best of the Year games", zap.Int("year", year))
|
||||
return GetSteam250(fmt.Sprintf(constant.Steam250BestOfTheYearURL, year))
|
||||
}
|
||||
|
||||
// GetSteam250WeekTop50 retrieves the top 50 games of the week from Steam250.
|
||||
func GetSteam250WeekTop50() ([]*model.GameInfo, error) {
|
||||
zap.L().Info("Fetching Steam250 Week Top 50 games")
|
||||
return GetSteam250(constant.Steam250WeekTop50URL)
|
||||
}
|
||||
|
||||
// GetSteam250MonthTop50 retrieves the top 50 games of the month from Steam250.
|
||||
func GetSteam250MonthTop50() ([]*model.GameInfo, error) {
|
||||
zap.L().Info("Fetching Steam250 Month Top 50 games")
|
||||
return GetSteam250(constant.Steam250MonthTop50URL)
|
||||
}
|
||||
|
||||
// GetSteam250MostPlayed retrieves the most played games from Steam250.
|
||||
func GetSteam250MostPlayed() ([]*model.GameInfo, error) {
|
||||
zap.L().Info("Fetching Steam250 Most Played games")
|
||||
return GetSteam250(constant.Steam250MostPlayedURL)
|
||||
}
|
||||
|
@ -17,122 +17,178 @@ import (
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
// SteamRIPCrawler defines a crawler for the SteamRIP website.
|
||||
type SteamRIPCrawler struct {
|
||||
logger *zap.Logger
|
||||
}
|
||||
|
||||
// NewSteamRIPCrawler creates a new instance of SteamRIPCrawler.
|
||||
func NewSteamRIPCrawler(logger *zap.Logger) *SteamRIPCrawler {
|
||||
return &SteamRIPCrawler{
|
||||
logger: logger,
|
||||
}
|
||||
}
|
||||
|
||||
// Name returns the name of the crawler.
|
||||
func (c *SteamRIPCrawler) Name() string {
|
||||
return "SteamRIPCrawler"
|
||||
}
|
||||
|
||||
// CrawlByUrl crawls a single game page from SteamRIP by URL.
|
||||
func (c *SteamRIPCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
|
||||
c.logger.Info("Crawling game details", zap.String("URL", URL))
|
||||
|
||||
// Fetch the page content
|
||||
resp, err := utils.Request().Get(URL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to fetch URL", zap.String("URL", URL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to fetch URL %s: %w", URL, err)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to parse HTML document", zap.String("URL", URL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to parse HTML document for URL %s: %w", URL, err)
|
||||
}
|
||||
|
||||
// Retrieve or create game item
|
||||
item, err := db.GetGameItemByUrl(URL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Warn("Game item not found in database, creating a new one", zap.String("URL", URL), zap.Error(err))
|
||||
item = &model.GameItem{}
|
||||
}
|
||||
|
||||
// Extract game details
|
||||
item.RawName = strings.TrimSpace(doc.Find(".entry-title").First().Text())
|
||||
if item.RawName == "" {
|
||||
c.logger.Warn("Game title not found", zap.String("URL", URL))
|
||||
return nil, errors.New("game title not found")
|
||||
}
|
||||
item.Name = SteamRIPFormatter(item.RawName)
|
||||
item.Url = URL
|
||||
item.Author = "SteamRIP"
|
||||
item.Platform = "windows"
|
||||
|
||||
// Extract game size
|
||||
sizeRegex := regexp.MustCompile(`(?i)<li><strong>Game Size:\s?</strong>(.*?)</li>`)
|
||||
sizeRegexRes := sizeRegex.FindStringSubmatch(string(resp.Body()))
|
||||
if len(sizeRegexRes) != 0 {
|
||||
item.Size = strings.TrimSpace(sizeRegexRes[1])
|
||||
sizeMatch := sizeRegex.FindStringSubmatch(string(resp.Body()))
|
||||
if len(sizeMatch) > 1 {
|
||||
item.Size = strings.TrimSpace(sizeMatch[1])
|
||||
} else {
|
||||
item.Size = "unknown"
|
||||
}
|
||||
megadbRegex := regexp.MustCompile(`(?i)(?:https?:)?(//megadb\.net/[^"]+)`)
|
||||
megadbRegexRes := megadbRegex.FindStringSubmatch(string(resp.Body()))
|
||||
links := []string{}
|
||||
if len(megadbRegexRes) != 0 {
|
||||
links = append(links, fmt.Sprintf("https:%s", megadbRegexRes[1]))
|
||||
}
|
||||
gofileRegex := regexp.MustCompile(`(?i)(?:https?:)?(//gofile\.io/d/[^"]+)`)
|
||||
gofileRegexRes := gofileRegex.FindStringSubmatch(string(resp.Body()))
|
||||
if len(gofileRegexRes) != 0 {
|
||||
links = append(links, fmt.Sprintf("https:%s", gofileRegexRes[1]))
|
||||
}
|
||||
filecryptRegex := regexp.MustCompile(`(?i)(?:https?:)?(//filecrypt\.co/Container/[^"]+)`)
|
||||
filecryptRegexRes := filecryptRegex.FindStringSubmatch(string(resp.Body()))
|
||||
if len(filecryptRegexRes) != 0 {
|
||||
links = append(links, fmt.Sprintf("https:%s", filecryptRegexRes[1]))
|
||||
}
|
||||
item.DownloadLinks = links
|
||||
|
||||
// Extract download links
|
||||
item.DownloadLinks = c.extractDownloadLinks(string(resp.Body()))
|
||||
if len(item.DownloadLinks) == 0 {
|
||||
c.logger.Warn("No download links found", zap.String("URL", URL))
|
||||
return nil, errors.New("failed to find download link")
|
||||
}
|
||||
|
||||
c.logger.Info("Crawled game details successfully", zap.String("Name", item.Name), zap.String("URL", URL))
|
||||
return item, nil
|
||||
}
|
||||
|
||||
// extractDownloadLinks extracts download links from the game page HTML.
|
||||
func (c *SteamRIPCrawler) extractDownloadLinks(pageContent string) []string {
|
||||
var links []string
|
||||
|
||||
// Match MegaDB links
|
||||
megadbRegex := regexp.MustCompile(`(?i)(?:https?:)?(//megadb\.net/[^"]+)`)
|
||||
if matches := megadbRegex.FindStringSubmatch(pageContent); len(matches) > 1 {
|
||||
links = append(links, fmt.Sprintf("https:%s", matches[1]))
|
||||
}
|
||||
|
||||
// Match Gofile links
|
||||
gofileRegex := regexp.MustCompile(`(?i)(?:https?:)?(//gofile\.io/d/[^"]+)`)
|
||||
if matches := gofileRegex.FindStringSubmatch(pageContent); len(matches) > 1 {
|
||||
links = append(links, fmt.Sprintf("https:%s", matches[1]))
|
||||
}
|
||||
|
||||
// Match Filecrypt links
|
||||
filecryptRegex := regexp.MustCompile(`(?i)(?:https?:)?(//filecrypt\.co/Container/[^"]+)`)
|
||||
if matches := filecryptRegex.FindStringSubmatch(pageContent); len(matches) > 1 {
|
||||
links = append(links, fmt.Sprintf("https:%s", matches[1]))
|
||||
}
|
||||
|
||||
return links
|
||||
}
|
||||
|
||||
// Crawl crawls a limited number of games from the SteamRIP game list.
|
||||
func (c *SteamRIPCrawler) Crawl(num int) ([]*model.GameItem, error) {
|
||||
count := 0
|
||||
c.logger.Info("Starting SteamRIP crawl", zap.Int("limit", num))
|
||||
|
||||
// Fetch the game list page
|
||||
resp, err := utils.Request().Get(constant.SteamRIPGameListURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to fetch game list", zap.String("URL", constant.SteamRIPGameListURL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to fetch game list: %w", err)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to parse game list HTML document", zap.String("URL", constant.SteamRIPGameListURL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to parse game list HTML document: %w", err)
|
||||
}
|
||||
|
||||
var items []*model.GameItem
|
||||
var urls []string
|
||||
var updateFlags []string // title
|
||||
var updateFlags []string
|
||||
|
||||
// Extract game URLs
|
||||
doc.Find(".az-list-item>a").Each(func(i int, s *goquery.Selection) {
|
||||
u, exist := s.Attr("href")
|
||||
if !exist {
|
||||
u, exists := s.Attr("href")
|
||||
if !exists {
|
||||
return
|
||||
}
|
||||
urls = append(urls, fmt.Sprintf("%s%s", constant.SteamRIPBaseURL, u))
|
||||
updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(s.Text())))
|
||||
})
|
||||
|
||||
// Crawl games
|
||||
count := 0
|
||||
for i, u := range urls {
|
||||
if count == num {
|
||||
if num > 0 && count == num {
|
||||
break
|
||||
}
|
||||
if db.IsSteamRIPCrawled(updateFlags[i]) {
|
||||
c.logger.Info("Skipping already crawled game", zap.String("URL", u))
|
||||
continue
|
||||
}
|
||||
c.logger.Info("Crawling", zap.String("URL", u))
|
||||
|
||||
c.logger.Info("Crawling game", zap.String("URL", u))
|
||||
item, err := c.CrawlByUrl(u)
|
||||
if err != nil {
|
||||
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
|
||||
c.logger.Warn("Failed to crawl game", zap.String("URL", u), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
item.UpdateFlag = updateFlags[i]
|
||||
if err := db.SaveGameItem(item); err != nil {
|
||||
c.logger.Warn("Failed to save item", zap.Error(err))
|
||||
c.logger.Warn("Failed to save game item", zap.String("URL", u), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
items = append(items, item)
|
||||
count++
|
||||
|
||||
if err := OrganizeGameItem(item); err != nil {
|
||||
c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u))
|
||||
c.logger.Warn("Failed to organize game item", zap.String("URL", u), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
c.logger.Info("SteamRIP crawl completed", zap.Int("gamesCrawled", len(items)))
|
||||
return items, nil
|
||||
}
|
||||
|
||||
// CrawlAll crawls all games from the SteamRIP game list.
|
||||
func (c *SteamRIPCrawler) CrawlAll() ([]*model.GameItem, error) {
|
||||
c.logger.Info("Starting full crawl of SteamRIP")
|
||||
return c.Crawl(-1)
|
||||
}
|
||||
|
||||
// SteamRIPFormatter formats the game name by removing unnecessary text.
|
||||
func SteamRIPFormatter(name string) string {
|
||||
name = regexp.MustCompile(`\([^\)]+\)`).ReplaceAllString(name, "")
|
||||
name = strings.Replace(name, "Free Download", "", -1)
|
||||
|
136
crawler/xatab.go
136
crawler/xatab.go
@ -32,145 +32,214 @@ func (c *XatabCrawler) Name() string {
|
||||
return "XatabCrawler"
|
||||
}
|
||||
|
||||
// Crawl crawls a single page of the Xatab website.
|
||||
func (c *XatabCrawler) Crawl(page int) ([]*model.GameItem, error) {
|
||||
requestURL := fmt.Sprintf("%s/page/%v", constant.XatabBaseURL, page)
|
||||
c.logger.Info("Fetching page", zap.String("URL", requestURL))
|
||||
|
||||
// Fetch the page content
|
||||
resp, err := utils.Request().Get(requestURL)
|
||||
if err != nil {
|
||||
c.logger.Error("Failed to fetch", zap.Error(err))
|
||||
return nil, err
|
||||
c.logger.Error("Failed to fetch page", zap.String("URL", requestURL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to fetch page %d: %w", page, err)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
||||
if err != nil {
|
||||
c.logger.Error("Failed to parse HTML", zap.Error(err))
|
||||
return nil, err
|
||||
c.logger.Error("Failed to parse HTML", zap.String("URL", requestURL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to parse HTML for page %d: %w", page, err)
|
||||
}
|
||||
|
||||
var urls []string
|
||||
var updateFlags []string // title
|
||||
var updateFlags []string
|
||||
|
||||
// Extract game URLs and titles
|
||||
doc.Find(".entry").Each(func(i int, s *goquery.Selection) {
|
||||
u, exist := s.Find(".entry__title.h2 a").Attr("href")
|
||||
if !exist {
|
||||
u, exists := s.Find(".entry__title.h2 a").Attr("href")
|
||||
if !exists {
|
||||
return
|
||||
}
|
||||
urls = append(urls, u)
|
||||
updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(s.Find(".entry__title.h2 a").Text())))
|
||||
})
|
||||
|
||||
// Process each game URL
|
||||
var res []*model.GameItem
|
||||
for i, u := range urls {
|
||||
if db.IsXatabCrawled(updateFlags[i]) {
|
||||
c.logger.Info("Skipping already crawled game", zap.String("URL", u))
|
||||
continue
|
||||
}
|
||||
c.logger.Info("Crawling", zap.String("URL", u))
|
||||
|
||||
c.logger.Info("Crawling game", zap.String("URL", u))
|
||||
item, err := c.CrawlByUrl(u)
|
||||
if err != nil {
|
||||
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
|
||||
c.logger.Warn("Failed to crawl game", zap.String("URL", u), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
item.UpdateFlag = updateFlags[i]
|
||||
err = db.SaveGameItem(item)
|
||||
if err != nil {
|
||||
c.logger.Warn("Failed to save", zap.Error(err))
|
||||
if err := db.SaveGameItem(item); err != nil {
|
||||
c.logger.Warn("Failed to save game item", zap.String("URL", u), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
res = append(res, item)
|
||||
|
||||
if err := OrganizeGameItem(item); err != nil {
|
||||
c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u))
|
||||
c.logger.Warn("Failed to organize game item", zap.String("URL", u), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
c.logger.Info("Crawled page successfully", zap.Int("gamesCrawled", len(res)), zap.Int("page", page))
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// CrawlByUrl crawls a single game page from Xatab by URL.
|
||||
func (c *XatabCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
|
||||
c.logger.Info("Crawling game details", zap.String("URL", URL))
|
||||
|
||||
// Fetch the game page
|
||||
resp, err := utils.Request().Get(URL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to fetch game page", zap.String("URL", URL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to fetch game page %s: %w", URL, err)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to parse game HTML", zap.String("URL", URL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to parse game HTML for URL %s: %w", URL, err)
|
||||
}
|
||||
|
||||
// Retrieve or create game item
|
||||
item, err := db.GetGameItemByUrl(URL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Warn("Failed to fetch game item from database, creating new", zap.String("URL", URL), zap.Error(err))
|
||||
item = &model.GameItem{}
|
||||
}
|
||||
|
||||
item.Url = URL
|
||||
item.RawName = doc.Find(".inner-entry__title").First().Text()
|
||||
item.RawName = strings.TrimSpace(doc.Find(".inner-entry__title").First().Text())
|
||||
if item.RawName == "" {
|
||||
c.logger.Warn("Game title not found", zap.String("URL", URL))
|
||||
return nil, errors.New("game title not found")
|
||||
}
|
||||
item.Name = XatabFormatter(item.RawName)
|
||||
item.Author = "Xatab"
|
||||
item.Platform = "windows"
|
||||
|
||||
// Extract download URL
|
||||
downloadURL := doc.Find("#download>a").First().AttrOr("href", "")
|
||||
if downloadURL == "" {
|
||||
c.logger.Warn("Download URL not found", zap.String("URL", URL))
|
||||
return nil, errors.New("failed to find download URL")
|
||||
}
|
||||
|
||||
// Fetch torrent file and convert to magnet link
|
||||
resp, err = utils.Request().SetHeaders(map[string]string{"Referer": URL}).Get(downloadURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to fetch download link", zap.String("URL", downloadURL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to fetch download link %s: %w", downloadURL, err)
|
||||
}
|
||||
|
||||
magnet, size, err := utils.ConvertTorrentToMagnet(resp.Body())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to convert torrent to magnet", zap.String("URL", downloadURL), zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to convert torrent to magnet: %w", err)
|
||||
}
|
||||
|
||||
item.Size = size
|
||||
item.DownloadLinks = []string{magnet}
|
||||
|
||||
c.logger.Info("Crawled game details successfully", zap.String("Name", item.Name), zap.String("URL", URL))
|
||||
return item, nil
|
||||
}
|
||||
|
||||
// CrawlMulti crawls multiple pages from Xatab.
|
||||
func (c *XatabCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) {
|
||||
c.logger.Info("Starting multi-page crawl", zap.Ints("pages", pages))
|
||||
|
||||
totalPageNum, err := c.GetTotalPageNum()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to get total page number", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to get total page number: %w", err)
|
||||
}
|
||||
|
||||
var res []*model.GameItem
|
||||
for _, page := range pages {
|
||||
if page > totalPageNum {
|
||||
c.logger.Warn("Skipping page out of range", zap.Int("page", page), zap.Int("totalPages", totalPageNum))
|
||||
continue
|
||||
}
|
||||
|
||||
items, err := c.Crawl(page)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Warn("Failed to crawl page", zap.Int("page", page), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
res = append(res, items...)
|
||||
}
|
||||
|
||||
c.logger.Info("Multi-page crawl completed", zap.Int("gamesCrawled", len(res)))
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// CrawlAll crawls all pages from Xatab.
|
||||
func (c *XatabCrawler) CrawlAll() ([]*model.GameItem, error) {
|
||||
c.logger.Info("Starting full crawl of Xatab")
|
||||
|
||||
totalPageNum, err := c.GetTotalPageNum()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Error("Failed to get total page number", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to get total page number: %w", err)
|
||||
}
|
||||
|
||||
var res []*model.GameItem
|
||||
for i := 1; i <= totalPageNum; i++ {
|
||||
items, err := c.Crawl(i)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
c.logger.Warn("Failed to crawl page", zap.Int("page", i), zap.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
res = append(res, items...)
|
||||
}
|
||||
|
||||
c.logger.Info("Full crawl completed", zap.Int("gamesCrawled", len(res)))
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// GetTotalPageNum retrieves the total number of pages from Xatab.
|
||||
func (c *XatabCrawler) GetTotalPageNum() (int, error) {
|
||||
c.logger.Info("Fetching total page number")
|
||||
|
||||
resp, err := utils.Request().Get(constant.XatabBaseURL)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
c.logger.Error("Failed to fetch base URL", zap.String("URL", constant.XatabBaseURL), zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to fetch base URL: %w", err)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
c.logger.Error("Failed to parse base HTML", zap.String("URL", constant.XatabBaseURL), zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to parse base HTML: %w", err)
|
||||
}
|
||||
|
||||
pageStr := doc.Find(".pagination>a").Last().Text()
|
||||
totalPageNum, err := strconv.Atoi(pageStr)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
c.logger.Error("Failed to parse total page number", zap.String("pageStr", pageStr), zap.Error(err))
|
||||
return 0, fmt.Errorf("failed to parse total page number: %w", err)
|
||||
}
|
||||
|
||||
c.logger.Info("Fetched total page number", zap.Int("totalPages", totalPageNum))
|
||||
return totalPageNum, nil
|
||||
}
|
||||
|
||||
var xatabRegexps = []*regexp.Regexp{
|
||||
regexp.MustCompile(`(?i)\sPC$`),
|
||||
}
|
||||
|
||||
// XatabFormatter formats the game name by removing unnecessary text.
|
||||
func XatabFormatter(name string) string {
|
||||
reg1 := regexp.MustCompile(`(?i)v(er)?\s?(\.)?\d+(\.\d+)*`)
|
||||
if index := reg1.FindIndex([]byte(name)); index != nil {
|
||||
@ -189,10 +258,13 @@ func XatabFormatter(name string) string {
|
||||
name = name[:index]
|
||||
}
|
||||
name = strings.TrimSpace(name)
|
||||
|
||||
// Remove specific patterns
|
||||
for _, re := range xatabRegexps {
|
||||
name = re.ReplaceAllString(name, "")
|
||||
}
|
||||
|
||||
// Handle names separated by "/"
|
||||
if index := strings.Index(name, "/"); index != -1 {
|
||||
names := strings.Split(name, "/")
|
||||
longestLength := 0
|
||||
@ -208,3 +280,7 @@ func XatabFormatter(name string) string {
|
||||
|
||||
return strings.TrimSpace(name)
|
||||
}
|
||||
|
||||
var xatabRegexps = []*regexp.Regexp{
|
||||
regexp.MustCompile(`(?i)\sPC$`),
|
||||
}
|
||||
|
10
db/game.go
10
db/game.go
@ -396,22 +396,22 @@ func GetGameInfosByPlatformIDs(platform string, ids []int) ([]*model.GameInfo, e
|
||||
return games, nil
|
||||
}
|
||||
|
||||
func HasGameItemOrganized(id primitive.ObjectID) (bool, []*model.GameInfo) {
|
||||
func HasGameItemOrganized(id primitive.ObjectID) bool {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
filter := bson.M{"games": id}
|
||||
var res []*model.GameInfo
|
||||
cursor, err := GameInfoCollection.Find(ctx, filter)
|
||||
if err != nil {
|
||||
return false, nil
|
||||
return false
|
||||
}
|
||||
if err = cursor.All(ctx, &res); err != nil {
|
||||
return false, nil
|
||||
return false
|
||||
}
|
||||
if len(res) == 0 {
|
||||
return false, nil
|
||||
return false
|
||||
}
|
||||
return true, res
|
||||
return true
|
||||
}
|
||||
|
||||
func GetUnorganizedGameItems(num int) ([]*model.GameItem, error) {
|
||||
|
@ -12,16 +12,14 @@ import (
|
||||
)
|
||||
|
||||
var Logger *zap.Logger
|
||||
var ConsoleLogger *zap.Logger
|
||||
var FileLogger *zap.Logger
|
||||
var TaskLogger *zap.Logger
|
||||
|
||||
func init() {
|
||||
fileCore, consoleCore, combinedCore, taskCore := buildZapCore(getZapLogLevel(config.Config.LogLevel))
|
||||
FileLogger = zap.New(fileCore, zap.AddCaller())
|
||||
ConsoleLogger = zap.New(consoleCore, zap.AddCaller())
|
||||
_, _, combinedCore, taskCore := buildZapCore(getZapLogLevel(config.Config.LogLevel))
|
||||
Logger = zap.New(combinedCore, zap.AddCaller())
|
||||
TaskLogger = zap.New(taskCore, zap.AddCaller())
|
||||
|
||||
zap.ReplaceGlobals(Logger)
|
||||
}
|
||||
|
||||
func buildZapCore(logLevel zapcore.Level) (fileCore zapcore.Core, consoleCore zapcore.Core, combinedCore zapcore.Core, taskCore zapcore.Core) {
|
||||
|
@ -24,7 +24,6 @@ type HealthCheckResponse struct {
|
||||
GameItem int64 `json:"game_num"`
|
||||
GameInfo int64 `json:"game_info_num"`
|
||||
Unorganized int64 `json:"unorganized_game_num"`
|
||||
MegaAvaliable bool `json:"mega_avaliable"`
|
||||
}
|
||||
|
||||
// HealthCheckHandler performs a health check of the service.
|
||||
@ -57,6 +56,5 @@ func HealthCheckHandler(c *gin.Context) {
|
||||
GameItem: downloadCount,
|
||||
GameInfo: infoCount,
|
||||
Unorganized: unorganizedCount,
|
||||
MegaAvaliable: config.Config.MegaAvaliable,
|
||||
})
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user