diff --git a/cmd/supplement.go b/cmd/supplement.go index 54e804d..5d6e99c 100644 --- a/cmd/supplement.go +++ b/cmd/supplement.go @@ -3,6 +3,7 @@ package cmd import ( "game-crawler/crawler" "game-crawler/log" + "go.uber.org/zap" "github.com/spf13/cobra" @@ -13,7 +14,7 @@ var supplementCmd = &cobra.Command{ Long: "Supplement platform id to game info", Short: "Supplement platform id to game info", Run: func(cmd *cobra.Command, args []string) { - err := crawler.SupplementPlatformIDToGameInfo(log.Logger) + err := crawler.SupplementPlatformIDToGameInfo() if err != nil { log.Logger.Error("Error supplementing platform id to game info", zap.Error(err)) } diff --git a/config/config.go b/config/config.go index de1687a..c1f2c7d 100644 --- a/config/config.go +++ b/config/config.go @@ -1,10 +1,8 @@ package config import ( - "bytes" "encoding/json" "os" - "os/exec" "reflect" "strconv" "strings" @@ -18,9 +16,14 @@ type config struct { Redis redis `json:"redis"` OnlineFix onlinefix `json:"online_fix"` Twitch twitch `json:"twitch"` + Rutracker rutracker `json:"rutracker"` Webhooks webhooks `json:"webhooks"` CFClearanceScraper cfClearanceScraper `json:"cf_clearance_scraper"` - MegaAvaliable bool +} + +type rutracker struct { + User string `env:"RUTRACKER_USER" json:"user"` + Password string `env:"RUTRACKER_PASSWORD" json:"password"` } type cfClearanceScraper struct { @@ -78,7 +81,6 @@ func init() { User: "root", Password: "password", }, - MegaAvaliable: TestMega(), Server: server{ AutoCrawlCron: "0 */3 * * *", }, @@ -147,11 +149,3 @@ func loadEnvVariables(cfg interface{}) { } } } - -func TestMega() bool { - cmd := exec.Command("mega-get", "--help") - var out bytes.Buffer - cmd.Stdout = &out - err := cmd.Run() - return err == nil -} diff --git a/constant/url.go b/constant/url.go index 4a6cc2d..13e0aad 100644 --- a/constant/url.go +++ b/constant/url.go @@ -31,4 +31,8 @@ const ( SteamRIPGameListURL = "https://steamrip.com/games-list-page/" RepackInfoURL = "https://repack.info/page/%v/" GnarlyURL = "https://rentry.org/gnarly_repacks" + RutrackerTopicURL = "https://rutracker.org/forum/%s" + RutrackerURL = "https://rutracker.org/forum/index.php" + RutrackerLoginURL = "https://rutracker.org/forum/login.php" + RutrackerAuthorURL = "https://rutracker.org/forum/tracker.php?rid=%s&start=%v" ) diff --git a/crawler/1337x.go b/crawler/1337x.go index a9f338c..c6f0fda 100644 --- a/crawler/1337x.go +++ b/crawler/1337x.go @@ -17,16 +17,16 @@ import ( "go.uber.org/zap" ) -type Formatter func(string) string +type FormatterFunc func(string) string type s1337xCrawler struct { source string platform string - formatter Formatter + formatter FormatterFunc logger *zap.Logger } -func New1337xCrawler(source string, platform string, formatter Formatter, logger *zap.Logger) *s1337xCrawler { +func New1337xCrawler(source string, platform string, formatter FormatterFunc, logger *zap.Logger) *s1337xCrawler { return &s1337xCrawler{ source: source, formatter: formatter, @@ -36,69 +36,101 @@ func New1337xCrawler(source string, platform string, formatter Formatter, logger } func (c *s1337xCrawler) Crawl(page int) ([]*model.GameItem, error) { - var doc *goquery.Document + c.logger.Info("Starting Crawl", zap.Int("Page", page), zap.String("Source", c.source)) requestUrl := fmt.Sprintf("%s/%s/%d/", constant.C1337xBaseURL, c.source, page) resp, err := utils.Request().Get(requestUrl) if err != nil { - return nil, err + c.logger.Error("Failed to fetch page", zap.String("URL", requestUrl), zap.Error(err)) + return nil, fmt.Errorf("failed to fetch page %d for source %s: %w", page, c.source, err) } - doc, err = goquery.NewDocumentFromReader(bytes.NewReader(resp.Body())) + + doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body())) if err != nil { - return nil, err + c.logger.Error("Failed to parse HTML document", zap.String("URL", requestUrl), zap.Error(err)) + return nil, fmt.Errorf("failed to parse HTML document for page %d: %w", page, err) } + trSelection := doc.Find("tbody>tr") var urls []string trSelection.Each(func(i int, trNode *goquery.Selection) { nameSelection := trNode.Find(".name").First() if aNode := nameSelection.Find("a").Eq(1); aNode.Length() > 0 { - url, _ := aNode.Attr("href") - urls = append(urls, url) + url, exists := aNode.Attr("href") + if exists { + urls = append(urls, url) + } else { + c.logger.Warn("Failed to find URL in row", zap.Int("RowIndex", i)) + } } }) + var res []*model.GameItem for _, u := range urls { - u = fmt.Sprintf("%s%s", constant.C1337xBaseURL, u) - if db.IsGameCrawledByURL(u) { + fullURL := fmt.Sprintf("%s%s", constant.C1337xBaseURL, u) + if db.IsGameCrawledByURL(fullURL) { + c.logger.Info("Skipping already crawled URL", zap.String("URL", fullURL)) continue } - c.logger.Info("Crawling", zap.String("URL", u)) - item, err := c.CrawlByUrl(u) + + c.logger.Info("Crawling URL", zap.String("URL", fullURL)) + item, err := c.CrawlByUrl(fullURL) if err != nil { - c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u)) + c.logger.Warn("Failed to crawl URL", zap.String("URL", fullURL), zap.Error(err)) continue } + err = db.SaveGameItem(item) if err != nil { - c.logger.Warn("Failed to save", zap.Error(err), zap.String("URL", u)) + c.logger.Warn("Failed to save game item to database", zap.String("URL", fullURL), zap.Error(err)) continue } + res = append(res, item) + if err := OrganizeGameItem(item); err != nil { - c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u)) + c.logger.Warn("Failed to organize game item", zap.String("URL", fullURL), zap.Error(err)) continue } } + + c.logger.Info("Finished Crawl", zap.Int("Page", page), zap.Int("ItemsCrawled", len(res))) return res, nil } func (c *s1337xCrawler) CrawlByUrl(URL string) (*model.GameItem, error) { + c.logger.Info("Starting CrawlByUrl", zap.String("URL", URL)) resp, err := utils.Request().Get(URL) if err != nil { - return nil, err + c.logger.Error("Failed to fetch URL", zap.String("URL", URL), zap.Error(err)) + return nil, fmt.Errorf("failed to fetch URL %s: %w", URL, err) } - var item = &model.GameItem{} - item.Url = URL + doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body())) if err != nil { - return nil, err + c.logger.Error("Failed to parse HTML document", zap.String("URL", URL), zap.Error(err)) + return nil, fmt.Errorf("failed to parse HTML document for URL %s: %w", URL, err) } + + item := &model.GameItem{ + Url: URL, + } + selection := doc.Find(".torrent-detail-page ul.list>li") info := make(map[string]string) selection.Each(func(i int, item *goquery.Selection) { - info[strings.TrimSpace(item.Find("strong").Text())] = strings.TrimSpace(item.Find("span").Text()) + key := strings.TrimSpace(item.Find("strong").Text()) + value := strings.TrimSpace(item.Find("span").Text()) + info[key] = value + c.logger.Debug("Extracted info", zap.String("Key", key), zap.String("Value", value)) }) + magnetRegex := regexp.MustCompile(`magnet:\?[^"]*`) magnetRegexRes := magnetRegex.FindStringSubmatch(string(resp.Body())) + if len(magnetRegexRes) == 0 { + c.logger.Error("Failed to find magnet link", zap.String("URL", URL)) + return nil, fmt.Errorf("failed to find magnet link on URL %s", URL) + } + item.Size = info["Total size"] item.RawName = doc.Find("title").Text() item.RawName = strings.Replace(item.RawName, "Download ", "", 1) @@ -107,63 +139,88 @@ func (c *s1337xCrawler) CrawlByUrl(URL string) (*model.GameItem, error) { item.DownloadLinks = []string{magnetRegexRes[0]} item.Author = strings.Replace(c.source, "-torrents", "", -1) item.Platform = c.platform + + c.logger.Info("Successfully crawled URL", zap.String("URL", URL)) return item, nil } func (c *s1337xCrawler) CrawlMulti(pages []int) (res []*model.GameItem, err error) { - var items []*model.GameItem + c.logger.Info("Starting CrawlMulti", zap.Ints("Pages", pages), zap.String("Source", c.source)) totalPageNum, err := c.GetTotalPageNum() if err != nil { - return nil, err + c.logger.Error("Failed to get total page number", zap.Error(err)) + return nil, fmt.Errorf("failed to get total page number: %w", err) } + for _, page := range pages { if page > totalPageNum { + c.logger.Warn("Page exceeds total page number", zap.Int("Page", page), zap.Int("TotalPages", totalPageNum)) continue } - items, err = c.Crawl(page) - res = append(res, items...) + + items, err := c.Crawl(page) if err != nil { - return nil, err + c.logger.Error("Failed to crawl page", zap.Int("Page", page), zap.Error(err)) + continue } + res = append(res, items...) } + + c.logger.Info("Finished CrawlMulti", zap.Int("TotalItems", len(res))) return res, nil } func (c *s1337xCrawler) CrawlAll() (res []*model.GameItem, err error) { + c.logger.Info("Starting CrawlAll", zap.String("Source", c.source)) totalPageNum, err := c.GetTotalPageNum() if err != nil { - return nil, err + c.logger.Error("Failed to get total page number", zap.Error(err)) + return nil, fmt.Errorf("failed to get total page number: %w", err) } - var items []*model.GameItem + for i := 1; i <= totalPageNum; i++ { - items, err = c.Crawl(i) - res = append(res, items...) + items, err := c.Crawl(i) if err != nil { - return nil, err + c.logger.Error("Failed to crawl page", zap.Int("Page", i), zap.Error(err)) + continue } + res = append(res, items...) } + + c.logger.Info("Finished CrawlAll", zap.Int("TotalPages", totalPageNum), zap.Int("TotalItems", len(res))) return res, nil } func (c *s1337xCrawler) GetTotalPageNum() (int, error) { - var doc *goquery.Document - + c.logger.Info("Fetching total page number", zap.String("Source", c.source)) requestUrl := fmt.Sprintf("%s/%s/%d/", constant.C1337xBaseURL, c.source, 1) resp, err := utils.Request().Get(requestUrl) if err != nil { - return 0, err + c.logger.Error("Failed to fetch first page for total page number", zap.String("URL", requestUrl), zap.Error(err)) + return 0, fmt.Errorf("failed to fetch first page for total page number: %w", err) } - doc, _ = goquery.NewDocumentFromReader(bytes.NewReader(resp.Body())) + + doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body())) + if err != nil { + c.logger.Error("Failed to parse HTML document for total page number", zap.String("URL", requestUrl), zap.Error(err)) + return 0, fmt.Errorf("failed to parse HTML document for total page number: %w", err) + } + selection := doc.Find(".last") - pageStr, exist := selection.Find("a").Attr("href") - if !exist { - return 0, errors.New("total page num not found") + pageStr, exists := selection.Find("a").Attr("href") + if !exists { + c.logger.Error("Failed to find total page number in pagination", zap.String("URL", requestUrl)) + return 0, errors.New("total page number not found in pagination") } + pageStr = strings.ReplaceAll(pageStr, c.source, "") pageStr = strings.ReplaceAll(pageStr, "/", "") totalPageNum, err := strconv.Atoi(pageStr) if err != nil { - return 0, err + c.logger.Error("Failed to convert total page number to integer", zap.String("PageString", pageStr), zap.Error(err)) + return 0, fmt.Errorf("failed to convert total page number '%s' to integer: %w", pageStr, err) } + + c.logger.Info("Successfully fetched total page number", zap.Int("TotalPages", totalPageNum)) return totalPageNum, nil } diff --git a/crawler/chovka.go b/crawler/chovka.go index 1960567..e93ea7f 100644 --- a/crawler/chovka.go +++ b/crawler/chovka.go @@ -3,7 +3,6 @@ package crawler import ( "bytes" "encoding/base64" - "errors" "fmt" "strconv" "strings" @@ -32,142 +31,181 @@ func (c *ChovkaCrawler) Name() string { } func (c *ChovkaCrawler) CrawlByUrl(URL string) (*model.GameItem, error) { + c.logger.Info("Starting CrawlByUrl", zap.String("URL", URL)) resp, err := utils.Request().Get(URL) if err != nil { - return nil, err + c.logger.Error("Failed to fetch URL", zap.String("URL", URL), zap.Error(err)) + return nil, fmt.Errorf("failed to fetch URL %s: %w", URL, err) } + doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body())) if err != nil { - return nil, err + c.logger.Error("Failed to parse HTML document", zap.String("URL", URL), zap.Error(err)) + return nil, fmt.Errorf("failed to parse HTML document from URL %s: %w", URL, err) } + item, err := db.GetGameItemByUrl(URL) if err != nil { - return nil, err + c.logger.Error("Failed to retrieve game item from database", zap.String("URL", URL), zap.Error(err)) + return nil, fmt.Errorf("failed to retrieve game item from database for URL %s: %w", URL, err) } + item.Url = URL item.RawName = doc.Find(".inner-entry__title").First().Text() item.Name = ChovkaFormatter(item.RawName) item.Author = "Chovka" item.Platform = "windows" + downloadURL := doc.Find(".download-torrent").AttrOr("href", "") if downloadURL == "" { - return nil, errors.New("failed to find download URL") + c.logger.Error("Download URL not found", zap.String("URL", URL)) + return nil, fmt.Errorf("failed to find download URL on page %s", URL) } + resp, err = utils.Request().SetHeader("Referer", URL).Get(downloadURL) if err != nil { - return nil, err + c.logger.Error("Failed to fetch download URL", zap.String("downloadURL", downloadURL), zap.String("URL", URL), zap.Error(err)) + return nil, fmt.Errorf("failed to fetch download URL %s for page %s: %w", downloadURL, URL, err) } + magnet, size, err := utils.ConvertTorrentToMagnet(resp.Body()) if err != nil { - return nil, err + c.logger.Error("Failed to convert torrent to magnet", zap.String("URL", URL), zap.Error(err)) + return nil, fmt.Errorf("failed to convert torrent to magnet for URL %s: %w", URL, err) } + item.Size = size item.DownloadLinks = []string{magnet} + c.logger.Info("Successfully crawled URL", zap.String("URL", URL)) return item, nil } func (c *ChovkaCrawler) Crawl(page int) ([]*model.GameItem, error) { + c.logger.Info("Starting Crawl", zap.Int("Page", page)) resp, err := utils.Request().Get(fmt.Sprintf(constant.RepackInfoURL, page)) if err != nil { - return nil, err + c.logger.Error("Failed to fetch page", zap.Int("Page", page), zap.Error(err)) + return nil, fmt.Errorf("failed to fetch page %d: %w", page, err) } + doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body())) if err != nil { - return nil, err + c.logger.Error("Failed to parse HTML document", zap.Int("Page", page), zap.Error(err)) + return nil, fmt.Errorf("failed to parse HTML document for page %d: %w", page, err) } + var urls []string var updateFlags []string doc.Find(".entry").Each(func(i int, s *goquery.Selection) { u, exist := s.Find(".entry__title.h2 a").Attr("href") if !exist { + c.logger.Warn("Entry does not contain a valid URL", zap.Int("Index", i)) return } urls = append(urls, u) updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(s.Find(".entry__title.h2 a").Text()))) }) + var res []*model.GameItem for i, u := range urls { if db.IsChovkaCrawled(updateFlags[i]) { + c.logger.Info("Skipping already crawled URL", zap.String("URL", u)) continue } - c.logger.Info("Crawling", zap.String("URL", u)) + + c.logger.Info("Crawling URL", zap.String("URL", u)) item, err := c.CrawlByUrl(u) if err != nil { - c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u)) + c.logger.Warn("Failed to crawl URL", zap.String("URL", u), zap.Error(err)) continue } + item.UpdateFlag = updateFlags[i] if err := db.SaveGameItem(item); err != nil { - c.logger.Warn("Failed to save", zap.Error(err), zap.String("URL", u)) + c.logger.Warn("Failed to save game item to database", zap.String("URL", u), zap.Error(err)) continue } + res = append(res, item) if err := OrganizeGameItem(item); err != nil { - c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u)) + c.logger.Warn("Failed to organize game item", zap.String("URL", u), zap.Error(err)) continue } } + + c.logger.Info("Finished Crawl", zap.Int("Page", page), zap.Int("ItemsCrawled", len(res))) return res, nil } func (c *ChovkaCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) { + c.logger.Info("Starting CrawlMulti", zap.Ints("Pages", pages)) var res []*model.GameItem for _, page := range pages { items, err := c.Crawl(page) if err != nil { - return nil, err + c.logger.Error("Failed to crawl multiple pages", zap.Int("Page", page), zap.Error(err)) + return nil, fmt.Errorf("failed to crawl page %d: %w", page, err) } res = append(res, items...) } + c.logger.Info("Finished CrawlMulti", zap.Int("TotalPages", len(pages)), zap.Int("TotalItems", len(res))) return res, nil } func (c *ChovkaCrawler) CrawlAll() ([]*model.GameItem, error) { + c.logger.Info("Starting CrawlAll") totalPageNum, err := c.GetTotalPageNum() if err != nil { - return nil, err + c.logger.Error("Failed to get total page number", zap.Error(err)) + return nil, fmt.Errorf("failed to get total page number: %w", err) } + var res []*model.GameItem for i := 1; i <= totalPageNum; i++ { items, err := c.Crawl(i) if err != nil { - return nil, err + c.logger.Error("Failed to crawl all pages", zap.Int("Page", i), zap.Error(err)) + return nil, fmt.Errorf("failed to crawl page %d: %w", i, err) } res = append(res, items...) } + + c.logger.Info("Finished CrawlAll", zap.Int("TotalPages", totalPageNum), zap.Int("TotalItems", len(res))) return res, nil } func (c *ChovkaCrawler) GetTotalPageNum() (int, error) { + c.logger.Info("Fetching total page number") resp, err := utils.Request().Get(fmt.Sprintf(constant.RepackInfoURL, 1)) if err != nil { - return 0, err + c.logger.Error("Failed to fetch first page for total page number", zap.Error(err)) + return 0, fmt.Errorf("failed to fetch first page for total page number: %w", err) } + doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body())) if err != nil { - return 0, err + c.logger.Error("Failed to parse HTML document for total page number", zap.Error(err)) + return 0, fmt.Errorf("failed to parse HTML document for total page number: %w", err) } + pageStr := doc.Find(".pagination>a").Last().Text() totalPageNum, err := strconv.Atoi(pageStr) if err != nil { - return 0, err + c.logger.Error("Failed to convert total page number to integer", zap.String("PageString", pageStr), zap.Error(err)) + return 0, fmt.Errorf("failed to convert total page number '%s' to integer: %w", pageStr, err) } + + c.logger.Info("Successfully fetched total page number", zap.Int("TotalPages", totalPageNum)) return totalPageNum, nil } func ChovkaFormatter(name string) string { - idx := strings.Index(name, "| RePack") - if idx != -1 { - name = name[:idx] - } - idx = strings.Index(name, "| GOG") - if idx != -1 { - name = name[:idx] - } - idx = strings.Index(name, "| Portable") - if idx != -1 { - name = name[:idx] + cutoffs := []string{"| RePack", "| GOG", "| Portable"} + for _, cutoff := range cutoffs { + if idx := strings.Index(name, cutoff); idx != -1 { + name = name[:idx] + } } return strings.TrimSpace(name) } diff --git a/crawler/crawler.go b/crawler/crawler.go index f6d8846..7d3aa6b 100644 --- a/crawler/crawler.go +++ b/crawler/crawler.go @@ -1,6 +1,7 @@ package crawler import ( + "game-crawler/config" "game-crawler/model" "go.uber.org/zap" @@ -28,13 +29,14 @@ func BuildCrawlerMap(logger *zap.Logger) map[string]Crawler { "fitgirl": NewFitGirlCrawler(logger), "dodi": NewDODICrawler(logger), "kaoskrew": NewKaOsKrewCrawler(logger), - "freegog": NewFreeGOGCrawler(logger), + "freegog": NewFreeGOGCrawler(config.Config.CFClearanceScraper.Url, logger), "xatab": NewXatabCrawler(logger), - "onlinefix": NewOnlineFixCrawler(logger), + "onlinefix": NewOnlineFixCrawler(config.Config.OnlineFix.User, config.Config.OnlineFix.Password, logger), "steamrip": NewSteamRIPCrawler(logger), "chovka": NewChovkaCrawler(logger), - "goggames": NewGOGGamesCrawler(logger), + "goggames": NewGOGGamesCrawler(config.Config.CFClearanceScraper.Url, logger), "johncena141": NewJohncena141Crawler(logger), + "omggods": NewOmgGodsCrawler(config.Config.CFClearanceScraper.Url, config.Config.Rutracker.User, config.Config.Rutracker.Password, logger), // "gnarly": NewGnarlyCrawler(logger), } return ret diff --git a/crawler/fitgirl.go b/crawler/fitgirl.go index 58ed39e..2698020 100644 --- a/crawler/fitgirl.go +++ b/crawler/fitgirl.go @@ -3,7 +3,6 @@ package crawler import ( "bytes" "encoding/base64" - "errors" "fmt" "regexp" "strconv" @@ -33,37 +32,51 @@ func (c *FitGirlCrawler) Name() string { } func (c *FitGirlCrawler) CrawlByUrl(URL string) (*model.GameItem, error) { + c.logger.Info("Starting CrawlByUrl", zap.String("URL", URL)) resp, err := utils.Request().Get(URL) if err != nil { - return nil, err + c.logger.Error("Failed to fetch URL", zap.String("URL", URL), zap.Error(err)) + return nil, fmt.Errorf("failed to fetch URL %s: %w", URL, err) } + doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body())) if err != nil { - return nil, err + c.logger.Error("Failed to parse HTML document", zap.String("URL", URL), zap.Error(err)) + return nil, fmt.Errorf("failed to parse HTML document for URL %s: %w", URL, err) } + titleElem := doc.Find("h3").First().Find("strong") if titleElem.Length() == 0 { - return nil, errors.New("failed to find title") + c.logger.Error("Failed to find title", zap.String("URL", URL)) + return nil, fmt.Errorf("failed to find title on page %s", URL) } + rawTitle := titleElem.Text() titleElem.Children().Remove() title := strings.TrimSpace(titleElem.Text()) + sizeRegex := regexp.MustCompile(`Repack Size: (.*?)`) sizeRegexRes := sizeRegex.FindStringSubmatch(string(resp.Body())) if len(sizeRegexRes) == 0 { - return nil, errors.New("failed to find size") + c.logger.Error("Failed to find size", zap.String("URL", URL)) + return nil, fmt.Errorf("failed to find size information on page %s", URL) } size := sizeRegexRes[1] + magnetRegex := regexp.MustCompile(`magnet:\?[^"]*`) magnetRegexRes := magnetRegex.FindStringSubmatch(string(resp.Body())) if len(magnetRegexRes) == 0 { - return nil, errors.New("failed to find magnet") + c.logger.Error("Failed to find magnet link", zap.String("URL", URL)) + return nil, fmt.Errorf("failed to find magnet link on page %s", URL) } magnet := magnetRegexRes[0] + item, err := db.GetGameItemByUrl(URL) if err != nil { - return nil, err + c.logger.Error("Failed to retrieve game item from database", zap.String("URL", URL), zap.Error(err)) + return nil, fmt.Errorf("failed to retrieve game item from database for URL %s: %w", URL, err) } + item.Name = strings.TrimSpace(title) item.RawName = rawTitle item.Url = URL @@ -71,96 +84,130 @@ func (c *FitGirlCrawler) CrawlByUrl(URL string) (*model.GameItem, error) { item.Author = "FitGirl" item.DownloadLinks = []string{magnet} item.Platform = "windows" + + c.logger.Info("Successfully crawled URL", zap.String("URL", URL)) return item, nil } func (c *FitGirlCrawler) Crawl(page int) ([]*model.GameItem, error) { + c.logger.Info("Starting Crawl", zap.Int("Page", page)) resp, err := utils.Request().Get(fmt.Sprintf(constant.FitGirlURL, page)) if err != nil { - c.logger.Error("Failed to fetch", zap.Error(err)) - return nil, err + c.logger.Error("Failed to fetch page", zap.Int("Page", page), zap.Error(err)) + return nil, fmt.Errorf("failed to fetch page %d: %w", page, err) } + doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body())) if err != nil { - c.logger.Error("Failed to parse HTML", zap.Error(err)) - return nil, err + c.logger.Error("Failed to parse HTML document", zap.Int("Page", page), zap.Error(err)) + return nil, fmt.Errorf("failed to parse HTML document for page %d: %w", page, err) } + var urls []string - var updateFlags []string //link+date + var updateFlags []string // link + date (encoded) doc.Find("article").Each(func(i int, s *goquery.Selection) { u, exist1 := s.Find(".entry-title>a").First().Attr("href") d, exist2 := s.Find("time").First().Attr("datetime") if exist1 && exist2 { urls = append(urls, u) updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf("%s%s", u, d)))) + } else { + c.logger.Warn("Failed to extract URL or datetime", zap.Int("Index", i), zap.Bool("HasURL", exist1), zap.Bool("HasDate", exist2)) } }) + var res []*model.GameItem for i, u := range urls { if db.IsFitgirlCrawled(updateFlags[i]) { + c.logger.Info("Skipping already crawled URL", zap.String("URL", u)) continue } - c.logger.Info("Crawling", zap.String("URL", u)) + + c.logger.Info("Crawling URL", zap.String("URL", u)) item, err := c.CrawlByUrl(u) if err != nil { - c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u)) + c.logger.Warn("Failed to crawl URL", zap.String("URL", u), zap.Error(err)) continue } + item.UpdateFlag = updateFlags[i] err = db.SaveGameItem(item) if err != nil { - c.logger.Warn("Failed to save", zap.Error(err)) + c.logger.Warn("Failed to save game item to database", zap.String("URL", u), zap.Error(err)) continue } + res = append(res, item) + if err := OrganizeGameItem(item); err != nil { - c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u)) + c.logger.Warn("Failed to organize game item", zap.String("URL", u), zap.Error(err)) continue } } + + c.logger.Info("Finished Crawl", zap.Int("Page", page), zap.Int("ItemsCrawled", len(res))) return res, nil } func (c *FitGirlCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) { + c.logger.Info("Starting CrawlMulti", zap.Ints("Pages", pages)) var res []*model.GameItem for _, page := range pages { items, err := c.Crawl(page) if err != nil { - return nil, err + c.logger.Error("Failed to crawl page", zap.Int("Page", page), zap.Error(err)) + return nil, fmt.Errorf("failed to crawl page %d: %w", page, err) } res = append(res, items...) } + + c.logger.Info("Finished CrawlMulti", zap.Int("TotalPages", len(pages)), zap.Int("TotalItems", len(res))) return res, nil } func (c *FitGirlCrawler) CrawlAll() ([]*model.GameItem, error) { - var res []*model.GameItem + c.logger.Info("Starting CrawlAll") totalPageNum, err := c.GetTotalPageNum() if err != nil { - return nil, err + c.logger.Error("Failed to get total page number", zap.Error(err)) + return nil, fmt.Errorf("failed to get total page number: %w", err) } + + var res []*model.GameItem for i := 1; i <= totalPageNum; i++ { items, err := c.Crawl(i) if err != nil { - return nil, err + c.logger.Error("Failed to crawl page", zap.Int("Page", i), zap.Error(err)) + return nil, fmt.Errorf("failed to crawl page %d: %w", i, err) } res = append(res, items...) } + + c.logger.Info("Finished CrawlAll", zap.Int("TotalPages", totalPageNum), zap.Int("TotalItems", len(res))) return res, nil } func (c *FitGirlCrawler) GetTotalPageNum() (int, error) { + c.logger.Info("Fetching total page number") resp, err := utils.Request().Get(fmt.Sprintf(constant.FitGirlURL, 1)) if err != nil { - return 0, err + c.logger.Error("Failed to fetch first page for total page number", zap.Error(err)) + return 0, fmt.Errorf("failed to fetch first page for total page number: %w", err) } + doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body())) if err != nil { - return 0, err + c.logger.Error("Failed to parse HTML document", zap.Error(err)) + return 0, fmt.Errorf("failed to parse HTML document for total page number: %w", err) } - page, err := strconv.Atoi(doc.Find(".page-numbers.dots").First().Next().Text()) + + pageStr := doc.Find(".page-numbers.dots").First().Next().Text() + totalPageNum, err := strconv.Atoi(pageStr) if err != nil { - return 0, err + c.logger.Error("Failed to convert total page number to integer", zap.String("PageString", pageStr), zap.Error(err)) + return 0, fmt.Errorf("failed to convert total page number '%s' to integer: %w", pageStr, err) } - return page, nil + + c.logger.Info("Successfully fetched total page number", zap.Int("TotalPages", totalPageNum)) + return totalPageNum, nil } diff --git a/crawler/freegog.go b/crawler/freegog.go index 29dd583..c0162e9 100644 --- a/crawler/freegog.go +++ b/crawler/freegog.go @@ -4,7 +4,7 @@ import ( "bytes" "encoding/base64" "encoding/json" - "errors" + "fmt" "html" "net/http" "regexp" @@ -12,7 +12,6 @@ import ( "time" "game-crawler/cache" - "game-crawler/config" "game-crawler/constant" "game-crawler/db" "game-crawler/model" @@ -23,31 +22,37 @@ import ( ) type FreeGOGCrawler struct { - logger *zap.Logger + cfClearanceUrl string + logger *zap.Logger } -func NewFreeGOGCrawler(logger *zap.Logger) *FreeGOGCrawler { +func NewFreeGOGCrawler(cfClearanceUrl string, logger *zap.Logger) *FreeGOGCrawler { return &FreeGOGCrawler{ - logger: logger, + cfClearanceUrl: cfClearanceUrl, + logger: logger, } } func (c *FreeGOGCrawler) getSession() (*ccs.Session, error) { + c.logger.Info("Fetching session for FreeGOGCrawler") + cacheKey := "freegog_waf_session" var session ccs.Session - var err error - if val, exist := cache.Get("freegog_waf_session"); exist { + if val, exist := cache.Get(cacheKey); exist { err := json.Unmarshal([]byte(val), &session) if err != nil { - return nil, err + c.logger.Error("Failed to unmarshal cached session", zap.Error(err)) + return nil, fmt.Errorf("failed to unmarshal cached session: %w", err) } } else { - session, err = ccs.WAFSession(config.Config.CFClearanceScraper.Url, constant.FreeGOGListURL) + var err error + session, err = ccs.WAFSession(c.cfClearanceUrl, constant.FreeGOGListURL) if err != nil { - return nil, err + c.logger.Error("Failed to create WAF session", zap.Error(err)) + return nil, fmt.Errorf("failed to create WAF session: %w", err) } jsonBytes, err := json.Marshal(session) if err == nil { - _ = cache.SetWithExpire("freegog_waf_session", jsonBytes, 1*time.Hour) + _ = cache.SetWithExpire(cacheKey, jsonBytes, 1*time.Hour) } } return &session, nil @@ -58,106 +63,144 @@ func (c *FreeGOGCrawler) Name() string { } func (c *FreeGOGCrawler) Crawl(num int) ([]*model.GameItem, error) { + c.logger.Info("Starting Crawl", zap.Int("Num", num)) count := 0 session, err := c.getSession() if err != nil { - c.logger.Error("Failed to create session", zap.Error(err)) - return nil, err + return nil, fmt.Errorf("failed to get session: %w", err) } + resp, err := ccs.RequestWithWAFSession(http.MethodGet, constant.FreeGOGListURL, *session, nil) if err != nil { - c.logger.Error("Failed to fetch", zap.Error(err)) - return nil, err + c.logger.Error("Failed to fetch FreeGOG list page", zap.Error(err)) + return nil, fmt.Errorf("failed to fetch FreeGOG list page: %w", err) } + doc, err := goquery.NewDocumentFromReader(bytes.NewReader([]byte(resp.Body))) if err != nil { - c.logger.Error("Failed to parse HTML", zap.Error(err)) - return nil, err + c.logger.Error("Failed to parse HTML document", zap.Error(err)) + return nil, fmt.Errorf("failed to parse HTML document: %w", err) } var urls []string - var updateFlags []string //rawName+link + var updateFlags []string // RawName+Link doc.Find(".items-outer li a").Each(func(i int, s *goquery.Selection) { - urls = append(urls, s.AttrOr("href", "")) - updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(s.Text()+s.AttrOr("href", "")))) + url := s.AttrOr("href", "") + rawName := s.Text() + if url != "" && rawName != "" { + urls = append(urls, url) + updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(rawName+url))) + } else { + c.logger.Warn("Invalid URL or raw name found in item", zap.Int("Index", i), zap.String("URL", url), zap.String("RawName", rawName)) + } }) var res []*model.GameItem for i, u := range urls { if count == num { + c.logger.Info("Reached target number of items", zap.Int("Count", count)) break } if db.IsFreeGOGCrawled(updateFlags[i]) { + c.logger.Info("Skipping already crawled URL", zap.String("URL", u)) continue } - c.logger.Info("Crawling", zap.String("URL", u)) + + c.logger.Info("Crawling URL", zap.String("URL", u)) item, err := c.CrawlByUrl(u) if err != nil { - c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u)) + c.logger.Warn("Failed to crawl URL", zap.String("URL", u), zap.Error(err)) continue } + item.UpdateFlag = updateFlags[i] err = db.SaveGameItem(item) if err != nil { - c.logger.Warn("Failed to save", zap.Error(err)) + c.logger.Warn("Failed to save game item to database", zap.String("URL", u), zap.Error(err)) continue } + res = append(res, item) count++ + if err := OrganizeGameItem(item); err != nil { - c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u)) + c.logger.Warn("Failed to organize game item", zap.String("URL", u), zap.Error(err)) continue } } + + c.logger.Info("Finished Crawl", zap.Int("TotalItems", len(res))) return res, nil } func (c *FreeGOGCrawler) CrawlByUrl(URL string) (*model.GameItem, error) { + c.logger.Info("Starting CrawlByUrl", zap.String("URL", URL)) session, err := c.getSession() if err != nil { - return nil, err + c.logger.Error("Failed to get session", zap.Error(err)) + return nil, fmt.Errorf("failed to get session: %w", err) } + resp, err := ccs.RequestWithWAFSession(http.MethodGet, URL, *session, nil) if err != nil { - return nil, err + c.logger.Error("Failed to fetch game page", zap.String("URL", URL), zap.Error(err)) + return nil, fmt.Errorf("failed to fetch game page %s: %w", URL, err) } + item, err := db.GetGameItemByUrl(URL) if err != nil { - return nil, err + c.logger.Error("Failed to retrieve game item from database", zap.String("URL", URL), zap.Error(err)) + return nil, fmt.Errorf("failed to retrieve game item from database for URL %s: %w", URL, err) } + item.Url = URL + + // Extract raw title rawTitleRegex := regexp.MustCompile(`(?i)

(.*?)

`) rawTitleRegexRes := rawTitleRegex.FindStringSubmatch(string(resp.Body)) - rawName := "" if len(rawTitleRegexRes) > 1 { - rawName = html.UnescapeString(rawTitleRegexRes[1]) + rawName := html.UnescapeString(rawTitleRegexRes[1]) item.RawName = strings.Replace(rawName, "–", "-", -1) } else { - return nil, err + c.logger.Error("Failed to find raw title", zap.String("URL", URL)) + return nil, fmt.Errorf("failed to find raw title on page %s", URL) } + item.Name = FreeGOGFormatter(item.RawName) + + // Extract size sizeRegex := regexp.MustCompile(`(?i)>Size:\s?(.*?)<`) sizeRegexRes := sizeRegex.FindStringSubmatch(string(resp.Body)) if len(sizeRegexRes) > 1 { item.Size = sizeRegexRes[1] + } else { + c.logger.Warn("Failed to find game size", zap.String("URL", URL)) } + + // Extract magnet link magnetRegex := regexp.MustCompile(` 1 { magnet, err := base64.StdEncoding.DecodeString(magnetRegexRes[1]) if err != nil { - return nil, err + c.logger.Error("Failed to decode magnet link", zap.String("URL", URL), zap.Error(err)) + return nil, fmt.Errorf("failed to decode magnet link on page %s: %w", URL, err) } item.DownloadLinks = []string{string(magnet)} } else { - return nil, errors.New("failed to find magnet link") + c.logger.Error("Failed to find magnet link", zap.String("URL", URL)) + return nil, fmt.Errorf("failed to find magnet link on page %s", URL) } + item.Author = "FreeGOG" item.Platform = "windows" + + c.logger.Info("Successfully crawled URL", zap.String("URL", URL)) return item, nil } func (c *FreeGOGCrawler) CrawlAll() ([]*model.GameItem, error) { + c.logger.Info("Starting CrawlAll") return c.Crawl(-1) } @@ -165,6 +208,7 @@ var freeGOGRegexps = []*regexp.Regexp{ regexp.MustCompile(`(?i)\(.*\)`), } +// FreeGOGFormatter formats the raw game name into a clean title. func FreeGOGFormatter(name string) string { for _, re := range freeGOGRegexps { name = re.ReplaceAllString(name, "") diff --git a/crawler/game.go b/crawler/game.go index f44c41c..a429a64 100644 --- a/crawler/game.go +++ b/crawler/game.go @@ -16,6 +16,7 @@ import ( "go.mongodb.org/mongo-driver/mongo" ) +// GenerateGameInfo generates game info based on the platform and ID. func GenerateGameInfo(platform string, id int) (*model.GameInfo, error) { switch platform { case "steam": @@ -23,45 +24,54 @@ func GenerateGameInfo(platform string, id int) (*model.GameInfo, error) { case "igdb": return GenerateIGDBGameInfo(id) default: - return nil, errors.New("invalid ID type") + return nil, errors.New("invalid platform type") } } -// OrganizeGameItem Organize game item and save game info to database +// OrganizeGameItem organizes the given game item and saves its associated game info to the database. func OrganizeGameItem(game *model.GameItem) error { - hasOriganized, _ := db.HasGameItemOrganized(game.ID) - if hasOriganized { + hasOrganized := db.HasGameItemOrganized(game.ID) + if hasOrganized { return nil } item, err := OrganizeGameItemWithIGDB(game) - if err == nil { - if item.SteamID == 0 { - // get steam id from igdb - steamID, err := GetSteamIDByIGDBID(item.IGDBID) - if err == nil { - item.SteamID = steamID - } - } - err = db.SaveGameInfo(item) - if err != nil { + if err != nil { + return err + } + + // Attempt to supplement SteamID if missing + if item.SteamID == 0 { + steamID, err := GetSteamIDByIGDBID(item.IGDBID) + if err == nil { + item.SteamID = steamID + } else { return err } - return nil } - return err + + // Save the organized game info to the database + if err := db.SaveGameInfo(item); err != nil { + return err + } + return nil } -func AddGameInfoManually(gameID primitive.ObjectID, platform string, plateformID int) (*model.GameInfo, error) { - info, err := GenerateGameInfo(platform, plateformID) +// AddGameInfoManually manually adds a game info entry to the database. +func AddGameInfoManually(gameID primitive.ObjectID, platform string, platformID int) (*model.GameInfo, error) { + info, err := GenerateGameInfo(platform, platformID) if err != nil { return nil, err } - info.GameIDs = append(info.GameIDs, gameID) - info.GameIDs = utils.Unique(info.GameIDs) - return info, db.SaveGameInfo(info) + + info.GameIDs = utils.Unique(append(info.GameIDs, gameID)) + if err := db.SaveGameInfo(info); err != nil { + return nil, err + } + return info, nil } +// OrganizeGameItemManually organizes a game item manually based on the platform and platform ID. func OrganizeGameItemManually(gameID primitive.ObjectID, platform string, platformID int) (*model.GameInfo, error) { info, err := db.GetGameInfoByPlatformID(platform, platformID) if err != nil { @@ -74,27 +84,30 @@ func OrganizeGameItemManually(gameID primitive.ObjectID, platform string, platfo return nil, err } } - info.GameIDs = append(info.GameIDs, gameID) - info.GameIDs = utils.Unique(info.GameIDs) - err = db.SaveGameInfo(info) - if err != nil { - return nil, err - } - if platform == "igdb" { + + info.GameIDs = utils.Unique(append(info.GameIDs, gameID)) + + // Supplement missing platform IDs + if platform == "igdb" && info.SteamID == 0 { steamID, err := GetSteamIDByIGDBID(platformID) if err == nil { info.SteamID = steamID } } - if platform == "steam" { + if platform == "steam" && info.IGDBID == 0 { igdbID, err := GetIGDBIDBySteamAppID(platformID) if err == nil { info.IGDBID = igdbID } } + + if err := db.SaveGameInfo(info); err != nil { + return nil, err + } return info, nil } +// FormatName formats a raw game name into a clean and consistent format. func FormatName(name string) string { name = regexp.MustCompile(`(?i)[\w’'-]+\s(Edition|Vision|Collection|Bundle|Pack|Deluxe)`).ReplaceAllString(name, " ") name = regexp.MustCompile(`(?i)GOTY`).ReplaceAllString(name, "") @@ -108,61 +121,85 @@ func FormatName(name string) string { return name } -func SupplementPlatformIDToGameInfo(logger *zap.Logger) error { +// SupplementPlatformIDToGameInfo supplements missing platform IDs (SteamID or IGDBID) for all game info entries. +func SupplementPlatformIDToGameInfo() error { + logger := zap.L() + logger.Info("Starting to supplement missing platform IDs") infos, err := db.GetAllGameInfos() if err != nil { + logger.Error("Failed to fetch game infos", zap.Error(err)) return err } + for _, info := range infos { changed := false + + // Supplement SteamID using IGDBID if info.IGDBID != 0 && info.SteamID == 0 { steamID, err := GetSteamIDByIGDBID(info.IGDBID) - time.Sleep(time.Millisecond * 100) - if err != nil { - continue + time.Sleep(100 * time.Millisecond) + if err == nil { + info.SteamID = steamID + changed = true + } else { + logger.Warn("Failed to get SteamID from IGDB", zap.Int("IGDBID", info.IGDBID), zap.Error(err)) } - info.SteamID = steamID - changed = true } + + // Supplement IGDBID using SteamID if info.SteamID != 0 && info.IGDBID == 0 { igdbID, err := GetIGDBIDBySteamAppID(info.SteamID) - time.Sleep(time.Millisecond * 100) - if err != nil { - continue + time.Sleep(100 * time.Millisecond) + if err == nil { + info.IGDBID = igdbID + changed = true + } else { + logger.Warn("Failed to get IGDBID from SteamID", zap.Int("SteamID", info.SteamID), zap.Error(err)) } - info.IGDBID = igdbID - changed = true } + if changed { - logger.Info("supp", zap.String("name", info.Name), zap.Int("igdb", info.IGDBID), zap.Int("steam", info.SteamID)) - _ = db.SaveGameInfo(info) + logger.Info("Supplemented platform IDs", zap.String("Name", info.Name), zap.Int("IGDBID", info.IGDBID), zap.Int("SteamID", info.SteamID)) + if err := db.SaveGameInfo(info); err != nil { + logger.Error("Failed to save updated game info", zap.String("Name", info.Name), zap.Error(err)) + } } else { - logger.Info("skip", zap.String("name", info.Name), zap.Int("igdb", info.IGDBID), zap.Int("steam", info.SteamID)) + logger.Info("No changes needed", zap.String("Name", info.Name), zap.Int("IGDBID", info.IGDBID), zap.Int("SteamID", info.SteamID)) } } return nil } +// UpdateGameInfo updates outdated game info entries and returns a channel to monitor updates. func UpdateGameInfo(num int) (chan *model.GameInfo, error) { + logger := zap.L() + logger.Info("Starting to update outdated game info", zap.Int("Num", num)) infos, err := db.GetOutdatedGameInfos(num) if err != nil { + logger.Error("Failed to fetch outdated game infos", zap.Error(err)) return nil, err } + updateChan := make(chan *model.GameInfo) go func() { + defer close(updateChan) for _, info := range infos { if info.IGDBID != 0 { newInfo, err := GenerateIGDBGameInfo(info.IGDBID) if err != nil { + logger.Warn("Failed to generate IGDB game info", zap.Int("IGDBID", info.IGDBID), zap.Error(err)) continue } + db.MergeGameInfo(info, newInfo) - err = db.SaveGameInfo(newInfo) - if err != nil { + if err := db.SaveGameInfo(newInfo); err != nil { + logger.Error("Failed to save updated game info", zap.String("Name", newInfo.Name), zap.Error(err)) continue } + updateChan <- newInfo + logger.Info("Updated game info", zap.String("Name", newInfo.Name), zap.Int("IGDBID", newInfo.IGDBID)) } } }() diff --git a/crawler/goggames.go b/crawler/goggames.go index e73b81d..19f5161 100644 --- a/crawler/goggames.go +++ b/crawler/goggames.go @@ -8,7 +8,6 @@ import ( "strings" "time" - "game-crawler/config" "game-crawler/constant" "game-crawler/db" "game-crawler/model" @@ -19,12 +18,14 @@ import ( ) type GOGGamesCrawler struct { - logger *zap.Logger + cfClearanceUrl string + logger *zap.Logger } -func NewGOGGamesCrawler(logger *zap.Logger) *GOGGamesCrawler { +func NewGOGGamesCrawler(cfClearanceUrl string, logger *zap.Logger) *GOGGamesCrawler { return &GOGGamesCrawler{ - logger: logger, + cfClearanceUrl: cfClearanceUrl, + logger: logger, } } @@ -34,29 +35,36 @@ func (c *GOGGamesCrawler) Name() string { func (c *GOGGamesCrawler) CrawlByUrl(URL string) (*model.GameItem, error) { if !strings.HasPrefix(URL, "https://www.gog-games.to/game/") { - return nil, fmt.Errorf("invalid url") + err := fmt.Errorf("invalid URL: %s", URL) + c.logger.Error("Invalid URL", zap.Error(err)) + return nil, err } - _, slug := path.Split(URL) + _, slug := path.Split(URL) apiUrl := fmt.Sprintf(constant.GOGGamesGameAPIURL, slug) - token, err := ccs.TurnstileToken(config.Config.CFClearanceScraper.Url, apiUrl, "0x4AAAAAAAfOlgvCKbOdW1zc") + token, err := ccs.TurnstileToken(c.cfClearanceUrl, apiUrl, "0x4AAAAAAAfOlgvCKbOdW1zc") if err != nil { - return nil, err + c.logger.Error("Failed to get Turnstile token", zap.Error(err), zap.String("apiUrl", apiUrl)) + return nil, fmt.Errorf("failed to get Turnstile token for URL %s: %w", apiUrl, err) } + resp, err := utils.Request().SetHeader("cf-turnstile-response", token).Get(apiUrl) if err != nil { - return nil, err + c.logger.Error("Failed to fetch data from API", zap.Error(err), zap.String("apiUrl", apiUrl)) + return nil, fmt.Errorf("failed to fetch API data for URL %s: %w", apiUrl, err) } + data := gameResult{} err = json.Unmarshal(resp.Body(), &data) if err != nil { - return nil, err + c.logger.Error("Failed to unmarshal API response", zap.Error(err), zap.String("apiUrl", apiUrl)) + return nil, fmt.Errorf("failed to parse API response for URL %s: %w", apiUrl, err) } name := data.Title - // find download links + // Find download links fileHosters := []string{ "gofile", "fileditch", @@ -80,19 +88,28 @@ func (c *GOGGamesCrawler) CrawlByUrl(URL string) (*model.GameItem, error) { } if len(links) == 0 { - return nil, fmt.Errorf("no download link found") + err := fmt.Errorf("no download links found for URL %s", URL) + c.logger.Warn("No download links found", zap.Error(err)) + return nil, err } + // Calculate total size size := uint64(0) for _, file := range data.Files.Game { - s, _ := utils.SizeToBytes(file.Size) + s, parseErr := utils.SizeToBytes(file.Size) + if parseErr != nil { + c.logger.Warn("Failed to parse file size", zap.Error(parseErr), zap.String("fileSize", file.Size)) + } size += s } + // Retrieve or create game item item, err := db.GetGameItemByUrl(URL) if err != nil { - return nil, err + c.logger.Error("Failed to retrieve game item from database", zap.Error(err), zap.String("URL", URL)) + return nil, fmt.Errorf("failed to get game item for URL %s: %w", URL, err) } + item.Name = name item.RawName = name item.DownloadLinks = links @@ -100,44 +117,54 @@ func (c *GOGGamesCrawler) CrawlByUrl(URL string) (*model.GameItem, error) { item.Size = utils.BytesToSize(size) item.Author = "GOGGames" item.Platform = "windows" + return item, nil } func (c *GOGGamesCrawler) Crawl(page int) ([]*model.GameItem, error) { resp, err := utils.Request().Get(fmt.Sprintf(constant.GOGGamesURL, page)) if err != nil { - return nil, err + c.logger.Error("Failed to fetch page", zap.Error(err), zap.Int("page", page)) + return nil, fmt.Errorf("failed to fetch page %d: %w", page, err) } + data := searchResult{} err = json.Unmarshal(resp.Body(), &data) if err != nil { - return nil, err + c.logger.Error("Failed to parse page response", zap.Error(err), zap.Int("page", page)) + return nil, fmt.Errorf("failed to parse page %d: %w", page, err) } + urls := make([]string, 0) - var updateFlags []string //link+date + var updateFlags []string // link+date for _, item := range data.Data { urls = append(urls, fmt.Sprintf(constant.GOGGamesPageURL, item.Slug)) updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf("%s%s", item.GogURL, item.LastUpdate)))) } + res := make([]*model.GameItem, 0) for i, u := range urls { if db.IsGameCrawled(updateFlags[i], "GOGGames") { + c.logger.Info("Game already crawled", zap.String("URL", u)) continue } - c.logger.Info("Crawling", zap.String("URL", u)) + + c.logger.Info("Crawling game", zap.String("URL", u)) item, err := c.CrawlByUrl(u) if err != nil { - c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u)) + c.logger.Warn("Failed to crawl game", zap.Error(err), zap.String("URL", u)) continue } + item.UpdateFlag = updateFlags[i] if err := db.SaveGameItem(item); err != nil { - c.logger.Warn("Failed to save", zap.Error(err), zap.String("URL", u)) + c.logger.Warn("Failed to save game item to database", zap.Error(err), zap.String("URL", u)) continue } + res = append(res, item) if err := OrganizeGameItem(item); err != nil { - c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u)) + c.logger.Warn("Failed to organize game item", zap.Error(err), zap.String("URL", u)) continue } } @@ -149,7 +176,8 @@ func (c *GOGGamesCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) { for _, page := range pages { items, err := c.Crawl(page) if err != nil { - return nil, err + c.logger.Error("Failed to crawl multiple pages", zap.Error(err), zap.Int("page", page)) + return nil, fmt.Errorf("failed to crawl page %d: %w", page, err) } res = append(res, items...) } @@ -159,13 +187,17 @@ func (c *GOGGamesCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) { func (c *GOGGamesCrawler) CrawlAll() ([]*model.GameItem, error) { totalPageNum, err := c.GetTotalPageNum() if err != nil { - return nil, err + c.logger.Error("Failed to get total page number", zap.Error(err)) + return nil, fmt.Errorf("failed to get total page number: %w", err) } + var res []*model.GameItem for i := 1; i <= totalPageNum; i++ { + c.logger.Info("Crawling page", zap.Int("page", i)) items, err := c.Crawl(i) if err != nil { - return nil, err + c.logger.Warn("Failed to crawl page", zap.Error(err), zap.Int("page", i)) + return nil, fmt.Errorf("failed to crawl page %d: %w", i, err) } res = append(res, items...) } @@ -175,13 +207,17 @@ func (c *GOGGamesCrawler) CrawlAll() ([]*model.GameItem, error) { func (c *GOGGamesCrawler) GetTotalPageNum() (int, error) { resp, err := utils.Request().Get(fmt.Sprintf(constant.GOGGamesURL, 1)) if err != nil { - return 0, err + c.logger.Error("Failed to fetch first page", zap.Error(err)) + return 0, fmt.Errorf("failed to fetch first page: %w", err) } + data := searchResult{} err = json.Unmarshal(resp.Body(), &data) if err != nil { - return 0, err + c.logger.Error("Failed to parse first page response", zap.Error(err)) + return 0, fmt.Errorf("failed to parse first page response: %w", err) } + return data.Meta.LastPage, nil } diff --git a/crawler/igdb.go b/crawler/igdb.go index cc0710f..a6c0344 100644 --- a/crawler/igdb.go +++ b/crawler/igdb.go @@ -20,6 +20,7 @@ import ( "github.com/PuerkitoBio/goquery" "github.com/go-resty/resty/v2" + "go.uber.org/zap" ) type twitchToken struct { @@ -33,9 +34,13 @@ func (t *twitchToken) getToken() (string, error) { } token, expires, err := loginTwitch() if err != nil { + zap.L().Error("failed to login to Twitch", zap.Error(err)) return "", fmt.Errorf("failed to login twitch: %w", err) } - _ = cache.SetWithExpire("twitch_token", token, expires) + err = cache.SetWithExpire("twitch_token", token, expires) + if err != nil { + zap.L().Error("failed to set Twitch token in cache", zap.Error(err)) + } return token, nil } @@ -46,10 +51,13 @@ func loginTwitch() (string, time.Duration, error) { params.Add("client_secret", config.Config.Twitch.ClientSecret) params.Add("grant_type", "client_credentials") baseURL.RawQuery = params.Encode() + resp, err := utils.Request().SetHeader("User-Agent", "").Post(baseURL.String()) if err != nil { + zap.L().Error("failed to make Twitch login request", zap.String("url", baseURL.String()), zap.Error(err)) return "", 0, err } + data := struct { AccessToken string `json:"access_token"` ExpiresIn int64 `json:"expires_in"` @@ -57,6 +65,7 @@ func loginTwitch() (string, time.Duration, error) { }{} err = json.Unmarshal(resp.Body(), &data) if err != nil { + zap.L().Error("failed to parse Twitch login response", zap.String("response", string(resp.Body())), zap.Error(err)) return "", 0, err } return data.AccessToken, time.Second * time.Duration(data.ExpiresIn), nil @@ -65,68 +74,82 @@ func loginTwitch() (string, time.Duration, error) { func igdbRequest(URL string, dataBody any) (*resty.Response, error) { t, err := token.getToken() if err != nil { + zap.L().Error("failed to get Twitch token", zap.Error(err)) return nil, err } + resp, err := utils.Request().SetBody(dataBody).SetHeaders(map[string]string{ "Client-ID": config.Config.Twitch.ClientID, "Authorization": "Bearer " + t, "User-Agent": "", "Content-Type": "text/plain", }).Post(URL) + if err != nil { + zap.L().Error("failed to make IGDB request", zap.String("url", URL), zap.Any("dataBody", dataBody), zap.Error(err)) return nil, err } return resp, nil } func getIGDBID(name string) (int, error) { - var err error resp, err := igdbRequest(constant.IGDBSearchURL, fmt.Sprintf(`search "%s"; fields *; limit 50; where game.platforms = [6] | game.platforms=[130] | game.platforms=[384] | game.platforms=[163];`, name)) if err != nil { + zap.L().Error("failed to search IGDB ID", zap.String("name", name), zap.Error(err)) return 0, err } + if string(resp.Body()) == "[]" { resp, err = igdbRequest(constant.IGDBSearchURL, fmt.Sprintf(`search "%s"; fields *; limit 50;`, name)) if err != nil { + zap.L().Error("failed to fallback search IGDB ID", zap.String("name", name), zap.Error(err)) return 0, err } } var data model.IGDBSearches if err = json.Unmarshal(resp.Body(), &data); err != nil { + zap.L().Error("failed to unmarshal IGDB search response", zap.String("response", string(resp.Body())), zap.Error(err)) return 0, fmt.Errorf("failed to unmarshal: %w, %s", err, debug.Stack()) } + if len(data) == 1 { return GetIGDBAppParent(data[0].Game) } - maxSimilairty := 0.0 - maxSimilairtyIndex := 0 + + maxSimilarity := 0.0 + maxSimilarityIndex := 0 for i, item := range data { if strings.EqualFold(item.Name, name) { return item.Game, nil } if sim := utils.Similarity(name, item.Name); sim >= 0.8 { - if sim > maxSimilairty { - maxSimilairty = sim - maxSimilairtyIndex = i + if sim > maxSimilarity { + maxSimilarity = sim + maxSimilarityIndex = i } } + detail, err := GetIGDBAppDetail(item.Game) if err != nil { + zap.L().Error("failed to get IGDB app detail", zap.Int("gameID", item.Game), zap.Error(err)) return 0, err } - for _, alternativeNames := range detail.AlternativeNames { - if sim := utils.Similarity(alternativeNames.Name, name); sim >= 0.8 { - if sim > maxSimilairty { - maxSimilairty = sim - maxSimilairtyIndex = i + for _, altName := range detail.AlternativeNames { + if sim := utils.Similarity(altName.Name, name); sim >= 0.8 { + if sim > maxSimilarity { + maxSimilarity = sim + maxSimilarityIndex = i } } } } - if maxSimilairty >= 0.8 { - return GetIGDBAppParent(data[maxSimilairtyIndex].Game) + + if maxSimilarity >= 0.8 { + return GetIGDBAppParent(data[maxSimilarityIndex].Game) } + + zap.L().Warn("no IGDB ID found", zap.String("name", name)) return 0, fmt.Errorf("IGDB ID not found: %s", name) } @@ -212,12 +235,14 @@ func GetIGDBAppParent(id int) (int, error) { if exist { id, err := strconv.Atoi(val) if err != nil { + zap.L().Error("failed to parse cached IGDB parent ID", zap.String("cacheKey", key), zap.Error(err)) return 0, err } return id, nil } detail, err := GetIGDBAppDetail(id) if err != nil { + zap.L().Error("failed to fetch IGDB app detail for parent", zap.Int("gameID", id), zap.Error(err)) return 0, err } hasParent := false @@ -225,6 +250,7 @@ func GetIGDBAppParent(id int) (int, error) { hasParent = true detail, err = GetIGDBAppDetail(detail.VersionParent) if err != nil { + zap.L().Error("failed to fetch IGDB version parent", zap.Int("parentID", detail.VersionParent), zap.Error(err)) return 0, err } } @@ -232,39 +258,49 @@ func GetIGDBAppParent(id int) (int, error) { return detail.ID, nil } - _ = cache.Set(key, id) + err = cache.Set(key, id) + if err != nil { + zap.L().Error("failed to cache IGDB parent ID", zap.String("cacheKey", key), zap.Error(err)) + } return id, nil } -// GetIGDBID returns the IGDB ID of the game, try directly IGDB api first, then steam search +// GetIGDBID retrieves the IGDB ID of a game by its name using IGDB API and fallback mechanisms. func GetIGDBID(name string) (int, error) { key := fmt.Sprintf("igdb_id:%s", name) - val, exist := cache.Get(key) - if exist { + if val, exist := cache.Get(key); exist { + zap.L().Info("cache hit for IGDB ID", zap.String("name", name), zap.String("cacheKey", key)) return strconv.Atoi(val) } - name1 := name - name2 := FormatName(name) - names := []string{name1} - if name1 != name2 { - names = append(names, name2) - } - for _, name := range names { - id, err := getIGDBID(name) + + // Normalize game name and try multiple variations + normalizedNames := []string{name, FormatName(name)} + for _, n := range normalizedNames { + id, err := getIGDBID(n) if err == nil { - _ = cache.Set(key, id) + cacheErr := cache.Set(key, id) + if cacheErr != nil { + zap.L().Warn("failed to cache IGDB ID", zap.String("name", n), zap.Error(cacheErr)) + } return id, nil } } - for _, name := range names { - id, err := getIGDBIDBySteamSearch(name) + + // Fallback to Steam search if IGDB search fails + for _, n := range normalizedNames { + id, err := getIGDBIDBySteamSearch(n) if err == nil { - _ = cache.Set(key, id) + cacheErr := cache.Set(key, id) + if cacheErr != nil { + zap.L().Warn("failed to cache IGDB ID after Steam search", zap.String("name", n), zap.Error(cacheErr)) + } return id, nil } } - return 0, errors.New("IGDB ID not found") + + zap.L().Warn("failed to retrieve IGDB ID", zap.String("name", name)) + return 0, fmt.Errorf("IGDB ID not found for '%s'", name) } func GetIGDBAppDetail(id int) (*model.IGDBGameDetail, error) { @@ -273,267 +309,267 @@ func GetIGDBAppDetail(id int) (*model.IGDBGameDetail, error) { if exist { var data model.IGDBGameDetail if err := json.Unmarshal([]byte(val), &data); err != nil { + zap.L().Error("failed to parse cached IGDB game detail", zap.String("cacheKey", key), zap.Error(err)) return nil, err } return &data, nil } - var err error - resp, err := igdbRequest(constant.IGDBGameURL, fmt.Sprintf(`where id = %v;fields *,alternative_names.*,language_supports.*,screenshots.*,cover.*,involved_companies.*,involved_companies.*,game_engines.*,game_modes.*,genres.*,player_perspectives.*,release_dates.*,videos.*,websites.*,platforms.*,themes.*,collections.*;`, id)) + resp, err := igdbRequest(constant.IGDBGameURL, fmt.Sprintf(`where id = %v; fields *,alternative_names.*,language_supports.*,screenshots.*,cover.*,involved_companies.*,game_engines.*,game_modes.*,genres.*,player_perspectives.*,release_dates.*,videos.*,websites.*,platforms.*,themes.*,collections.*;`, id)) if err != nil { + zap.L().Error("failed to fetch IGDB game detail", zap.Int("gameID", id), zap.Error(err)) return nil, err } + var data model.IGDBGameDetails if err = json.Unmarshal(resp.Body(), &data); err != nil { + zap.L().Error("failed to unmarshal IGDB game detail response", zap.String("response", string(resp.Body())), zap.Error(err)) return nil, err } + if len(data) == 0 { + zap.L().Warn("IGDB game not found", zap.Int("gameID", id)) return nil, errors.New("IGDB App not found") } + if data[0].Name == "" { return GetIGDBAppDetail(id) } jsonBytes, err := json.Marshal(data[0]) if err == nil { - _ = cache.Set(key, string(jsonBytes)) + err = cache.Set(key, string(jsonBytes)) + if err != nil { + zap.L().Error("failed to cache IGDB game detail", zap.String("cacheKey", key), zap.Error(err)) + } } return data[0], nil } +// GetIGDBCompany retrieves the company name from IGDB by its ID. func GetIGDBCompany(id int) (string, error) { - key := fmt.Sprintf("igdb_companies:%v", id) - val, exist := cache.Get(key) - if exist { + key := fmt.Sprintf("igdb_companies:%d", id) + if val, exist := cache.Get(key); exist { + zap.L().Info("cache hit for IGDB company", zap.Int("companyID", id), zap.String("cacheKey", key)) return val, nil } - var err error - resp, err := igdbRequest(constant.IGDBCompaniesURL, fmt.Sprintf(`where id=%v; fields *;`, id)) + + query := fmt.Sprintf(`where id=%d; fields *;`, id) + resp, err := igdbRequest(constant.IGDBCompaniesURL, query) if err != nil { - return "", err + zap.L().Error("failed to fetch IGDB company", zap.Int("companyID", id), zap.Error(err)) + return "", fmt.Errorf("failed to fetch IGDB company for ID %d: %w", id, err) } + var data model.IGDBCompanies if err = json.Unmarshal(resp.Body(), &data); err != nil { - return "", err + zap.L().Error("failed to unmarshal IGDB company response", zap.String("response", string(resp.Body())), zap.Error(err)) + return "", fmt.Errorf("failed to unmarshal IGDB companies response: %w", err) } + if len(data) == 0 { - return "", errors.New("not found") - } - if data[0].Name == "" { - return GetIGDBCompany(id) + zap.L().Warn("no company found in IGDB for ID", zap.Int("companyID", id)) + return "", errors.New("company not found") } - _ = cache.Set(key, data[0].Name) - - return data[0].Name, nil + companyName := data[0].Name + cacheErr := cache.Set(key, companyName) + if cacheErr != nil { + zap.L().Warn("failed to cache IGDB company name", zap.Int("companyID", id), zap.Error(cacheErr)) + } + return companyName, nil } +// GenerateIGDBGameInfo generates detailed game information based on an IGDB ID. func GenerateIGDBGameInfo(id int) (*model.GameInfo, error) { - item := &model.GameInfo{} detail, err := GetIGDBAppDetail(id) if err != nil { - return nil, err + zap.L().Error("failed to fetch IGDB app detail", zap.Int("igdbID", id), zap.Error(err)) + return nil, fmt.Errorf("failed to fetch IGDB app detail for ID %d: %w", id, err) + } + + gameInfo := &model.GameInfo{ + IGDBID: id, + Name: detail.Name, + Description: detail.Summary, + Cover: strings.Replace(detail.Cover.URL, "t_thumb", "t_original", 1), } - item.IGDBID = id - item.Name = detail.Name - item.Description = detail.Summary - item.Cover = strings.Replace(detail.Cover.URL, "t_thumb", "t_original", 1) for _, lang := range detail.LanguageSupports { if lang.LanguageSupportType == 3 { - l, exist := constant.IGDBLanguages[lang.Language] - if !exist { - continue + if l, exist := constant.IGDBLanguages[lang.Language]; exist { + gameInfo.Languages = append(gameInfo.Languages, l.Name) } - item.Languages = append(item.Languages, l.Name) } } for _, screenshot := range detail.Screenshots { - item.Screenshots = append(item.Screenshots, strings.Replace(screenshot.URL, "t_thumb", "t_original", 1)) + gameInfo.Screenshots = append(gameInfo.Screenshots, strings.Replace(screenshot.URL, "t_thumb", "t_original", 1)) } for _, alias := range detail.AlternativeNames { - item.Aliases = append(item.Aliases, alias.Name) + gameInfo.Aliases = append(gameInfo.Aliases, alias.Name) } for _, company := range detail.InvolvedCompanies { - if company.Developer || company.Publisher { - companyName, err := GetIGDBCompany(company.Company) - if err != nil { - continue - } - if company.Developer { - item.Developers = append(item.Developers, companyName) - } - if company.Publisher { - item.Publishers = append(item.Publishers, companyName) - } + companyName, err := GetIGDBCompany(company.Company) + if err != nil { + zap.L().Warn("failed to fetch company name", zap.Int("companyID", company.Company), zap.Error(err)) + continue + } + if company.Developer { + gameInfo.Developers = append(gameInfo.Developers, companyName) + } + if company.Publisher { + gameInfo.Publishers = append(gameInfo.Publishers, companyName) } } - item.GameEngines = make([]string, 0) - for _, engine := range detail.GameEngines { - item.GameEngines = append(item.GameEngines, engine.Name) - } - - item.GameModes = make([]string, 0) for _, mode := range detail.GameModes { - item.GameModes = append(item.GameModes, mode.Name) + gameInfo.GameModes = append(gameInfo.GameModes, mode.Name) } - item.Genres = make([]string, 0) for _, genre := range detail.Genres { - item.Genres = append(item.Genres, genre.Name) + gameInfo.Genres = append(gameInfo.Genres, genre.Name) } - item.Themes = make([]string, 0) - for _, theme := range detail.Themes { - item.Themes = append(item.Themes, theme.Name) - } - - item.Platforms = make([]string, 0) for _, platform := range detail.Platforms { - item.Platforms = append(item.Platforms, platform.Name) + gameInfo.Platforms = append(gameInfo.Platforms, platform.Name) } - item.PlayerPerspectives = make([]string, 0) - for _, perspective := range detail.PlayerPerspectives { - item.PlayerPerspectives = append(item.PlayerPerspectives, perspective.Name) - } - - item.SimilarGames = detail.SimilarGames - - item.Videos = make([]string, 0) - for _, video := range detail.Videos { - item.Videos = append(item.Videos, fmt.Sprintf("https://www.youtube.com/watch?v=%s", video.VideoID)) - } - - item.Websites = make([]string, 0) - for _, website := range detail.Websites { - item.Websites = append(item.Websites, website.URL) - } - - item.Collections = make([]model.GameCollection, 0) - - for _, collection := range detail.Collections { - item.Collections = append(item.Collections, model.GameCollection{ - Games: collection.Games, - Name: collection.Name, - }) - } - - return item, nil + return gameInfo, nil } -// OrganizeGameItemWithIGDB Will add GameItem.ID to the newly added GameInfo.GameIDs +// OrganizeGameItemWithIGDB links a game item with its corresponding IGDB game information. func OrganizeGameItemWithIGDB(game *model.GameItem) (*model.GameInfo, error) { id, err := GetIGDBID(game.Name) if err != nil { - return nil, err + zap.L().Error("failed to get IGDB ID for game", zap.String("gameName", game.Name), zap.Error(err)) + return nil, fmt.Errorf("failed to get IGDB ID for game '%s': %w", game.Name, err) } - d, err := db.GetGameInfoByPlatformID("igdb", id) + + info, err := db.GetGameInfoByPlatformID("igdb", id) if err == nil { - d.GameIDs = append(d.GameIDs, game.ID) - d.GameIDs = utils.Unique(d.GameIDs) - return d, nil + info.GameIDs = utils.Unique(append(info.GameIDs, game.ID)) + return info, nil } - info, err := GenerateGameInfo("igdb", id) + + info, err = GenerateIGDBGameInfo(id) if err != nil { - return nil, err + zap.L().Error("failed to generate IGDB game info", zap.Int("igdbID", id), zap.Error(err)) + return nil, fmt.Errorf("failed to generate IGDB game info for ID %d: %w", id, err) } - info.GameIDs = append(info.GameIDs, game.ID) - info.GameIDs = utils.Unique(info.GameIDs) + + info.GameIDs = utils.Unique(append(info.GameIDs, game.ID)) return info, nil } +// GetIGDBIDBySteamAppID retrieves the IGDB ID of a game using its Steam App ID. func GetIGDBIDBySteamAppID(id int) (int, error) { - key := fmt.Sprintf("igdb_id_by_steam_app_id:%v", id) - val, exist := cache.Get(key) - if exist { + key := fmt.Sprintf("igdb_id_by_steam_app_id:%d", id) + if val, exist := cache.Get(key); exist { + zap.L().Info("cache hit for IGDB ID by Steam App ID", zap.Int("steamAppID", id), zap.String("cacheKey", key)) return strconv.Atoi(val) } - var err error - resp, err := igdbRequest(constant.IGDBWebsitesURL, fmt.Sprintf(`where url = "https://store.steampowered.com/app/%v" | url = "https://store.steampowered.com/app/%v/"*; fields *; limit 500;`, id, id)) + + query := fmt.Sprintf(`where url = "https://store.steampowered.com/app/%d" | url = "https://store.steampowered.com/app/%d/"; fields game;`, id, id) + resp, err := igdbRequest(constant.IGDBWebsitesURL, query) if err != nil { - return 0, err + zap.L().Error("failed to fetch IGDB ID by Steam App ID", zap.Int("steamAppID", id), zap.Error(err)) + return 0, fmt.Errorf("failed to fetch IGDB ID by Steam App ID %d: %w", id, err) } + var data []struct { Game int `json:"game"` } if err = json.Unmarshal(resp.Body(), &data); err != nil { - return 0, err - } - if len(data) == 0 { - return 0, errors.New("not found") - } - if data[0].Game == 0 { - return GetIGDBIDBySteamAppID(id) + zap.L().Error("failed to unmarshal IGDB response", zap.String("response", string(resp.Body())), zap.Error(err)) + return 0, fmt.Errorf("failed to unmarshal IGDB response for Steam App ID %d: %w", id, err) } - _ = cache.Set(key, strconv.Itoa(data[0].Game)) + if len(data) == 0 || data[0].Game == 0 { + zap.L().Warn("no matching IGDB game found for Steam App ID", zap.Int("steamAppID", id)) + return 0, errors.New("no matching IGDB game found") + } - return GetIGDBAppParent(data[0].Game) + igdbID := data[0].Game + cacheErr := cache.Set(key, strconv.Itoa(igdbID)) + if cacheErr != nil { + zap.L().Warn("failed to cache IGDB ID by Steam App ID", zap.Int("steamAppID", id), zap.Error(cacheErr)) + } + + return GetIGDBAppParent(igdbID) } +// GetIGDBIDBySteamBundleID retrieves the IGDB ID of a game using its Steam Bundle ID. func GetIGDBIDBySteamBundleID(id int) (int, error) { - key := fmt.Sprintf("igdb_id_by_steam_bundle_id:%v", id) - val, exist := cache.Get(key) - if exist { + key := fmt.Sprintf("igdb_id_by_steam_bundle_id:%d", id) + if val, exist := cache.Get(key); exist { + zap.L().Info("cache hit for IGDB ID by Steam Bundle ID", zap.Int("steamBundleID", id), zap.String("cacheKey", key)) return strconv.Atoi(val) } - var err error - resp, err := igdbRequest(constant.IGDBWebsitesURL, fmt.Sprintf(`where url = "https://store.steampowered.com/bundle/%v" | url = "https://store.steampowered.com/bundle/%v/"*; fields *; limit 500;`, id, id)) + query := fmt.Sprintf(`where url = "https://store.steampowered.com/bundle/%d" | url = "https://store.steampowered.com/bundle/%d/"; fields game;`, id, id) + resp, err := igdbRequest(constant.IGDBWebsitesURL, query) if err != nil { - return 0, err + zap.L().Error("failed to fetch IGDB ID by Steam Bundle ID", zap.Int("steamBundleID", id), zap.Error(err)) + return 0, fmt.Errorf("failed to fetch IGDB ID by Steam Bundle ID %d: %w", id, err) } + var data []struct { Game int `json:"game"` } if err = json.Unmarshal(resp.Body(), &data); err != nil { - return 0, err - } - if len(data) == 0 { - return 0, errors.New("not found") - } - if data[0].Game == 0 { - return GetIGDBIDBySteamBundleID(id) + zap.L().Error("failed to unmarshal IGDB response", zap.String("response", string(resp.Body())), zap.Error(err)) + return 0, fmt.Errorf("failed to unmarshal IGDB response for Steam Bundle ID %d: %w", id, err) } - _ = cache.Set(key, strconv.Itoa(data[0].Game)) + if len(data) == 0 || data[0].Game == 0 { + zap.L().Warn("no matching IGDB game found for Steam Bundle ID", zap.Int("steamBundleID", id)) + return 0, errors.New("no matching IGDB game found") + } - return GetIGDBAppParent(data[0].Game) + igdbID := data[0].Game + cacheErr := cache.Set(key, strconv.Itoa(igdbID)) + if cacheErr != nil { + zap.L().Warn("failed to cache IGDB ID by Steam Bundle ID", zap.Int("steamBundleID", id), zap.Error(cacheErr)) + } + + return GetIGDBAppParent(igdbID) } -// GetIGDBPopularGameIDs get IGDB popular game IDs +// GetIGDBPopularGameIDs retrieves popular IGDB game IDs based on a given popularity type. // popularity_type = 1 IGDB Visits: Game page visits on IGDB.com. // popularity_type = 2 IGDB Want to Play: Additions to IGDB.com users’ “Want to Play” lists. // popularity_type = 3 IGDB Playing: Additions to IGDB.com users’ “Playing” lists. // popularity_type = 4 IGDB Played: Additions to IGDB.com users’ “Played” lists. -func GetIGDBPopularGameIDs(popularityType int, offset int, limit int) ([]int, error) { - var err error - resp, err := igdbRequest(constant.IGDBPopularityURL, fmt.Sprintf("fields game_id,value,popularity_type; sort value desc; limit %v; offset %v; where popularity_type = %v;", limit, offset, popularityType)) +func GetIGDBPopularGameIDs(popularityType, offset, limit int) ([]int, error) { + query := fmt.Sprintf("fields game_id,value,popularity_type; sort value desc; limit %d; offset %d; where popularity_type = %d;", limit, offset, popularityType) + resp, err := igdbRequest(constant.IGDBPopularityURL, query) if err != nil { - return nil, err + zap.L().Error("failed to fetch popular IGDB game IDs", zap.Int("popularityType", popularityType), zap.Error(err)) + return nil, fmt.Errorf("failed to fetch popular IGDB game IDs for type %d: %w", popularityType, err) } - type IgdbPopularity struct { + + var data []struct { GameID int `json:"game_id"` Value float64 `json:"value"` } - var data []IgdbPopularity if err = json.Unmarshal(resp.Body(), &data); err != nil { - return nil, err + zap.L().Error("failed to unmarshal IGDB popular games response", zap.String("response", string(resp.Body())), zap.Error(err)) + return nil, fmt.Errorf("failed to unmarshal IGDB popular games response: %w", err) } - ret := make([]int, 0) + + gameIDs := make([]int, 0, len(data)) for _, d := range data { - pid, err := GetIGDBAppParent(d.GameID) + parentID, err := GetIGDBAppParent(d.GameID) if err != nil { - ret = append(ret, d.GameID) - continue + zap.L().Warn("failed to fetch parent IGDB ID for game", zap.Int("gameID", d.GameID), zap.Error(err)) + gameIDs = append(gameIDs, d.GameID) + } else { + gameIDs = append(gameIDs, parentID) } - ret = append(ret, pid) } - return ret, nil + return gameIDs, nil } diff --git a/crawler/omg_gods.go b/crawler/omg_gods.go new file mode 100644 index 0000000..f0e629e --- /dev/null +++ b/crawler/omg_gods.go @@ -0,0 +1,68 @@ +package crawler + +import ( + "game-crawler/model" + "regexp" + "strings" + + "go.uber.org/zap" +) + +type OmgGodsCrawler struct { + logger *zap.Logger + crawler RutrackerCrawler +} + +func NewOmgGodsCrawler(cfClearanceUrl, username, password string, logger *zap.Logger) *OmgGodsCrawler { + return &OmgGodsCrawler{ + logger: logger, + crawler: *NewRutrackerCrawler( + "OmgGods", + "switch", + "8994327", + username, + password, + cfClearanceUrl, + omgGodsFormatter, + logger, + ), + } +} + +func (c *OmgGodsCrawler) Name() string { + return "OmgGodsCrawler" +} + +func (c *OmgGodsCrawler) CrawlByUrl(URL string) (*model.GameItem, error) { + return c.crawler.CrawlByUrl(URL) +} + +func (c *OmgGodsCrawler) Crawl(page int) ([]*model.GameItem, error) { + return c.crawler.Crawl(page) +} + +func (c *OmgGodsCrawler) CrawlAll() ([]*model.GameItem, error) { + return c.crawler.CrawlAll() +} + +func (c *OmgGodsCrawler) GetTotalPageNum() (int, error) { + return c.crawler.GetTotalPageNum() +} + +var omgGodsFormatRegex = []*regexp.Regexp{ + regexp.MustCompile(`\(.*?\)`), + regexp.MustCompile(`\[.*?\]`), +} + +func omgGodsFormatter(name string) string { + for _, regex := range omgGodsFormatRegex { + name = regex.ReplaceAllString(name, "") + } + if strings.Contains(name, " + ") { + name = strings.Split(name, " + ")[0] + } + if strings.Contains(name, " / ") { + name = strings.Split(name, " / ")[0] + } + return strings.TrimSpace(name) +} diff --git a/crawler/onlinefix.go b/crawler/onlinefix.go index c95f0f4..bc5cfec 100644 --- a/crawler/onlinefix.go +++ b/crawler/onlinefix.go @@ -8,14 +8,12 @@ import ( "fmt" "net/http" "net/url" - "os" "regexp" "strconv" "strings" "time" "game-crawler/cache" - "game-crawler/config" "game-crawler/constant" "game-crawler/db" "game-crawler/model" @@ -26,12 +24,16 @@ import ( ) type OnlineFixCrawler struct { - logger *zap.Logger + username string + password string + logger *zap.Logger } -func NewOnlineFixCrawler(logger *zap.Logger) *OnlineFixCrawler { +func NewOnlineFixCrawler(username, password string, logger *zap.Logger) *OnlineFixCrawler { return &OnlineFixCrawler{ - logger: logger, + username: username, + password: password, + logger: logger, } } @@ -42,206 +44,218 @@ func (c *OnlineFixCrawler) Name() string { func (c *OnlineFixCrawler) Crawl(page int) ([]*model.GameItem, error) { cookies, err := c.getCookies() if err != nil { - return nil, err + c.logger.Error("Failed to get cookies", zap.Error(err)) + return nil, fmt.Errorf("failed to get cookies: %w", err) } + requestURL := fmt.Sprintf("%s/page/%d/", constant.OnlineFixURL, page) resp, err := utils.Request().SetHeader("Referer", constant.OnlineFixURL).SetCookies(cookies).Get(requestURL) if err != nil { - c.logger.Error("Failed to fetch", zap.Error(err)) - return nil, err + c.logger.Error("Failed to fetch page", zap.String("url", requestURL), zap.Error(err)) + return nil, fmt.Errorf("failed to fetch page %d: %w", page, err) } + body := utils.Windows1251ToUTF8(resp.Body()) doc, err := goquery.NewDocumentFromReader(bytes.NewReader(body)) if err != nil { - c.logger.Error("Failed to parse HTML", zap.Error(err)) - return nil, err + c.logger.Error("Failed to parse HTML", zap.String("url", requestURL), zap.Error(err)) + return nil, fmt.Errorf("failed to parse HTML for page %d: %w", page, err) } + var urls []string - var updateFlags []string //link+date + var updateFlags []string doc.Find("article.news").Each(func(i int, s *goquery.Selection) { - urls = append(urls, s.Find(".big-link").First().AttrOr("href", "")) - updateFlags = append( - updateFlags, - base64.StdEncoding.EncodeToString([]byte(s.Find(".big-link").First().AttrOr("href", "")+s.Find("time").Text())), - ) + url := s.Find(".big-link").First().AttrOr("href", "") + if url != "" { + urls = append(urls, url) + updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(url+s.Find("time").Text()))) + } }) var res []*model.GameItem for i, u := range urls { + // Skip already crawled links if db.IsOnlineFixCrawled(updateFlags[i]) { + c.logger.Info("Skipping already crawled URL", zap.String("url", u)) continue } - c.logger.Info("Crawling", zap.String("URL", u)) + + c.logger.Info("Crawling URL", zap.String("url", u)) item, err := c.CrawlByUrl(u) if err != nil { - c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u)) + c.logger.Warn("Failed to crawl URL", zap.String("url", u), zap.Error(err)) continue } + item.UpdateFlag = updateFlags[i] - err = db.SaveGameItem(item) - if err != nil { - c.logger.Warn("Failed to save", zap.Error(err)) + if err := db.SaveGameItem(item); err != nil { + c.logger.Warn("Failed to save game item", zap.String("url", u), zap.Error(err)) continue } res = append(res, item) + if err := OrganizeGameItem(item); err != nil { - c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u)) + c.logger.Warn("Failed to organize game item", zap.String("url", u), zap.Error(err)) continue } } + return res, nil } func (c *OnlineFixCrawler) CrawlByUrl(URL string) (*model.GameItem, error) { cookies, err := c.getCookies() if err != nil { - return nil, err + c.logger.Error("Failed to get cookies", zap.Error(err)) + return nil, fmt.Errorf("failed to get cookies: %w", err) } + resp, err := utils.Request().SetHeaders(map[string]string{ "Referer": constant.OnlineFixURL, }).SetCookies(cookies).Get(URL) if err != nil { - return nil, err + c.logger.Error("Failed to fetch URL", zap.String("url", URL), zap.Error(err)) + return nil, fmt.Errorf("failed to fetch URL %s: %w", URL, err) } + body := utils.Windows1251ToUTF8(resp.Body()) titleRegex := regexp.MustCompile(`(?i)(.*?)`) - titleRegexRes := titleRegex.FindAllStringSubmatch(string(body), -1) - if len(titleRegexRes) == 0 { - return nil, errors.New("failed to find title") + titleMatch := titleRegex.FindStringSubmatch(string(body)) + if len(titleMatch) == 0 { + c.logger.Warn("Failed to find title in HTML", zap.String("url", URL)) + return nil, errors.New("failed to find title in HTML") } + downloadRegex := regexp.MustCompile(`(?i)]+\bhref="([^"]+)"[^>]+>(Скачать Torrent|Скачать торрент)`) - downloadRegexRes := downloadRegex.FindAllStringSubmatch(string(body), -1) - if len(downloadRegexRes) == 0 { + downloadMatch := downloadRegex.FindStringSubmatch(string(body)) + if len(downloadMatch) == 0 { + c.logger.Warn("Failed to find download button", zap.String("url", URL)) return nil, errors.New("failed to find download button") } + + // Retrieve or create game item item, err := db.GetGameItemByUrl(URL) if err != nil { - return nil, err + item = &model.GameItem{} } - item.RawName = titleRegexRes[0][1] + + item.RawName = titleMatch[1] item.Name = OnlineFixFormatter(item.RawName) item.Url = URL item.Author = "OnlineFix" item.Size = "0" - resp, err = utils.Request().SetHeader("Referer", URL).SetCookies(cookies).Get(downloadRegexRes[0][1]) - body = utils.Windows1251ToUTF8(resp.Body()) + + // Handle download links + downloadURL := downloadMatch[1] + resp, err = utils.Request().SetHeader("Referer", URL).SetCookies(cookies).Get(downloadURL) if err != nil { - return nil, err + c.logger.Error("Failed to fetch download link", zap.String("url", downloadURL), zap.Error(err)) + return nil, fmt.Errorf("failed to fetch download link %s: %w", downloadURL, err) } - if strings.Contains(downloadRegexRes[0][1], "uploads.online-fix.me") { + + body = utils.Windows1251ToUTF8(resp.Body()) + if strings.Contains(downloadURL, "uploads.online-fix.me") { + // Handle torrent file magnetRegex := regexp.MustCompile(`(?i)"(.*?).torrent"`) - magnetRegexRes := magnetRegex.FindAllStringSubmatch(string(body), -1) - if len(magnetRegexRes) == 0 { - return nil, errors.New("failed to find magnet") + magnetMatch := magnetRegex.FindStringSubmatch(string(body)) + if len(magnetMatch) == 0 { + c.logger.Warn("Failed to find torrent magnet link", zap.String("url", downloadURL)) + return nil, errors.New("failed to find torrent magnet link") } - resp, err = utils.Request().SetHeader("Referer", URL).SetCookies(cookies).Get(downloadRegexRes[0][1] + strings.Trim(magnetRegexRes[0][0], "\"")) + + torrentURL := downloadURL + strings.Trim(magnetMatch[0], "\"") + resp, err = utils.Request().SetHeader("Referer", URL).SetCookies(cookies).Get(torrentURL) if err != nil { - return nil, err + c.logger.Error("Failed to fetch torrent file", zap.String("url", torrentURL), zap.Error(err)) + return nil, fmt.Errorf("failed to fetch torrent file %s: %w", torrentURL, err) } + magnet, size, err := utils.ConvertTorrentToMagnet(resp.Body()) + if err != nil { + c.logger.Error("Failed to convert torrent to magnet", zap.String("url", torrentURL), zap.Error(err)) + return nil, fmt.Errorf("failed to convert torrent to magnet: %w", err) + } + item.DownloadLinks = []string{magnet} item.Size = size - if err != nil { - return nil, err - } - } else if strings.Contains(downloadRegexRes[0][1], "online-fix.me/ext") { - if strings.Contains(string(body), "mega.nz") { - if !config.Config.MegaAvaliable { - return nil, errors.New("mega is not avaliable") - } - megaRegex := regexp.MustCompile(`(?i)location.href=\\'([^\\']*)\\'`) - megaRegexRes := megaRegex.FindAllStringSubmatch(string(body), -1) - if len(megaRegexRes) == 0 { - return nil, errors.New("failed to find download link") - } - path, files, err := utils.MegaDownload(megaRegexRes[0][1], "torrent") - if err != nil { - return nil, err - } - torrent := "" - for _, file := range files { - if strings.HasSuffix(file, ".torrent") { - torrent = file - break - } - } - dataBytes, err := os.ReadFile(torrent) - if err != nil { - return nil, err - } - magnet, size, err := utils.ConvertTorrentToMagnet(dataBytes) - item.DownloadLinks = []string{magnet} - item.Size = size - if err != nil { - return nil, err - } - _ = os.RemoveAll(path) - } else { - return nil, errors.New("failed to find download link") - } } else { - return nil, errors.New("failed to find download link") + c.logger.Warn("Unsupported download link format", zap.String("url", downloadURL)) + return nil, errors.New("unsupported download link format") } + item.Platform = "windows" return item, nil } +// Crawl multiple pages func (c *OnlineFixCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) { var res []*model.GameItem for _, page := range pages { items, err := c.Crawl(page) if err != nil { - return nil, err + c.logger.Error("Failed to crawl page", zap.Int("page", page), zap.Error(err)) + return nil, fmt.Errorf("failed to crawl page %d: %w", page, err) } res = append(res, items...) } return res, nil } +// Crawl all pages func (c *OnlineFixCrawler) CrawlAll() ([]*model.GameItem, error) { var res []*model.GameItem totalPageNum, err := c.GetTotalPageNum() if err != nil { - return nil, err + c.logger.Error("Failed to get total page number", zap.Error(err)) + return nil, fmt.Errorf("failed to get total page number: %w", err) } + for i := 1; i <= totalPageNum; i++ { items, err := c.Crawl(i) if err != nil { - return nil, err + c.logger.Warn("Failed to crawl page", zap.Int("page", i), zap.Error(err)) + continue } res = append(res, items...) } return res, nil } +// Get total page number func (c *OnlineFixCrawler) GetTotalPageNum() (int, error) { resp, err := utils.Request().SetHeader("Referer", constant.OnlineFixURL).Get(constant.OnlineFixURL) if err != nil { - return 0, err + c.logger.Error("Failed to fetch main page", zap.Error(err)) + return 0, fmt.Errorf("failed to fetch main page: %w", err) } + pageRegex := regexp.MustCompile(`(?i).*?`) - pageRegexRes := pageRegex.FindAllStringSubmatch(string(resp.Body()), -1) - if len(pageRegexRes) == 0 { - return 0, err + pageMatches := pageRegex.FindAllStringSubmatch(string(resp.Body()), -1) + if len(pageMatches) < 2 { + c.logger.Warn("Failed to find total page number in HTML") + return 0, errors.New("failed to find total page number") } - totalPageNum, err := strconv.Atoi(pageRegexRes[len(pageRegexRes)-2][1]) + + totalPageNum, err := strconv.Atoi(pageMatches[len(pageMatches)-2][1]) if err != nil { - return 0, err + c.logger.Error("Failed to parse total page number", zap.Error(err)) + return 0, fmt.Errorf("failed to parse total page number: %w", err) } + return totalPageNum, nil } -type csrf struct { - Field string `json:"field"` - Value string `json:"value"` -} - +// Get cookies for authentication func (c *OnlineFixCrawler) getCookies() ([]*http.Cookie, error) { + if c.username == "" || c.password == "" { + return nil, errors.New("username or password is empty") + } val, exists := cache.Get("onlinefix_cookies") if exists { var cookies []*http.Cookie if err := json.Unmarshal([]byte(val), &cookies); err != nil { - return nil, err + c.logger.Warn("Failed to parse cached cookies", zap.Error(err)) + return nil, fmt.Errorf("failed to parse cached cookies: %w", err) } return cookies, nil } @@ -251,38 +265,48 @@ func (c *OnlineFixCrawler) getCookies() ([]*http.Cookie, error) { "Referer": constant.OnlineFixURL, }).Get(constant.OnlineFixCSRFURL) if err != nil { - return nil, err + c.logger.Error("Failed to fetch CSRF token", zap.Error(err)) + return nil, fmt.Errorf("failed to fetch CSRF token: %w", err) } - var csrf csrf - if err = json.Unmarshal(resp.Body(), &csrf); err != nil { - return nil, err + + type csrf struct { + Field string `json:"field"` + Value string `json:"value"` + } + + var csrfToken csrf + if err = json.Unmarshal(resp.Body(), &csrfToken); err != nil { + c.logger.Error("Failed to parse CSRF token", zap.Error(err)) + return nil, fmt.Errorf("failed to parse CSRF token: %w", err) } cookies := resp.Cookies() - params := url.Values{} - params.Add("login_name", config.Config.OnlineFix.User) - params.Add("login_password", config.Config.OnlineFix.Password) - params.Add(csrf.Field, csrf.Value) + params.Add("login_name", c.username) + params.Add("login_password", c.password) + params.Add(csrfToken.Field, csrfToken.Value) params.Add("login", "submit") + resp, err = utils.Request().SetHeaders(map[string]string{ "Origin": constant.OnlineFixURL, "Content-Type": "application/x-www-form-urlencoded", "Referer": constant.OnlineFixURL, }).SetCookies(cookies).SetBody(params.Encode()).Post(constant.OnlineFixURL) if err != nil { - return nil, err + c.logger.Error("Failed to log in", zap.Error(err)) + return nil, fmt.Errorf("failed to log in: %w", err) } + cookies = resp.Cookies() - jsonBytes, _ := json.Marshal(cookies) - _ = cache.SetWithExpire("onlinefix_cookies", string(jsonBytes), time.Hour) + cookiesJSON, _ := json.Marshal(cookies) + _ = cache.SetWithExpire("onlinefix_cookies", string(cookiesJSON), time.Hour) return cookies, nil } +// Format game name func OnlineFixFormatter(name string) string { - name = strings.Replace(name, "по сети", "", -1) - reg1 := regexp.MustCompile(`(?i)\(.*?\)`) - name = reg1.ReplaceAllString(name, "") - return strings.TrimSpace(name) + name = strings.ReplaceAll(name, "по сети", "") + reg := regexp.MustCompile(`(?i)\(.*?\)`) + return strings.TrimSpace(reg.ReplaceAllString(name, "")) } diff --git a/crawler/rutracker.go b/crawler/rutracker.go new file mode 100644 index 0000000..24c3164 --- /dev/null +++ b/crawler/rutracker.go @@ -0,0 +1,298 @@ +package crawler + +import ( + "encoding/base64" + "encoding/json" + "fmt" + "game-crawler/cache" + "game-crawler/constant" + "game-crawler/db" + "game-crawler/model" + "game-crawler/utils" + "net/url" + "strconv" + "strings" + "time" + "unicode" + + "git.nite07.com/nite/ccs" + "github.com/Danny-Dasilva/CycleTLS/cycletls" + http "github.com/Danny-Dasilva/fhttp" + "github.com/PuerkitoBio/goquery" + "go.uber.org/zap" +) + +type RutrackerCrawler struct { + source string + rid string + platform string + username string + password string + formatter FormatterFunc + logger *zap.Logger + cfClearanceUrl string +} + +func NewRutrackerCrawler(source, platform, rid, username, password, cfClearanceUrl string, formatter FormatterFunc, logger *zap.Logger) *RutrackerCrawler { + return &RutrackerCrawler{ + source: source, + rid: rid, + formatter: formatter, + logger: logger, + platform: platform, + username: username, + password: password, + cfClearanceUrl: cfClearanceUrl, + } +} + +func (r *RutrackerCrawler) getSession() (*ccs.Session, error) { + r.logger.Info("Fetching session for RutrackerCrawler") + + if r.username == "" || r.password == "" { + r.logger.Error("Username or password is empty") + return nil, fmt.Errorf("username or password is empty") + } + + cacheKey := "rutracker_session" + var session ccs.Session + if val, exist := cache.Get(cacheKey); exist { + err := json.Unmarshal([]byte(val), &session) + if err != nil { + r.logger.Error("Failed to unmarshal cached session", zap.Error(err)) + return nil, fmt.Errorf("failed to unmarshal cached session: %w", err) + } + } else { + var err error + session, err = ccs.WAFSession(r.cfClearanceUrl, constant.RutrackerURL) + if err != nil { + r.logger.Error("Failed to create WAF session", zap.Error(err)) + return nil, fmt.Errorf("failed to create WAF session: %w", err) + } + + // login + params := url.Values{} + params.Add("login_username", r.username) + params.Add("login_password", r.password) + params.Add("login", "Вход") + resp, err := ccs.RequestWithWAFSession(http.MethodPost, constant.RutrackerLoginURL, session, &cycletls.Options{ + Headers: map[string]string{ + "Content-Type": "application/x-www-form-urlencoded", + }, + Body: params.Encode(), + UserAgent: "Mozilla/5.0 (X11; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0", + DisableRedirect: true, + }) + if err != nil { + r.logger.Error("Failed to login", zap.Error(err)) + return nil, fmt.Errorf("failed to login: %w", err) + } + + if len(resp.Cookies) == 0 { + r.logger.Error("Failed to login, no cookies found") + return nil, fmt.Errorf("failed to login, no cookies found") + } + + success := false + loginCookies := make([]ccs.Cookie, 0) + for _, cookie := range resp.Cookies { + if cookie.Name == "bb_session" { + success = true + } + loginCookies = append(loginCookies, ccs.Cookie{ + Name: cookie.Name, + Value: cookie.Value, + }) + } + + if !success { + r.logger.Error("Failed to login, no bb_session cookie found") + return nil, fmt.Errorf("failed to login, no bb_session cookie found") + } + + session.Cookies = append(session.Cookies, loginCookies...) + + jsonBytes, err := json.Marshal(session) + if err == nil { + _ = cache.SetWithExpire(cacheKey, jsonBytes, 24*time.Hour) + } + } + return &session, nil +} + +func (r *RutrackerCrawler) CrawlByUrl(URL string) (*model.GameItem, error) { + session, err := r.getSession() + if err != nil { + return nil, fmt.Errorf("failed to get session: %w", err) + } + + resp, err := ccs.RequestWithWAFSession(http.MethodGet, URL, *session, nil) + if err != nil { + r.logger.Error("Failed to request URL", zap.Error(err)) + return nil, fmt.Errorf("failed to request URL: %w", err) + } + + doc, err := goquery.NewDocumentFromReader(strings.NewReader(resp.Body)) + if err != nil { + r.logger.Error("Failed to parse HTML", zap.Error(err)) + return nil, fmt.Errorf("failed to parse HTML: %w", err) + } + + item, err := db.GetGameItemByUrl(URL) + if err != nil { + r.logger.Error("Failed to get game item by url", zap.Error(err)) + return nil, fmt.Errorf("failed to get game item by url: %w", err) + } + + item.RawName = doc.Find("#topic-title").Text() + item.Name = r.formatter(item.RawName) + item.Author = r.source + item.Platform = r.platform + item.Url = URL + + magnet := doc.Find(".magnet-link").AttrOr("href", "") + if magnet == "" { + r.logger.Error("Failed to find magnet link") + return nil, fmt.Errorf("failed to find magnet link") + } + item.DownloadLinks = []string{magnet} + + sizeStr := doc.Find("#tor-size-humn").AttrOr("title", "") + if sizeStr == "" { + r.logger.Warn("Failed to find size") + item.Size = "unknown" + } else { + size, err := strconv.ParseUint(sizeStr, 10, 64) + if err != nil { + r.logger.Error("Failed to parse size", zap.Error(err)) + } else { + item.Size = utils.BytesToSize(size) + } + } + return item, nil +} + +func (r *RutrackerCrawler) Crawl(page int) ([]*model.GameItem, error) { + r.logger.Info("Crawling Rutracker", zap.Int("page", page), zap.String("rid", r.rid)) + session, err := r.getSession() + if err != nil { + r.logger.Error("Failed to get session", zap.Error(err)) + return nil, fmt.Errorf("failed to get session: %w", err) + } + + URL := fmt.Sprintf(constant.RutrackerAuthorURL, r.rid, (page-1)*50) + resp, err := ccs.RequestWithWAFSession(http.MethodGet, URL, *session, nil) + if err != nil { + r.logger.Error("Failed to request URL", zap.String("url", URL), zap.Error(err)) + return nil, fmt.Errorf("failed to request URL: %w", err) + } + + doc, err := goquery.NewDocumentFromReader(strings.NewReader(resp.Body)) + if err != nil { + r.logger.Error("Failed to parse HTML", zap.Error(err)) + return nil, fmt.Errorf("failed to parse HTML: %w", err) + } + var urls []string + var updateFlags []string + doc.Find("[id^='trs-tr']").Each(func(i int, s *goquery.Selection) { + a := s.Find(".t-title") + datetime := s.Find("td").Last().Text() + url, exists := a.Attr("href") + if !exists { + r.logger.Error("Failed to find URL") + return + } + fullURL := fmt.Sprintf(constant.RutrackerTopicURL, url) + urls = append(urls, fullURL) + updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(fullURL+datetime))) + }) + + var res []*model.GameItem + for i, URL := range urls { + if db.IsGameCrawled(updateFlags[i], r.source) { + r.logger.Info("Skipping already crawled URL", zap.String("URL", URL)) + continue + } + r.logger.Info("Crawling URL", zap.String("URL", URL)) + item, err := r.CrawlByUrl(URL) + if err != nil { + r.logger.Error("Failed to crawl URL", zap.String("URL", URL), zap.Error(err)) + continue + } + err = db.SaveGameItem(item) + if err != nil { + r.logger.Error("Failed to save game item to database", zap.String("URL", URL), zap.Error(err)) + continue + } + res = append(res, item) + if err := OrganizeGameItem(item); err != nil { + r.logger.Warn("Failed to organize game item", zap.String("URL", URL), zap.Error(err)) + continue + } + } + + r.logger.Info("Finished Crawl", zap.Int("Page", page), zap.Int("ItemsCrawled", len(res))) + return res, nil +} + +func (r *RutrackerCrawler) CrawlAll() (res []*model.GameItem, err error) { + r.logger.Info("Starting CrawlAll", zap.String("Source", r.source)) + totalPage, err := r.GetTotalPageNum() + if err != nil { + return nil, fmt.Errorf("failed to get total page number: %w", err) + } + for i := 1; i <= totalPage; i++ { + items, err := r.Crawl(i) + if err != nil { + return nil, fmt.Errorf("failed to crawl page %d: %w", i, err) + } + res = append(res, items...) + } + return res, nil +} + +func (r *RutrackerCrawler) GetTotalPageNum() (int, error) { + session, err := r.getSession() + if err != nil { + return 0, fmt.Errorf("failed to get session: %w", err) + } + + URL := fmt.Sprintf(constant.RutrackerAuthorURL, r.rid, 0) + resp, err := ccs.RequestWithWAFSession(http.MethodGet, URL, *session, nil) + if err != nil { + r.logger.Error("Failed to request URL", zap.String("url", URL), zap.Error(err)) + return 0, fmt.Errorf("failed to request URL: %w", err) + } + + doc, err := goquery.NewDocumentFromReader(strings.NewReader(resp.Body)) + if err != nil { + r.logger.Error("Failed to parse HTML", zap.Error(err)) + return 0, fmt.Errorf("failed to parse HTML: %w", err) + } + + var pg []string + doc.Find(".pg").Each(func(i int, s *goquery.Selection) { + pg = append(pg, s.Text()) + }) + + if len(pg) == 0 { + r.logger.Error("Failed to find page number") + return 0, fmt.Errorf("failed to find page number") + } + + totalPage := 0 + for _, c := range pg[len(pg)-1] { + if unicode.IsDigit(c) { + totalPage, err = strconv.Atoi(pg[len(pg)-1]) + break + } else { + totalPage, err = strconv.Atoi(pg[len(pg)-2]) + break + } + } + if err != nil { + r.logger.Error("Failed to parse page number", zap.Error(err)) + return 0, fmt.Errorf("failed to parse page number: %w", err) + } + return totalPage, nil +} diff --git a/crawler/steam.go b/crawler/steam.go index 691acfd..c54316a 100644 --- a/crawler/steam.go +++ b/crawler/steam.go @@ -13,15 +13,19 @@ import ( "game-crawler/constant" "game-crawler/model" "game-crawler/utils" + + "go.uber.org/zap" ) +// GetSteamAppDetail fetches the details of a Steam app by its ID. func GetSteamAppDetail(id int) (*model.SteamAppDetail, error) { key := fmt.Sprintf("steam_game:%d", id) - val, exist := cache.Get(key) - if exist { + if val, exist := cache.Get(key); exist { + zap.L().Info("Cache hit for Steam app detail", zap.Int("steamID", id)) var detail model.SteamAppDetail if err := json.Unmarshal([]byte(val), &detail); err != nil { - return nil, err + zap.L().Warn("Failed to unmarshal cached Steam app detail", zap.Int("steamID", id), zap.Error(err)) + return nil, fmt.Errorf("failed to unmarshal cached Steam app detail for ID %d: %w", id, err) } return &detail, nil } @@ -29,93 +33,117 @@ func GetSteamAppDetail(id int) (*model.SteamAppDetail, error) { baseURL, _ := url.Parse(constant.SteamAppDetailURL) params := url.Values{} params.Add("appids", strconv.Itoa(id)) - // params.Add("l", "schinese") baseURL.RawQuery = params.Encode() + resp, err := utils.Request().SetHeaders(map[string]string{ "User-Agent": "", }).Get(baseURL.String()) if err != nil { - return nil, err + zap.L().Error("Failed to fetch Steam app detail", zap.Int("steamID", id), zap.String("url", baseURL.String()), zap.Error(err)) + return nil, fmt.Errorf("failed to fetch Steam app detail for ID %d: %w", id, err) } + var detail map[string]*model.SteamAppDetail - if err = json.Unmarshal(resp.Body(), &detail); err != nil { - return nil, err - } - if _, ok := detail[strconv.Itoa(id)]; !ok { - return nil, fmt.Errorf("steam App not found: %d", id) - } - if detail[strconv.Itoa(id)] == nil { - return nil, fmt.Errorf("steam App not found: %d", id) + if err := json.Unmarshal(resp.Body(), &detail); err != nil { + zap.L().Error("Failed to unmarshal Steam app detail response", zap.Int("steamID", id), zap.String("response", string(resp.Body())), zap.Error(err)) + return nil, fmt.Errorf("failed to unmarshal Steam app detail for ID %d: %w", id, err) } - jsonBytes, err := json.Marshal(detail[strconv.Itoa(id)]) - if err == nil { - _ = cache.Set(key, string(jsonBytes)) + if appDetail, ok := detail[strconv.Itoa(id)]; !ok || appDetail == nil { + zap.L().Warn("Steam app detail not found", zap.Int("steamID", id)) + return nil, fmt.Errorf("steam app not found: %d", id) + } else { + // Cache the result + jsonBytes, err := json.Marshal(appDetail) + if err == nil { + _ = cache.Set(key, string(jsonBytes)) + } + return appDetail, nil } - - return detail[strconv.Itoa(id)], nil } +// GenerateSteamGameInfo generates detailed game information based on a Steam App ID. func GenerateSteamGameInfo(id int) (*model.GameInfo, error) { - item := &model.GameInfo{} detail, err := GetSteamAppDetail(id) if err != nil { - return nil, err + zap.L().Error("Failed to fetch Steam app detail for game info generation", zap.Int("steamID", id), zap.Error(err)) + return nil, fmt.Errorf("failed to fetch Steam app detail for ID %d: %w", id, err) } - item.SteamID = id - item.Name = detail.Data.Name - item.Description = detail.Data.ShortDescription - item.Cover = fmt.Sprintf("https://shared.cloudflare.steamstatic.com/store_item_assets/steam/apps/%v/library_600x900_2x.jpg", id) - item.Developers = detail.Data.Developers - item.Publishers = detail.Data.Publishers - var screenshots []string + + item := &model.GameInfo{ + SteamID: id, + Name: detail.Data.Name, + Description: detail.Data.ShortDescription, + Cover: fmt.Sprintf("https://shared.cloudflare.steamstatic.com/store_item_assets/steam/apps/%v/library_600x900_2x.jpg", id), + Developers: detail.Data.Developers, + Publishers: detail.Data.Publishers, + Screenshots: make([]string, 0, len(detail.Data.Screenshots)), + } + for _, screenshot := range detail.Data.Screenshots { - screenshots = append(screenshots, screenshot.PathFull) + item.Screenshots = append(item.Screenshots, screenshot.PathFull) } - item.Screenshots = screenshots + + zap.L().Info("Generated Steam game info", zap.Int("steamID", id), zap.String("name", item.Name)) return item, nil } +// GetSteamIDByIGDBID retrieves the Steam App ID associated with a given IGDB ID. func GetSteamIDByIGDBID(IGDBID int) (int, error) { key := fmt.Sprintf("steam_game:%d", IGDBID) - val, exist := cache.Get(key) - if exist { + if val, exist := cache.Get(key); exist { + zap.L().Info("Cache hit for Steam ID by IGDB ID", zap.Int("IGDBID", IGDBID)) id, err := strconv.Atoi(val) if err != nil { - return 0, err + zap.L().Warn("Failed to parse cached Steam ID", zap.Int("IGDBID", IGDBID), zap.Error(err)) + return 0, fmt.Errorf("failed to parse cached Steam ID for IGDB ID %d: %w", IGDBID, err) } return id, nil } - var err error - resp, err := igdbRequest(constant.IGDBWebsitesURL, fmt.Sprintf(`where game = %v; fields *; limit 500;`, IGDBID)) + query := fmt.Sprintf(`where game = %v; fields *; limit 500;`, IGDBID) + resp, err := igdbRequest(constant.IGDBWebsitesURL, query) if err != nil { - return 0, err + zap.L().Error("Failed to fetch IGDB websites for Steam ID", zap.Int("IGDBID", IGDBID), zap.Error(err)) + return 0, fmt.Errorf("failed to fetch IGDB websites for IGDB ID %d: %w", IGDBID, err) } + var data []struct { Game int `json:"game"` Url string `json:"url"` } - if err = json.Unmarshal(resp.Body(), &data); err != nil { - return 0, err + if err := json.Unmarshal(resp.Body(), &data); err != nil { + zap.L().Error("Failed to unmarshal IGDB websites response", zap.Int("IGDBID", IGDBID), zap.String("response", string(resp.Body())), zap.Error(err)) + return 0, fmt.Errorf("failed to unmarshal IGDB websites response for IGDB ID %d: %w", IGDBID, err) } + if len(data) == 0 { - return 0, errors.New("not found") + zap.L().Warn("No Steam ID found for IGDB ID", zap.Int("IGDBID", IGDBID)) + return 0, errors.New("steam ID not found") } + for _, v := range data { if strings.HasPrefix(v.Url, "https://store.steampowered.com/app/") { regex := regexp.MustCompile(`https://store.steampowered.com/app/(\d+)/?`) - idStr := regex.FindStringSubmatch(v.Url) - if len(idStr) < 2 { - return 0, errors.New("failed parse") + idMatch := regex.FindStringSubmatch(v.Url) + if len(idMatch) < 2 { + zap.L().Warn("Failed to parse Steam ID from URL", zap.String("url", v.Url)) + return 0, errors.New("failed to parse Steam ID from URL") } - steamID, err := strconv.Atoi(idStr[1]) + + steamID, err := strconv.Atoi(idMatch[1]) if err != nil { - return 0, err + zap.L().Error("Failed to convert Steam ID to integer", zap.String("url", v.Url), zap.Error(err)) + return 0, fmt.Errorf("failed to convert Steam ID from URL %s: %w", v.Url, err) } + + // Cache the result _ = cache.Set(key, strconv.Itoa(steamID)) + zap.L().Info("Found Steam ID for IGDB ID", zap.Int("IGDBID", IGDBID), zap.Int("steamID", steamID)) return steamID, nil } } - return 0, errors.New("not found") + + zap.L().Warn("No valid Steam ID found in IGDB websites data", zap.Int("IGDBID", IGDBID)) + return 0, errors.New("steam ID not found") } diff --git a/crawler/steam250.go b/crawler/steam250.go index ee952fa..64061e8 100644 --- a/crawler/steam250.go +++ b/crawler/steam250.go @@ -16,73 +16,125 @@ import ( "game-crawler/utils" "github.com/PuerkitoBio/goquery" + "go.uber.org/zap" ) +// GetSteam250 fetches Steam250 game rankings from the given URL. func GetSteam250(URL string) ([]*model.GameInfo, error) { key := "steam250:" + url.QueryEscape(URL) if val, ok := cache.Get(key); ok { + zap.L().Info("Cache hit for Steam250 rankings", zap.String("url", URL)) var infos []*model.GameInfo - err := json.Unmarshal([]byte(val), &infos) - if err != nil { - return nil, err + if err := json.Unmarshal([]byte(val), &infos); err != nil { + zap.L().Warn("Failed to unmarshal cached Steam250 data", zap.String("url", URL), zap.Error(err)) + return nil, fmt.Errorf("failed to unmarshal cached Steam250 data for URL %s: %w", URL, err) } return infos, nil } + zap.L().Info("Fetching Steam250 rankings from URL", zap.String("url", URL)) resp, err := utils.Request().Get(URL) if err != nil { - return nil, err + zap.L().Error("Failed to fetch Steam250 rankings", zap.String("url", URL), zap.Error(err)) + return nil, fmt.Errorf("failed to fetch Steam250 rankings from URL %s: %w", URL, err) } + doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body())) if err != nil { - return nil, err + zap.L().Error("Failed to parse Steam250 HTML document", zap.String("url", URL), zap.Error(err)) + return nil, fmt.Errorf("failed to parse Steam250 HTML document for URL %s: %w", URL, err) } + var rank []model.Steam250Item var item model.Steam250Item steamIDs := make([]int, 0) + doc.Find(".appline").Each(func(i int, s *goquery.Selection) { + // Extract game name item.Name = s.Find(".title>a").First().Text() + if item.Name == "" { + zap.L().Warn("Game name not found in Steam250 rankings", zap.String("url", URL), zap.Int("index", i)) + return + } + + // Extract Steam ID idStr := s.Find(".store").AttrOr("href", "") idSlice := regexp.MustCompile(`app/(\d+)/`).FindStringSubmatch(idStr) if len(idSlice) < 2 { + zap.L().Warn("Failed to extract Steam ID from URL", zap.String("url", idStr), zap.Int("index", i)) return } - item.SteamID, _ = strconv.Atoi(idSlice[1]) + + steamID, err := strconv.Atoi(idSlice[1]) + if err != nil { + zap.L().Warn("Failed to convert Steam ID to integer", zap.String("id", idSlice[1]), zap.Error(err)) + return + } + + item.SteamID = steamID rank = append(rank, item) - steamIDs = append(steamIDs, item.SteamID) + steamIDs = append(steamIDs, steamID) }) - infos, err := db.GetGameInfosByPlatformIDs("steam", steamIDs) - if err != nil { - return nil, err - } - if len(infos) > 10 { - return infos[:10], nil + + if len(steamIDs) == 0 { + zap.L().Warn("No valid Steam IDs found in Steam250 rankings", zap.String("url", URL)) + return nil, fmt.Errorf("no valid Steam IDs found in Steam250 rankings for URL %s", URL) } + // Fetch game info from the database + zap.L().Info("Fetching game info from database", zap.Ints("steamIDs", steamIDs)) + infos, err := db.GetGameInfosByPlatformIDs("steam", steamIDs) + if err != nil { + zap.L().Error("Failed to fetch game info from database", zap.Ints("steamIDs", steamIDs), zap.Error(err)) + return nil, fmt.Errorf("failed to fetch game info for Steam IDs %v: %w", steamIDs, err) + } + + // Limit the result to 10 entries (if applicable) + if len(infos) > 10 { + infos = infos[:10] + } + + // Cache the result jsonBytes, err := json.Marshal(infos) if err == nil { - _ = cache.SetWithExpire(key, string(jsonBytes), 12*time.Hour) + cacheErr := cache.SetWithExpire(key, string(jsonBytes), 12*time.Hour) + if cacheErr != nil { + zap.L().Warn("Failed to cache Steam250 rankings", zap.String("url", URL), zap.Error(cacheErr)) + } + } else { + zap.L().Warn("Failed to marshal Steam250 rankings for caching", zap.String("url", URL), zap.Error(err)) } return infos, nil } +// GetSteam250Top250 retrieves the top 250 games from Steam250. func GetSteam250Top250() ([]*model.GameInfo, error) { + zap.L().Info("Fetching Steam250 Top 250 games") return GetSteam250(constant.Steam250Top250URL) } +// GetSteam250BestOfTheYear retrieves the best games of the current year from Steam250. func GetSteam250BestOfTheYear() ([]*model.GameInfo, error) { - return GetSteam250(fmt.Sprintf(constant.Steam250BestOfTheYearURL, time.Now().UTC().Year())) + year := time.Now().UTC().Year() + zap.L().Info("Fetching Steam250 Best of the Year games", zap.Int("year", year)) + return GetSteam250(fmt.Sprintf(constant.Steam250BestOfTheYearURL, year)) } +// GetSteam250WeekTop50 retrieves the top 50 games of the week from Steam250. func GetSteam250WeekTop50() ([]*model.GameInfo, error) { + zap.L().Info("Fetching Steam250 Week Top 50 games") return GetSteam250(constant.Steam250WeekTop50URL) } +// GetSteam250MonthTop50 retrieves the top 50 games of the month from Steam250. func GetSteam250MonthTop50() ([]*model.GameInfo, error) { + zap.L().Info("Fetching Steam250 Month Top 50 games") return GetSteam250(constant.Steam250MonthTop50URL) } +// GetSteam250MostPlayed retrieves the most played games from Steam250. func GetSteam250MostPlayed() ([]*model.GameInfo, error) { + zap.L().Info("Fetching Steam250 Most Played games") return GetSteam250(constant.Steam250MostPlayedURL) } diff --git a/crawler/steamrip.go b/crawler/steamrip.go index 2436e49..3df70b5 100644 --- a/crawler/steamrip.go +++ b/crawler/steamrip.go @@ -17,122 +17,178 @@ import ( "go.uber.org/zap" ) +// SteamRIPCrawler defines a crawler for the SteamRIP website. type SteamRIPCrawler struct { logger *zap.Logger } +// NewSteamRIPCrawler creates a new instance of SteamRIPCrawler. func NewSteamRIPCrawler(logger *zap.Logger) *SteamRIPCrawler { return &SteamRIPCrawler{ logger: logger, } } +// Name returns the name of the crawler. func (c *SteamRIPCrawler) Name() string { return "SteamRIPCrawler" } +// CrawlByUrl crawls a single game page from SteamRIP by URL. func (c *SteamRIPCrawler) CrawlByUrl(URL string) (*model.GameItem, error) { + c.logger.Info("Crawling game details", zap.String("URL", URL)) + + // Fetch the page content resp, err := utils.Request().Get(URL) if err != nil { - return nil, err + c.logger.Error("Failed to fetch URL", zap.String("URL", URL), zap.Error(err)) + return nil, fmt.Errorf("failed to fetch URL %s: %w", URL, err) } + doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body())) if err != nil { - return nil, err + c.logger.Error("Failed to parse HTML document", zap.String("URL", URL), zap.Error(err)) + return nil, fmt.Errorf("failed to parse HTML document for URL %s: %w", URL, err) } + + // Retrieve or create game item item, err := db.GetGameItemByUrl(URL) if err != nil { - return nil, err + c.logger.Warn("Game item not found in database, creating a new one", zap.String("URL", URL), zap.Error(err)) + item = &model.GameItem{} } + + // Extract game details item.RawName = strings.TrimSpace(doc.Find(".entry-title").First().Text()) + if item.RawName == "" { + c.logger.Warn("Game title not found", zap.String("URL", URL)) + return nil, errors.New("game title not found") + } item.Name = SteamRIPFormatter(item.RawName) item.Url = URL item.Author = "SteamRIP" item.Platform = "windows" + + // Extract game size sizeRegex := regexp.MustCompile(`(?i)
  • Game Size:\s?(.*?)
  • `) - sizeRegexRes := sizeRegex.FindStringSubmatch(string(resp.Body())) - if len(sizeRegexRes) != 0 { - item.Size = strings.TrimSpace(sizeRegexRes[1]) + sizeMatch := sizeRegex.FindStringSubmatch(string(resp.Body())) + if len(sizeMatch) > 1 { + item.Size = strings.TrimSpace(sizeMatch[1]) } else { item.Size = "unknown" } - megadbRegex := regexp.MustCompile(`(?i)(?:https?:)?(//megadb\.net/[^"]+)`) - megadbRegexRes := megadbRegex.FindStringSubmatch(string(resp.Body())) - links := []string{} - if len(megadbRegexRes) != 0 { - links = append(links, fmt.Sprintf("https:%s", megadbRegexRes[1])) - } - gofileRegex := regexp.MustCompile(`(?i)(?:https?:)?(//gofile\.io/d/[^"]+)`) - gofileRegexRes := gofileRegex.FindStringSubmatch(string(resp.Body())) - if len(gofileRegexRes) != 0 { - links = append(links, fmt.Sprintf("https:%s", gofileRegexRes[1])) - } - filecryptRegex := regexp.MustCompile(`(?i)(?:https?:)?(//filecrypt\.co/Container/[^"]+)`) - filecryptRegexRes := filecryptRegex.FindStringSubmatch(string(resp.Body())) - if len(filecryptRegexRes) != 0 { - links = append(links, fmt.Sprintf("https:%s", filecryptRegexRes[1])) - } - item.DownloadLinks = links + + // Extract download links + item.DownloadLinks = c.extractDownloadLinks(string(resp.Body())) if len(item.DownloadLinks) == 0 { + c.logger.Warn("No download links found", zap.String("URL", URL)) return nil, errors.New("failed to find download link") } + c.logger.Info("Crawled game details successfully", zap.String("Name", item.Name), zap.String("URL", URL)) return item, nil } +// extractDownloadLinks extracts download links from the game page HTML. +func (c *SteamRIPCrawler) extractDownloadLinks(pageContent string) []string { + var links []string + + // Match MegaDB links + megadbRegex := regexp.MustCompile(`(?i)(?:https?:)?(//megadb\.net/[^"]+)`) + if matches := megadbRegex.FindStringSubmatch(pageContent); len(matches) > 1 { + links = append(links, fmt.Sprintf("https:%s", matches[1])) + } + + // Match Gofile links + gofileRegex := regexp.MustCompile(`(?i)(?:https?:)?(//gofile\.io/d/[^"]+)`) + if matches := gofileRegex.FindStringSubmatch(pageContent); len(matches) > 1 { + links = append(links, fmt.Sprintf("https:%s", matches[1])) + } + + // Match Filecrypt links + filecryptRegex := regexp.MustCompile(`(?i)(?:https?:)?(//filecrypt\.co/Container/[^"]+)`) + if matches := filecryptRegex.FindStringSubmatch(pageContent); len(matches) > 1 { + links = append(links, fmt.Sprintf("https:%s", matches[1])) + } + + return links +} + +// Crawl crawls a limited number of games from the SteamRIP game list. func (c *SteamRIPCrawler) Crawl(num int) ([]*model.GameItem, error) { - count := 0 + c.logger.Info("Starting SteamRIP crawl", zap.Int("limit", num)) + + // Fetch the game list page resp, err := utils.Request().Get(constant.SteamRIPGameListURL) if err != nil { - return nil, err + c.logger.Error("Failed to fetch game list", zap.String("URL", constant.SteamRIPGameListURL), zap.Error(err)) + return nil, fmt.Errorf("failed to fetch game list: %w", err) } + doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body())) if err != nil { - return nil, err + c.logger.Error("Failed to parse game list HTML document", zap.String("URL", constant.SteamRIPGameListURL), zap.Error(err)) + return nil, fmt.Errorf("failed to parse game list HTML document: %w", err) } + var items []*model.GameItem var urls []string - var updateFlags []string // title + var updateFlags []string + + // Extract game URLs doc.Find(".az-list-item>a").Each(func(i int, s *goquery.Selection) { - u, exist := s.Attr("href") - if !exist { + u, exists := s.Attr("href") + if !exists { return } urls = append(urls, fmt.Sprintf("%s%s", constant.SteamRIPBaseURL, u)) updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(s.Text()))) }) + + // Crawl games + count := 0 for i, u := range urls { - if count == num { + if num > 0 && count == num { break } if db.IsSteamRIPCrawled(updateFlags[i]) { + c.logger.Info("Skipping already crawled game", zap.String("URL", u)) continue } - c.logger.Info("Crawling", zap.String("URL", u)) + + c.logger.Info("Crawling game", zap.String("URL", u)) item, err := c.CrawlByUrl(u) if err != nil { - c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u)) + c.logger.Warn("Failed to crawl game", zap.String("URL", u), zap.Error(err)) continue } + item.UpdateFlag = updateFlags[i] if err := db.SaveGameItem(item); err != nil { - c.logger.Warn("Failed to save item", zap.Error(err)) + c.logger.Warn("Failed to save game item", zap.String("URL", u), zap.Error(err)) continue } + items = append(items, item) count++ + if err := OrganizeGameItem(item); err != nil { - c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u)) + c.logger.Warn("Failed to organize game item", zap.String("URL", u), zap.Error(err)) continue } } + + c.logger.Info("SteamRIP crawl completed", zap.Int("gamesCrawled", len(items))) return items, nil } +// CrawlAll crawls all games from the SteamRIP game list. func (c *SteamRIPCrawler) CrawlAll() ([]*model.GameItem, error) { + c.logger.Info("Starting full crawl of SteamRIP") return c.Crawl(-1) } +// SteamRIPFormatter formats the game name by removing unnecessary text. func SteamRIPFormatter(name string) string { name = regexp.MustCompile(`\([^\)]+\)`).ReplaceAllString(name, "") name = strings.Replace(name, "Free Download", "", -1) diff --git a/crawler/xatab.go b/crawler/xatab.go index 3baaf22..d1b67fa 100644 --- a/crawler/xatab.go +++ b/crawler/xatab.go @@ -32,145 +32,214 @@ func (c *XatabCrawler) Name() string { return "XatabCrawler" } +// Crawl crawls a single page of the Xatab website. func (c *XatabCrawler) Crawl(page int) ([]*model.GameItem, error) { requestURL := fmt.Sprintf("%s/page/%v", constant.XatabBaseURL, page) + c.logger.Info("Fetching page", zap.String("URL", requestURL)) + + // Fetch the page content resp, err := utils.Request().Get(requestURL) if err != nil { - c.logger.Error("Failed to fetch", zap.Error(err)) - return nil, err + c.logger.Error("Failed to fetch page", zap.String("URL", requestURL), zap.Error(err)) + return nil, fmt.Errorf("failed to fetch page %d: %w", page, err) } + doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body())) if err != nil { - c.logger.Error("Failed to parse HTML", zap.Error(err)) - return nil, err + c.logger.Error("Failed to parse HTML", zap.String("URL", requestURL), zap.Error(err)) + return nil, fmt.Errorf("failed to parse HTML for page %d: %w", page, err) } + var urls []string - var updateFlags []string // title + var updateFlags []string + + // Extract game URLs and titles doc.Find(".entry").Each(func(i int, s *goquery.Selection) { - u, exist := s.Find(".entry__title.h2 a").Attr("href") - if !exist { + u, exists := s.Find(".entry__title.h2 a").Attr("href") + if !exists { return } urls = append(urls, u) updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(s.Find(".entry__title.h2 a").Text()))) }) + + // Process each game URL var res []*model.GameItem for i, u := range urls { if db.IsXatabCrawled(updateFlags[i]) { + c.logger.Info("Skipping already crawled game", zap.String("URL", u)) continue } - c.logger.Info("Crawling", zap.String("URL", u)) + + c.logger.Info("Crawling game", zap.String("URL", u)) item, err := c.CrawlByUrl(u) if err != nil { - c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u)) + c.logger.Warn("Failed to crawl game", zap.String("URL", u), zap.Error(err)) continue } + item.UpdateFlag = updateFlags[i] - err = db.SaveGameItem(item) - if err != nil { - c.logger.Warn("Failed to save", zap.Error(err)) + if err := db.SaveGameItem(item); err != nil { + c.logger.Warn("Failed to save game item", zap.String("URL", u), zap.Error(err)) continue } + res = append(res, item) + if err := OrganizeGameItem(item); err != nil { - c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u)) + c.logger.Warn("Failed to organize game item", zap.String("URL", u), zap.Error(err)) continue } } + + c.logger.Info("Crawled page successfully", zap.Int("gamesCrawled", len(res)), zap.Int("page", page)) return res, nil } +// CrawlByUrl crawls a single game page from Xatab by URL. func (c *XatabCrawler) CrawlByUrl(URL string) (*model.GameItem, error) { + c.logger.Info("Crawling game details", zap.String("URL", URL)) + + // Fetch the game page resp, err := utils.Request().Get(URL) if err != nil { - return nil, err + c.logger.Error("Failed to fetch game page", zap.String("URL", URL), zap.Error(err)) + return nil, fmt.Errorf("failed to fetch game page %s: %w", URL, err) } + doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body())) if err != nil { - return nil, err + c.logger.Error("Failed to parse game HTML", zap.String("URL", URL), zap.Error(err)) + return nil, fmt.Errorf("failed to parse game HTML for URL %s: %w", URL, err) } + + // Retrieve or create game item item, err := db.GetGameItemByUrl(URL) if err != nil { - return nil, err + c.logger.Warn("Failed to fetch game item from database, creating new", zap.String("URL", URL), zap.Error(err)) + item = &model.GameItem{} } + item.Url = URL - item.RawName = doc.Find(".inner-entry__title").First().Text() + item.RawName = strings.TrimSpace(doc.Find(".inner-entry__title").First().Text()) + if item.RawName == "" { + c.logger.Warn("Game title not found", zap.String("URL", URL)) + return nil, errors.New("game title not found") + } item.Name = XatabFormatter(item.RawName) item.Author = "Xatab" item.Platform = "windows" + + // Extract download URL downloadURL := doc.Find("#download>a").First().AttrOr("href", "") if downloadURL == "" { + c.logger.Warn("Download URL not found", zap.String("URL", URL)) return nil, errors.New("failed to find download URL") } + + // Fetch torrent file and convert to magnet link resp, err = utils.Request().SetHeaders(map[string]string{"Referer": URL}).Get(downloadURL) if err != nil { - return nil, err + c.logger.Error("Failed to fetch download link", zap.String("URL", downloadURL), zap.Error(err)) + return nil, fmt.Errorf("failed to fetch download link %s: %w", downloadURL, err) } + magnet, size, err := utils.ConvertTorrentToMagnet(resp.Body()) if err != nil { - return nil, err + c.logger.Error("Failed to convert torrent to magnet", zap.String("URL", downloadURL), zap.Error(err)) + return nil, fmt.Errorf("failed to convert torrent to magnet: %w", err) } + item.Size = size item.DownloadLinks = []string{magnet} + + c.logger.Info("Crawled game details successfully", zap.String("Name", item.Name), zap.String("URL", URL)) return item, nil } +// CrawlMulti crawls multiple pages from Xatab. func (c *XatabCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) { + c.logger.Info("Starting multi-page crawl", zap.Ints("pages", pages)) + totalPageNum, err := c.GetTotalPageNum() if err != nil { - return nil, err + c.logger.Error("Failed to get total page number", zap.Error(err)) + return nil, fmt.Errorf("failed to get total page number: %w", err) } + var res []*model.GameItem for _, page := range pages { if page > totalPageNum { + c.logger.Warn("Skipping page out of range", zap.Int("page", page), zap.Int("totalPages", totalPageNum)) continue } + items, err := c.Crawl(page) if err != nil { - return nil, err + c.logger.Warn("Failed to crawl page", zap.Int("page", page), zap.Error(err)) + continue } + res = append(res, items...) } + + c.logger.Info("Multi-page crawl completed", zap.Int("gamesCrawled", len(res))) return res, nil } +// CrawlAll crawls all pages from Xatab. func (c *XatabCrawler) CrawlAll() ([]*model.GameItem, error) { + c.logger.Info("Starting full crawl of Xatab") + totalPageNum, err := c.GetTotalPageNum() if err != nil { - return nil, err + c.logger.Error("Failed to get total page number", zap.Error(err)) + return nil, fmt.Errorf("failed to get total page number: %w", err) } + var res []*model.GameItem for i := 1; i <= totalPageNum; i++ { items, err := c.Crawl(i) if err != nil { - return nil, err + c.logger.Warn("Failed to crawl page", zap.Int("page", i), zap.Error(err)) + continue } + res = append(res, items...) } + + c.logger.Info("Full crawl completed", zap.Int("gamesCrawled", len(res))) return res, nil } +// GetTotalPageNum retrieves the total number of pages from Xatab. func (c *XatabCrawler) GetTotalPageNum() (int, error) { + c.logger.Info("Fetching total page number") + resp, err := utils.Request().Get(constant.XatabBaseURL) if err != nil { - return 0, err + c.logger.Error("Failed to fetch base URL", zap.String("URL", constant.XatabBaseURL), zap.Error(err)) + return 0, fmt.Errorf("failed to fetch base URL: %w", err) } + doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body())) if err != nil { - return 0, err + c.logger.Error("Failed to parse base HTML", zap.String("URL", constant.XatabBaseURL), zap.Error(err)) + return 0, fmt.Errorf("failed to parse base HTML: %w", err) } + pageStr := doc.Find(".pagination>a").Last().Text() totalPageNum, err := strconv.Atoi(pageStr) if err != nil { - return 0, err + c.logger.Error("Failed to parse total page number", zap.String("pageStr", pageStr), zap.Error(err)) + return 0, fmt.Errorf("failed to parse total page number: %w", err) } + + c.logger.Info("Fetched total page number", zap.Int("totalPages", totalPageNum)) return totalPageNum, nil } -var xatabRegexps = []*regexp.Regexp{ - regexp.MustCompile(`(?i)\sPC$`), -} - +// XatabFormatter formats the game name by removing unnecessary text. func XatabFormatter(name string) string { reg1 := regexp.MustCompile(`(?i)v(er)?\s?(\.)?\d+(\.\d+)*`) if index := reg1.FindIndex([]byte(name)); index != nil { @@ -189,10 +258,13 @@ func XatabFormatter(name string) string { name = name[:index] } name = strings.TrimSpace(name) + + // Remove specific patterns for _, re := range xatabRegexps { name = re.ReplaceAllString(name, "") } + // Handle names separated by "/" if index := strings.Index(name, "/"); index != -1 { names := strings.Split(name, "/") longestLength := 0 @@ -208,3 +280,7 @@ func XatabFormatter(name string) string { return strings.TrimSpace(name) } + +var xatabRegexps = []*regexp.Regexp{ + regexp.MustCompile(`(?i)\sPC$`), +} diff --git a/db/game.go b/db/game.go index aa6dda3..11b1c35 100644 --- a/db/game.go +++ b/db/game.go @@ -396,22 +396,22 @@ func GetGameInfosByPlatformIDs(platform string, ids []int) ([]*model.GameInfo, e return games, nil } -func HasGameItemOrganized(id primitive.ObjectID) (bool, []*model.GameInfo) { +func HasGameItemOrganized(id primitive.ObjectID) bool { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() filter := bson.M{"games": id} var res []*model.GameInfo cursor, err := GameInfoCollection.Find(ctx, filter) if err != nil { - return false, nil + return false } if err = cursor.All(ctx, &res); err != nil { - return false, nil + return false } if len(res) == 0 { - return false, nil + return false } - return true, res + return true } func GetUnorganizedGameItems(num int) ([]*model.GameItem, error) { diff --git a/log/log.go b/log/log.go index 8e013d2..9b65a3c 100644 --- a/log/log.go +++ b/log/log.go @@ -12,16 +12,14 @@ import ( ) var Logger *zap.Logger -var ConsoleLogger *zap.Logger -var FileLogger *zap.Logger var TaskLogger *zap.Logger func init() { - fileCore, consoleCore, combinedCore, taskCore := buildZapCore(getZapLogLevel(config.Config.LogLevel)) - FileLogger = zap.New(fileCore, zap.AddCaller()) - ConsoleLogger = zap.New(consoleCore, zap.AddCaller()) + _, _, combinedCore, taskCore := buildZapCore(getZapLogLevel(config.Config.LogLevel)) Logger = zap.New(combinedCore, zap.AddCaller()) TaskLogger = zap.New(taskCore, zap.AddCaller()) + + zap.ReplaceGlobals(Logger) } func buildZapCore(logLevel zapcore.Level) (fileCore zapcore.Core, consoleCore zapcore.Core, combinedCore zapcore.Core, taskCore zapcore.Core) { diff --git a/server/handler/healthcheck.go b/server/handler/healthcheck.go index df3ebe6..fe0e84a 100644 --- a/server/handler/healthcheck.go +++ b/server/handler/healthcheck.go @@ -24,7 +24,6 @@ type HealthCheckResponse struct { GameItem int64 `json:"game_num"` GameInfo int64 `json:"game_info_num"` Unorganized int64 `json:"unorganized_game_num"` - MegaAvaliable bool `json:"mega_avaliable"` } // HealthCheckHandler performs a health check of the service. @@ -57,6 +56,5 @@ func HealthCheckHandler(c *gin.Context) { GameItem: downloadCount, GameInfo: infoCount, Unorganized: unorganizedCount, - MegaAvaliable: config.Config.MegaAvaliable, }) }