package crawler import ( "bytes" "encoding/base64" "html" "pcgamedb/constant" "pcgamedb/db" "pcgamedb/model" "pcgamedb/utils" "regexp" "strings" "github.com/PuerkitoBio/goquery" "go.uber.org/zap" ) type FreeGOGCrawler struct { logger *zap.Logger } // Deprecated: Unable to get through cloudflare func NewFreeGOGCrawler(logger *zap.Logger) *FreeGOGCrawler { return &FreeGOGCrawler{ logger: logger, } } func (c *FreeGOGCrawler) Crawl(num int) ([]*model.GameDownload, error) { count := 0 resp, err := utils.Fetch(utils.FetchConfig{ Url: constant.FreeGOGListURL, }) if err != nil { c.logger.Error("Failed to fetch", zap.Error(err)) return nil, err } doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Data)) if err != nil { c.logger.Error("Failed to parse HTML", zap.Error(err)) return nil, err } urls := []string{} updateFlags := []string{} //rawName+link doc.Find(".items-outer li a").Each(func(i int, s *goquery.Selection) { urls = append(urls, s.AttrOr("href", "")) updateFlags = append(updateFlags, s.Text()+s.AttrOr("href", "")) }) res := []*model.GameDownload{} for i, u := range urls { if count == num { break } if db.IsFreeGOGCrawled(updateFlags[i]) { continue } c.logger.Info("Crawling", zap.String("URL", u)) item, err := c.CrawlByUrl(u) if err != nil { c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u)) continue } item.UpdateFlag = updateFlags[i] err = db.SaveGameDownload(item) if err != nil { c.logger.Warn("Failed to save", zap.Error(err)) continue } res = append(res, item) count++ info, err := OrganizeGameDownload(item) if err != nil { c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u)) continue } err = db.SaveGameInfo(info) if err != nil { c.logger.Warn("Failed to save", zap.Error(err), zap.String("URL", u)) continue } } return res, nil } func (c *FreeGOGCrawler) CrawlByUrl(url string) (*model.GameDownload, error) { resp, err := utils.Fetch(utils.FetchConfig{ Url: url, }) if err != nil { return nil, err } item, err := db.GetGameDownloadByUrl(url) if err != nil { return nil, err } item.Url = url rawTitleRegex := regexp.MustCompile(`(?i)