package crawler import ( "bytes" "encoding/base64" "errors" "html" "regexp" "strings" "pcgamedb/config" "pcgamedb/constant" "pcgamedb/db" "pcgamedb/model" "pcgamedb/utils" "github.com/PuerkitoBio/goquery" "go.uber.org/zap" ) type FreeGOGCrawler struct { logger *zap.Logger } func NewFreeGOGCrawler(logger *zap.Logger) *FreeGOGCrawler { return &FreeGOGCrawler{ logger: logger, } } func (c *FreeGOGCrawler) Name() string { return "FreeGOG" } func (c *FreeGOGCrawler) Crawl(num int) ([]*model.GameItem, error) { count := 0 session, err := utils.CCSWAFSession(config.Config.CFClearanceScraper.Url, constant.FreeGOGListURL) if err != nil { c.logger.Error("Failed to create session", zap.Error(err)) return nil, err } resp, err := utils.FetchWithWAFSession(utils.FetchConfig{ Url: constant.FreeGOGListURL, }, session) if err != nil { c.logger.Error("Failed to fetch", zap.Error(err)) return nil, err } doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Data)) if err != nil { c.logger.Error("Failed to parse HTML", zap.Error(err)) return nil, err } urls := []string{} updateFlags := []string{} //rawName+link doc.Find(".items-outer li a").Each(func(i int, s *goquery.Selection) { urls = append(urls, s.AttrOr("href", "")) updateFlags = append(updateFlags, s.Text()+s.AttrOr("href", "")) }) res := []*model.GameItem{} for i, u := range urls { if count == num { break } if db.IsFreeGOGCrawled(updateFlags[i]) { continue } c.logger.Info("Crawling", zap.String("URL", u)) item, err := c.CrawlByUrl(u, session) if err != nil { c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u)) continue } item.UpdateFlag = updateFlags[i] err = db.SaveGameItem(item) if err != nil { c.logger.Warn("Failed to save", zap.Error(err)) continue } res = append(res, item) count++ info, err := OrganizeGameItem(item) if err != nil { c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u)) continue } err = db.SaveGameInfo(info) if err != nil { c.logger.Warn("Failed to save", zap.Error(err), zap.String("URL", u)) continue } } return res, nil } func (c *FreeGOGCrawler) CrawlByUrl(url string, session *utils.WAFSession) (*model.GameItem, error) { resp, err := utils.FetchWithWAFSession(utils.FetchConfig{ Url: url, }, session) if err != nil { return nil, err } item, err := db.GetGameItemByUrl(url) if err != nil { return nil, err } item.Url = url rawTitleRegex := regexp.MustCompile(`(?i)