package crawler import ( "bytes" "encoding/base64" "encoding/json" "errors" "html" "net/http" "regexp" "strings" "time" "pcgamedb/cache" "pcgamedb/config" "pcgamedb/constant" "pcgamedb/db" "pcgamedb/model" "git.nite07.com/nite/ccs" "github.com/PuerkitoBio/goquery" "go.uber.org/zap" ) type FreeGOGCrawler struct { logger *zap.Logger } func NewFreeGOGCrawler(logger *zap.Logger) *FreeGOGCrawler { return &FreeGOGCrawler{ logger: logger, } } func (c *FreeGOGCrawler) getWAFSession() (*ccs.Session, error) { var session ccs.Session var err error if val, exist := cache.Get("freegog_waf_session"); exist { err := json.Unmarshal([]byte(val), &session) if err != nil { return nil, err } } else { session, err = ccs.WAFSession(config.Config.CFClearanceScraper.Url, constant.FreeGOGListURL) if err != nil { return nil, err } jsonBytes, _ := json.Marshal(session) _ = cache.AddWithExpire("freegog_waf_session", jsonBytes, 24*time.Hour) } return &session, nil } func (c *FreeGOGCrawler) Name() string { return "FreeGOG" } func (c *FreeGOGCrawler) Crawl(num int) ([]*model.GameItem, error) { count := 0 session, err := c.getWAFSession() if err != nil { c.logger.Error("Failed to create session", zap.Error(err)) return nil, err } resp, err := ccs.RequestWithWAFSession(http.MethodGet, constant.FreeGOGListURL, *session, nil) if err != nil { c.logger.Error("Failed to fetch", zap.Error(err)) return nil, err } doc, err := goquery.NewDocumentFromReader(bytes.NewReader([]byte(resp.Body))) if err != nil { c.logger.Error("Failed to parse HTML", zap.Error(err)) return nil, err } var urls []string var updateFlags []string //rawName+link doc.Find(".items-outer li a").Each(func(i int, s *goquery.Selection) { urls = append(urls, s.AttrOr("href", "")) updateFlags = append(updateFlags, s.Text()+s.AttrOr("href", "")) }) var res []*model.GameItem for i, u := range urls { if count == num { break } if db.IsFreeGOGCrawled(updateFlags[i]) { continue } c.logger.Info("Crawling", zap.String("URL", u)) item, err := c.CrawlByUrl(u) if err != nil { c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u)) continue } item.UpdateFlag = updateFlags[i] err = db.SaveGameItem(item) if err != nil { c.logger.Warn("Failed to save", zap.Error(err)) continue } res = append(res, item) count++ if err := OrganizeGameItem(item); err != nil { c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u)) continue } } return res, nil } func (c *FreeGOGCrawler) CrawlByUrl(URL string) (*model.GameItem, error) { session, err := c.getWAFSession() if err != nil { return nil, err } resp, err := ccs.RequestWithWAFSession(http.MethodGet, URL, *session, nil) if err != nil { return nil, err } item, err := db.GetGameItemByUrl(URL) if err != nil { return nil, err } item.Url = URL rawTitleRegex := regexp.MustCompile(`(?i)