package crawler import ( "bytes" "encoding/base64" "encoding/json" "errors" "html" "net/http" "regexp" "strings" "time" "pcgamedb/cache" "pcgamedb/config" "pcgamedb/constant" "pcgamedb/db" "pcgamedb/model" "git.nite07.com/nite/ccs" "github.com/PuerkitoBio/goquery" "go.uber.org/zap" ) type FreeGOGCrawler struct { logger *zap.Logger } func NewFreeGOGCrawler(logger *zap.Logger) *FreeGOGCrawler { return &FreeGOGCrawler{ logger: logger, } } func (c *FreeGOGCrawler) getSession() (*ccs.Session, error) { var session ccs.Session var err error if val, exist := cache.Get("freegog_waf_session"); exist { err := json.Unmarshal([]byte(val), &session) if err != nil { return nil, err } } else { session, err = ccs.WAFSession(config.Config.CFClearanceScraper.Url, constant.FreeGOGListURL) if err != nil { return nil, err } jsonBytes, err := json.Marshal(session) if err == nil { _ = cache.SetWithExpire("freegog_waf_session", jsonBytes, 1*time.Hour) } } return &session, nil } func (c *FreeGOGCrawler) Name() string { return "FreeGOG" } func (c *FreeGOGCrawler) Crawl(num int) ([]*model.GameItem, error) { count := 0 session, err := c.getSession() if err != nil { c.logger.Error("Failed to create session", zap.Error(err)) return nil, err } resp, err := ccs.RequestWithWAFSession(http.MethodGet, constant.FreeGOGListURL, *session, nil) if err != nil { c.logger.Error("Failed to fetch", zap.Error(err)) return nil, err } doc, err := goquery.NewDocumentFromReader(bytes.NewReader([]byte(resp.Body))) if err != nil { c.logger.Error("Failed to parse HTML", zap.Error(err)) return nil, err } var urls []string var updateFlags []string //rawName+link doc.Find(".items-outer li a").Each(func(i int, s *goquery.Selection) { urls = append(urls, s.AttrOr("href", "")) updateFlags = append(updateFlags, s.Text()+s.AttrOr("href", "")) }) var res []*model.GameItem for i, u := range urls { if count == num { break } if db.IsFreeGOGCrawled(updateFlags[i]) { continue } c.logger.Info("Crawling", zap.String("URL", u)) item, err := c.CrawlByUrl(u) if err != nil { c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u)) continue } item.UpdateFlag = updateFlags[i] err = db.SaveGameItem(item) if err != nil { c.logger.Warn("Failed to save", zap.Error(err)) continue } res = append(res, item) count++ if err := OrganizeGameItem(item); err != nil { c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u)) continue } } return res, nil } func (c *FreeGOGCrawler) CrawlByUrl(URL string) (*model.GameItem, error) { session, err := c.getSession() if err != nil { return nil, err } resp, err := ccs.RequestWithWAFSession(http.MethodGet, URL, *session, nil) if err != nil { return nil, err } item, err := db.GetGameItemByUrl(URL) if err != nil { return nil, err } item.Url = URL rawTitleRegex := regexp.MustCompile(`(?i)

(.*?)

`) rawTitleRegexRes := rawTitleRegex.FindStringSubmatch(string(resp.Body)) rawName := "" if len(rawTitleRegexRes) > 1 { rawName = html.UnescapeString(rawTitleRegexRes[1]) item.RawName = strings.Replace(rawName, "–", "-", -1) } else { return nil, err } item.Name = FreeGOGFormatter(item.RawName) sizeRegex := regexp.MustCompile(`(?i)>Size:\s?(.*?)<`) sizeRegexRes := sizeRegex.FindStringSubmatch(string(resp.Body)) if len(sizeRegexRes) > 1 { item.Size = sizeRegexRes[1] } magnetRegex := regexp.MustCompile(` 1 { magnet, err := base64.StdEncoding.DecodeString(magnetRegexRes[1]) if err != nil { return nil, err } item.Download = string(magnet) } else { return nil, errors.New("failed to find magnet link") } item.Author = "FreeGOG" item.Platform = "windows" return item, nil } func (c *FreeGOGCrawler) CrawlAll() ([]*model.GameItem, error) { return c.Crawl(-1) } var freeGOGRegexps = []*regexp.Regexp{ regexp.MustCompile(`(?i)\(.*\)`), } func FreeGOGFormatter(name string) string { for _, re := range freeGOGRegexps { name = re.ReplaceAllString(name, "") } reg1 := regexp.MustCompile(`(?i)v\d+(\.\d+)*`) if index := reg1.FindIndex([]byte(name)); index != nil { name = name[:index[0]] } if index := strings.Index(name, "+"); index != -1 { name = name[:index] } reg2 := regexp.MustCompile(`(?i):\sgoty`) name = reg2.ReplaceAllString(name, ": Game Of The Year") return strings.TrimSpace(name) }