package crawler import ( "bytes" "errors" "fmt" "regexp" "strconv" "strings" "pcgamedb/constant" "pcgamedb/db" "pcgamedb/model" "pcgamedb/utils" "github.com/PuerkitoBio/goquery" "go.uber.org/zap" ) type FitGirlCrawler struct { logger *zap.Logger } func NewFitGirlCrawler(logger *zap.Logger) *FitGirlCrawler { return &FitGirlCrawler{ logger: logger, } } func (c *FitGirlCrawler) Name() string { return "FitGirlCrawler" } func (c *FitGirlCrawler) CrawlByUrl(URL string) (*model.GameItem, error) { resp, err := utils.Request().Get(URL) if err != nil { return nil, err } doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body())) if err != nil { return nil, err } titleElem := doc.Find("h3").First().Find("strong") if titleElem.Length() == 0 { return nil, errors.New("failed to find title") } rawTitle := titleElem.Text() titleElem.Children().Remove() title := strings.TrimSpace(titleElem.Text()) sizeRegex := regexp.MustCompile(`Repack Size: (.*?)`) sizeRegexRes := sizeRegex.FindStringSubmatch(string(resp.Body())) if len(sizeRegexRes) == 0 { return nil, errors.New("failed to find size") } size := sizeRegexRes[1] magnetRegex := regexp.MustCompile(`magnet:\?[^"]*`) magnetRegexRes := magnetRegex.FindStringSubmatch(string(resp.Body())) if len(magnetRegexRes) == 0 { return nil, errors.New("failed to find magnet") } magnet := magnetRegexRes[0] item, err := db.GetGameItemByUrl(URL) if err != nil { return nil, err } item.Name = strings.TrimSpace(title) item.RawName = rawTitle item.Url = URL item.Size = size item.Author = "FitGirl" item.Download = magnet item.Platform = "windows" return item, nil } func (c *FitGirlCrawler) Crawl(page int) ([]*model.GameItem, error) { resp, err := utils.Request().Get(fmt.Sprintf(constant.FitGirlURL, page)) if err != nil { c.logger.Error("Failed to fetch", zap.Error(err)) return nil, err } doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body())) if err != nil { c.logger.Error("Failed to parse HTML", zap.Error(err)) return nil, err } var urls []string var updateFlags []string //link+date doc.Find("article").Each(func(i int, s *goquery.Selection) { u, exist1 := s.Find(".entry-title>a").First().Attr("href") d, exist2 := s.Find("time").First().Attr("datetime") if exist1 && exist2 { urls = append(urls, u) updateFlags = append(updateFlags, fmt.Sprintf("%s%s", u, d)) } }) var res []*model.GameItem for i, u := range urls { if db.IsFitgirlCrawled(updateFlags[i]) { continue } c.logger.Info("Crawling", zap.String("URL", u)) item, err := c.CrawlByUrl(u) if err != nil { c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u)) continue } item.UpdateFlag = updateFlags[i] err = db.SaveGameItem(item) if err != nil { c.logger.Warn("Failed to save", zap.Error(err)) continue } res = append(res, item) if err := OrganizeGameItem(item); err != nil { c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u)) continue } } return res, nil } func (c *FitGirlCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) { var res []*model.GameItem for _, page := range pages { items, err := c.Crawl(page) if err != nil { return nil, err } res = append(res, items...) } return res, nil } func (c *FitGirlCrawler) CrawlAll() ([]*model.GameItem, error) { var res []*model.GameItem totalPageNum, err := c.GetTotalPageNum() if err != nil { return nil, err } for i := 1; i <= totalPageNum; i++ { items, err := c.Crawl(i) if err != nil { return nil, err } res = append(res, items...) } return res, nil } func (c *FitGirlCrawler) GetTotalPageNum() (int, error) { resp, err := utils.Request().Get(fmt.Sprintf(constant.FitGirlURL, 1)) if err != nil { return 0, err } doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body())) if err != nil { return 0, err } page, err := strconv.Atoi(doc.Find(".page-numbers.dots").First().Next().Text()) if err != nil { return 0, err } return page, nil }