package crawler import ( "bytes" "encoding/json" "errors" "fmt" "net/http" "net/url" "os" "regexp" "strconv" "strings" "time" "pcgamedb/cache" "pcgamedb/config" "pcgamedb/constant" "pcgamedb/db" "pcgamedb/model" "pcgamedb/utils" "github.com/PuerkitoBio/goquery" "go.uber.org/zap" ) type OnlineFixCrawler struct { logger *zap.Logger } func NewOnlineFixCrawler(logger *zap.Logger) *OnlineFixCrawler { return &OnlineFixCrawler{ logger: logger, } } func (c *OnlineFixCrawler) Name() string { return "OnlineFixCrawler" } func (c *OnlineFixCrawler) Crawl(page int) ([]*model.GameItem, error) { cookies, err := c.getCookies() if err != nil { return nil, err } requestURL := fmt.Sprintf("%s/page/%d/", constant.OnlineFixURL, page) resp, err := utils.Request().SetHeader("Referer", constant.OnlineFixURL).SetCookies(cookies).Get(requestURL) if err != nil { c.logger.Error("Failed to fetch", zap.Error(err)) return nil, err } body := utils.Windows1251ToUTF8(resp.Body()) doc, err := goquery.NewDocumentFromReader(bytes.NewReader(body)) if err != nil { c.logger.Error("Failed to parse HTML", zap.Error(err)) return nil, err } var urls []string var updateFlags []string //link+date doc.Find("article.news").Each(func(i int, s *goquery.Selection) { urls = append(urls, s.Find(".big-link").First().AttrOr("href", "")) updateFlags = append( updateFlags, s.Find(".big-link").First().AttrOr("href", "")+ s.Find("time").Text(), ) }) var res []*model.GameItem for i, u := range urls { if db.IsOnlineFixCrawled(updateFlags[i]) { continue } c.logger.Info("Crawling", zap.String("URL", u)) item, err := c.CrawlByUrl(u) if err != nil { c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u)) continue } item.UpdateFlag = updateFlags[i] err = db.SaveGameItem(item) if err != nil { c.logger.Warn("Failed to save", zap.Error(err)) continue } res = append(res, item) if err := OrganizeGameItem(item); err != nil { c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u)) continue } } return res, nil } func (c *OnlineFixCrawler) CrawlByUrl(URL string) (*model.GameItem, error) { cookies, err := c.getCookies() if err != nil { return nil, err } resp, err := utils.Request().SetHeaders(map[string]string{ "Referer": constant.OnlineFixURL, }).SetCookies(cookies).Get(URL) if err != nil { return nil, err } body := utils.Windows1251ToUTF8(resp.Body()) titleRegex := regexp.MustCompile(`(?i)(.*?)`) titleRegexRes := titleRegex.FindAllStringSubmatch(string(body), -1) if len(titleRegexRes) == 0 { return nil, errors.New("failed to find title") } downloadRegex := regexp.MustCompile(`(?i)]+\bhref="([^"]+)"[^>]+>(Скачать Torrent|Скачать торрент)`) downloadRegexRes := downloadRegex.FindAllStringSubmatch(string(body), -1) if len(downloadRegexRes) == 0 { return nil, errors.New("failed to find download button") } item, err := db.GetGameItemByUrl(URL) if err != nil { return nil, err } item.RawName = titleRegexRes[0][1] item.Name = OnlineFixFormatter(item.RawName) item.Url = URL item.Author = "OnlineFix" item.Size = "0" resp, err = utils.Request().SetHeader("Referer", URL).SetCookies(cookies).Get(downloadRegexRes[0][1]) body = utils.Windows1251ToUTF8(resp.Body()) if err != nil { return nil, err } if strings.Contains(downloadRegexRes[0][1], "uploads.online-fix.me") { magnetRegex := regexp.MustCompile(`(?i)"(.*?).torrent"`) magnetRegexRes := magnetRegex.FindAllStringSubmatch(string(body), -1) if len(magnetRegexRes) == 0 { return nil, errors.New("failed to find magnet") } resp, err = utils.Request().SetHeader("Referer", URL).SetCookies(cookies).Get(downloadRegexRes[0][1] + strings.Trim(magnetRegexRes[0][0], "\"")) if err != nil { return nil, err } item.Download, item.Size, err = utils.ConvertTorrentToMagnet(resp.Body()) if err != nil { return nil, err } } else if strings.Contains(downloadRegexRes[0][1], "online-fix.me/ext") { if strings.Contains(string(body), "mega.nz") { if !config.Config.MegaAvaliable { return nil, errors.New("mega is not avaliable") } megaRegex := regexp.MustCompile(`(?i)location.href=\\'([^\\']*)\\'`) megaRegexRes := megaRegex.FindAllStringSubmatch(string(body), -1) if len(megaRegexRes) == 0 { return nil, errors.New("failed to find download link") } path, files, err := utils.MegaDownload(megaRegexRes[0][1], "torrent") if err != nil { return nil, err } torrent := "" for _, file := range files { if strings.HasSuffix(file, ".torrent") { torrent = file break } } dataBytes, err := os.ReadFile(torrent) if err != nil { return nil, err } item.Download, item.Size, err = utils.ConvertTorrentToMagnet(dataBytes) if err != nil { return nil, err } _ = os.RemoveAll(path) } else { return nil, errors.New("failed to find download link") } } else { return nil, errors.New("failed to find download link") } item.Platform = "windows" return item, nil } func (c *OnlineFixCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) { var res []*model.GameItem for _, page := range pages { items, err := c.Crawl(page) if err != nil { return nil, err } res = append(res, items...) } return res, nil } func (c *OnlineFixCrawler) CrawlAll() ([]*model.GameItem, error) { var res []*model.GameItem totalPageNum, err := c.GetTotalPageNum() if err != nil { return nil, err } for i := 1; i <= totalPageNum; i++ { items, err := c.Crawl(i) if err != nil { return nil, err } res = append(res, items...) } return res, nil } func (c *OnlineFixCrawler) GetTotalPageNum() (int, error) { resp, err := utils.Request().SetHeader("Referer", constant.OnlineFixURL).Get(constant.OnlineFixURL) if err != nil { return 0, err } pageRegex := regexp.MustCompile(`(?i).*?`) pageRegexRes := pageRegex.FindAllStringSubmatch(string(resp.Body()), -1) if len(pageRegexRes) == 0 { return 0, err } totalPageNum, err := strconv.Atoi(pageRegexRes[len(pageRegexRes)-2][1]) if err != nil { return 0, err } return totalPageNum, nil } type csrf struct { Field string `json:"field"` Value string `json:"value"` } func (c *OnlineFixCrawler) getCookies() ([]*http.Cookie, error) { val, exists := cache.Get("onlinefix_cookies") if exists { var cookies []*http.Cookie if err := json.Unmarshal([]byte(val), &cookies); err != nil { return nil, err } return cookies, nil } resp, err := utils.Request().SetHeaders(map[string]string{ "X-Requested-With": "XMLHttpRequest", "Referer": constant.OnlineFixURL, }).Get(constant.OnlineFixCSRFURL) if err != nil { return nil, err } var csrf csrf if err = json.Unmarshal(resp.Body(), &csrf); err != nil { return nil, err } cookies := resp.Cookies() params := url.Values{} params.Add("login_name", config.Config.OnlineFix.User) params.Add("login_password", config.Config.OnlineFix.Password) params.Add(csrf.Field, csrf.Value) params.Add("login", "submit") resp, err = utils.Request().SetHeaders(map[string]string{ "Origin": constant.OnlineFixURL, "Content-Type": "application/x-www-form-urlencoded", "Referer": constant.OnlineFixURL, }).SetCookies(cookies).SetBody(params.Encode()).Post(constant.OnlineFixURL) if err != nil { return nil, err } cookies = resp.Cookies() jsonBytes, _ := json.Marshal(cookies) _ = cache.SetWithExpire("onlinefix_cookies", string(jsonBytes), time.Hour) return cookies, nil } func OnlineFixFormatter(name string) string { name = strings.Replace(name, "по сети", "", -1) reg1 := regexp.MustCompile(`(?i)\(.*?\)`) name = reg1.ReplaceAllString(name, "") return strings.TrimSpace(name) }