package crawler import ( "bytes" "encoding/json" "errors" "fmt" "net/url" "os" "regexp" "strconv" "strings" "github.com/nitezs/pcgamedb/config" "github.com/nitezs/pcgamedb/constant" "github.com/nitezs/pcgamedb/db" "github.com/nitezs/pcgamedb/model" "github.com/nitezs/pcgamedb/utils" "github.com/PuerkitoBio/goquery" "go.uber.org/zap" ) type OnlineFixCrawler struct { logger *zap.Logger cookies map[string]string } func NewOnlineFixCrawler(logger *zap.Logger) *OnlineFixCrawler { return &OnlineFixCrawler{ logger: logger, cookies: map[string]string{}, } } func (c *OnlineFixCrawler) Name() string { return "OnlineFixCrawler" } func (c *OnlineFixCrawler) Crawl(page int) ([]*model.GameItem, error) { if !config.Config.OnlineFixAvaliable { c.logger.Error("Need Online Fix account") return nil, errors.New("Online Fix is not available") } if len(c.cookies) == 0 { err := c.login() if err != nil { c.logger.Error("Failed to login", zap.Error(err)) return nil, err } } requestURL := fmt.Sprintf("%s/page/%d/", constant.OnlineFixURL, page) resp, err := utils.Fetch(utils.FetchConfig{ Url: requestURL, Cookies: c.cookies, Headers: map[string]string{ "Referer": constant.OnlineFixURL, }, }) if err != nil { c.logger.Error("Failed to fetch", zap.Error(err)) return nil, err } doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Data)) if err != nil { c.logger.Error("Failed to parse HTML", zap.Error(err)) return nil, err } urls := []string{} updateFlags := []string{} //link+date doc.Find("article.news").Each(func(i int, s *goquery.Selection) { urls = append(urls, s.Find(".big-link").First().AttrOr("href", "")) updateFlags = append( updateFlags, s.Find(".big-link").First().AttrOr("href", "")+ s.Find("time").Text(), ) }) var res []*model.GameItem for i, u := range urls { if db.IsOnlineFixCrawled(updateFlags[i]) { continue } c.logger.Info("Crawling", zap.String("URL", u)) item, err := c.CrawlByUrl(u) if err != nil { c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u)) continue } item.UpdateFlag = updateFlags[i] err = db.SaveGameItem(item) if err != nil { c.logger.Warn("Failed to save", zap.Error(err)) continue } res = append(res, item) info, err := OrganizeGameItem(item) if err != nil { c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u)) continue } err = db.SaveGameInfo(info) if err != nil { c.logger.Warn("Failed to save", zap.Error(err), zap.String("URL", u)) continue } } return res, nil } func (c *OnlineFixCrawler) CrawlByUrl(url string) (*model.GameItem, error) { if len(c.cookies) == 0 { err := c.login() if err != nil { c.logger.Error("Failed to login", zap.Error(err)) return nil, err } } resp, err := utils.Fetch(utils.FetchConfig{ Url: url, Cookies: c.cookies, Headers: map[string]string{ "Referer": constant.OnlineFixURL, }, }) if err != nil { return nil, err } titleRegex := regexp.MustCompile(`(?i)(.*?)`) titleRegexRes := titleRegex.FindAllStringSubmatch(string(resp.Data), -1) if len(titleRegexRes) == 0 { return nil, errors.New("Failed to find title") } downloadRegex := regexp.MustCompile(`(?i)]*\bhref="([^"]+)"[^>]*>(Скачать Torrent|Скачать торрент)`) downloadRegexRes := downloadRegex.FindAllStringSubmatch(string(resp.Data), -1) if len(downloadRegexRes) == 0 { return nil, errors.New("Failed to find download button") } item, err := db.GetGameItemByUrl(url) if err != nil { return nil, err } item.RawName = titleRegexRes[0][1] item.Name = OnlineFixFormatter(item.RawName) item.Url = url item.Author = "OnlineFix" item.Size = "0" resp, err = utils.Fetch(utils.FetchConfig{ Url: downloadRegexRes[0][1], Cookies: c.cookies, Headers: map[string]string{ "Referer": url, }, }) if err != nil { return nil, err } if strings.Contains(downloadRegexRes[0][1], "uploads.online-fix.me") { magnetRegex := regexp.MustCompile(`(?i)"(.*?).torrent"`) magnetRegexRes := magnetRegex.FindAllStringSubmatch(string(resp.Data), -1) if len(magnetRegexRes) == 0 { return nil, errors.New("Failed to find magnet") } resp, err = utils.Fetch(utils.FetchConfig{ Url: downloadRegexRes[0][1] + strings.Trim(magnetRegexRes[0][0], "\""), Cookies: c.cookies, Headers: map[string]string{ "Referer": url, }, }) if err != nil { return nil, err } item.Download, item.Size, err = utils.ConvertTorrentToMagnet(resp.Data) if err != nil { return nil, err } } else if strings.Contains(downloadRegexRes[0][1], "online-fix.me/ext") { if strings.Contains(string(resp.Data), "mega.nz") { if !config.Config.MegaAvaliable { return nil, errors.New("Mega is not avaliable") } megaRegex := regexp.MustCompile(`(?i)location.href=\\'([^\\']*)\\'`) megaRegexRes := megaRegex.FindAllStringSubmatch(string(resp.Data), -1) if len(megaRegexRes) == 0 { return nil, errors.New("Failed to find download link") } path, files, err := utils.MegaDownload(megaRegexRes[0][1], "torrent") if err != nil { return nil, err } torrent := "" for _, file := range files { if strings.HasSuffix(file, ".torrent") { torrent = file break } } dataBytes, err := os.ReadFile(torrent) if err != nil { return nil, err } item.Download, item.Size, err = utils.ConvertTorrentToMagnet(dataBytes) if err != nil { return nil, err } _ = os.RemoveAll(path) } else { return nil, errors.New("Failed to find download link") } } else { return nil, errors.New("Failed to find download link") } return item, nil } func (c *OnlineFixCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) { var res []*model.GameItem for _, page := range pages { items, err := c.Crawl(page) if err != nil { return nil, err } res = append(res, items...) } return res, nil } func (c *OnlineFixCrawler) CrawlAll() ([]*model.GameItem, error) { var res []*model.GameItem totalPageNum, err := c.GetTotalPageNum() if err != nil { return nil, err } for i := 1; i <= totalPageNum; i++ { items, err := c.Crawl(i) if err != nil { return nil, err } res = append(res, items...) } return res, nil } func (c *OnlineFixCrawler) GetTotalPageNum() (int, error) { resp, err := utils.Fetch(utils.FetchConfig{ Url: constant.OnlineFixURL, Headers: map[string]string{ "Referer": constant.OnlineFixURL, }, }) if err != nil { return 0, err } pageRegex := regexp.MustCompile(`(?i).*?`) pageRegexRes := pageRegex.FindAllStringSubmatch(string(resp.Data), -1) if len(pageRegexRes) == 0 { return 0, err } totalPageNum, err := strconv.Atoi(pageRegexRes[len(pageRegexRes)-2][1]) if err != nil { return 0, err } return totalPageNum, nil } type csrf struct { Field string `json:"field"` Value string `json:"value"` } func (c *OnlineFixCrawler) login() error { resp, err := utils.Fetch(utils.FetchConfig{ Url: constant.OnlineFixCSRFURL, Headers: map[string]string{ "X-Requested-With": "XMLHttpRequest", "Referer": constant.OnlineFixURL, }, }) if err != nil { return err } var csrf csrf if err = json.Unmarshal(resp.Data, &csrf); err != nil { return err } for _, cookie := range resp.Cookie { c.cookies[cookie.Name] = cookie.Value } params := url.Values{} params.Add("login_name", config.Config.OnlineFix.User) params.Add("login_password", config.Config.OnlineFix.Password) params.Add(csrf.Field, csrf.Value) params.Add("login", "submit") resp, err = utils.Fetch(utils.FetchConfig{ Url: constant.OnlineFixURL, Method: "POST", Cookies: c.cookies, Headers: map[string]string{ "Origin": constant.OnlineFixURL, "Content-Type": "application/x-www-form-urlencoded", "Referer": constant.OnlineFixURL, }, Data: params, }) if err != nil { return err } for _, cookie := range resp.Cookie { c.cookies[cookie.Name] = cookie.Value } return nil } func OnlineFixFormatter(name string) string { name = strings.Replace(name, "по сети", "", -1) reg1 := regexp.MustCompile(`(?i)\(.*?\)`) name = reg1.ReplaceAllString(name, "") return strings.TrimSpace(name) }