game-crawler/crawler/onlinefix.go
2024-12-29 13:43:19 +08:00

329 lines
8.7 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package crawler
import (
"bytes"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"net/http"
"net/url"
"regexp"
"strconv"
"strings"
"time"
"game-crawler/cache"
"game-crawler/constant"
"game-crawler/db"
"game-crawler/model"
"game-crawler/utils"
"github.com/PuerkitoBio/goquery"
"go.uber.org/zap"
)
type OnlineFixCrawler struct {
*BaseLogger
*BaseError
username string
password string
logger *zap.Logger
}
func NewOnlineFixCrawler(username, password string, logger *zap.Logger) *OnlineFixCrawler {
return &OnlineFixCrawler{
BaseLogger: &BaseLogger{
logger: logger,
},
BaseError: &BaseError{},
username: username,
password: password,
logger: logger,
}
}
func (c *OnlineFixCrawler) Name() string {
return "OnlineFixCrawler"
}
func (c *OnlineFixCrawler) Crawl(page int) ([]*model.GameItem, error) {
c.LogCrawlStart(page)
cookies, err := c.getCookies()
if err != nil {
return nil, c.ErrGetSession(err)
}
requestURL := fmt.Sprintf("%s/page/%d/", constant.OnlineFixURL, page)
resp, err := utils.Request().SetLogger(c.logger.Sugar()).SetHeader("Referer", constant.OnlineFixURL).SetCookies(cookies).Get(requestURL)
if err != nil {
return nil, c.ErrRequest(requestURL, err)
}
body := utils.Windows1251ToUTF8(resp.Body())
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(body))
if err != nil {
return nil, c.ErrParseDoc(requestURL, err)
}
var urls []string
var updateFlags []string
doc.Find("article.news").Each(func(i int, s *goquery.Selection) {
url := s.Find(".big-link").First().AttrOr("href", "")
if url != "" {
urls = append(urls, url)
updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(url+s.Find("time").Text())))
}
})
var res []*model.GameItem
for i, u := range urls {
if db.IsOnlineFixCrawled(updateFlags[i]) {
c.LogCrawlSkip(u)
continue
}
item, err := c.CrawlByUrl(u)
if err != nil {
c.LogCrawlByUrlError(u, err)
continue
}
item.UpdateFlag = updateFlags[i]
if err := db.SaveGameItem(item); err != nil {
c.LogSaveGameItemError(u, err)
continue
}
res = append(res, item)
if err := OrganizeGameItem(item); err != nil {
c.LogOrganizeGameWarn(item.ID, item.Name, err)
continue
}
}
return res, nil
}
func (c *OnlineFixCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
c.LogCrawlByUrlStart(URL)
cookies, err := c.getCookies()
if err != nil {
return nil, c.ErrGetSession(err)
}
resp, err := utils.Request().SetLogger(c.logger.Sugar()).SetHeaders(map[string]string{
"Referer": constant.OnlineFixURL,
}).SetCookies(cookies).Get(URL)
if err != nil {
return nil, c.ErrRequest(URL, err)
}
body := utils.Windows1251ToUTF8(resp.Body())
titleRegex := regexp.MustCompile(`(?i)<h1.*?>(.*?)</h1>`)
titleMatch := titleRegex.FindStringSubmatch(string(body))
if len(titleMatch) == 0 {
return nil, c.ErrGetGameItemDetail(URL, errors.New("failed to find title"))
}
item, err := db.GetGameItemByUrl(URL)
if err != nil {
item = &model.GameItem{}
}
item.RawName = titleMatch[1]
item.Name = OnlineFixFormatter(item.RawName)
item.Url = URL
item.Author = "OnlineFix"
item.Size = "0"
item.Platform = "windows"
downloadRegex := regexp.MustCompile(`(?i)<a[^>]+\bhref="([^"]+)"[^>]+>(Скачать с Online-Fix Hosters|Скачать Torrent|Скачать торрент)</a>`)
downloadMatch := downloadRegex.FindAllStringSubmatch(string(body), -1)
if len(downloadMatch) == 0 {
return nil, c.ErrGetGameItemDetail(URL, errors.New("failed to find download link"))
}
downloads := make(map[string]string)
for _, match := range downloadMatch {
downloadURL := match[1]
resp, err = utils.Request().SetLogger(c.logger.Sugar()).SetHeader("Referer", URL).SetCookies(cookies).Get(downloadURL)
body = utils.Windows1251ToUTF8(resp.Body())
if err != nil {
return nil, c.ErrRequest(downloadURL, err)
}
if strings.Contains(downloadURL, "uploads.online-fix.me") {
magnetRegex := regexp.MustCompile(`(?i)"(.*?).torrent"`)
magnetMatch := magnetRegex.FindStringSubmatch(string(body))
if len(magnetMatch) == 0 {
return nil, c.ErrGetGameItemDetail(URL, errors.New("failed to find magnet link"))
}
torrentURL := downloadURL + strings.Trim(magnetMatch[0], "\"")
resp, err = utils.Request().SetLogger(c.logger.Sugar()).SetHeader("Referer", URL).SetCookies(cookies).Get(torrentURL)
if err != nil {
return nil, c.ErrRequest(torrentURL, err)
}
magnet, size, err := utils.ConvertTorrentToMagnet(resp.Body())
if err != nil {
return nil, c.ErrConvertTorrent(err)
}
downloads["magnet"] = magnet
item.Size = size
}
if strings.Contains(downloadURL, "hosters.online-fix.me") {
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(body))
if err != nil {
return nil, c.ErrParseDoc(downloadURL, err)
}
type data struct {
DirectLink string `json:"direct_link"`
FileName string `json:"file_name"`
ID int `json:"id"`
}
var dataList []data
var jsons []string
doc.Find("div[data-links]").Each(func(i int, s *goquery.Selection) {
jsons = append(jsons, s.AttrOr("data-links", ""))
})
for _, jsonStr := range jsons {
if err := json.Unmarshal([]byte(jsonStr), &dataList); err != nil {
return nil, c.ErrUnmarshalJSON(err)
}
for _, d := range dataList {
downloads[fmt.Sprintf("%s(%v)", d.FileName, d.ID)] = d.DirectLink
}
}
}
}
item.Downloads = downloads
return item, nil
}
func (c *OnlineFixCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) {
totalPageNum, err := c.GetTotalPageNum()
if err != nil {
return nil, c.ErrGetTotalPageNum(err)
}
var res []*model.GameItem
for _, page := range pages {
if page > totalPageNum || page < 1 {
c.LogPageExceedWarn(page)
continue
}
items, err := c.Crawl(page)
if err != nil {
c.LogCrawlError(page, err)
continue
}
res = append(res, items...)
}
return res, nil
}
func (c *OnlineFixCrawler) CrawlAll() ([]*model.GameItem, error) {
var res []*model.GameItem
totalPageNum, err := c.GetTotalPageNum()
if err != nil {
return nil, c.ErrGetTotalPageNum(err)
}
for i := 1; i <= totalPageNum; i++ {
items, err := c.Crawl(i)
if err != nil {
c.LogCrawlError(i, err)
continue
}
res = append(res, items...)
}
return res, nil
}
func (c *OnlineFixCrawler) GetTotalPageNum() (int, error) {
resp, err := utils.Request().SetLogger(c.logger.Sugar()).SetHeader("Referer", constant.OnlineFixURL).Get(constant.OnlineFixURL)
if err != nil {
return 0, c.ErrRequest(constant.OnlineFixURL, err)
}
pageRegex := regexp.MustCompile(`(?i)<a href="https://online-fix.me/page/(\d+)/">.*?</a>`)
pageMatches := pageRegex.FindAllStringSubmatch(string(resp.Body()), -1)
if len(pageMatches) < 2 {
return 0, c.ErrMatchRegex(string(resp.Body()), pageRegex, errors.New("failed to find total page number"))
}
totalPageNum, err := strconv.Atoi(pageMatches[len(pageMatches)-2][1])
if err != nil {
return 0, c.ErrParseInt(pageMatches[len(pageMatches)-2][1], err)
}
return totalPageNum, nil
}
func (c *OnlineFixCrawler) getCookies() ([]*http.Cookie, error) {
if c.username == "" || c.password == "" {
return nil, c.ErrUserOrPassEmpty()
}
key := "onlinefix_cookies"
val, exists := cache.Get(key)
if exists {
var cookies []*http.Cookie
err := json.Unmarshal([]byte(val), &cookies)
if err != nil {
return nil, c.ErrUnmarshalJSON(err)
}
return cookies, nil
}
resp, err := utils.Request().SetLogger(c.logger.Sugar()).SetHeaders(map[string]string{
"X-Requested-With": "XMLHttpRequest",
"Referer": constant.OnlineFixURL,
}).Get(constant.OnlineFixCSRFURL)
if err != nil {
return nil, fmt.Errorf("failed to fetch CSRF token: %w", err)
}
type csrf struct {
Field string `json:"field"`
Value string `json:"value"`
}
var csrfToken csrf
if err = json.Unmarshal(resp.Body(), &csrfToken); err != nil {
return nil, c.ErrUnmarshalJSON(err)
}
cookies := resp.Cookies()
params := url.Values{}
params.Add("login_name", c.username)
params.Add("login_password", c.password)
params.Add(csrfToken.Field, csrfToken.Value)
params.Add("login", "submit")
resp, err = utils.Request().SetLogger(c.logger.Sugar()).SetHeaders(map[string]string{
"Origin": constant.OnlineFixURL,
"Content-Type": "application/x-www-form-urlencoded",
"Referer": constant.OnlineFixURL,
}).SetCookies(cookies).SetBody(params.Encode()).Post(constant.OnlineFixURL)
if err != nil {
return nil, c.ErrRequest(constant.OnlineFixURL, err)
}
cookies = resp.Cookies()
cookiesJSON, _ := json.Marshal(cookies)
_ = cache.SetWithExpire("onlinefix_cookies", string(cookiesJSON), time.Hour)
return cookies, nil
}
func OnlineFixFormatter(name string) string {
name = strings.ReplaceAll(name, "по сети", "")
reg := regexp.MustCompile(`(?i)\(.*?\)`)
return strings.TrimSpace(reg.ReplaceAllString(name, ""))
}