pcgamedb/crawler/onlinefix.go

285 lines
7.7 KiB
Go
Raw Permalink Normal View History

2024-09-24 06:17:11 -04:00
package crawler
import (
"bytes"
"encoding/json"
"errors"
"fmt"
2024-12-02 03:17:01 -05:00
"net/http"
2024-09-24 06:17:11 -04:00
"net/url"
"os"
"regexp"
"strconv"
"strings"
2024-12-10 08:37:16 -05:00
"time"
2024-09-24 06:17:11 -04:00
2024-12-10 08:37:16 -05:00
"pcgamedb/cache"
2024-11-20 06:09:04 -05:00
"pcgamedb/config"
"pcgamedb/constant"
"pcgamedb/db"
"pcgamedb/model"
"pcgamedb/utils"
2024-11-15 02:02:45 -05:00
2024-09-24 06:17:11 -04:00
"github.com/PuerkitoBio/goquery"
"go.uber.org/zap"
)
type OnlineFixCrawler struct {
2024-12-10 08:37:16 -05:00
logger *zap.Logger
2024-09-24 06:17:11 -04:00
}
func NewOnlineFixCrawler(logger *zap.Logger) *OnlineFixCrawler {
return &OnlineFixCrawler{
2024-12-10 08:37:16 -05:00
logger: logger,
2024-09-24 06:17:11 -04:00
}
}
2024-11-14 12:29:19 -05:00
func (c *OnlineFixCrawler) Name() string {
return "OnlineFixCrawler"
}
func (c *OnlineFixCrawler) Crawl(page int) ([]*model.GameItem, error) {
2024-12-02 03:17:01 -05:00
cookies, err := c.getCookies()
if err != nil {
return nil, err
2024-09-24 06:17:11 -04:00
}
requestURL := fmt.Sprintf("%s/page/%d/", constant.OnlineFixURL, page)
2024-12-02 03:17:01 -05:00
resp, err := utils.Request().SetHeader("Referer", constant.OnlineFixURL).SetCookies(cookies).Get(requestURL)
2024-09-24 06:17:11 -04:00
if err != nil {
c.logger.Error("Failed to fetch", zap.Error(err))
return nil, err
}
2024-12-10 08:37:16 -05:00
body := utils.Windows1251ToUTF8(resp.Body())
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(body))
2024-09-24 06:17:11 -04:00
if err != nil {
c.logger.Error("Failed to parse HTML", zap.Error(err))
return nil, err
}
var urls []string
var updateFlags []string //link+date
2024-09-24 06:17:11 -04:00
doc.Find("article.news").Each(func(i int, s *goquery.Selection) {
urls = append(urls, s.Find(".big-link").First().AttrOr("href", ""))
updateFlags = append(
updateFlags,
s.Find(".big-link").First().AttrOr("href", "")+
s.Find("time").Text(),
)
})
var res []*model.GameItem
2024-09-24 06:17:11 -04:00
for i, u := range urls {
if db.IsOnlineFixCrawled(updateFlags[i]) {
continue
}
c.logger.Info("Crawling", zap.String("URL", u))
item, err := c.CrawlByUrl(u)
if err != nil {
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
continue
}
item.UpdateFlag = updateFlags[i]
err = db.SaveGameItem(item)
2024-09-24 06:17:11 -04:00
if err != nil {
c.logger.Warn("Failed to save", zap.Error(err))
continue
}
res = append(res, item)
if err := OrganizeGameItem(item); err != nil {
2024-09-24 06:17:11 -04:00
c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u))
continue
}
}
return res, nil
}
2024-12-02 03:17:01 -05:00
func (c *OnlineFixCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
cookies, err := c.getCookies()
if err != nil {
return nil, err
2024-09-24 06:17:11 -04:00
}
2024-12-02 03:17:01 -05:00
resp, err := utils.Request().SetHeaders(map[string]string{
"Referer": constant.OnlineFixURL,
}).SetCookies(cookies).Get(URL)
2024-09-24 06:17:11 -04:00
if err != nil {
return nil, err
}
2024-12-10 08:37:16 -05:00
body := utils.Windows1251ToUTF8(resp.Body())
2024-09-24 06:17:11 -04:00
titleRegex := regexp.MustCompile(`(?i)<h1.*?>(.*?)</h1>`)
2024-12-10 08:37:16 -05:00
titleRegexRes := titleRegex.FindAllStringSubmatch(string(body), -1)
2024-09-24 06:17:11 -04:00
if len(titleRegexRes) == 0 {
return nil, errors.New("failed to find title")
2024-09-24 06:17:11 -04:00
}
2024-12-10 08:37:16 -05:00
downloadRegex := regexp.MustCompile(`(?i)<a[^>]+\bhref="([^"]+)"[^>]+>(Скачать Torrent|Скачать торрент)</a>`)
downloadRegexRes := downloadRegex.FindAllStringSubmatch(string(body), -1)
2024-09-24 06:17:11 -04:00
if len(downloadRegexRes) == 0 {
return nil, errors.New("failed to find download button")
2024-09-24 06:17:11 -04:00
}
2024-12-02 03:17:01 -05:00
item, err := db.GetGameItemByUrl(URL)
2024-09-24 06:17:11 -04:00
if err != nil {
return nil, err
}
item.RawName = titleRegexRes[0][1]
item.Name = OnlineFixFormatter(item.RawName)
2024-12-02 03:17:01 -05:00
item.Url = URL
2024-09-24 06:17:11 -04:00
item.Author = "OnlineFix"
item.Size = "0"
2024-12-02 03:17:01 -05:00
resp, err = utils.Request().SetHeader("Referer", URL).SetCookies(cookies).Get(downloadRegexRes[0][1])
2024-12-10 08:37:16 -05:00
body = utils.Windows1251ToUTF8(resp.Body())
2024-09-24 06:17:11 -04:00
if err != nil {
return nil, err
}
if strings.Contains(downloadRegexRes[0][1], "uploads.online-fix.me") {
magnetRegex := regexp.MustCompile(`(?i)"(.*?).torrent"`)
2024-12-10 08:37:16 -05:00
magnetRegexRes := magnetRegex.FindAllStringSubmatch(string(body), -1)
2024-09-24 06:17:11 -04:00
if len(magnetRegexRes) == 0 {
return nil, errors.New("failed to find magnet")
2024-09-24 06:17:11 -04:00
}
2024-12-02 03:17:01 -05:00
resp, err = utils.Request().SetHeader("Referer", URL).SetCookies(cookies).Get(downloadRegexRes[0][1] + strings.Trim(magnetRegexRes[0][0], "\""))
2024-09-24 06:17:11 -04:00
if err != nil {
return nil, err
}
2024-12-02 03:17:01 -05:00
item.Download, item.Size, err = utils.ConvertTorrentToMagnet(resp.Body())
2024-09-24 06:17:11 -04:00
if err != nil {
return nil, err
}
} else if strings.Contains(downloadRegexRes[0][1], "online-fix.me/ext") {
2024-12-10 08:37:16 -05:00
if strings.Contains(string(body), "mega.nz") {
2024-09-24 06:17:11 -04:00
if !config.Config.MegaAvaliable {
return nil, errors.New("mega is not avaliable")
2024-09-24 06:17:11 -04:00
}
megaRegex := regexp.MustCompile(`(?i)location.href=\\'([^\\']*)\\'`)
2024-12-10 08:37:16 -05:00
megaRegexRes := megaRegex.FindAllStringSubmatch(string(body), -1)
2024-09-24 06:17:11 -04:00
if len(megaRegexRes) == 0 {
return nil, errors.New("failed to find download link")
2024-09-24 06:17:11 -04:00
}
path, files, err := utils.MegaDownload(megaRegexRes[0][1], "torrent")
if err != nil {
return nil, err
}
torrent := ""
for _, file := range files {
if strings.HasSuffix(file, ".torrent") {
torrent = file
break
}
}
dataBytes, err := os.ReadFile(torrent)
if err != nil {
return nil, err
}
item.Download, item.Size, err = utils.ConvertTorrentToMagnet(dataBytes)
if err != nil {
return nil, err
}
_ = os.RemoveAll(path)
} else {
return nil, errors.New("failed to find download link")
2024-09-24 06:17:11 -04:00
}
} else {
return nil, errors.New("failed to find download link")
2024-09-24 06:17:11 -04:00
}
2024-12-21 11:37:00 -05:00
item.Platform = "windows"
2024-09-24 06:17:11 -04:00
return item, nil
}
func (c *OnlineFixCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) {
var res []*model.GameItem
2024-09-24 06:17:11 -04:00
for _, page := range pages {
items, err := c.Crawl(page)
if err != nil {
return nil, err
}
res = append(res, items...)
}
return res, nil
}
func (c *OnlineFixCrawler) CrawlAll() ([]*model.GameItem, error) {
var res []*model.GameItem
2024-09-24 06:17:11 -04:00
totalPageNum, err := c.GetTotalPageNum()
if err != nil {
return nil, err
}
for i := 1; i <= totalPageNum; i++ {
items, err := c.Crawl(i)
if err != nil {
return nil, err
}
res = append(res, items...)
}
return res, nil
}
func (c *OnlineFixCrawler) GetTotalPageNum() (int, error) {
2024-12-02 03:17:01 -05:00
resp, err := utils.Request().SetHeader("Referer", constant.OnlineFixURL).Get(constant.OnlineFixURL)
2024-09-24 06:17:11 -04:00
if err != nil {
return 0, err
}
pageRegex := regexp.MustCompile(`(?i)<a href="https://online-fix.me/page/(\d+)/">.*?</a>`)
2024-12-02 03:17:01 -05:00
pageRegexRes := pageRegex.FindAllStringSubmatch(string(resp.Body()), -1)
2024-09-24 06:17:11 -04:00
if len(pageRegexRes) == 0 {
return 0, err
}
totalPageNum, err := strconv.Atoi(pageRegexRes[len(pageRegexRes)-2][1])
if err != nil {
return 0, err
}
return totalPageNum, nil
}
type csrf struct {
Field string `json:"field"`
Value string `json:"value"`
}
2024-12-02 03:17:01 -05:00
func (c *OnlineFixCrawler) getCookies() ([]*http.Cookie, error) {
2024-12-10 08:37:16 -05:00
val, exists := cache.Get("onlinefix_cookies")
if exists {
var cookies []*http.Cookie
if err := json.Unmarshal([]byte(val), &cookies); err != nil {
2024-12-02 03:17:01 -05:00
return nil, err
}
2024-12-10 08:37:16 -05:00
return cookies, nil
}
2024-09-24 06:17:11 -04:00
2024-12-10 08:37:16 -05:00
resp, err := utils.Request().SetHeaders(map[string]string{
"X-Requested-With": "XMLHttpRequest",
"Referer": constant.OnlineFixURL,
}).Get(constant.OnlineFixCSRFURL)
if err != nil {
return nil, err
}
var csrf csrf
if err = json.Unmarshal(resp.Body(), &csrf); err != nil {
return nil, err
}
2024-12-02 03:17:01 -05:00
2024-12-10 08:37:16 -05:00
cookies := resp.Cookies()
params := url.Values{}
params.Add("login_name", config.Config.OnlineFix.User)
params.Add("login_password", config.Config.OnlineFix.Password)
params.Add(csrf.Field, csrf.Value)
params.Add("login", "submit")
resp, err = utils.Request().SetHeaders(map[string]string{
"Origin": constant.OnlineFixURL,
"Content-Type": "application/x-www-form-urlencoded",
"Referer": constant.OnlineFixURL,
}).SetCookies(cookies).SetBody(params.Encode()).Post(constant.OnlineFixURL)
if err != nil {
return nil, err
2024-09-24 06:17:11 -04:00
}
2024-12-10 08:37:16 -05:00
cookies = resp.Cookies()
jsonBytes, _ := json.Marshal(cookies)
_ = cache.SetWithExpire("onlinefix_cookies", string(jsonBytes), time.Hour)
return cookies, nil
2024-09-24 06:17:11 -04:00
}
func OnlineFixFormatter(name string) string {
name = strings.Replace(name, "по сети", "", -1)
reg1 := regexp.MustCompile(`(?i)\(.*?\)`)
name = reg1.ReplaceAllString(name, "")
return strings.TrimSpace(name)
}