2024-09-24 06:17:11 -04:00
|
|
|
package crawler
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"encoding/json"
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
2024-12-02 03:17:01 -05:00
|
|
|
"net/http"
|
2024-09-24 06:17:11 -04:00
|
|
|
"net/url"
|
|
|
|
"os"
|
|
|
|
"regexp"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
2024-12-10 08:37:16 -05:00
|
|
|
"time"
|
2024-09-24 06:17:11 -04:00
|
|
|
|
2024-12-10 08:37:16 -05:00
|
|
|
"pcgamedb/cache"
|
2024-11-20 06:09:04 -05:00
|
|
|
"pcgamedb/config"
|
|
|
|
"pcgamedb/constant"
|
|
|
|
"pcgamedb/db"
|
|
|
|
"pcgamedb/model"
|
|
|
|
"pcgamedb/utils"
|
2024-11-15 02:02:45 -05:00
|
|
|
|
2024-09-24 06:17:11 -04:00
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
"go.uber.org/zap"
|
|
|
|
)
|
|
|
|
|
|
|
|
type OnlineFixCrawler struct {
|
2024-12-10 08:37:16 -05:00
|
|
|
logger *zap.Logger
|
2024-09-24 06:17:11 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
func NewOnlineFixCrawler(logger *zap.Logger) *OnlineFixCrawler {
|
|
|
|
return &OnlineFixCrawler{
|
2024-12-10 08:37:16 -05:00
|
|
|
logger: logger,
|
2024-09-24 06:17:11 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-11-14 12:29:19 -05:00
|
|
|
func (c *OnlineFixCrawler) Name() string {
|
|
|
|
return "OnlineFixCrawler"
|
|
|
|
}
|
|
|
|
|
2024-11-16 00:48:48 -05:00
|
|
|
func (c *OnlineFixCrawler) Crawl(page int) ([]*model.GameItem, error) {
|
2024-12-02 03:17:01 -05:00
|
|
|
cookies, err := c.getCookies()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2024-09-24 06:17:11 -04:00
|
|
|
}
|
|
|
|
requestURL := fmt.Sprintf("%s/page/%d/", constant.OnlineFixURL, page)
|
2024-12-02 03:17:01 -05:00
|
|
|
resp, err := utils.Request().SetHeader("Referer", constant.OnlineFixURL).SetCookies(cookies).Get(requestURL)
|
2024-09-24 06:17:11 -04:00
|
|
|
if err != nil {
|
|
|
|
c.logger.Error("Failed to fetch", zap.Error(err))
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-12-10 08:37:16 -05:00
|
|
|
body := utils.Windows1251ToUTF8(resp.Body())
|
|
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(body))
|
2024-09-24 06:17:11 -04:00
|
|
|
if err != nil {
|
|
|
|
c.logger.Error("Failed to parse HTML", zap.Error(err))
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-11-21 12:30:26 -05:00
|
|
|
var urls []string
|
|
|
|
var updateFlags []string //link+date
|
2024-09-24 06:17:11 -04:00
|
|
|
doc.Find("article.news").Each(func(i int, s *goquery.Selection) {
|
|
|
|
urls = append(urls, s.Find(".big-link").First().AttrOr("href", ""))
|
|
|
|
updateFlags = append(
|
|
|
|
updateFlags,
|
|
|
|
s.Find(".big-link").First().AttrOr("href", "")+
|
|
|
|
s.Find("time").Text(),
|
|
|
|
)
|
|
|
|
})
|
|
|
|
|
2024-11-16 00:48:48 -05:00
|
|
|
var res []*model.GameItem
|
2024-09-24 06:17:11 -04:00
|
|
|
for i, u := range urls {
|
|
|
|
if db.IsOnlineFixCrawled(updateFlags[i]) {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
c.logger.Info("Crawling", zap.String("URL", u))
|
|
|
|
item, err := c.CrawlByUrl(u)
|
|
|
|
if err != nil {
|
|
|
|
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
item.UpdateFlag = updateFlags[i]
|
2024-11-16 00:48:48 -05:00
|
|
|
err = db.SaveGameItem(item)
|
2024-09-24 06:17:11 -04:00
|
|
|
if err != nil {
|
|
|
|
c.logger.Warn("Failed to save", zap.Error(err))
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
res = append(res, item)
|
2024-11-21 12:30:26 -05:00
|
|
|
if err := OrganizeGameItem(item); err != nil {
|
2024-09-24 06:17:11 -04:00
|
|
|
c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u))
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return res, nil
|
|
|
|
}
|
|
|
|
|
2024-12-02 03:17:01 -05:00
|
|
|
func (c *OnlineFixCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
|
|
|
|
cookies, err := c.getCookies()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2024-09-24 06:17:11 -04:00
|
|
|
}
|
2024-12-02 03:17:01 -05:00
|
|
|
resp, err := utils.Request().SetHeaders(map[string]string{
|
|
|
|
"Referer": constant.OnlineFixURL,
|
|
|
|
}).SetCookies(cookies).Get(URL)
|
2024-09-24 06:17:11 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-12-10 08:37:16 -05:00
|
|
|
body := utils.Windows1251ToUTF8(resp.Body())
|
2024-09-24 06:17:11 -04:00
|
|
|
titleRegex := regexp.MustCompile(`(?i)<h1.*?>(.*?)</h1>`)
|
2024-12-10 08:37:16 -05:00
|
|
|
titleRegexRes := titleRegex.FindAllStringSubmatch(string(body), -1)
|
2024-09-24 06:17:11 -04:00
|
|
|
if len(titleRegexRes) == 0 {
|
2024-11-21 12:30:26 -05:00
|
|
|
return nil, errors.New("failed to find title")
|
2024-09-24 06:17:11 -04:00
|
|
|
}
|
2024-12-10 08:37:16 -05:00
|
|
|
downloadRegex := regexp.MustCompile(`(?i)<a[^>]+\bhref="([^"]+)"[^>]+>(Скачать Torrent|Скачать торрент)</a>`)
|
|
|
|
downloadRegexRes := downloadRegex.FindAllStringSubmatch(string(body), -1)
|
2024-09-24 06:17:11 -04:00
|
|
|
if len(downloadRegexRes) == 0 {
|
2024-11-21 12:30:26 -05:00
|
|
|
return nil, errors.New("failed to find download button")
|
2024-09-24 06:17:11 -04:00
|
|
|
}
|
2024-12-02 03:17:01 -05:00
|
|
|
item, err := db.GetGameItemByUrl(URL)
|
2024-09-24 06:17:11 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
item.RawName = titleRegexRes[0][1]
|
|
|
|
item.Name = OnlineFixFormatter(item.RawName)
|
2024-12-02 03:17:01 -05:00
|
|
|
item.Url = URL
|
2024-09-24 06:17:11 -04:00
|
|
|
item.Author = "OnlineFix"
|
|
|
|
item.Size = "0"
|
2024-12-02 03:17:01 -05:00
|
|
|
resp, err = utils.Request().SetHeader("Referer", URL).SetCookies(cookies).Get(downloadRegexRes[0][1])
|
2024-12-10 08:37:16 -05:00
|
|
|
body = utils.Windows1251ToUTF8(resp.Body())
|
2024-09-24 06:17:11 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if strings.Contains(downloadRegexRes[0][1], "uploads.online-fix.me") {
|
|
|
|
magnetRegex := regexp.MustCompile(`(?i)"(.*?).torrent"`)
|
2024-12-10 08:37:16 -05:00
|
|
|
magnetRegexRes := magnetRegex.FindAllStringSubmatch(string(body), -1)
|
2024-09-24 06:17:11 -04:00
|
|
|
if len(magnetRegexRes) == 0 {
|
2024-11-21 12:30:26 -05:00
|
|
|
return nil, errors.New("failed to find magnet")
|
2024-09-24 06:17:11 -04:00
|
|
|
}
|
2024-12-02 03:17:01 -05:00
|
|
|
resp, err = utils.Request().SetHeader("Referer", URL).SetCookies(cookies).Get(downloadRegexRes[0][1] + strings.Trim(magnetRegexRes[0][0], "\""))
|
2024-09-24 06:17:11 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-12-02 03:17:01 -05:00
|
|
|
item.Download, item.Size, err = utils.ConvertTorrentToMagnet(resp.Body())
|
2024-09-24 06:17:11 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
} else if strings.Contains(downloadRegexRes[0][1], "online-fix.me/ext") {
|
2024-12-10 08:37:16 -05:00
|
|
|
if strings.Contains(string(body), "mega.nz") {
|
2024-09-24 06:17:11 -04:00
|
|
|
if !config.Config.MegaAvaliable {
|
2024-11-21 12:30:26 -05:00
|
|
|
return nil, errors.New("mega is not avaliable")
|
2024-09-24 06:17:11 -04:00
|
|
|
}
|
|
|
|
megaRegex := regexp.MustCompile(`(?i)location.href=\\'([^\\']*)\\'`)
|
2024-12-10 08:37:16 -05:00
|
|
|
megaRegexRes := megaRegex.FindAllStringSubmatch(string(body), -1)
|
2024-09-24 06:17:11 -04:00
|
|
|
if len(megaRegexRes) == 0 {
|
2024-11-21 12:30:26 -05:00
|
|
|
return nil, errors.New("failed to find download link")
|
2024-09-24 06:17:11 -04:00
|
|
|
}
|
|
|
|
path, files, err := utils.MegaDownload(megaRegexRes[0][1], "torrent")
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
torrent := ""
|
|
|
|
for _, file := range files {
|
|
|
|
if strings.HasSuffix(file, ".torrent") {
|
|
|
|
torrent = file
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
dataBytes, err := os.ReadFile(torrent)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
item.Download, item.Size, err = utils.ConvertTorrentToMagnet(dataBytes)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
_ = os.RemoveAll(path)
|
|
|
|
} else {
|
2024-11-21 12:30:26 -05:00
|
|
|
return nil, errors.New("failed to find download link")
|
2024-09-24 06:17:11 -04:00
|
|
|
}
|
|
|
|
} else {
|
2024-11-21 12:30:26 -05:00
|
|
|
return nil, errors.New("failed to find download link")
|
2024-09-24 06:17:11 -04:00
|
|
|
}
|
2024-12-21 11:37:00 -05:00
|
|
|
item.Platform = "windows"
|
2024-09-24 06:17:11 -04:00
|
|
|
return item, nil
|
|
|
|
}
|
|
|
|
|
2024-11-16 00:48:48 -05:00
|
|
|
func (c *OnlineFixCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) {
|
|
|
|
var res []*model.GameItem
|
2024-09-24 06:17:11 -04:00
|
|
|
for _, page := range pages {
|
|
|
|
items, err := c.Crawl(page)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
res = append(res, items...)
|
|
|
|
}
|
|
|
|
return res, nil
|
|
|
|
}
|
|
|
|
|
2024-11-16 00:48:48 -05:00
|
|
|
func (c *OnlineFixCrawler) CrawlAll() ([]*model.GameItem, error) {
|
|
|
|
var res []*model.GameItem
|
2024-09-24 06:17:11 -04:00
|
|
|
totalPageNum, err := c.GetTotalPageNum()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
for i := 1; i <= totalPageNum; i++ {
|
|
|
|
items, err := c.Crawl(i)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
res = append(res, items...)
|
|
|
|
}
|
|
|
|
return res, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *OnlineFixCrawler) GetTotalPageNum() (int, error) {
|
2024-12-02 03:17:01 -05:00
|
|
|
resp, err := utils.Request().SetHeader("Referer", constant.OnlineFixURL).Get(constant.OnlineFixURL)
|
2024-09-24 06:17:11 -04:00
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
pageRegex := regexp.MustCompile(`(?i)<a href="https://online-fix.me/page/(\d+)/">.*?</a>`)
|
2024-12-02 03:17:01 -05:00
|
|
|
pageRegexRes := pageRegex.FindAllStringSubmatch(string(resp.Body()), -1)
|
2024-09-24 06:17:11 -04:00
|
|
|
if len(pageRegexRes) == 0 {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
totalPageNum, err := strconv.Atoi(pageRegexRes[len(pageRegexRes)-2][1])
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
return totalPageNum, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
type csrf struct {
|
|
|
|
Field string `json:"field"`
|
|
|
|
Value string `json:"value"`
|
|
|
|
}
|
|
|
|
|
2024-12-02 03:17:01 -05:00
|
|
|
func (c *OnlineFixCrawler) getCookies() ([]*http.Cookie, error) {
|
2024-12-10 08:37:16 -05:00
|
|
|
val, exists := cache.Get("onlinefix_cookies")
|
|
|
|
if exists {
|
|
|
|
var cookies []*http.Cookie
|
|
|
|
if err := json.Unmarshal([]byte(val), &cookies); err != nil {
|
2024-12-02 03:17:01 -05:00
|
|
|
return nil, err
|
|
|
|
}
|
2024-12-10 08:37:16 -05:00
|
|
|
return cookies, nil
|
|
|
|
}
|
2024-09-24 06:17:11 -04:00
|
|
|
|
2024-12-10 08:37:16 -05:00
|
|
|
resp, err := utils.Request().SetHeaders(map[string]string{
|
|
|
|
"X-Requested-With": "XMLHttpRequest",
|
|
|
|
"Referer": constant.OnlineFixURL,
|
|
|
|
}).Get(constant.OnlineFixCSRFURL)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
var csrf csrf
|
|
|
|
if err = json.Unmarshal(resp.Body(), &csrf); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-12-02 03:17:01 -05:00
|
|
|
|
2024-12-10 08:37:16 -05:00
|
|
|
cookies := resp.Cookies()
|
|
|
|
|
|
|
|
params := url.Values{}
|
|
|
|
params.Add("login_name", config.Config.OnlineFix.User)
|
|
|
|
params.Add("login_password", config.Config.OnlineFix.Password)
|
|
|
|
params.Add(csrf.Field, csrf.Value)
|
|
|
|
params.Add("login", "submit")
|
|
|
|
resp, err = utils.Request().SetHeaders(map[string]string{
|
|
|
|
"Origin": constant.OnlineFixURL,
|
|
|
|
"Content-Type": "application/x-www-form-urlencoded",
|
|
|
|
"Referer": constant.OnlineFixURL,
|
|
|
|
}).SetCookies(cookies).SetBody(params.Encode()).Post(constant.OnlineFixURL)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2024-09-24 06:17:11 -04:00
|
|
|
}
|
2024-12-10 08:37:16 -05:00
|
|
|
cookies = resp.Cookies()
|
|
|
|
jsonBytes, _ := json.Marshal(cookies)
|
|
|
|
_ = cache.SetWithExpire("onlinefix_cookies", string(jsonBytes), time.Hour)
|
|
|
|
|
|
|
|
return cookies, nil
|
2024-09-24 06:17:11 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
func OnlineFixFormatter(name string) string {
|
|
|
|
name = strings.Replace(name, "по сети", "", -1)
|
|
|
|
reg1 := regexp.MustCompile(`(?i)\(.*?\)`)
|
|
|
|
name = reg1.ReplaceAllString(name, "")
|
|
|
|
return strings.TrimSpace(name)
|
|
|
|
}
|