216 lines
5.0 KiB
Go
216 lines
5.0 KiB
Go
package crawler
|
||
|
||
import (
|
||
"bytes"
|
||
"encoding/base64"
|
||
"encoding/json"
|
||
"errors"
|
||
"html"
|
||
"net/http"
|
||
"regexp"
|
||
"strings"
|
||
"time"
|
||
|
||
"game-crawler/cache"
|
||
"game-crawler/constant"
|
||
"game-crawler/db"
|
||
"game-crawler/model"
|
||
|
||
"git.nite07.com/nite/ccs"
|
||
"github.com/PuerkitoBio/goquery"
|
||
"go.uber.org/zap"
|
||
)
|
||
|
||
type FreeGOGCrawler struct {
|
||
*BaseLogger
|
||
*BaseError
|
||
cfClearanceUrl string
|
||
logger *zap.Logger
|
||
}
|
||
|
||
func NewFreeGOGCrawler(cfClearanceUrl string, logger *zap.Logger) *FreeGOGCrawler {
|
||
return &FreeGOGCrawler{
|
||
BaseLogger: &BaseLogger{
|
||
logger: logger,
|
||
},
|
||
BaseError: &BaseError{},
|
||
cfClearanceUrl: cfClearanceUrl,
|
||
logger: logger,
|
||
}
|
||
}
|
||
|
||
func (c *FreeGOGCrawler) getSession() (*ccs.Session, error) {
|
||
cacheKey := "freegog_waf_session"
|
||
var session ccs.Session
|
||
if val, exist := cache.Get(cacheKey); exist {
|
||
err := json.Unmarshal([]byte(val), &session)
|
||
if err != nil {
|
||
return nil, c.ErrUnmarshalJSON(err)
|
||
}
|
||
} else {
|
||
var err error
|
||
session, err = ccs.WAFSession(c.cfClearanceUrl, constant.FreeGOGListURL)
|
||
if err != nil {
|
||
return nil, c.ErrRequest(constant.FreeGOGListURL, err)
|
||
}
|
||
jsonBytes, err := json.Marshal(session)
|
||
if err == nil {
|
||
_ = cache.SetWithExpire(cacheKey, jsonBytes, 1*time.Hour)
|
||
}
|
||
}
|
||
return &session, nil
|
||
}
|
||
|
||
func (c *FreeGOGCrawler) Name() string {
|
||
return "FreeGOG"
|
||
}
|
||
|
||
func (c *FreeGOGCrawler) Crawl(num int) ([]*model.GameItem, error) {
|
||
c.LogCrawlStart(num)
|
||
count := 0
|
||
session, err := c.getSession()
|
||
if err != nil {
|
||
return nil, c.ErrGetSession(err)
|
||
}
|
||
|
||
resp, err := ccs.RequestWithWAFSession(http.MethodGet, constant.FreeGOGListURL, *session, nil)
|
||
if err != nil {
|
||
return nil, c.ErrRequest(constant.FreeGOGListURL, err)
|
||
}
|
||
|
||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader([]byte(resp.Body)))
|
||
if err != nil {
|
||
return nil, c.ErrParseDoc(constant.FreeGOGListURL, err)
|
||
}
|
||
|
||
var urls []string
|
||
var updateFlags []string // RawName+Link
|
||
doc.Find(".items-outer li a").Each(func(i int, s *goquery.Selection) {
|
||
url := s.AttrOr("href", "")
|
||
rawName := s.Text()
|
||
if url != "" && rawName != "" {
|
||
urls = append(urls, url)
|
||
updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(rawName+url)))
|
||
}
|
||
})
|
||
|
||
var res []*model.GameItem
|
||
for i, u := range urls {
|
||
if count == num {
|
||
break
|
||
}
|
||
if db.IsFreeGOGCrawled(updateFlags[i]) {
|
||
c.LogCrawlSkip(u)
|
||
continue
|
||
}
|
||
|
||
item, err := c.CrawlByUrl(u)
|
||
if err != nil {
|
||
c.LogCrawlByUrlError(u, err)
|
||
continue
|
||
}
|
||
|
||
item.UpdateFlag = updateFlags[i]
|
||
err = db.SaveGameItem(item)
|
||
if err != nil {
|
||
c.LogSaveGameItemError(u, err)
|
||
continue
|
||
}
|
||
|
||
res = append(res, item)
|
||
count++
|
||
|
||
if err := OrganizeGameItem(item); err != nil {
|
||
c.LogOrganizeGameWarn(item.ID, item.Name, err)
|
||
continue
|
||
}
|
||
}
|
||
|
||
return res, nil
|
||
}
|
||
|
||
func (c *FreeGOGCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
|
||
c.LogCrawlByUrlStart(URL)
|
||
session, err := c.getSession()
|
||
if err != nil {
|
||
return nil, c.ErrGetSession(err)
|
||
}
|
||
|
||
resp, err := ccs.RequestWithWAFSession(http.MethodGet, URL, *session, nil)
|
||
if err != nil {
|
||
return nil, c.ErrRequest(URL, err)
|
||
}
|
||
|
||
item, err := db.GetGameItemByUrl(URL)
|
||
if err != nil {
|
||
return nil, c.ErrDBQuery(err)
|
||
}
|
||
|
||
item.Url = URL
|
||
|
||
rawTitleRegex := regexp.MustCompile(`(?i)<h1 class="entry-title">(.*?)</h1>`)
|
||
rawTitleRegexRes := rawTitleRegex.FindStringSubmatch(string(resp.Body))
|
||
if len(rawTitleRegexRes) > 1 {
|
||
rawName := html.UnescapeString(rawTitleRegexRes[1])
|
||
item.RawName = strings.Replace(rawName, "–", "-", -1)
|
||
} else {
|
||
return nil, c.ErrGetGameItemDetail(URL, errors.New("failed to find game name"))
|
||
}
|
||
|
||
item.Name = FreeGOGFormatter(item.RawName)
|
||
|
||
sizeRegex := regexp.MustCompile(`(?i)>Size:\s?(.*?)<`)
|
||
sizeRegexRes := sizeRegex.FindStringSubmatch(string(resp.Body))
|
||
if len(sizeRegexRes) > 1 {
|
||
item.Size = sizeRegexRes[1]
|
||
} else {
|
||
item.Size = "unknown"
|
||
}
|
||
|
||
magnetRegex := regexp.MustCompile(`<a class="download-btn" href="https://gdl.freegogpcgames.xyz/download-gen\.php\?url=(.*?)"`)
|
||
magnetRegexRes := magnetRegex.FindStringSubmatch(string(resp.Body))
|
||
if len(magnetRegexRes) > 1 {
|
||
magnet, err := base64.StdEncoding.DecodeString(magnetRegexRes[1])
|
||
if err != nil {
|
||
return nil, c.ErrBase64Decode(magnetRegexRes[1], err)
|
||
}
|
||
item.Downloads = map[string]string{
|
||
"magnet": string(magnet),
|
||
}
|
||
} else {
|
||
return nil, c.ErrGetGameItemDetail(URL, errors.New("failed to find magnet link"))
|
||
}
|
||
|
||
item.Author = "FreeGOG"
|
||
item.Platform = "windows"
|
||
|
||
return item, nil
|
||
}
|
||
|
||
func (c *FreeGOGCrawler) CrawlAll() ([]*model.GameItem, error) {
|
||
return c.Crawl(-1)
|
||
}
|
||
|
||
var freeGOGRegexps = []*regexp.Regexp{
|
||
regexp.MustCompile(`(?i)\(.*\)`),
|
||
}
|
||
|
||
func FreeGOGFormatter(name string) string {
|
||
for _, re := range freeGOGRegexps {
|
||
name = re.ReplaceAllString(name, "")
|
||
}
|
||
|
||
reg1 := regexp.MustCompile(`(?i)v\d+(\.\d+)*`)
|
||
if index := reg1.FindIndex([]byte(name)); index != nil {
|
||
name = name[:index[0]]
|
||
}
|
||
if index := strings.Index(name, "+"); index != -1 {
|
||
name = name[:index]
|
||
}
|
||
|
||
reg2 := regexp.MustCompile(`(?i):\sgoty`)
|
||
name = reg2.ReplaceAllString(name, ": Game Of The Year")
|
||
|
||
return strings.TrimSpace(name)
|
||
}
|