game-crawler/crawler/steamrip.go
2024-12-29 13:17:04 +08:00

164 lines
3.7 KiB
Go

package crawler
import (
"bytes"
"encoding/base64"
"errors"
"fmt"
"net/url"
"regexp"
"strings"
"game-crawler/constant"
"game-crawler/db"
"game-crawler/model"
"game-crawler/utils"
"github.com/PuerkitoBio/goquery"
"go.uber.org/zap"
)
type SteamRIPCrawler struct {
*BaseLogger
*BaseError
logger *zap.Logger
}
func NewSteamRIPCrawler(logger *zap.Logger) *SteamRIPCrawler {
return &SteamRIPCrawler{
BaseLogger: &BaseLogger{
logger: logger,
},
BaseError: &BaseError{},
logger: logger,
}
}
func (c *SteamRIPCrawler) Name() string {
return "SteamRIPCrawler"
}
func (c *SteamRIPCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
c.LogCrawlByUrlStart(URL)
resp, err := utils.Request().SetLogger(c.logger.Sugar()).Get(URL)
if err != nil {
return nil, c.ErrRequest(URL, err)
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
if err != nil {
return nil, c.ErrParseDoc(URL, err)
}
item, err := db.GetGameItemByUrl(URL)
if err != nil {
return nil, c.ErrDBQuery(err)
}
item.RawName = strings.TrimSpace(doc.Find(".entry-title").First().Text())
if item.RawName == "" {
return nil, c.ErrGetGameItemDetail(URL, errors.New("failed to get game name"))
}
item.Name = SteamRIPFormatter(item.RawName)
item.Url = URL
item.Author = "SteamRIP"
item.Platform = "windows"
sizeRegex := regexp.MustCompile(`(?i)<li><strong>Game Size:\s?</strong>(.*?)</li>`)
sizeMatch := sizeRegex.FindStringSubmatch(string(resp.Body()))
if len(sizeMatch) > 1 {
item.Size = strings.TrimSpace(sizeMatch[1])
} else {
item.Size = "unknown"
}
downloadLinks := map[string]string{}
doc.Find(".shortc-button").Each(func(i int, s *goquery.Selection) {
downloadLink, _ := s.Attr("href")
u, _ := url.Parse(downloadLink)
if strings.HasPrefix(downloadLink, "//") {
downloadLink = "https:" + downloadLink
}
downloadLinks[u.Host] = downloadLink
})
item.Downloads = downloadLinks
if len(item.Downloads) == 0 {
return nil, c.ErrGetGameItemDetail(URL, errors.New("failed to get download links"))
}
return item, nil
}
func (c *SteamRIPCrawler) Crawl(num int) ([]*model.GameItem, error) {
c.LogCrawlStart(num)
resp, err := utils.Request().SetLogger(c.logger.Sugar()).Get(constant.SteamRIPGameListURL)
if err != nil {
return nil, c.ErrRequest(constant.SteamRIPGameListURL, err)
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
if err != nil {
return nil, c.ErrParseDoc(constant.SteamRIPGameListURL, err)
}
var items []*model.GameItem
var urls []string
var updateFlags []string
doc.Find(".az-list-item>a").Each(func(i int, s *goquery.Selection) {
u, exists := s.Attr("href")
if !exists {
return
}
urls = append(urls, fmt.Sprintf("%s%s", constant.SteamRIPBaseURL, u))
updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(s.Text())))
})
count := 0
for i, u := range urls {
if num > 0 && count == num {
break
}
if db.IsSteamRIPCrawled(updateFlags[i]) {
c.LogCrawlSkip(u)
continue
}
item, err := c.CrawlByUrl(u)
if err != nil {
c.LogCrawlByUrlError(u, err)
continue
}
item.UpdateFlag = updateFlags[i]
if err := db.SaveGameItem(item); err != nil {
c.LogSaveGameItemError(u, err)
continue
}
items = append(items, item)
count++
if err := OrganizeGameItem(item); err != nil {
c.LogOrganizeGameWarn(item.ID, item.Name, err)
continue
}
}
return items, nil
}
func (c *SteamRIPCrawler) CrawlAll() ([]*model.GameItem, error) {
return c.Crawl(-1)
}
func SteamRIPFormatter(name string) string {
name = regexp.MustCompile(`\([^\)]+\)`).ReplaceAllString(name, "")
name = strings.Replace(name, "Free Download", "", -1)
name = strings.TrimSpace(name)
return name
}