164 lines
3.7 KiB
Go
164 lines
3.7 KiB
Go
package crawler
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/base64"
|
|
"errors"
|
|
"fmt"
|
|
"net/url"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"game-crawler/constant"
|
|
"game-crawler/db"
|
|
"game-crawler/model"
|
|
"game-crawler/utils"
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
type SteamRIPCrawler struct {
|
|
*BaseLogger
|
|
*BaseError
|
|
logger *zap.Logger
|
|
}
|
|
|
|
func NewSteamRIPCrawler(logger *zap.Logger) *SteamRIPCrawler {
|
|
return &SteamRIPCrawler{
|
|
BaseLogger: &BaseLogger{
|
|
logger: logger,
|
|
},
|
|
BaseError: &BaseError{},
|
|
logger: logger,
|
|
}
|
|
}
|
|
|
|
func (c *SteamRIPCrawler) Name() string {
|
|
return "SteamRIPCrawler"
|
|
}
|
|
|
|
func (c *SteamRIPCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
|
|
c.LogCrawlByUrlStart(URL)
|
|
|
|
resp, err := utils.Request().SetLogger(c.logger.Sugar()).Get(URL)
|
|
if err != nil {
|
|
return nil, c.ErrRequest(URL, err)
|
|
}
|
|
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
|
if err != nil {
|
|
return nil, c.ErrParseDoc(URL, err)
|
|
}
|
|
|
|
item, err := db.GetGameItemByUrl(URL)
|
|
if err != nil {
|
|
return nil, c.ErrDBQuery(err)
|
|
}
|
|
|
|
item.RawName = strings.TrimSpace(doc.Find(".entry-title").First().Text())
|
|
if item.RawName == "" {
|
|
return nil, c.ErrGetGameItemDetail(URL, errors.New("failed to get game name"))
|
|
}
|
|
item.Name = SteamRIPFormatter(item.RawName)
|
|
item.Url = URL
|
|
item.Author = "SteamRIP"
|
|
item.Platform = "windows"
|
|
|
|
sizeRegex := regexp.MustCompile(`(?i)<li><strong>Game Size:\s?</strong>(.*?)</li>`)
|
|
sizeMatch := sizeRegex.FindStringSubmatch(string(resp.Body()))
|
|
if len(sizeMatch) > 1 {
|
|
item.Size = strings.TrimSpace(sizeMatch[1])
|
|
} else {
|
|
item.Size = "unknown"
|
|
}
|
|
|
|
downloadLinks := map[string]string{}
|
|
doc.Find(".shortc-button").Each(func(i int, s *goquery.Selection) {
|
|
downloadLink, _ := s.Attr("href")
|
|
u, _ := url.Parse(downloadLink)
|
|
if strings.HasPrefix(downloadLink, "//") {
|
|
downloadLink = "https:" + downloadLink
|
|
}
|
|
downloadLinks[u.Host] = downloadLink
|
|
})
|
|
item.Downloads = downloadLinks
|
|
|
|
if len(item.Downloads) == 0 {
|
|
return nil, c.ErrGetGameItemDetail(URL, errors.New("failed to get download links"))
|
|
}
|
|
|
|
return item, nil
|
|
}
|
|
|
|
func (c *SteamRIPCrawler) Crawl(num int) ([]*model.GameItem, error) {
|
|
c.LogCrawlStart(num)
|
|
|
|
resp, err := utils.Request().SetLogger(c.logger.Sugar()).Get(constant.SteamRIPGameListURL)
|
|
if err != nil {
|
|
return nil, c.ErrRequest(constant.SteamRIPGameListURL, err)
|
|
}
|
|
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
|
if err != nil {
|
|
return nil, c.ErrParseDoc(constant.SteamRIPGameListURL, err)
|
|
}
|
|
|
|
var items []*model.GameItem
|
|
var urls []string
|
|
var updateFlags []string
|
|
|
|
doc.Find(".az-list-item>a").Each(func(i int, s *goquery.Selection) {
|
|
u, exists := s.Attr("href")
|
|
if !exists {
|
|
return
|
|
}
|
|
urls = append(urls, fmt.Sprintf("%s%s", constant.SteamRIPBaseURL, u))
|
|
updateFlags = append(updateFlags, base64.StdEncoding.EncodeToString([]byte(s.Text())))
|
|
})
|
|
|
|
count := 0
|
|
for i, u := range urls {
|
|
if num > 0 && count == num {
|
|
break
|
|
}
|
|
if db.IsSteamRIPCrawled(updateFlags[i]) {
|
|
c.LogCrawlSkip(u)
|
|
continue
|
|
}
|
|
|
|
item, err := c.CrawlByUrl(u)
|
|
if err != nil {
|
|
c.LogCrawlByUrlError(u, err)
|
|
continue
|
|
}
|
|
|
|
item.UpdateFlag = updateFlags[i]
|
|
if err := db.SaveGameItem(item); err != nil {
|
|
c.LogSaveGameItemError(u, err)
|
|
continue
|
|
}
|
|
|
|
items = append(items, item)
|
|
count++
|
|
|
|
if err := OrganizeGameItem(item); err != nil {
|
|
c.LogOrganizeGameWarn(item.ID, item.Name, err)
|
|
continue
|
|
}
|
|
}
|
|
|
|
return items, nil
|
|
}
|
|
|
|
func (c *SteamRIPCrawler) CrawlAll() ([]*model.GameItem, error) {
|
|
return c.Crawl(-1)
|
|
}
|
|
|
|
func SteamRIPFormatter(name string) string {
|
|
name = regexp.MustCompile(`\([^\)]+\)`).ReplaceAllString(name, "")
|
|
name = strings.Replace(name, "Free Download", "", -1)
|
|
name = strings.TrimSpace(name)
|
|
return name
|
|
}
|