This commit is contained in:
Nite07 2024-12-28 12:35:02 +08:00
parent cd9b7412b8
commit 0d6bae97ab
11 changed files with 123 additions and 63 deletions

View File

@ -6,15 +6,18 @@ game-crawler is a powerful command-line tool designed to scrape and manage repac
- **Data Sources**:
- KaOSKrew(1337x)
- DODI(1337x)
- johncena141(1337x)
- Fitgirl
- KaOSKrew
- DODI
- FreeGOG
- GOGGames
- OnlineFix
- Xatab
- SteamRIP
- Chovka
- Omg_Gods(rutracker)
- LinuxGame(rutracker)
- **Database**:

View File

@ -34,5 +34,4 @@ const (
RutrackerTopicURL = "https://rutracker.org/forum/%s"
RutrackerURL = "https://rutracker.org/forum/index.php"
RutrackerLoginURL = "https://rutracker.org/forum/login.php"
RutrackerAuthorURL = "https://rutracker.org/forum/tracker.php?rid=%s&start=%v"
)

View File

@ -26,17 +26,18 @@ type PagedCrawler interface {
func BuildCrawlerMap(logger *zap.Logger) map[string]Crawler {
ret := map[string]Crawler{
"fitgirl": NewFitGirlCrawler(logger),
"dodi": NewDODICrawler(logger),
"kaoskrew": NewKaOsKrewCrawler(logger),
"freegog": NewFreeGOGCrawler(config.Config.CFClearanceScraper.Url, logger),
"xatab": NewXatabCrawler(logger),
"onlinefix": NewOnlineFixCrawler(config.Config.OnlineFix.User, config.Config.OnlineFix.Password, logger),
"steamrip": NewSteamRIPCrawler(logger),
"chovka": NewChovkaCrawler(logger),
"goggames": NewGOGGamesCrawler(config.Config.CFClearanceScraper.Url, logger),
"johncena141": NewJohncena141Crawler(logger),
"omggods": NewOmgGodsCrawler(config.Config.CFClearanceScraper.Url, config.Config.Rutracker.User, config.Config.Rutracker.Password, logger),
"fitgirl": NewFitGirlCrawler(logger),
"dodi": NewDODICrawler(logger),
"kaoskrew": NewKaOsKrewCrawler(logger),
"freegog": NewFreeGOGCrawler(config.Config.CFClearanceScraper.Url, logger),
"xatab": NewXatabCrawler(logger),
"onlinefix": NewOnlineFixCrawler(config.Config.OnlineFix.User, config.Config.OnlineFix.Password, logger),
"steamrip": NewSteamRIPCrawler(logger),
"chovka": NewChovkaCrawler(logger),
"goggames": NewGOGGamesCrawler(config.Config.CFClearanceScraper.Url, logger),
"johncena141": NewJohncena141Crawler(logger),
"omggods": NewOmgGodsCrawler(config.Config.CFClearanceScraper.Url, config.Config.Rutracker.User, config.Config.Rutracker.Password, logger),
"rutracker-linux-game": NewRutrackerLinuxGameCrawler(config.Config.CFClearanceScraper.Url, config.Config.Rutracker.User, config.Config.Rutracker.Password, logger),
// "gnarly": NewGnarlyCrawler(logger),
}
return ret

View File

@ -108,7 +108,7 @@ func getIGDBID(name string) (int, error) {
maxSimilarityIndex := 0
for i, item := range data {
if strings.EqualFold(item.Name, name) {
return item.Game, nil
return GetIGDBAppParent(item.Game)
}
if sim := utils.Similarity(name, item.Name); sim >= 0.8 {
if sim > maxSimilarity {
@ -229,6 +229,13 @@ func GetIGDBAppParent(id int) (int, error) {
return 0, fmt.Errorf("failed to fetch IGDB app detail for parent: %d: %w", id, err)
}
hasParent := false
if detail.ParentGame != 0 {
hasParent = true
detail, err = GetIGDBAppDetail(detail.ParentGame)
if err != nil {
return 0, fmt.Errorf("failed to fetch IGDB version parent: %d: %w", detail.VersionParent, err)
}
}
for detail.VersionParent != 0 {
hasParent = true
detail, err = GetIGDBAppDetail(detail.VersionParent)

View File

@ -2,8 +2,6 @@ package crawler
import (
"game-crawler/model"
"regexp"
"strings"
"go.uber.org/zap"
)
@ -17,13 +15,12 @@ func NewOmgGodsCrawler(cfClearanceUrl, username, password string, logger *zap.Lo
return &OmgGodsCrawler{
logger: logger,
crawler: *NewRutrackerCrawler(
"https://rutracker.org/forum/tracker.php?rid=8994327&start=%v",
"OmgGods",
"switch",
"8994327",
username,
password,
cfClearanceUrl,
omgGodsFormatter,
logger,
),
}
@ -48,21 +45,3 @@ func (c *OmgGodsCrawler) CrawlAll() ([]*model.GameItem, error) {
func (c *OmgGodsCrawler) GetTotalPageNum() (int, error) {
return c.crawler.GetTotalPageNum()
}
var omgGodsFormatRegex = []*regexp.Regexp{
regexp.MustCompile(`\(.*?\)`),
regexp.MustCompile(`\[.*?\]`),
}
func omgGodsFormatter(name string) string {
for _, regex := range omgGodsFormatRegex {
name = regex.ReplaceAllString(name, "")
}
if strings.Contains(name, " + ") {
name = strings.Split(name, " + ")[0]
}
if strings.Contains(name, " / ") {
name = strings.Split(name, " / ")[0]
}
return strings.TrimSpace(name)
}

View File

@ -11,7 +11,9 @@ import (
"game-crawler/model"
"game-crawler/utils"
"net/url"
"regexp"
"strconv"
"strings"
"time"
"unicode"
@ -24,20 +26,18 @@ import (
type RutrackerCrawler struct {
source string
rid string
url string
platform string
username string
password string
formatter FormatterFunc
logger *zap.Logger
cfClearanceUrl string
}
func NewRutrackerCrawler(source, platform, rid, username, password, cfClearanceUrl string, formatter FormatterFunc, logger *zap.Logger) *RutrackerCrawler {
func NewRutrackerCrawler(url, source, platform, username, password, cfClearanceUrl string, logger *zap.Logger) *RutrackerCrawler {
return &RutrackerCrawler{
source: source,
rid: rid,
formatter: formatter,
url: url,
logger: logger,
platform: platform,
username: username,
@ -118,6 +118,11 @@ func (r *RutrackerCrawler) getSession() (*ccs.Session, error) {
return &session, nil
}
var regexps = []*regexp.Regexp{
regexp.MustCompile(`\(.*?\)`),
regexp.MustCompile(`\[.*?\]`),
}
func (r *RutrackerCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
r.logger.Info("Crawling game", zap.String("URL", URL))
session, err := r.getSession()
@ -143,8 +148,25 @@ func (r *RutrackerCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
return nil, fmt.Errorf("failed to get game item by url: %w", err)
}
name := doc.Find(".post_body").First().Find("span").First().Text()
if strings.TrimSpace(name) == "" {
name = doc.Find(".post_body").First().Find("span").Eq(1).Text()
}
name = strings.TrimSpace(name)
if strings.TrimSpace(name) == "" {
name = doc.Find("#topic-title").Text()
for _, regex := range regexps {
name = regex.ReplaceAllString(name, "")
}
name = strings.TrimSpace(name)
if name == "" {
r.logger.Error("Failed to find name")
return nil, fmt.Errorf("failed to find name")
}
}
item.RawName = doc.Find("#topic-title").Text()
item.Name = r.formatter(item.RawName)
item.Name = name
item.Author = r.source
item.Platform = r.platform
item.Url = URL
@ -173,14 +195,13 @@ func (r *RutrackerCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
}
func (r *RutrackerCrawler) Crawl(page int) ([]*model.GameItem, error) {
r.logger.Info("Crawling Rutracker", zap.Int("page", page), zap.String("rid", r.rid))
session, err := r.getSession()
if err != nil {
r.logger.Error("Failed to get session", zap.Error(err))
return nil, fmt.Errorf("failed to get session: %w", err)
}
URL := fmt.Sprintf(constant.RutrackerAuthorURL, r.rid, (page-1)*50)
URL := fmt.Sprintf(r.url, (page-1)*50)
resp, err := ccs.RequestWithWAFSession(http.MethodGet, URL, *session, nil)
if err != nil {
r.logger.Error("Failed to request URL", zap.String("url", URL), zap.Error(err))
@ -258,7 +279,7 @@ func (r *RutrackerCrawler) GetTotalPageNum() (int, error) {
return 0, fmt.Errorf("failed to get session: %w", err)
}
URL := fmt.Sprintf(constant.RutrackerAuthorURL, r.rid, 0)
URL := fmt.Sprintf(r.url, 0)
resp, err := ccs.RequestWithWAFSession(http.MethodGet, URL, *session, nil)
if err != nil {
r.logger.Error("Failed to request URL", zap.String("url", URL), zap.Error(err))

View File

@ -0,0 +1,47 @@
package crawler
import (
"game-crawler/model"
"go.uber.org/zap"
)
type RutrackerLinuxGameCrawler struct {
logger *zap.Logger
crawler RutrackerCrawler
}
func NewRutrackerLinuxGameCrawler(cfClearanceUrl, username, password string, logger *zap.Logger) *RutrackerLinuxGameCrawler {
return &RutrackerLinuxGameCrawler{
logger: logger,
crawler: *NewRutrackerCrawler(
"https://rutracker.org/forum/tracker.php?f=1992&start=%v",
"rutracker-linux-game",
"linux",
username,
password,
cfClearanceUrl,
logger,
),
}
}
func (c *RutrackerLinuxGameCrawler) Name() string {
return "OmgGodsCrawler"
}
func (c *RutrackerLinuxGameCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
return c.crawler.CrawlByUrl(URL)
}
func (c *RutrackerLinuxGameCrawler) Crawl(page int) ([]*model.GameItem, error) {
return c.crawler.Crawl(page)
}
func (c *RutrackerLinuxGameCrawler) CrawlAll() ([]*model.GameItem, error) {
return c.crawler.CrawlAll()
}
func (c *RutrackerLinuxGameCrawler) GetTotalPageNum() (int, error) {
return c.crawler.GetTotalPageNum()
}

View File

@ -932,7 +932,7 @@ func GetOutdatedGameInfos(maxNum int) ([]*model.GameInfo, error) {
func MergeGameInfo(oldInfo *model.GameInfo, newInfo *model.GameInfo) {
newInfo.ID = oldInfo.ID
newInfo.UpdatedAt = time.Now()
newInfo.GameIDs = oldInfo.GameIDs
newInfo.GameIDs = utils.Unique(append(newInfo.GameIDs, oldInfo.GameIDs...))
newInfo.IGDBID = oldInfo.IGDBID
newInfo.SteamID = oldInfo.SteamID
newInfo.CreatedAt = oldInfo.CreatedAt

View File

@ -65,8 +65,9 @@ type IGDBGameDetail struct {
UpdatedAt int `json:"updated_at"`
Checksum string `json:"checksum"`
} `json:"involved_companies"`
Name string `json:"name"`
Platforms []struct {
Name string `json:"name"`
ParentGame int `json:"parent_game"`
Platforms []struct {
ID int `json:"id"`
Abbreviation string `json:"abbreviation"`
AlternativeName string `json:"alternative_name"`

View File

@ -120,20 +120,6 @@
</div>
{{end}}
{{if .Description}}
<div>
<p>{{.Description}}</p>
</div>
{{end}}
{{if .SteamID}}
<div>
<a href="https://store.steampowered.com/app/{{.SteamID}}" target="_blank" class="btn btn-primary">
Steam
</a>
</div>
{{end}}
{{if .GameEngines}}
<div>
<span class="info-label">Engines:</span>
@ -160,6 +146,20 @@
{{end}}
</div>
{{end}}
{{if .Description}}
<div>
<p>{{.Description}}</p>
</div>
{{end}}
{{if .SteamID}}
<div>
<a href="https://store.steampowered.com/app/{{.SteamID}}" target="_blank" class="btn btn-primary">
Steam
</a>
</div>
{{end}}
</div>
</div>

View File

@ -6,6 +6,8 @@ import (
"go.uber.org/zap"
)
//TODO: IGDB 游戏信息缓存还没有设置有效期
func UpdateOutdatedGameInfos(logger *zap.Logger) {
channel, err := crawler.UpdateGameInfo(10)
count := 0