u
This commit is contained in:
parent
cd9b7412b8
commit
0d6bae97ab
@ -6,15 +6,18 @@ game-crawler is a powerful command-line tool designed to scrape and manage repac
|
||||
|
||||
- **Data Sources**:
|
||||
|
||||
- KaOSKrew(1337x)
|
||||
- DODI(1337x)
|
||||
- johncena141(1337x)
|
||||
- Fitgirl
|
||||
- KaOSKrew
|
||||
- DODI
|
||||
- FreeGOG
|
||||
- GOGGames
|
||||
- OnlineFix
|
||||
- Xatab
|
||||
- SteamRIP
|
||||
- Chovka
|
||||
- Omg_Gods(rutracker)
|
||||
- LinuxGame(rutracker)
|
||||
|
||||
- **Database**:
|
||||
|
||||
|
@ -34,5 +34,4 @@ const (
|
||||
RutrackerTopicURL = "https://rutracker.org/forum/%s"
|
||||
RutrackerURL = "https://rutracker.org/forum/index.php"
|
||||
RutrackerLoginURL = "https://rutracker.org/forum/login.php"
|
||||
RutrackerAuthorURL = "https://rutracker.org/forum/tracker.php?rid=%s&start=%v"
|
||||
)
|
||||
|
@ -26,17 +26,18 @@ type PagedCrawler interface {
|
||||
|
||||
func BuildCrawlerMap(logger *zap.Logger) map[string]Crawler {
|
||||
ret := map[string]Crawler{
|
||||
"fitgirl": NewFitGirlCrawler(logger),
|
||||
"dodi": NewDODICrawler(logger),
|
||||
"kaoskrew": NewKaOsKrewCrawler(logger),
|
||||
"freegog": NewFreeGOGCrawler(config.Config.CFClearanceScraper.Url, logger),
|
||||
"xatab": NewXatabCrawler(logger),
|
||||
"onlinefix": NewOnlineFixCrawler(config.Config.OnlineFix.User, config.Config.OnlineFix.Password, logger),
|
||||
"steamrip": NewSteamRIPCrawler(logger),
|
||||
"chovka": NewChovkaCrawler(logger),
|
||||
"goggames": NewGOGGamesCrawler(config.Config.CFClearanceScraper.Url, logger),
|
||||
"johncena141": NewJohncena141Crawler(logger),
|
||||
"omggods": NewOmgGodsCrawler(config.Config.CFClearanceScraper.Url, config.Config.Rutracker.User, config.Config.Rutracker.Password, logger),
|
||||
"fitgirl": NewFitGirlCrawler(logger),
|
||||
"dodi": NewDODICrawler(logger),
|
||||
"kaoskrew": NewKaOsKrewCrawler(logger),
|
||||
"freegog": NewFreeGOGCrawler(config.Config.CFClearanceScraper.Url, logger),
|
||||
"xatab": NewXatabCrawler(logger),
|
||||
"onlinefix": NewOnlineFixCrawler(config.Config.OnlineFix.User, config.Config.OnlineFix.Password, logger),
|
||||
"steamrip": NewSteamRIPCrawler(logger),
|
||||
"chovka": NewChovkaCrawler(logger),
|
||||
"goggames": NewGOGGamesCrawler(config.Config.CFClearanceScraper.Url, logger),
|
||||
"johncena141": NewJohncena141Crawler(logger),
|
||||
"omggods": NewOmgGodsCrawler(config.Config.CFClearanceScraper.Url, config.Config.Rutracker.User, config.Config.Rutracker.Password, logger),
|
||||
"rutracker-linux-game": NewRutrackerLinuxGameCrawler(config.Config.CFClearanceScraper.Url, config.Config.Rutracker.User, config.Config.Rutracker.Password, logger),
|
||||
// "gnarly": NewGnarlyCrawler(logger),
|
||||
}
|
||||
return ret
|
||||
|
@ -108,7 +108,7 @@ func getIGDBID(name string) (int, error) {
|
||||
maxSimilarityIndex := 0
|
||||
for i, item := range data {
|
||||
if strings.EqualFold(item.Name, name) {
|
||||
return item.Game, nil
|
||||
return GetIGDBAppParent(item.Game)
|
||||
}
|
||||
if sim := utils.Similarity(name, item.Name); sim >= 0.8 {
|
||||
if sim > maxSimilarity {
|
||||
@ -229,6 +229,13 @@ func GetIGDBAppParent(id int) (int, error) {
|
||||
return 0, fmt.Errorf("failed to fetch IGDB app detail for parent: %d: %w", id, err)
|
||||
}
|
||||
hasParent := false
|
||||
if detail.ParentGame != 0 {
|
||||
hasParent = true
|
||||
detail, err = GetIGDBAppDetail(detail.ParentGame)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("failed to fetch IGDB version parent: %d: %w", detail.VersionParent, err)
|
||||
}
|
||||
}
|
||||
for detail.VersionParent != 0 {
|
||||
hasParent = true
|
||||
detail, err = GetIGDBAppDetail(detail.VersionParent)
|
||||
|
@ -2,8 +2,6 @@ package crawler
|
||||
|
||||
import (
|
||||
"game-crawler/model"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
@ -17,13 +15,12 @@ func NewOmgGodsCrawler(cfClearanceUrl, username, password string, logger *zap.Lo
|
||||
return &OmgGodsCrawler{
|
||||
logger: logger,
|
||||
crawler: *NewRutrackerCrawler(
|
||||
"https://rutracker.org/forum/tracker.php?rid=8994327&start=%v",
|
||||
"OmgGods",
|
||||
"switch",
|
||||
"8994327",
|
||||
username,
|
||||
password,
|
||||
cfClearanceUrl,
|
||||
omgGodsFormatter,
|
||||
logger,
|
||||
),
|
||||
}
|
||||
@ -48,21 +45,3 @@ func (c *OmgGodsCrawler) CrawlAll() ([]*model.GameItem, error) {
|
||||
func (c *OmgGodsCrawler) GetTotalPageNum() (int, error) {
|
||||
return c.crawler.GetTotalPageNum()
|
||||
}
|
||||
|
||||
var omgGodsFormatRegex = []*regexp.Regexp{
|
||||
regexp.MustCompile(`\(.*?\)`),
|
||||
regexp.MustCompile(`\[.*?\]`),
|
||||
}
|
||||
|
||||
func omgGodsFormatter(name string) string {
|
||||
for _, regex := range omgGodsFormatRegex {
|
||||
name = regex.ReplaceAllString(name, "")
|
||||
}
|
||||
if strings.Contains(name, " + ") {
|
||||
name = strings.Split(name, " + ")[0]
|
||||
}
|
||||
if strings.Contains(name, " / ") {
|
||||
name = strings.Split(name, " / ")[0]
|
||||
}
|
||||
return strings.TrimSpace(name)
|
||||
}
|
||||
|
@ -11,7 +11,9 @@ import (
|
||||
"game-crawler/model"
|
||||
"game-crawler/utils"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode"
|
||||
|
||||
@ -24,20 +26,18 @@ import (
|
||||
|
||||
type RutrackerCrawler struct {
|
||||
source string
|
||||
rid string
|
||||
url string
|
||||
platform string
|
||||
username string
|
||||
password string
|
||||
formatter FormatterFunc
|
||||
logger *zap.Logger
|
||||
cfClearanceUrl string
|
||||
}
|
||||
|
||||
func NewRutrackerCrawler(source, platform, rid, username, password, cfClearanceUrl string, formatter FormatterFunc, logger *zap.Logger) *RutrackerCrawler {
|
||||
func NewRutrackerCrawler(url, source, platform, username, password, cfClearanceUrl string, logger *zap.Logger) *RutrackerCrawler {
|
||||
return &RutrackerCrawler{
|
||||
source: source,
|
||||
rid: rid,
|
||||
formatter: formatter,
|
||||
url: url,
|
||||
logger: logger,
|
||||
platform: platform,
|
||||
username: username,
|
||||
@ -118,6 +118,11 @@ func (r *RutrackerCrawler) getSession() (*ccs.Session, error) {
|
||||
return &session, nil
|
||||
}
|
||||
|
||||
var regexps = []*regexp.Regexp{
|
||||
regexp.MustCompile(`\(.*?\)`),
|
||||
regexp.MustCompile(`\[.*?\]`),
|
||||
}
|
||||
|
||||
func (r *RutrackerCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
|
||||
r.logger.Info("Crawling game", zap.String("URL", URL))
|
||||
session, err := r.getSession()
|
||||
@ -143,8 +148,25 @@ func (r *RutrackerCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
|
||||
return nil, fmt.Errorf("failed to get game item by url: %w", err)
|
||||
}
|
||||
|
||||
name := doc.Find(".post_body").First().Find("span").First().Text()
|
||||
if strings.TrimSpace(name) == "" {
|
||||
name = doc.Find(".post_body").First().Find("span").Eq(1).Text()
|
||||
}
|
||||
name = strings.TrimSpace(name)
|
||||
if strings.TrimSpace(name) == "" {
|
||||
name = doc.Find("#topic-title").Text()
|
||||
for _, regex := range regexps {
|
||||
name = regex.ReplaceAllString(name, "")
|
||||
}
|
||||
name = strings.TrimSpace(name)
|
||||
if name == "" {
|
||||
r.logger.Error("Failed to find name")
|
||||
return nil, fmt.Errorf("failed to find name")
|
||||
}
|
||||
}
|
||||
|
||||
item.RawName = doc.Find("#topic-title").Text()
|
||||
item.Name = r.formatter(item.RawName)
|
||||
item.Name = name
|
||||
item.Author = r.source
|
||||
item.Platform = r.platform
|
||||
item.Url = URL
|
||||
@ -173,14 +195,13 @@ func (r *RutrackerCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
|
||||
}
|
||||
|
||||
func (r *RutrackerCrawler) Crawl(page int) ([]*model.GameItem, error) {
|
||||
r.logger.Info("Crawling Rutracker", zap.Int("page", page), zap.String("rid", r.rid))
|
||||
session, err := r.getSession()
|
||||
if err != nil {
|
||||
r.logger.Error("Failed to get session", zap.Error(err))
|
||||
return nil, fmt.Errorf("failed to get session: %w", err)
|
||||
}
|
||||
|
||||
URL := fmt.Sprintf(constant.RutrackerAuthorURL, r.rid, (page-1)*50)
|
||||
URL := fmt.Sprintf(r.url, (page-1)*50)
|
||||
resp, err := ccs.RequestWithWAFSession(http.MethodGet, URL, *session, nil)
|
||||
if err != nil {
|
||||
r.logger.Error("Failed to request URL", zap.String("url", URL), zap.Error(err))
|
||||
@ -258,7 +279,7 @@ func (r *RutrackerCrawler) GetTotalPageNum() (int, error) {
|
||||
return 0, fmt.Errorf("failed to get session: %w", err)
|
||||
}
|
||||
|
||||
URL := fmt.Sprintf(constant.RutrackerAuthorURL, r.rid, 0)
|
||||
URL := fmt.Sprintf(r.url, 0)
|
||||
resp, err := ccs.RequestWithWAFSession(http.MethodGet, URL, *session, nil)
|
||||
if err != nil {
|
||||
r.logger.Error("Failed to request URL", zap.String("url", URL), zap.Error(err))
|
||||
|
47
crawler/rutracker_linux_game.go
Normal file
47
crawler/rutracker_linux_game.go
Normal file
@ -0,0 +1,47 @@
|
||||
package crawler
|
||||
|
||||
import (
|
||||
"game-crawler/model"
|
||||
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
type RutrackerLinuxGameCrawler struct {
|
||||
logger *zap.Logger
|
||||
crawler RutrackerCrawler
|
||||
}
|
||||
|
||||
func NewRutrackerLinuxGameCrawler(cfClearanceUrl, username, password string, logger *zap.Logger) *RutrackerLinuxGameCrawler {
|
||||
return &RutrackerLinuxGameCrawler{
|
||||
logger: logger,
|
||||
crawler: *NewRutrackerCrawler(
|
||||
"https://rutracker.org/forum/tracker.php?f=1992&start=%v",
|
||||
"rutracker-linux-game",
|
||||
"linux",
|
||||
username,
|
||||
password,
|
||||
cfClearanceUrl,
|
||||
logger,
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
func (c *RutrackerLinuxGameCrawler) Name() string {
|
||||
return "OmgGodsCrawler"
|
||||
}
|
||||
|
||||
func (c *RutrackerLinuxGameCrawler) CrawlByUrl(URL string) (*model.GameItem, error) {
|
||||
return c.crawler.CrawlByUrl(URL)
|
||||
}
|
||||
|
||||
func (c *RutrackerLinuxGameCrawler) Crawl(page int) ([]*model.GameItem, error) {
|
||||
return c.crawler.Crawl(page)
|
||||
}
|
||||
|
||||
func (c *RutrackerLinuxGameCrawler) CrawlAll() ([]*model.GameItem, error) {
|
||||
return c.crawler.CrawlAll()
|
||||
}
|
||||
|
||||
func (c *RutrackerLinuxGameCrawler) GetTotalPageNum() (int, error) {
|
||||
return c.crawler.GetTotalPageNum()
|
||||
}
|
@ -932,7 +932,7 @@ func GetOutdatedGameInfos(maxNum int) ([]*model.GameInfo, error) {
|
||||
func MergeGameInfo(oldInfo *model.GameInfo, newInfo *model.GameInfo) {
|
||||
newInfo.ID = oldInfo.ID
|
||||
newInfo.UpdatedAt = time.Now()
|
||||
newInfo.GameIDs = oldInfo.GameIDs
|
||||
newInfo.GameIDs = utils.Unique(append(newInfo.GameIDs, oldInfo.GameIDs...))
|
||||
newInfo.IGDBID = oldInfo.IGDBID
|
||||
newInfo.SteamID = oldInfo.SteamID
|
||||
newInfo.CreatedAt = oldInfo.CreatedAt
|
||||
|
@ -65,8 +65,9 @@ type IGDBGameDetail struct {
|
||||
UpdatedAt int `json:"updated_at"`
|
||||
Checksum string `json:"checksum"`
|
||||
} `json:"involved_companies"`
|
||||
Name string `json:"name"`
|
||||
Platforms []struct {
|
||||
Name string `json:"name"`
|
||||
ParentGame int `json:"parent_game"`
|
||||
Platforms []struct {
|
||||
ID int `json:"id"`
|
||||
Abbreviation string `json:"abbreviation"`
|
||||
AlternativeName string `json:"alternative_name"`
|
||||
|
@ -120,20 +120,6 @@
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{if .Description}}
|
||||
<div>
|
||||
<p>{{.Description}}</p>
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{if .SteamID}}
|
||||
<div>
|
||||
<a href="https://store.steampowered.com/app/{{.SteamID}}" target="_blank" class="btn btn-primary">
|
||||
Steam
|
||||
</a>
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{if .GameEngines}}
|
||||
<div>
|
||||
<span class="info-label">Engines:</span>
|
||||
@ -160,6 +146,20 @@
|
||||
{{end}}
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{if .Description}}
|
||||
<div>
|
||||
<p>{{.Description}}</p>
|
||||
</div>
|
||||
{{end}}
|
||||
|
||||
{{if .SteamID}}
|
||||
<div>
|
||||
<a href="https://store.steampowered.com/app/{{.SteamID}}" target="_blank" class="btn btn-primary">
|
||||
Steam
|
||||
</a>
|
||||
</div>
|
||||
{{end}}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
@ -6,6 +6,8 @@ import (
|
||||
"go.uber.org/zap"
|
||||
)
|
||||
|
||||
//TODO: IGDB 游戏信息缓存还没有设置有效期
|
||||
|
||||
func UpdateOutdatedGameInfos(logger *zap.Logger) {
|
||||
channel, err := crawler.UpdateGameInfo(10)
|
||||
count := 0
|
||||
|
Loading…
Reference in New Issue
Block a user