fix CleanOrphanGamesInGameInfos

mod SearchGameInfo
fix trigger body miss data
disable goggames crawler
This commit is contained in:
Nite07 2024-11-18 20:21:08 +08:00
parent ebac45ccd2
commit f8e3265a76
11 changed files with 309 additions and 81 deletions

View File

@ -4,9 +4,8 @@ const (
C1337xBaseURL = "https://www.1337x.to"
FreeGOGListURL = "https://freegogpcgames.com/a-z-games-list"
GOGGamesBaseURL = "https://www.gog-games.to"
GOGGamesURL = "https://www.gog-games.to/search/all/%v/date/desc/any"
GOGSearchURL = "https://embed.gog.com/games/ajax/filtered"
GOGDetailsURL = "https://api.gog.com/products"
GOGGamesURL = "https://www.gog-games.to/search?page=%v&search=&is_new=false&is_updated=true&in_dev_filter=none&sort_by=last_update_desc"
GOGGamesPageURL = "https://www.gog-games.to/api/v1/games/%s"
SteamSearchURL = "https://store.steampowered.com/search"
SteamAppDetailURL = "https://store.steampowered.com/api/appdetails"
SteamAllAppsURL = "https://api.steampowered.com/ISteamApps/GetAppList/v2/?format=json"

View File

@ -105,7 +105,7 @@ func (c *ARMGDDNCrawler) crawlGames(data []GameData, platform string, num int) (
}
c.logger.Info("Crawling", zap.String("url", u))
walker := c.conn.Walk(path)
size := int64(0)
size := uint64(0)
for walker.Next() {
if walker.Stat().Type == ftp.EntryTypeFile {
fileSize, err := c.conn.FileSize(walker.Path())
@ -113,7 +113,7 @@ func (c *ARMGDDNCrawler) crawlGames(data []GameData, platform string, num int) (
c.logger.Warn("file size error", zap.Error(err))
break
}
size += fileSize
size += uint64(fileSize)
}
}
item, err := db.GetGameItemByUrl(u)
@ -123,7 +123,7 @@ func (c *ARMGDDNCrawler) crawlGames(data []GameData, platform string, num int) (
item.Url = u
item.Name = ARMGDDNFormatter(v.FolderName)
item.UpdateFlag = updateFlag
item.Size = utils.FormatSize(size)
item.Size = utils.BytesToSize(size)
item.RawName = v.FolderName
item.Author = "ARMGDDN"
item.Download = fmt.Sprintf("ftpes://%s:%s@%s/%s/%s", ftpUsername, ftpPassword, ftpAddress, platform, url.QueryEscape(v.FolderName))

View File

@ -32,8 +32,8 @@ func BuildCrawlerMap(logger *zap.Logger) map[string]Crawler {
"onlinefix": NewOnlineFixCrawler(logger),
"steamrip": NewSteamRIPCrawler(logger),
// "armgddn": NewARMGDDNCrawler(logger),
"goggames": NewGOGGamesCrawler(logger),
"chovka": NewChovkaCrawler(logger),
// "goggames": NewGOGGamesCrawler(logger),
"chovka": NewChovkaCrawler(logger),
// "gnarly": NewGnarlyCrawler(logger),
}
}

View File

@ -1,19 +1,16 @@
package crawler
import (
"bytes"
"errors"
"encoding/json"
"fmt"
"regexp"
"strconv"
"strings"
"time"
"github.com/nitezs/pcgamedb/constant"
"github.com/nitezs/pcgamedb/db"
"github.com/nitezs/pcgamedb/model"
"github.com/nitezs/pcgamedb/utils"
"github.com/PuerkitoBio/goquery"
"go.uber.org/zap"
)
@ -38,33 +35,37 @@ func (c *GOGGamesCrawler) CrawlByUrl(url string) (*model.GameItem, error) {
if err != nil {
return nil, err
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Data))
data := gameResult{}
err = json.Unmarshal(resp.Data, &data)
if err != nil {
return nil, err
}
name := strings.TrimSpace(doc.Find("#game-details>.container>h1").First().Text())
magnetRegex := regexp.MustCompile(`magnet:\?[^"]*`)
magnetRegexRes := magnetRegex.FindString(string(resp.Data))
if magnetRegexRes == "" {
return nil, errors.New("magnet not found")
name := data.Title
links := make([]string, 0)
for _, link := range data.Links.Game.Gofile.Links {
links = append(links, link.Link)
}
sizeStrs := make([]string, 0)
doc.Find(".container>.items-group").First().Find(".filesize").Each(func(i int, s *goquery.Selection) {
sizeStrs = append(sizeStrs, s.Text())
})
size, err := utils.SubSizeStrings(sizeStrs)
if err != nil {
return nil, err
if len(data.Links.Patch.Gofile.Links) > 0 {
for _, link := range data.Links.Patch.Gofile.Links {
links = append(links, link.Link)
}
}
size := uint64(0)
for _, file := range data.Files.Game {
s, _ := utils.SizeToBytes(file.Size)
size += s
}
item, err := db.GetGameItemByUrl(url)
if err != nil {
return nil, err
}
item.Name = name
item.RawName = name
item.Download = magnetRegexRes
item.Download = strings.Join(links, ",")
item.Url = url
item.Size = size
item.Size = utils.BytesToSize(size)
item.Author = "GOGGames"
return item, nil
}
@ -76,21 +77,23 @@ func (c *GOGGamesCrawler) Crawl(page int) ([]*model.GameItem, error) {
if err != nil {
return nil, err
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Data))
data := searchResult{}
err = json.Unmarshal(resp.Data, &data)
if err != nil {
return nil, err
}
urls := make([]string, 0)
doc.Find(".game-blocks>a").Each(func(i int, s *goquery.Selection) {
u, exist := s.Attr("href")
if !exist {
return
}
urls = append(urls, fmt.Sprintf("%s%s", constant.GOGGamesBaseURL, u))
})
updateFlags := []string{} //link+date
for _, item := range data.Data {
urls = append(urls, fmt.Sprintf(constant.GOGGamesPageURL, item.Slug))
updateFlags = append(updateFlags, fmt.Sprintf("%s%s", item.GogURL, item.LastUpdate))
}
res := make([]*model.GameItem, 0)
for _, u := range urls {
for i, u := range urls {
c.logger.Info("Crawling", zap.String("URL", u))
if db.IsGameCrawled(updateFlags[i], "GOGGames") {
continue
}
item, err := c.CrawlByUrl(u)
if err != nil {
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
@ -149,10 +152,191 @@ func (c *GOGGamesCrawler) GetTotalPageNum() (int, error) {
if err != nil {
return 0, err
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Data))
data := searchResult{}
err = json.Unmarshal(resp.Data, &data)
if err != nil {
return 0, err
}
btns := doc.Find(".pagination>.btn")
return strconv.Atoi(strings.TrimSpace(btns.Eq(btns.Length() - 2).Text()))
return data.Meta.LastPage, nil
}
type searchResult struct {
Data []struct {
ID string `json:"id"`
Slug string `json:"slug"`
Title string `json:"title"`
Image string `json:"image"`
Background string `json:"background"`
GogURL string `json:"gog_url"`
IsIndev bool `json:"is_indev"`
IsNew bool `json:"is_new"`
IsUpdated bool `json:"is_updated"`
IsQueued bool `json:"is_queued"`
IsUploading bool `json:"is_uploading"`
VotedOn bool `json:"voted_on"`
LastUpdate time.Time `json:"last_update"`
Md5Filename string `json:"md5_filename"`
Infohash string `json:"infohash"`
IsVotable bool `json:"is_votable"`
} `json:"data"`
Links struct {
First string `json:"first"`
Last string `json:"last"`
Prev any `json:"prev"`
Next string `json:"next"`
} `json:"links"`
Meta struct {
CurrentPage int `json:"current_page"`
From int `json:"from"`
LastPage int `json:"last_page"`
Links []struct {
URL any `json:"url"`
Label string `json:"label"`
Active bool `json:"active"`
} `json:"links"`
Path string `json:"path"`
PerPage int `json:"per_page"`
To int `json:"to"`
Total int `json:"total"`
} `json:"meta"`
}
type gameResult struct {
ID string `json:"id"`
Slug string `json:"slug"`
Title string `json:"title"`
Image string `json:"image"`
Background string `json:"background"`
GogURL string `json:"gog_url"`
IsIndev bool `json:"is_indev"`
IsNew bool `json:"is_new"`
IsUpdated bool `json:"is_updated"`
IsQueued bool `json:"is_queued"`
IsUploading bool `json:"is_uploading"`
VotedOn bool `json:"voted_on"`
LastUpdate time.Time `json:"last_update"`
Md5Filename string `json:"md5_filename"`
Infohash string `json:"infohash"`
Links struct {
Goodie struct {
OneFichier struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"1fichier"`
Vikingfile struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"vikingfile"`
Pixeldrain struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"pixeldrain"`
Gofile struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"gofile"`
} `json:"goodie"`
Game struct {
OneFichier struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"1fichier"`
Vikingfile struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"vikingfile"`
Pixeldrain struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"pixeldrain"`
Gofile struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"gofile"`
} `json:"game"`
Patch struct {
OneFichier struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"1fichier"`
Vikingfile struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"vikingfile"`
Pixeldrain struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"pixeldrain"`
Gofile struct {
ID string `json:"id"`
Name string `json:"name"`
Links []struct {
Label string `json:"label"`
Link string `json:"link"`
} `json:"links"`
} `json:"gofile"`
} `json:"patch"`
} `json:"links"`
Files struct {
Game []struct {
Name string `json:"name"`
Type string `json:"type"`
Size string `json:"size"`
} `json:"game"`
Goodie []struct {
Name string `json:"name"`
Type string `json:"type"`
Size string `json:"size"`
} `json:"goodie"`
Patch []struct {
Name string `json:"name"`
Type string `json:"type"`
Size string `json:"size"`
} `json:"patch"`
} `json:"files"`
IsVotable bool `json:"is_votable"`
}

View File

@ -115,12 +115,12 @@ func (c *SteamRIPCrawler) Crawl(num int) ([]*model.GameItem, error) {
c.logger.Info("Crawling", zap.String("URL", u))
item, err := c.CrawlByUrl(u)
if err != nil {
c.logger.Error("Failed to crawl", zap.Error(err), zap.String("URL", u))
c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u))
continue
}
item.UpdateFlag = updateFlags[i]
if err := db.SaveGameItem(item); err != nil {
c.logger.Error("Failed to save item", zap.Error(err))
c.logger.Warn("Failed to save item", zap.Error(err))
continue
}
items = append(items, item)

View File

@ -16,15 +16,15 @@ import (
)
const (
gameDownloadCollectionName = "games"
gameInfoCollectionName = "game_infos"
gameItemCollectionName = "games"
gameInfoCollectionName = "game_infos"
)
var (
mongoDB *mongo.Client
mutx = &sync.RWMutex{}
GameItemCollection = &CustomCollection{
collName: gameDownloadCollectionName,
collName: gameItemCollectionName,
}
GameInfoCollection = &CustomCollection{
collName: gameInfoCollectionName,
@ -58,7 +58,7 @@ func connect() {
log.Logger.Info("Connected to MongoDB")
mongoDB = client
gameDownloadCollection := mongoDB.Database(config.Config.Database.Database).Collection(gameDownloadCollectionName)
gameDownloadCollection := mongoDB.Database(config.Config.Database.Database).Collection(gameItemCollectionName)
gameInfoCollection := mongoDB.Database(config.Config.Database.Database).Collection(gameInfoCollectionName)
nameIndex := mongo.IndexModel{

View File

@ -21,7 +21,7 @@ import (
)
var (
removeDelimiter = regexp.MustCompile(`[:\-\+]`)
removeNoneAlphaNumeric = regexp.MustCompile(`^[A-Za-z0-9]`)
removeRepeatingSpacesRegex = regexp.MustCompile(`\s+`)
)
@ -221,7 +221,7 @@ func GetGameItemsByIDs(ids []primitive.ObjectID) ([]*model.GameItem, error) {
func SearchGameInfos(name string, page int, pageSize int) ([]*model.GameInfo, int, error) {
var items []*model.GameInfo
name = removeDelimiter.ReplaceAllString(name, " ")
name = removeNoneAlphaNumeric.ReplaceAllString(name, " ")
name = removeRepeatingSpacesRegex.ReplaceAllString(name, " ")
name = strings.TrimSpace(name)
name = strings.Replace(name, " ", ".*", -1)
@ -435,7 +435,7 @@ func CleanOrphanGamesInGameInfos() (map[primitive.ObjectID]primitive.ObjectID, e
pipeline := mongo.Pipeline{
bson.D{{Key: "$unwind", Value: "$games"}},
bson.D{{Key: "$lookup", Value: bson.D{
{Key: "from", Value: GameItemCollection},
{Key: "from", Value: gameItemCollectionName},
{Key: "localField", Value: "games"},
{Key: "foreignField", Value: "_id"},
{Key: "as", Value: "gameDownloads"},

View File

@ -10,7 +10,7 @@ import (
)
type SearchGamesRequest struct {
Keyword string `form:"keyword" json:"keyword" binding:"required,min=4,max=64"`
Keyword string `form:"keyword" json:"keyword" binding:"required,min=1,max=64"`
Page int `form:"page" json:"page"`
PageSize int `form:"page_size" json:"page_size"`
}

View File

@ -24,12 +24,14 @@ func Crawl(logger *zap.Logger) {
logger.Warn("Failed to crawl games", zap.String("crawler", c.Name()), zap.Error(err))
}
games = append(games, g...)
logger.Info("Crawled games", zap.String("crawler", c.Name()), zap.Int("count", len(g)))
} else if c, ok := item.(crawler.SimpleCrawler); ok {
g, err := c.CrawlAll()
if err != nil {
logger.Warn("Failed to crawl games", zap.String("crawler", c.Name()), zap.Error(err))
}
games = append(games, g...)
logger.Info("Crawled games", zap.String("crawler", c.Name()), zap.Int("count", len(g)))
}
}
logger.Info("Crawled finished", zap.Int("count", len(games)))
@ -44,7 +46,7 @@ func Crawl(logger *zap.Logger) {
Clean(logger)
// trigger webhooks
infos := make([]*model.GameInfo, len(games))
infos := []*model.GameInfo{}
for _, game := range games {
info, err := db.GetGameInfoByGameItemID(game.ID)
if err != nil {

View File

@ -2,7 +2,6 @@ package utils
import (
"bytes"
"fmt"
"strconv"
"strings"
@ -18,39 +17,21 @@ func ConvertTorrentToMagnet(torrent []byte) (string, string, error) {
if err != nil {
return "", "", err
}
var size int64 = info.Length
var size uint64 = uint64(info.Length)
if size == 0 {
for _, file := range info.Files {
size += file.Length
size += uint64(file.Length)
}
}
infoHash := minfo.HashInfoBytes()
magnet := minfo.Magnet(&infoHash, &info)
return magnet.String(), FormatSize(size), nil
}
func FormatSize(size int64) string {
const (
_ = iota
KB int64 = 1 << (10 * iota)
MB
GB
TB
)
switch {
case size >= GB:
return fmt.Sprintf("%.1f GB", float64(size)/float64(GB))
case size >= MB:
return fmt.Sprintf("%.1f MB", float64(size)/float64(MB))
case size >= KB:
return fmt.Sprintf("%.1f KB", float64(size)/float64(KB))
default:
return fmt.Sprintf("%d Bytes", size)
magnet, err := minfo.MagnetV2()
if err != nil {
return "", "", err
}
return magnet.String(), BytesToSize(size), nil
}
func SubSizeStrings(sizes []string) (string, error) {
size := int64(0)
size := uint64(0)
for _, sizeStr := range sizes {
sizeStr := strings.ToLower(sizeStr)
if strings.Contains(sizeStr, "gb") {
@ -60,7 +41,7 @@ func SubSizeStrings(sizes []string) (string, error) {
if err != nil {
return "", err
}
size += int64(addSize * 1024 * 1024 * 1024)
size += uint64(addSize * 1024 * 1024 * 1024)
}
if strings.Contains(sizeStr, "mb") {
sizeStr = strings.ReplaceAll(sizeStr, "mb", "")
@ -69,7 +50,7 @@ func SubSizeStrings(sizes []string) (string, error) {
if err != nil {
return "", err
}
size += int64(addSize * 1024 * 1024)
size += uint64(addSize * 1024 * 1024)
}
if strings.Contains(sizeStr, "kb") {
sizeStr = strings.ReplaceAll(sizeStr, "kb", "")
@ -78,8 +59,8 @@ func SubSizeStrings(sizes []string) (string, error) {
if err != nil {
return "", err
}
size += int64(addSize * 1024)
size += uint64(addSize * 1024)
}
}
return FormatSize(size), nil
return BytesToSize(size), nil
}

62
utils/size.go Normal file
View File

@ -0,0 +1,62 @@
package utils
import (
"fmt"
"strconv"
"strings"
)
func SizeToBytes(size string) (uint64, error) {
size = strings.TrimSpace(strings.ToUpper(size))
units := map[string]uint64{
"B": 1,
"KB": 1024,
"MB": 1024 * 1024,
"GB": 1024 * 1024 * 1024,
"TB": 1024 * 1024 * 1024 * 1024,
}
var unit string
var value float64
for u := range units {
if strings.HasSuffix(size, u) {
unit = u
numStr := strings.TrimSuffix(size, u)
val, err := strconv.ParseFloat(strings.TrimSpace(numStr), 64)
if err != nil {
return 0, err
}
value = val
break
}
}
if unit == "" {
return 0, fmt.Errorf("invalid unit in size: %s", size)
}
bytes := uint64(value * float64(units[unit]))
return bytes, nil
}
func BytesToSize(size uint64) string {
const (
_ = iota
KB uint64 = 1 << (10 * iota)
MB
GB
TB
)
switch {
case size >= GB:
return fmt.Sprintf("%.1f GB", float64(size)/float64(GB))
case size >= MB:
return fmt.Sprintf("%.1f MB", float64(size)/float64(MB))
case size >= KB:
return fmt.Sprintf("%.1f KB", float64(size)/float64(KB))
default:
return fmt.Sprintf("%d Bytes", size)
}
}