pcgamedb/task/crawl.go

89 lines
2.4 KiB
Go
Raw Normal View History

2024-09-24 06:17:11 -04:00
package task
import (
2024-11-14 05:57:11 -05:00
"net/http"
"net/url"
2024-11-15 02:02:45 -05:00
2024-11-20 06:09:04 -05:00
"pcgamedb/config"
"pcgamedb/crawler"
"pcgamedb/db"
"pcgamedb/model"
"pcgamedb/utils"
2024-09-24 06:17:11 -04:00
"go.mongodb.org/mongo-driver/bson/primitive"
2024-09-24 06:17:11 -04:00
"go.uber.org/zap"
)
func Crawl(logger *zap.Logger) {
var games []*model.GameItem
2024-09-24 06:17:11 -04:00
var crawlerMap = crawler.BuildCrawlerMap(logger)
for _, item := range crawlerMap {
2024-11-18 12:54:26 -05:00
logger.Info("Crawler start", zap.String("crawler", item.Name()))
2024-09-24 06:17:11 -04:00
if c, ok := item.(crawler.PagedCrawler); ok {
g, err := c.CrawlMulti([]int{1, 2, 3})
if err != nil {
2024-11-18 12:54:26 -05:00
logger.Debug("Failed to crawl games", zap.String("crawler", c.Name()), zap.Error(err))
2024-09-24 06:17:11 -04:00
}
games = append(games, g...)
2024-11-18 12:54:26 -05:00
logger.Info("Crawler end", zap.String("crawler", c.Name()), zap.Int("count", len(g)))
2024-09-24 06:17:11 -04:00
} else if c, ok := item.(crawler.SimpleCrawler); ok {
g, err := c.CrawlAll()
if err != nil {
2024-11-18 12:54:26 -05:00
logger.Debug("Failed to crawl games", zap.String("crawler", c.Name()), zap.Error(err))
2024-09-24 06:17:11 -04:00
}
games = append(games, g...)
2024-11-18 12:54:26 -05:00
logger.Info("Crawler end", zap.String("crawler", c.Name()), zap.Int("count", len(g)))
2024-09-24 06:17:11 -04:00
}
}
logger.Info("Crawled finished", zap.Int("count", len(games)))
for _, game := range games {
logger.Info(
"Crawled game",
zap.String("name", game.RawName),
zap.String("author", game.Author),
zap.String("url", game.Url),
)
}
Clean(logger)
// trigger webhooks
var ids []primitive.ObjectID
for _, game := range games {
ids = append(ids, game.ID)
}
items, err := db.GetGameItemsByIDs(ids)
if err != nil {
logger.Error("Failed to get game items", zap.Error(err))
return
}
var infos []*model.GameInfo
for _, game := range items {
info, err := db.GetGameInfoByGameItemID(game.ID)
if err != nil {
logger.Error("Failed to get game info", zap.Error(err))
continue
}
info.Games = append(info.Games, game)
infos = append(infos, info)
}
2024-11-14 05:57:11 -05:00
for _, u := range config.Config.Webhooks.CrawlTask {
_, err := url.Parse(u)
if err != nil {
logger.Error("Invalid webhook url", zap.String("url", u), zap.Error(err))
continue
}
logger.Info("webhook triggered", zap.String("task", "crawl"), zap.String("url", u))
_, err = utils.Fetch(utils.FetchConfig{
Url: u,
Method: http.MethodPost,
Headers: map[string]string{
"Content-Type": "application/json",
},
Data: infos,
2024-11-14 05:57:11 -05:00
})
if err != nil {
logger.Error("Failed to trigger webhook", zap.String("task", "crawl"), zap.String("url", u), zap.Error(err))
}
}
2024-09-24 06:17:11 -04:00
}