2024-09-24 06:17:11 -04:00
|
|
|
package task
|
|
|
|
|
|
|
|
import (
|
2024-11-14 05:57:11 -05:00
|
|
|
"net/http"
|
|
|
|
"net/url"
|
2024-11-15 02:02:45 -05:00
|
|
|
|
2024-11-20 06:09:04 -05:00
|
|
|
"pcgamedb/config"
|
|
|
|
"pcgamedb/crawler"
|
|
|
|
"pcgamedb/db"
|
|
|
|
"pcgamedb/model"
|
|
|
|
"pcgamedb/utils"
|
2024-09-24 06:17:11 -04:00
|
|
|
|
2024-11-21 12:30:26 -05:00
|
|
|
"go.mongodb.org/mongo-driver/bson/primitive"
|
2024-09-24 06:17:11 -04:00
|
|
|
"go.uber.org/zap"
|
|
|
|
)
|
|
|
|
|
|
|
|
func Crawl(logger *zap.Logger) {
|
2024-11-16 00:48:48 -05:00
|
|
|
var games []*model.GameItem
|
2024-09-24 06:17:11 -04:00
|
|
|
var crawlerMap = crawler.BuildCrawlerMap(logger)
|
|
|
|
for _, item := range crawlerMap {
|
2024-11-18 12:54:26 -05:00
|
|
|
logger.Info("Crawler start", zap.String("crawler", item.Name()))
|
2024-09-24 06:17:11 -04:00
|
|
|
if c, ok := item.(crawler.PagedCrawler); ok {
|
|
|
|
g, err := c.CrawlMulti([]int{1, 2, 3})
|
|
|
|
if err != nil {
|
2024-11-18 12:54:26 -05:00
|
|
|
logger.Debug("Failed to crawl games", zap.String("crawler", c.Name()), zap.Error(err))
|
2024-09-24 06:17:11 -04:00
|
|
|
}
|
|
|
|
games = append(games, g...)
|
2024-11-18 12:54:26 -05:00
|
|
|
logger.Info("Crawler end", zap.String("crawler", c.Name()), zap.Int("count", len(g)))
|
2024-09-24 06:17:11 -04:00
|
|
|
} else if c, ok := item.(crawler.SimpleCrawler); ok {
|
|
|
|
g, err := c.CrawlAll()
|
|
|
|
if err != nil {
|
2024-11-18 12:54:26 -05:00
|
|
|
logger.Debug("Failed to crawl games", zap.String("crawler", c.Name()), zap.Error(err))
|
2024-09-24 06:17:11 -04:00
|
|
|
}
|
|
|
|
games = append(games, g...)
|
2024-11-18 12:54:26 -05:00
|
|
|
logger.Info("Crawler end", zap.String("crawler", c.Name()), zap.Int("count", len(g)))
|
2024-09-24 06:17:11 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
logger.Info("Crawled finished", zap.Int("count", len(games)))
|
|
|
|
for _, game := range games {
|
|
|
|
logger.Info(
|
|
|
|
"Crawled game",
|
|
|
|
zap.String("name", game.RawName),
|
|
|
|
zap.String("author", game.Author),
|
|
|
|
zap.String("url", game.Url),
|
|
|
|
)
|
|
|
|
}
|
|
|
|
Clean(logger)
|
2024-11-17 00:29:04 -05:00
|
|
|
|
|
|
|
// trigger webhooks
|
2024-11-21 12:30:26 -05:00
|
|
|
var ids []primitive.ObjectID
|
2024-11-17 00:29:04 -05:00
|
|
|
for _, game := range games {
|
2024-11-21 12:30:26 -05:00
|
|
|
ids = append(ids, game.ID)
|
|
|
|
}
|
2024-11-23 06:44:58 -05:00
|
|
|
|
2024-11-21 12:30:26 -05:00
|
|
|
var infos []*model.GameInfo
|
2024-11-23 06:44:58 -05:00
|
|
|
if len(ids) != 0 {
|
|
|
|
items, err := db.GetGameItemsByIDs(ids)
|
2024-11-17 00:29:04 -05:00
|
|
|
if err != nil {
|
2024-11-23 06:44:58 -05:00
|
|
|
logger.Error("Failed to get game items", zap.Error(err))
|
|
|
|
return
|
|
|
|
}
|
|
|
|
for _, game := range items {
|
|
|
|
info, err := db.GetGameInfoByGameItemID(game.ID)
|
|
|
|
if err != nil {
|
|
|
|
logger.Error("Failed to get game info", zap.Error(err))
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
info.Games = append(info.Games, game)
|
|
|
|
infos = append(infos, info)
|
2024-11-17 00:29:04 -05:00
|
|
|
}
|
|
|
|
}
|
2024-11-23 06:44:58 -05:00
|
|
|
|
2024-11-14 05:57:11 -05:00
|
|
|
for _, u := range config.Config.Webhooks.CrawlTask {
|
|
|
|
_, err := url.Parse(u)
|
|
|
|
if err != nil {
|
|
|
|
logger.Error("Invalid webhook url", zap.String("url", u), zap.Error(err))
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
logger.Info("webhook triggered", zap.String("task", "crawl"), zap.String("url", u))
|
|
|
|
_, err = utils.Fetch(utils.FetchConfig{
|
|
|
|
Url: u,
|
|
|
|
Method: http.MethodPost,
|
|
|
|
Headers: map[string]string{
|
|
|
|
"Content-Type": "application/json",
|
|
|
|
},
|
2024-11-17 00:29:04 -05:00
|
|
|
Data: infos,
|
2024-11-14 05:57:11 -05:00
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
logger.Error("Failed to trigger webhook", zap.String("task", "crawl"), zap.String("url", u), zap.Error(err))
|
|
|
|
}
|
|
|
|
}
|
2024-09-24 06:17:11 -04:00
|
|
|
}
|