pcgamedb/task/crawl.go

85 lines
2.3 KiB
Go

package task
import (
"net/url"
"pcgamedb/config"
"pcgamedb/crawler"
"pcgamedb/db"
"pcgamedb/model"
"pcgamedb/utils"
"go.mongodb.org/mongo-driver/bson/primitive"
"go.uber.org/zap"
)
func Crawl(logger *zap.Logger) {
var games []*model.GameItem
var crawlerMap = crawler.BuildCrawlerMap(logger)
for _, item := range crawlerMap {
logger.Info("Crawler start", zap.String("crawler", item.Name()))
if c, ok := item.(crawler.PagedCrawler); ok {
g, err := c.CrawlMulti([]int{1, 2, 3})
if err != nil {
logger.Debug("Failed to crawl games", zap.String("crawler", c.Name()), zap.Error(err))
}
games = append(games, g...)
logger.Info("Crawler end", zap.String("crawler", c.Name()), zap.Int("count", len(g)))
} else if c, ok := item.(crawler.SimpleCrawler); ok {
g, err := c.CrawlAll()
if err != nil {
logger.Debug("Failed to crawl games", zap.String("crawler", c.Name()), zap.Error(err))
}
games = append(games, g...)
logger.Info("Crawler end", zap.String("crawler", c.Name()), zap.Int("count", len(g)))
}
}
logger.Info("Crawled finished", zap.Int("count", len(games)))
for _, game := range games {
logger.Info(
"Crawled game",
zap.String("name", game.RawName),
zap.String("author", game.Author),
zap.String("url", game.Url),
)
}
Clean(logger)
// trigger webhooks
var ids []primitive.ObjectID
for _, game := range games {
ids = append(ids, game.ID)
}
var infos []*model.GameInfo
if len(ids) != 0 {
items, err := db.GetGameItemsByIDs(ids)
if err != nil {
logger.Error("Failed to get game items", zap.Error(err))
return
}
for _, game := range items {
info, err := db.GetGameInfoByGameItemID(game.ID)
if err != nil {
logger.Error("Failed to get game info", zap.Error(err))
continue
}
info.Games = append(info.Games, game)
infos = append(infos, info)
}
}
for _, u := range config.Config.Webhooks.CrawlTask {
_, err := url.Parse(u)
if err != nil {
logger.Error("Invalid webhook url", zap.String("url", u), zap.Error(err))
continue
}
logger.Info("webhook triggered", zap.String("task", "crawl"), zap.String("url", u))
_, err = utils.Request().SetHeader("Content-Type", "application/json").SetBody(infos).Post(u)
if err != nil {
logger.Error("Failed to trigger webhook", zap.String("task", "crawl"), zap.String("url", u), zap.Error(err))
}
}
}