game-crawler/crawler/crawler.go
2024-12-29 13:17:04 +08:00

147 lines
4.7 KiB
Go

package crawler
import (
"fmt"
"game-crawler/config"
"game-crawler/model"
"regexp"
"go.mongodb.org/mongo-driver/bson/primitive"
"go.uber.org/zap"
)
type Crawler interface {
Name() string
Crawl(int) ([]*model.GameItem, error)
CrawlAll() ([]*model.GameItem, error)
CrawlByUrl(string) (*model.GameItem, error)
}
type SimpleCrawler interface {
Crawler
}
type PagedCrawler interface {
Crawler
CrawlMulti([]int) ([]*model.GameItem, error)
GetTotalPageNum() (int, error)
}
func BuildCrawlerMap(logger *zap.Logger) map[string]Crawler {
ret := map[string]Crawler{
"fitgirl": NewFitGirlCrawler(logger),
"dodi": NewDODICrawler(logger),
"kaoskrew": NewKaOsKrewCrawler(logger),
"freegog": NewFreeGOGCrawler(config.Config.CFClearanceScraper.Url, logger),
"xatab": NewXatabCrawler(logger),
"onlinefix": NewOnlineFixCrawler(config.Config.OnlineFix.User, config.Config.OnlineFix.Password, logger),
"steamrip": NewSteamRIPCrawler(logger),
"chovka": NewChovkaCrawler(logger),
"goggames": NewGOGGamesCrawler(config.Config.CFClearanceScraper.Url, logger),
"johncena141": NewJohncena141Crawler(logger),
"omggods": NewOmgGodsCrawler(config.Config.CFClearanceScraper.Url, config.Config.Rutracker.User, config.Config.Rutracker.Password, logger),
"rutracker-linux-game": NewRutrackerLinuxGameCrawler(config.Config.CFClearanceScraper.Url, config.Config.Rutracker.User, config.Config.Rutracker.Password, logger),
"nxbrew": NewNxbrewCrawler(config.Config.CFClearanceScraper.Url, logger),
// "gnarly": NewGnarlyCrawler(logger),
}
return ret
}
type BaseLogger struct {
logger *zap.Logger
}
func (l *BaseLogger) LogCrawlStart(page int) {
l.logger.Info("Starting Crawl", zap.Int("Page/Num", page))
}
func (l *BaseLogger) LogCrawlByUrlStart(URL string) {
l.logger.Info("Starting CrawlByUrl", zap.String("URL", URL))
}
func (l *BaseLogger) LogCrawlSkip(URL string) {
l.logger.Info("Skipping already crawled URL", zap.String("URL", URL))
}
func (l *BaseLogger) LogCrawlByUrlError(URL string, err error) {
l.logger.Error("Failed to crawl URL", zap.String("URL", URL), zap.Error(err))
}
func (l *BaseLogger) LogSaveGameItemError(URL string, err error) {
l.logger.Error("Failed to save data to database", zap.String("URL", URL), zap.Error(err))
}
func (l *BaseLogger) LogOrganizeGameWarn(id primitive.ObjectID, name string, err error) {
l.logger.Warn("Failed to organize game", zap.String("ID", id.Hex()), zap.String("Name", name), zap.Error(err))
}
func (l *BaseLogger) LogPageExceedWarn(page int) {
l.logger.Warn("Page number exceeds total page number", zap.Int("Page", page))
}
func (l *BaseLogger) LogCrawlError(page int, err error) {
l.logger.Error("Failed to crawl", zap.Int("Page", page), zap.Error(err))
}
type BaseError struct{}
func (e *BaseError) ErrRequest(URL string, err error) error {
return fmt.Errorf("failed to get request for %s: %w", URL, err)
}
func (e *BaseError) ErrParseDoc(URL string, err error) error {
return fmt.Errorf("failed to parse HTML document for %s: %w", URL, err)
}
func (e *BaseError) ErrGetGameItemDetail(URL string, err error) error {
return fmt.Errorf("failed to extract detail for %s: %w", URL, err)
}
func (e *BaseError) ErrDBQuery(err error) error {
return fmt.Errorf("failed to query database: %w", err)
}
func (e *BaseError) ErrDBSave(URL string, err error) error {
return fmt.Errorf("failed to save data to database for %s: %w", URL, err)
}
func (e *BaseError) ErrParseInt(s string, err error) error {
return fmt.Errorf("failed to parse int from %s: %w", s, err)
}
func (e *BaseError) ErrGetTotalPageNum(err error) error {
return fmt.Errorf("failed to get total page number: %w", err)
}
func (e *BaseError) ErrMarshalJSON(err error) error {
return fmt.Errorf("failed to marshal JSON: %w", err)
}
func (e *BaseError) ErrUnmarshalJSON(err error) error {
return fmt.Errorf("failed to unmarshal JSON: %w", err)
}
func (e *BaseError) ErrGetSession(err error) error {
return fmt.Errorf("failed to get cloudflare/login session: %w", err)
}
func (e *BaseError) ErrBase64Decode(s string, err error) error {
return fmt.Errorf("failed to base64 decode %s: %w", s, err)
}
func (e *BaseError) ErrInvailValue(s string) error {
return fmt.Errorf("invalid value: %s", s)
}
func (e *BaseError) ErrConvertTorrent(err error) error {
return fmt.Errorf("failed to convert torrent to magnet link: %w", err)
}
func (e *BaseError) ErrMatchRegex(s string, regex *regexp.Regexp, err error) error {
return fmt.Errorf("failed to match regex %s with %s: %w", regex.String(), s, err)
}
func (e *BaseError) ErrUserOrPassEmpty() error {
return fmt.Errorf("user or password is empty")
}