147 lines
4.7 KiB
Go
147 lines
4.7 KiB
Go
package crawler
|
|
|
|
import (
|
|
"fmt"
|
|
"game-crawler/config"
|
|
"game-crawler/model"
|
|
"regexp"
|
|
|
|
"go.mongodb.org/mongo-driver/bson/primitive"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
type Crawler interface {
|
|
Name() string
|
|
Crawl(int) ([]*model.GameItem, error)
|
|
CrawlAll() ([]*model.GameItem, error)
|
|
CrawlByUrl(string) (*model.GameItem, error)
|
|
}
|
|
|
|
type SimpleCrawler interface {
|
|
Crawler
|
|
}
|
|
|
|
type PagedCrawler interface {
|
|
Crawler
|
|
CrawlMulti([]int) ([]*model.GameItem, error)
|
|
GetTotalPageNum() (int, error)
|
|
}
|
|
|
|
func BuildCrawlerMap(logger *zap.Logger) map[string]Crawler {
|
|
ret := map[string]Crawler{
|
|
"fitgirl": NewFitGirlCrawler(logger),
|
|
"dodi": NewDODICrawler(logger),
|
|
"kaoskrew": NewKaOsKrewCrawler(logger),
|
|
"freegog": NewFreeGOGCrawler(config.Config.CFClearanceScraper.Url, logger),
|
|
"xatab": NewXatabCrawler(logger),
|
|
"onlinefix": NewOnlineFixCrawler(config.Config.OnlineFix.User, config.Config.OnlineFix.Password, logger),
|
|
"steamrip": NewSteamRIPCrawler(logger),
|
|
"chovka": NewChovkaCrawler(logger),
|
|
"goggames": NewGOGGamesCrawler(config.Config.CFClearanceScraper.Url, logger),
|
|
"johncena141": NewJohncena141Crawler(logger),
|
|
"omggods": NewOmgGodsCrawler(config.Config.CFClearanceScraper.Url, config.Config.Rutracker.User, config.Config.Rutracker.Password, logger),
|
|
"rutracker-linux-game": NewRutrackerLinuxGameCrawler(config.Config.CFClearanceScraper.Url, config.Config.Rutracker.User, config.Config.Rutracker.Password, logger),
|
|
"nxbrew": NewNxbrewCrawler(config.Config.CFClearanceScraper.Url, logger),
|
|
// "gnarly": NewGnarlyCrawler(logger),
|
|
}
|
|
return ret
|
|
}
|
|
|
|
type BaseLogger struct {
|
|
logger *zap.Logger
|
|
}
|
|
|
|
func (l *BaseLogger) LogCrawlStart(page int) {
|
|
l.logger.Info("Starting Crawl", zap.Int("Page/Num", page))
|
|
}
|
|
|
|
func (l *BaseLogger) LogCrawlByUrlStart(URL string) {
|
|
l.logger.Info("Starting CrawlByUrl", zap.String("URL", URL))
|
|
}
|
|
|
|
func (l *BaseLogger) LogCrawlSkip(URL string) {
|
|
l.logger.Info("Skipping already crawled URL", zap.String("URL", URL))
|
|
}
|
|
|
|
func (l *BaseLogger) LogCrawlByUrlError(URL string, err error) {
|
|
l.logger.Error("Failed to crawl URL", zap.String("URL", URL), zap.Error(err))
|
|
}
|
|
|
|
func (l *BaseLogger) LogSaveGameItemError(URL string, err error) {
|
|
l.logger.Error("Failed to save data to database", zap.String("URL", URL), zap.Error(err))
|
|
}
|
|
|
|
func (l *BaseLogger) LogOrganizeGameWarn(id primitive.ObjectID, name string, err error) {
|
|
l.logger.Warn("Failed to organize game", zap.String("ID", id.Hex()), zap.String("Name", name), zap.Error(err))
|
|
}
|
|
|
|
func (l *BaseLogger) LogPageExceedWarn(page int) {
|
|
l.logger.Warn("Page number exceeds total page number", zap.Int("Page", page))
|
|
}
|
|
|
|
func (l *BaseLogger) LogCrawlError(page int, err error) {
|
|
l.logger.Error("Failed to crawl", zap.Int("Page", page), zap.Error(err))
|
|
}
|
|
|
|
type BaseError struct{}
|
|
|
|
func (e *BaseError) ErrRequest(URL string, err error) error {
|
|
return fmt.Errorf("failed to get request for %s: %w", URL, err)
|
|
}
|
|
|
|
func (e *BaseError) ErrParseDoc(URL string, err error) error {
|
|
return fmt.Errorf("failed to parse HTML document for %s: %w", URL, err)
|
|
}
|
|
|
|
func (e *BaseError) ErrGetGameItemDetail(URL string, err error) error {
|
|
return fmt.Errorf("failed to extract detail for %s: %w", URL, err)
|
|
}
|
|
|
|
func (e *BaseError) ErrDBQuery(err error) error {
|
|
return fmt.Errorf("failed to query database: %w", err)
|
|
}
|
|
|
|
func (e *BaseError) ErrDBSave(URL string, err error) error {
|
|
return fmt.Errorf("failed to save data to database for %s: %w", URL, err)
|
|
}
|
|
|
|
func (e *BaseError) ErrParseInt(s string, err error) error {
|
|
return fmt.Errorf("failed to parse int from %s: %w", s, err)
|
|
}
|
|
|
|
func (e *BaseError) ErrGetTotalPageNum(err error) error {
|
|
return fmt.Errorf("failed to get total page number: %w", err)
|
|
}
|
|
|
|
func (e *BaseError) ErrMarshalJSON(err error) error {
|
|
return fmt.Errorf("failed to marshal JSON: %w", err)
|
|
}
|
|
|
|
func (e *BaseError) ErrUnmarshalJSON(err error) error {
|
|
return fmt.Errorf("failed to unmarshal JSON: %w", err)
|
|
}
|
|
|
|
func (e *BaseError) ErrGetSession(err error) error {
|
|
return fmt.Errorf("failed to get cloudflare/login session: %w", err)
|
|
}
|
|
|
|
func (e *BaseError) ErrBase64Decode(s string, err error) error {
|
|
return fmt.Errorf("failed to base64 decode %s: %w", s, err)
|
|
}
|
|
|
|
func (e *BaseError) ErrInvailValue(s string) error {
|
|
return fmt.Errorf("invalid value: %s", s)
|
|
}
|
|
|
|
func (e *BaseError) ErrConvertTorrent(err error) error {
|
|
return fmt.Errorf("failed to convert torrent to magnet link: %w", err)
|
|
}
|
|
|
|
func (e *BaseError) ErrMatchRegex(s string, regex *regexp.Regexp, err error) error {
|
|
return fmt.Errorf("failed to match regex %s with %s: %w", regex.String(), s, err)
|
|
}
|
|
|
|
func (e *BaseError) ErrUserOrPassEmpty() error {
|
|
return fmt.Errorf("user or password is empty")
|
|
}
|