pcgamedb/cmd/crawl.go
2024-11-15 15:02:45 +08:00

142 lines
3.6 KiB
Go

package cmd
import (
"errors"
"fmt"
"strconv"
"strings"
"github.com/nitezs/pcgamedb/crawler"
"github.com/nitezs/pcgamedb/log"
"github.com/nitezs/pcgamedb/utils"
"github.com/spf13/cobra"
"go.uber.org/zap"
)
var crawlCmd = &cobra.Command{
Use: "crawl",
Long: "Crawl games from specific platforms",
Short: "Crawl games from specific platforms",
Run: crawlRun,
}
type CrawlCommandConfig struct {
Source string
Page string
All bool
Num int
}
var crawlCmdCfg CrawlCommandConfig
var crawlerMap = map[string]crawler.Crawler{}
func init() {
crawlerMap = crawler.BuildCrawlerMap(log.Logger)
allCrawlerBuilder := strings.Builder{}
paginationCrwalerBuilder := strings.Builder{}
noPaginationCrawlerBuilder := strings.Builder{}
for k, v := range crawlerMap {
allCrawlerBuilder.WriteString(k)
allCrawlerBuilder.WriteString(",")
if _, ok := v.(crawler.PagedCrawler); ok {
paginationCrwalerBuilder.WriteString(k)
paginationCrwalerBuilder.WriteString(",")
} else if _, ok := v.(crawler.SimpleCrawler); ok {
noPaginationCrawlerBuilder.WriteString(k)
noPaginationCrawlerBuilder.WriteString(",")
}
}
crawlCmd.Flags().StringVarP(&crawlCmdCfg.Source, "source", "s", "", fmt.Sprintf("source to crawl (%s)", strings.Trim(allCrawlerBuilder.String(), ",")))
crawlCmd.Flags().StringVarP(&crawlCmdCfg.Page, "pages", "p", "1", fmt.Sprintf("pages to crawl (1,2,3 or 1-3) (%s)", strings.Trim(paginationCrwalerBuilder.String(), ",")))
crawlCmd.Flags().BoolVarP(&crawlCmdCfg.All, "all", "a", false, "crawl all page")
crawlCmd.Flags().IntVarP(&crawlCmdCfg.Num, "num", "n", -1, fmt.Sprintf("number of items to process (%s)", strings.Trim(noPaginationCrawlerBuilder.String(), ",")))
RootCmd.AddCommand(crawlCmd)
}
func crawlRun(cmd *cobra.Command, args []string) {
crawlCmdCfg.Source = strings.ToLower(crawlCmdCfg.Source)
if crawlCmdCfg.Source == "" {
log.Logger.Error("Source is required")
return
}
item, ok := crawlerMap[crawlCmdCfg.Source]
if !ok {
log.Logger.Error("Invalid source", zap.String("source", crawlCmdCfg.Source))
return
}
if c, ok := item.(crawler.PagedCrawler); ok {
if crawlCmdCfg.All {
_, err := c.CrawlAll()
if err != nil {
log.Logger.Error("Crawl error", zap.Error(err))
return
}
} else {
pages, err := pagination(crawlCmdCfg.Page)
if err != nil {
log.Logger.Error("Invalid page", zap.String("page", crawlCmdCfg.Page))
return
}
_, err = c.CrawlMulti(pages)
if err != nil {
log.Logger.Error("Crawl error", zap.Error(err))
return
}
}
} else if c, ok := item.(crawler.SimpleCrawler); ok {
if crawlCmdCfg.All {
_, err := c.CrawlAll()
if err != nil {
log.Logger.Error("Crawl error", zap.Error(err))
return
}
} else {
_, err := c.Crawl(crawlCmdCfg.Num)
if err != nil {
log.Logger.Error("Crawl error", zap.Error(err))
return
}
}
}
}
func pagination(pageStr string) ([]int, error) {
if pageStr == "" {
return nil, errors.New("empty page")
}
var pages []int
pageSlice := strings.Split(pageStr, ",")
for i := 0; i < len(pageSlice); i++ {
if strings.Contains(pageSlice[i], "-") {
pageRange := strings.Split(pageSlice[i], "-")
start, err := strconv.Atoi(pageRange[0])
if err != nil {
return nil, err
}
end, err := strconv.Atoi(pageRange[1])
if err != nil {
return nil, err
}
if start > end {
return nil, err
}
for j := start; j <= end; j++ {
pages = append(pages, j)
}
} else {
p, err := strconv.Atoi(pageSlice[i])
if err != nil {
log.Logger.Error("Invalid page", zap.String("page", pageSlice[i]))
return nil, err
}
pages = append(pages, p)
}
}
return utils.Unique(pages), nil
}