pcgamedb/crawler/dodi.go

104 lines
2.3 KiB
Go
Raw Permalink Normal View History

2024-09-24 06:17:11 -04:00
package crawler
import (
"regexp"
"strings"
2024-11-20 06:09:04 -05:00
"pcgamedb/model"
"pcgamedb/utils"
2024-11-15 02:02:45 -05:00
2024-09-24 06:17:11 -04:00
"go.uber.org/zap"
)
const DODIName string = "DODI-torrents"
type DODICrawler struct {
logger *zap.Logger
crawler s1337xCrawler
}
func NewDODICrawler(logger *zap.Logger) *DODICrawler {
return &DODICrawler{
logger: logger,
crawler: *New1337xCrawler(
DODIName,
DODIFormatter,
logger,
),
}
}
2024-11-14 12:29:19 -05:00
func (c *DODICrawler) Name() string {
return "DODICrawler"
}
func (c *DODICrawler) Crawl(page int) ([]*model.GameItem, error) {
2024-09-24 06:17:11 -04:00
return c.crawler.Crawl(page)
}
func (c *DODICrawler) CrawlByUrl(url string) (*model.GameItem, error) {
2024-09-24 06:17:11 -04:00
return c.crawler.CrawlByUrl(url)
}
func (c *DODICrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) {
2024-09-24 06:17:11 -04:00
return c.crawler.CrawlMulti(pages)
}
func (c *DODICrawler) CrawlAll() ([]*model.GameItem, error) {
2024-09-24 06:17:11 -04:00
return c.crawler.CrawlAll()
}
func (c *DODICrawler) GetTotalPageNum() (int, error) {
return c.crawler.GetTotalPageNum()
}
var dodiRegexps = []*regexp.Regexp{
regexp.MustCompile(`(?i)\s{2,}`),
regexp.MustCompile(`(?i)[\-\+]\s?[^:\-]*?\s(Edition|Bundle|Pack|Set|Remake|Collection)`),
}
func DODIFormatter(name string) string {
name = strings.Replace(name, "- [DODI Repack]", "", -1)
name = strings.Replace(name, "- Campaign Remastered", "", -1)
name = strings.Replace(name, "- Remastered", "", -1)
if index := strings.Index(name, "+"); index != -1 {
name = name[:index]
}
if index := strings.Index(name, ""); index != -1 {
name = name[:index]
}
if index := strings.Index(name, "("); index != -1 {
name = name[:index]
}
if index := strings.Index(name, "["); index != -1 {
name = name[:index]
}
if index := strings.Index(name, "- AiO"); index != -1 {
name = name[:index]
}
if index := strings.Index(name, "- All In One"); index != -1 {
name = name[:index]
}
for _, re := range dodiRegexps {
name = strings.TrimSpace(re.ReplaceAllString(name, ""))
}
name = strings.TrimSpace(name)
name = strings.Replace(name, "- Portable", "", -1)
name = strings.Replace(name, "- Remastered", "", -1)
if index := strings.Index(name, "/"); index != -1 {
names := strings.Split(name, "/")
longestLength := 0
longestName := ""
for _, n := range names {
if !utils.ContainsRussian(n) && len(n) > longestLength {
longestLength = len(n)
longestName = n
}
}
name = longestName
}
return strings.TrimSpace(name)
}