package crawler import ( "encoding/json" "fmt" "strings" "time" "pcgamedb/config" "pcgamedb/constant" "pcgamedb/db" "pcgamedb/model" "pcgamedb/utils" "go.uber.org/zap" ) type GOGGamesCrawler struct { logger *zap.Logger } func NewGOGGamesCrawler(logger *zap.Logger) *GOGGamesCrawler { return &GOGGamesCrawler{ logger: logger, } } func (c *GOGGamesCrawler) Name() string { return "GOGGamesCrawler" } // URL is api url, like https://www.gog-games.to/api/v1/games/%s func (c *GOGGamesCrawler) CrawlByUrl(URL string) (*model.GameItem, error) { token, err := utils.CCSTurnstileToken(config.Config.CFClearanceScraper.Url, URL, "0x4AAAAAAAfOlgvCKbOdW1zc") if err != nil { return nil, err } resp, err := utils.Fetch(utils.FetchConfig{ Url: URL, Headers: map[string]string{ "cf-turnstile-response": token, }, }) if err != nil { return nil, err } data := gameResult{} err = json.Unmarshal(resp.Data, &data) if err != nil { return nil, err } name := data.Title links := make([]string, 0) for _, link := range data.Links.Game.Gofile.Links { links = append(links, link.Link) } if len(data.Links.Patch.Gofile.Links) > 0 { for _, link := range data.Links.Patch.Gofile.Links { links = append(links, link.Link) } } size := uint64(0) for _, file := range data.Files.Game { s, _ := utils.SizeToBytes(file.Size) size += s } item, err := db.GetGameItemByUrl(URL) if err != nil { return nil, err } item.Name = name item.RawName = name item.Download = strings.Join(links, ",") item.Url = URL item.Size = utils.BytesToSize(size) item.Author = "GOGGames" return item, nil } func (c *GOGGamesCrawler) Crawl(page int) ([]*model.GameItem, error) { resp, err := utils.Fetch(utils.FetchConfig{ Url: fmt.Sprintf(constant.GOGGamesURL, page), }) if err != nil { return nil, err } data := searchResult{} err = json.Unmarshal(resp.Data, &data) if err != nil { return nil, err } urls := make([]string, 0) updateFlags := []string{} //link+date for _, item := range data.Data { urls = append(urls, fmt.Sprintf(constant.GOGGamesPageURL, item.Slug)) updateFlags = append(updateFlags, fmt.Sprintf("%s%s", item.GogURL, item.LastUpdate)) } res := make([]*model.GameItem, 0) for i, u := range urls { c.logger.Info("Crawling", zap.String("URL", u)) if db.IsGameCrawled(updateFlags[i], "GOGGames") { continue } item, err := c.CrawlByUrl(u) if err != nil { c.logger.Warn("Failed to crawl", zap.Error(err), zap.String("URL", u)) continue } if err := db.SaveGameItem(item); err != nil { c.logger.Warn("Failed to save", zap.Error(err), zap.String("URL", u)) continue } res = append(res, item) info, err := OrganizeGameItem(item) if err != nil { c.logger.Warn("Failed to organize", zap.Error(err), zap.String("URL", u)) continue } if err := db.SaveGameInfo(info); err != nil { c.logger.Warn("Failed to save", zap.Error(err), zap.String("URL", u)) continue } } return res, nil } func (c *GOGGamesCrawler) CrawlMulti(pages []int) ([]*model.GameItem, error) { res := make([]*model.GameItem, 0) for _, page := range pages { items, err := c.Crawl(page) if err != nil { return nil, err } res = append(res, items...) } return res, nil } func (c *GOGGamesCrawler) CrawlAll() ([]*model.GameItem, error) { totalPageNum, err := c.GetTotalPageNum() if err != nil { return nil, err } var res []*model.GameItem for i := 1; i <= totalPageNum; i++ { items, err := c.Crawl(i) if err != nil { return nil, err } res = append(res, items...) } return res, nil } func (c *GOGGamesCrawler) GetTotalPageNum() (int, error) { resp, err := utils.Fetch(utils.FetchConfig{ Url: fmt.Sprintf(constant.GOGGamesURL, 1), }) if err != nil { return 0, err } data := searchResult{} err = json.Unmarshal(resp.Data, &data) if err != nil { return 0, err } return data.Meta.LastPage, nil } type searchResult struct { Data []struct { ID string `json:"id"` Slug string `json:"slug"` Title string `json:"title"` Image string `json:"image"` Background string `json:"background"` GogURL string `json:"gog_url"` IsIndev bool `json:"is_indev"` IsNew bool `json:"is_new"` IsUpdated bool `json:"is_updated"` IsQueued bool `json:"is_queued"` IsUploading bool `json:"is_uploading"` VotedOn bool `json:"voted_on"` LastUpdate time.Time `json:"last_update"` Md5Filename string `json:"md5_filename"` Infohash string `json:"infohash"` IsVotable bool `json:"is_votable"` } `json:"data"` Links struct { First string `json:"first"` Last string `json:"last"` Prev any `json:"prev"` Next string `json:"next"` } `json:"links"` Meta struct { CurrentPage int `json:"current_page"` From int `json:"from"` LastPage int `json:"last_page"` Links []struct { URL any `json:"url"` Label string `json:"label"` Active bool `json:"active"` } `json:"links"` Path string `json:"path"` PerPage int `json:"per_page"` To int `json:"to"` Total int `json:"total"` } `json:"meta"` } type gameResult struct { ID string `json:"id"` Slug string `json:"slug"` Title string `json:"title"` Image string `json:"image"` Background string `json:"background"` GogURL string `json:"gog_url"` IsIndev bool `json:"is_indev"` IsNew bool `json:"is_new"` IsUpdated bool `json:"is_updated"` IsQueued bool `json:"is_queued"` IsUploading bool `json:"is_uploading"` VotedOn bool `json:"voted_on"` LastUpdate time.Time `json:"last_update"` Md5Filename string `json:"md5_filename"` Infohash string `json:"infohash"` Links struct { Goodie struct { OneFichier struct { ID string `json:"id"` Name string `json:"name"` Links []struct { Label string `json:"label"` Link string `json:"link"` } `json:"links"` } `json:"1fichier"` Vikingfile struct { ID string `json:"id"` Name string `json:"name"` Links []struct { Label string `json:"label"` Link string `json:"link"` } `json:"links"` } `json:"vikingfile"` Pixeldrain struct { ID string `json:"id"` Name string `json:"name"` Links []struct { Label string `json:"label"` Link string `json:"link"` } `json:"links"` } `json:"pixeldrain"` Gofile struct { ID string `json:"id"` Name string `json:"name"` Links []struct { Label string `json:"label"` Link string `json:"link"` } `json:"links"` } `json:"gofile"` } `json:"goodie"` Game struct { OneFichier struct { ID string `json:"id"` Name string `json:"name"` Links []struct { Label string `json:"label"` Link string `json:"link"` } `json:"links"` } `json:"1fichier"` Vikingfile struct { ID string `json:"id"` Name string `json:"name"` Links []struct { Label string `json:"label"` Link string `json:"link"` } `json:"links"` } `json:"vikingfile"` Pixeldrain struct { ID string `json:"id"` Name string `json:"name"` Links []struct { Label string `json:"label"` Link string `json:"link"` } `json:"links"` } `json:"pixeldrain"` Gofile struct { ID string `json:"id"` Name string `json:"name"` Links []struct { Label string `json:"label"` Link string `json:"link"` } `json:"links"` } `json:"gofile"` } `json:"game"` Patch struct { OneFichier struct { ID string `json:"id"` Name string `json:"name"` Links []struct { Label string `json:"label"` Link string `json:"link"` } `json:"links"` } `json:"1fichier"` Vikingfile struct { ID string `json:"id"` Name string `json:"name"` Links []struct { Label string `json:"label"` Link string `json:"link"` } `json:"links"` } `json:"vikingfile"` Pixeldrain struct { ID string `json:"id"` Name string `json:"name"` Links []struct { Label string `json:"label"` Link string `json:"link"` } `json:"links"` } `json:"pixeldrain"` Gofile struct { ID string `json:"id"` Name string `json:"name"` Links []struct { Label string `json:"label"` Link string `json:"link"` } `json:"links"` } `json:"gofile"` } `json:"patch"` } `json:"links"` Files struct { Game []struct { Name string `json:"name"` Type string `json:"type"` Size string `json:"size"` } `json:"game"` Goodie []struct { Name string `json:"name"` Type string `json:"type"` Size string `json:"size"` } `json:"goodie"` Patch []struct { Name string `json:"name"` Type string `json:"type"` Size string `json:"size"` } `json:"patch"` } `json:"files"` IsVotable bool `json:"is_votable"` }