mirror of
https://github.com/bestnite/bilinovel-downloader.git
synced 2025-10-26 09:11:01 +00:00
feat: Add concurrency and headless options for downloads
This commit introduces new features for controlling the download process: - **Concurrency**: Users can now specify the number of concurrent volume downloads using the `--concurrency` flag. This significantly speeds up the download of entire novels. - **Headless Mode**: A `--headless` flag has been added to control whether the browser operates in headless mode (without a visible UI). This is useful for debugging or running in environments without a display. **Changes include:** - Updated `download` command to accept `--concurrency` and `--headless` flags. - Refactored `bilinovel` downloader to support `BilinovelNewOption` for configuring headless mode and concurrency. - Implemented a page pool and concurrency control mechanism within the `bilinovel` downloader to manage concurrent browser page usage. - Added `DownloadNovel` and `DownloadVolume` methods to the `bilinovel` downloader, utilizing goroutines and wait groups for parallel processing. - Updated `.vscode/launch.json` with new configurations for testing novel and volume downloads with the new options.
This commit is contained in:
21
.vscode/launch.json
vendored
21
.vscode/launch.json
vendored
@@ -2,7 +2,7 @@
|
|||||||
"version": "0.2.0",
|
"version": "0.2.0",
|
||||||
"configurations": [
|
"configurations": [
|
||||||
{
|
{
|
||||||
"name": "download",
|
"name": "novel",
|
||||||
"type": "go",
|
"type": "go",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"mode": "auto",
|
"mode": "auto",
|
||||||
@@ -10,9 +10,22 @@
|
|||||||
"args": [
|
"args": [
|
||||||
"download",
|
"download",
|
||||||
"-n",
|
"-n",
|
||||||
"2321",
|
"2727",
|
||||||
"-v",
|
"--concurrency",
|
||||||
"162759"
|
"5"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "volume",
|
||||||
|
"type": "go",
|
||||||
|
"request": "launch",
|
||||||
|
"mode": "auto",
|
||||||
|
"program": "${workspaceFolder}",
|
||||||
|
"args": [
|
||||||
|
"download",
|
||||||
|
"-n=2727",
|
||||||
|
"-v=150098",
|
||||||
|
"--headless=false"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
package cmd
|
package cmd
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bilinovel-downloader/downloader"
|
||||||
"bilinovel-downloader/downloader/bilinovel"
|
"bilinovel-downloader/downloader/bilinovel"
|
||||||
"bilinovel-downloader/epub"
|
"bilinovel-downloader/epub"
|
||||||
"bilinovel-downloader/model"
|
"bilinovel-downloader/model"
|
||||||
@@ -31,6 +32,8 @@ type downloadCmdArgs struct {
|
|||||||
VolumeId int `validate:"required"`
|
VolumeId int `validate:"required"`
|
||||||
outputPath string
|
outputPath string
|
||||||
outputType string
|
outputType string
|
||||||
|
headless bool
|
||||||
|
concurrency int
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
@@ -42,11 +45,16 @@ func init() {
|
|||||||
downloadCmd.Flags().IntVarP(&downloadArgs.VolumeId, "volume-id", "v", 0, "volume id")
|
downloadCmd.Flags().IntVarP(&downloadArgs.VolumeId, "volume-id", "v", 0, "volume id")
|
||||||
downloadCmd.Flags().StringVarP(&downloadArgs.outputPath, "output-path", "o", "novels", "output path")
|
downloadCmd.Flags().StringVarP(&downloadArgs.outputPath, "output-path", "o", "novels", "output path")
|
||||||
downloadCmd.Flags().StringVarP(&downloadArgs.outputType, "output-type", "t", "epub", "output type, epub or text")
|
downloadCmd.Flags().StringVarP(&downloadArgs.outputType, "output-type", "t", "epub", "output type, epub or text")
|
||||||
|
downloadCmd.Flags().BoolVar(&downloadArgs.headless, "headless", true, "headless mode")
|
||||||
|
downloadCmd.Flags().IntVar(&downloadArgs.concurrency, "concurrency", 3, "concurrency of downloading volumes")
|
||||||
RootCmd.AddCommand(downloadCmd)
|
RootCmd.AddCommand(downloadCmd)
|
||||||
}
|
}
|
||||||
|
|
||||||
func runDownloadNovel() error {
|
func runDownloadNovel() error {
|
||||||
downloader, err := bilinovel.New()
|
downloader, err := bilinovel.New(bilinovel.BilinovelNewOption{
|
||||||
|
Headless: downloadArgs.headless,
|
||||||
|
Concurrency: downloadArgs.concurrency,
|
||||||
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to create downloader: %v", err)
|
return fmt.Errorf("failed to create downloader: %v", err)
|
||||||
}
|
}
|
||||||
@@ -63,16 +71,10 @@ func runDownloadNovel() error {
|
|||||||
|
|
||||||
if downloadArgs.VolumeId == 0 {
|
if downloadArgs.VolumeId == 0 {
|
||||||
// 下载整本小说
|
// 下载整本小说
|
||||||
novel, err := downloader.GetNovel(downloadArgs.NovelId, true)
|
err := downloadNovel(downloader, downloadArgs.NovelId)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to get novel: %v", err)
|
return fmt.Errorf("failed to get novel: %v", err)
|
||||||
}
|
}
|
||||||
for _, volume := range novel.Volumes {
|
|
||||||
err = downloadVolume(downloader, volume.Id)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to download volume: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
// 下载单卷
|
// 下载单卷
|
||||||
err = downloadVolume(downloader, downloadArgs.VolumeId)
|
err = downloadVolume(downloader, downloadArgs.VolumeId)
|
||||||
@@ -84,7 +86,59 @@ func runDownloadNovel() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func downloadVolume(downloader model.Downloader, volumeId int) error {
|
func downloadNovel(downloader downloader.Downloader, novelId int) error {
|
||||||
|
novelInfo, err := downloader.GetNovel(novelId, true, nil)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to get novel info: %w", err)
|
||||||
|
}
|
||||||
|
skipVolumes := make([]int, 0)
|
||||||
|
for _, volume := range novelInfo.Volumes {
|
||||||
|
jsonPath := filepath.Join(downloadArgs.outputPath, fmt.Sprintf("volume-%d-%d.json", downloadArgs.NovelId, volume.Id))
|
||||||
|
err = os.MkdirAll(filepath.Dir(jsonPath), 0755)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create directory: %v", err)
|
||||||
|
}
|
||||||
|
_, err = os.Stat(jsonPath)
|
||||||
|
if err == nil {
|
||||||
|
// 已经下载
|
||||||
|
skipVolumes = append(skipVolumes, volume.Id)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
novel, err := downloader.GetNovel(novelId, false, skipVolumes)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to download novel: %w", err)
|
||||||
|
}
|
||||||
|
for _, volume := range novel.Volumes {
|
||||||
|
jsonPath := filepath.Join(downloadArgs.outputPath, fmt.Sprintf("volume-%d-%d.json", downloadArgs.NovelId, volume.Id))
|
||||||
|
err = os.MkdirAll(filepath.Dir(jsonPath), 0755)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create directory: %v", err)
|
||||||
|
}
|
||||||
|
jsonFile, err := os.Create(jsonPath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create json file: %v", err)
|
||||||
|
}
|
||||||
|
err = json.NewEncoder(jsonFile).Encode(volume)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to encode json file: %v", err)
|
||||||
|
}
|
||||||
|
switch downloadArgs.outputType {
|
||||||
|
case "epub":
|
||||||
|
err = epub.PackVolumeToEpub(volume, downloadArgs.outputPath, downloader.GetStyleCSS(), downloader.GetExtraFiles())
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to pack volume: %v", err)
|
||||||
|
}
|
||||||
|
case "text":
|
||||||
|
err = text.PackVolumeToText(volume, downloadArgs.outputPath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to pack volume: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func downloadVolume(downloader downloader.Downloader, volumeId int) error {
|
||||||
jsonPath := filepath.Join(downloadArgs.outputPath, fmt.Sprintf("volume-%d-%d.json", downloadArgs.NovelId, volumeId))
|
jsonPath := filepath.Join(downloadArgs.outputPath, fmt.Sprintf("volume-%d-%d.json", downloadArgs.NovelId, volumeId))
|
||||||
err := os.MkdirAll(filepath.Dir(jsonPath), 0755)
|
err := os.MkdirAll(filepath.Dir(jsonPath), 0755)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -13,8 +13,10 @@ import (
|
|||||||
"path"
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"slices"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
|
|
||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
mapper "github.com/bestnite/font-mapper"
|
mapper "github.com/bestnite/font-mapper"
|
||||||
@@ -35,10 +37,17 @@ type Bilinovel struct {
|
|||||||
// 浏览器实例复用
|
// 浏览器实例复用
|
||||||
browser playwright.Browser
|
browser playwright.Browser
|
||||||
browserContext playwright.BrowserContext
|
browserContext playwright.BrowserContext
|
||||||
page playwright.Page
|
pages map[string]playwright.Page
|
||||||
|
concurrency int
|
||||||
|
concurrentChan chan any
|
||||||
}
|
}
|
||||||
|
|
||||||
func New() (*Bilinovel, error) {
|
type BilinovelNewOption struct {
|
||||||
|
Headless bool
|
||||||
|
Concurrency int
|
||||||
|
}
|
||||||
|
|
||||||
|
func New(option BilinovelNewOption) (*Bilinovel, error) {
|
||||||
fontMapper, err := mapper.NewGlyphOutlineMapper(readTTF, miLantingTTF)
|
fontMapper, err := mapper.NewGlyphOutlineMapper(readTTF, miLantingTTF)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to create font mapper: %v", err)
|
return nil, fmt.Errorf("failed to create font mapper: %v", err)
|
||||||
@@ -49,10 +58,13 @@ func New() (*Bilinovel, error) {
|
|||||||
fontMapper: fontMapper,
|
fontMapper: fontMapper,
|
||||||
textOnly: false,
|
textOnly: false,
|
||||||
restyClient: restyClient,
|
restyClient: restyClient,
|
||||||
|
pages: make(map[string]playwright.Page),
|
||||||
|
concurrency: option.Concurrency,
|
||||||
|
concurrentChan: make(chan any, option.Concurrency),
|
||||||
}
|
}
|
||||||
|
|
||||||
// 初始化浏览器实例
|
// 初始化浏览器实例
|
||||||
err = b.initBrowser()
|
err = b.initBrowser(option.Headless)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to init browser: %v", err)
|
return nil, fmt.Errorf("failed to init browser: %v", err)
|
||||||
}
|
}
|
||||||
@@ -69,13 +81,13 @@ func (b *Bilinovel) GetExtraFiles() []model.ExtraFile {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// initBrowser 初始化浏览器实例
|
// initBrowser 初始化浏览器实例
|
||||||
func (b *Bilinovel) initBrowser() error {
|
func (b *Bilinovel) initBrowser(headless bool) error {
|
||||||
pw, err := playwright.Run()
|
pw, err := playwright.Run()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("could not start playwright: %w", err)
|
return fmt.Errorf("could not start playwright: %w", err)
|
||||||
}
|
}
|
||||||
b.browser, err = pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{
|
b.browser, err = pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{
|
||||||
Headless: playwright.Bool(false),
|
Headless: playwright.Bool(headless),
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("could not launch browser: %w", err)
|
return fmt.Errorf("could not launch browser: %w", err)
|
||||||
@@ -86,11 +98,6 @@ func (b *Bilinovel) initBrowser() error {
|
|||||||
return fmt.Errorf("could not create browser context: %w", err)
|
return fmt.Errorf("could not create browser context: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
b.page, err = b.browserContext.NewPage()
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("could not create page: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Println("Browser initialized successfully")
|
log.Println("Browser initialized successfully")
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -103,7 +110,6 @@ func (b *Bilinovel) Close() error {
|
|||||||
}
|
}
|
||||||
b.browser = nil
|
b.browser = nil
|
||||||
b.browserContext = nil
|
b.browserContext = nil
|
||||||
b.page = nil
|
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -115,7 +121,7 @@ func (b *Bilinovel) GetStyleCSS() string {
|
|||||||
return string(styleCSS)
|
return string(styleCSS)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *Bilinovel) GetNovel(novelId int, skipChapter bool) (*model.Novel, error) {
|
func (b *Bilinovel) GetNovel(novelId int, skipChapterContent bool, skipVolumes []int) (*model.Novel, error) {
|
||||||
log.Printf("Getting novel %v\n", novelId)
|
log.Printf("Getting novel %v\n", novelId)
|
||||||
|
|
||||||
novelUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v.html", novelId)
|
novelUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v.html", novelId)
|
||||||
@@ -145,7 +151,7 @@ func (b *Bilinovel) GetNovel(novelId int, skipChapter bool) (*model.Novel, error
|
|||||||
novel.Authors = append(novel.Authors, strings.TrimSpace(s.Text()))
|
novel.Authors = append(novel.Authors, strings.TrimSpace(s.Text()))
|
||||||
})
|
})
|
||||||
|
|
||||||
volumes, err := b.getAllVolumes(novelId, skipChapter)
|
volumes, err := b.getAllVolumes(novelId, skipChapterContent, skipVolumes)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to get novel volumes: %v", err)
|
return nil, fmt.Errorf("failed to get novel volumes: %v", err)
|
||||||
}
|
}
|
||||||
@@ -154,7 +160,7 @@ func (b *Bilinovel) GetNovel(novelId int, skipChapter bool) (*model.Novel, error
|
|||||||
return novel, nil
|
return novel, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *Bilinovel) GetVolume(novelId int, volumeId int, skipChapter bool) (*model.Volume, error) {
|
func (b *Bilinovel) GetVolume(novelId int, volumeId int, skipChapterContent bool) (*model.Volume, error) {
|
||||||
log.Printf("Getting volume %v of novel %v\n", volumeId, novelId)
|
log.Printf("Getting volume %v of novel %v\n", volumeId, novelId)
|
||||||
|
|
||||||
novelUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v/catalog", novelId)
|
novelUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v/catalog", novelId)
|
||||||
@@ -229,7 +235,7 @@ func (b *Bilinovel) GetVolume(novelId int, volumeId int, skipChapter bool) (*mod
|
|||||||
|
|
||||||
idRegexp := regexp.MustCompile(`/novel/(\d+)/(\d+).html`)
|
idRegexp := regexp.MustCompile(`/novel/(\d+)/(\d+).html`)
|
||||||
|
|
||||||
if !skipChapter {
|
if !skipChapterContent {
|
||||||
for i := range volume.Chapters {
|
for i := range volume.Chapters {
|
||||||
matches := idRegexp.FindStringSubmatch(volume.Chapters[i].Url)
|
matches := idRegexp.FindStringSubmatch(volume.Chapters[i].Url)
|
||||||
if len(matches) > 0 {
|
if len(matches) > 0 {
|
||||||
@@ -252,7 +258,7 @@ func (b *Bilinovel) GetVolume(novelId int, volumeId int, skipChapter bool) (*mod
|
|||||||
return volume, nil
|
return volume, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *Bilinovel) getAllVolumes(novelId int, skipChapter bool) ([]*model.Volume, error) {
|
func (b *Bilinovel) getAllVolumes(novelId int, skipChapterContent bool, skipVolumes []int) ([]*model.Volume, error) {
|
||||||
log.Printf("Getting all volumes of novel %v\n", novelId)
|
log.Printf("Getting all volumes of novel %v\n", novelId)
|
||||||
|
|
||||||
catelogUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v/catalog", novelId)
|
catelogUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v/catalog", novelId)
|
||||||
@@ -280,27 +286,63 @@ func (b *Bilinovel) getAllVolumes(novelId int, skipChapter bool) ([]*model.Volum
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
volumes := make([]*model.Volume, 0)
|
volumes := make([]*model.Volume, len(volumeIds))
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
var mu sync.Mutex // 保护 volumes 写入的互斥锁
|
||||||
|
|
||||||
for i, volumeIdStr := range volumeIds {
|
for i, volumeIdStr := range volumeIds {
|
||||||
|
wg.Add(1)
|
||||||
|
b.concurrentChan <- struct{}{} // 获取一个并发槽
|
||||||
|
|
||||||
|
go func(i int, volumeIdStr string) {
|
||||||
|
defer wg.Done()
|
||||||
|
defer func() { <-b.concurrentChan }() // 释放并发槽
|
||||||
|
|
||||||
volumeId, err := strconv.Atoi(volumeIdStr)
|
volumeId, err := strconv.Atoi(volumeIdStr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to convert volume id: %v", err)
|
log.Printf("failed to convert volume id %s: %v", volumeIdStr, err)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
volume, err := b.GetVolume(novelId, volumeId, skipChapter)
|
if slices.Contains(skipVolumes, volumeId) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
volume, err := b.GetVolume(novelId, volumeId, skipChapterContent)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to get volume info: %v", err)
|
log.Printf("failed to get volume info for novel %d, volume %d: %v", novelId, volumeId, err)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
volume.SeriesIdx = i
|
volume.SeriesIdx = i
|
||||||
volumes = append(volumes, volume)
|
|
||||||
|
// 关闭浏览器标签页
|
||||||
|
pwPageKey := fmt.Sprintf("%v-%v", novelId, volumeId)
|
||||||
|
if pwPage, ok := b.pages[pwPageKey]; ok {
|
||||||
|
_ = pwPage.Close()
|
||||||
|
delete(b.pages, pwPageKey)
|
||||||
}
|
}
|
||||||
|
|
||||||
return volumes, nil
|
mu.Lock()
|
||||||
|
volumes[i] = volume
|
||||||
|
mu.Unlock()
|
||||||
|
}(i, volumeIdStr)
|
||||||
|
}
|
||||||
|
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
// 过滤掉获取失败的 nil volume
|
||||||
|
filteredVolumes := make([]*model.Volume, 0, len(volumes))
|
||||||
|
for _, vol := range volumes {
|
||||||
|
if vol != nil {
|
||||||
|
filteredVolumes = append(filteredVolumes, vol)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return filteredVolumes, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *Bilinovel) GetChapter(novelId int, volumeId int, chapterId int) (*model.Chapter, error) {
|
func (b *Bilinovel) GetChapter(novelId int, volumeId int, chapterId int) (*model.Chapter, error) {
|
||||||
log.Printf("Getting chapter %v of novel %v\n", chapterId, novelId)
|
log.Printf("Getting chapter %v of novel %v\n", chapterId, novelId)
|
||||||
|
|
||||||
page := 1
|
pageNum := 1
|
||||||
chapter := &model.Chapter{
|
chapter := &model.Chapter{
|
||||||
Id: chapterId,
|
Id: chapterId,
|
||||||
NovelId: novelId,
|
NovelId: novelId,
|
||||||
@@ -308,22 +350,30 @@ func (b *Bilinovel) GetChapter(novelId int, volumeId int, chapterId int) (*model
|
|||||||
Url: fmt.Sprintf("https://www.bilinovel.com/novel/%v/%v.html", novelId, chapterId),
|
Url: fmt.Sprintf("https://www.bilinovel.com/novel/%v/%v.html", novelId, chapterId),
|
||||||
}
|
}
|
||||||
for {
|
for {
|
||||||
hasNext, err := b.getChapterByPage(chapter, page)
|
pwPageKey := fmt.Sprintf("%v-%v", novelId, volumeId)
|
||||||
|
if _, ok := b.pages[pwPageKey]; !ok {
|
||||||
|
pwPage, err := b.browserContext.NewPage()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create browser page: %w", err)
|
||||||
|
}
|
||||||
|
b.pages[pwPageKey] = pwPage
|
||||||
|
}
|
||||||
|
hasNext, err := b.getChapterByPage(b.pages[pwPageKey], chapter, pageNum)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to download chapter: %w", err)
|
return nil, fmt.Errorf("failed to download chapter: %w", err)
|
||||||
}
|
}
|
||||||
if !hasNext {
|
if !hasNext {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
page++
|
pageNum++
|
||||||
}
|
}
|
||||||
return chapter, nil
|
return chapter, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (b *Bilinovel) getChapterByPage(chapter *model.Chapter, page int) (bool, error) {
|
func (b *Bilinovel) getChapterByPage(pwPage playwright.Page, chapter *model.Chapter, pageNum int) (bool, error) {
|
||||||
log.Printf("Getting chapter %v by page %v\n", chapter.Id, page)
|
log.Printf("Getting chapter %v by page %v\n", chapter.Id, pageNum)
|
||||||
|
|
||||||
Url := strings.TrimSuffix(chapter.Url, ".html") + fmt.Sprintf("_%v.html", page)
|
Url := strings.TrimSuffix(chapter.Url, ".html") + fmt.Sprintf("_%v.html", pageNum)
|
||||||
|
|
||||||
hasNext := false
|
hasNext := false
|
||||||
headers := map[string]string{
|
headers := map[string]string{
|
||||||
@@ -344,8 +394,9 @@ func (b *Bilinovel) getChapterByPage(chapter *model.Chapter, page int) (bool, er
|
|||||||
}
|
}
|
||||||
|
|
||||||
html := resp.Body()
|
html := resp.Body()
|
||||||
|
|
||||||
// 解决乱序问题
|
// 解决乱序问题
|
||||||
resortedHtml, err := b.processContentWithPlaywright(string(html))
|
resortedHtml, err := b.processContentWithPlaywright(pwPage, string(html))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, fmt.Errorf("failed to process html: %w", err)
|
return false, fmt.Errorf("failed to process html: %w", err)
|
||||||
}
|
}
|
||||||
@@ -354,7 +405,7 @@ func (b *Bilinovel) getChapterByPage(chapter *model.Chapter, page int) (bool, er
|
|||||||
return false, fmt.Errorf("failed to parse html: %w", err)
|
return false, fmt.Errorf("failed to parse html: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if page == 1 {
|
if pageNum == 1 {
|
||||||
chapter.Title = doc.Find("#atitle").Text()
|
chapter.Title = doc.Find("#atitle").Text()
|
||||||
}
|
}
|
||||||
content := doc.Find("#acontent").First()
|
content := doc.Find("#acontent").First()
|
||||||
@@ -445,7 +496,7 @@ func (b *Bilinovel) getImg(url string) ([]byte, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// processContentWithPlaywright 使用复用的浏览器实例处理内容
|
// processContentWithPlaywright 使用复用的浏览器实例处理内容
|
||||||
func (b *Bilinovel) processContentWithPlaywright(htmlContent string) (string, error) {
|
func (b *Bilinovel) processContentWithPlaywright(page playwright.Page, htmlContent string) (string, error) {
|
||||||
tempFile, err := os.CreateTemp("", "bilinovel-temp-*.html")
|
tempFile, err := os.CreateTemp("", "bilinovel-temp-*.html")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("failed to create temp file: %w", err)
|
return "", fmt.Errorf("failed to create temp file: %w", err)
|
||||||
@@ -459,10 +510,10 @@ func (b *Bilinovel) processContentWithPlaywright(htmlContent string) (string, er
|
|||||||
tempFile.Close()
|
tempFile.Close()
|
||||||
tempFilePath := tempFile.Name()
|
tempFilePath := tempFile.Name()
|
||||||
|
|
||||||
_, err = b.page.ExpectResponse(func(url string) bool {
|
_, err = page.ExpectResponse(func(url string) bool {
|
||||||
return strings.Contains(url, "chapterlog.js")
|
return strings.Contains(url, "chapterlog.js")
|
||||||
}, func() error {
|
}, func() error {
|
||||||
_, err = b.page.Goto("file://" + filepath.ToSlash(tempFilePath))
|
_, err = page.Goto("file://" + filepath.ToSlash(tempFilePath))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("could not navigate to file: %w", err)
|
return fmt.Errorf("could not navigate to file: %w", err)
|
||||||
}
|
}
|
||||||
@@ -474,7 +525,7 @@ func (b *Bilinovel) processContentWithPlaywright(htmlContent string) (string, er
|
|||||||
return "", fmt.Errorf("failed to wait for network request finish")
|
return "", fmt.Errorf("failed to wait for network request finish")
|
||||||
}
|
}
|
||||||
|
|
||||||
err = b.page.Locator("#acontent").WaitFor(playwright.LocatorWaitForOptions{
|
err = page.Locator("#acontent").WaitFor(playwright.LocatorWaitForOptions{
|
||||||
State: playwright.WaitForSelectorStateVisible,
|
State: playwright.WaitForSelectorStateVisible,
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -482,7 +533,7 @@ func (b *Bilinovel) processContentWithPlaywright(htmlContent string) (string, er
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 遍历所有 #acontent 的子元素, 通过 window.getComputedStyle().display 检测是否是 none, 如果是 none 则从页面删除这个元素
|
// 遍历所有 #acontent 的子元素, 通过 window.getComputedStyle().display 检测是否是 none, 如果是 none 则从页面删除这个元素
|
||||||
result, err := b.page.Evaluate(`
|
result, err := page.Evaluate(`
|
||||||
(function() {
|
(function() {
|
||||||
const acontent = document.getElementById('acontent');
|
const acontent = document.getElementById('acontent');
|
||||||
if (!acontent) {
|
if (!acontent) {
|
||||||
@@ -497,7 +548,7 @@ func (b *Bilinovel) processContentWithPlaywright(htmlContent string) (string, er
|
|||||||
const element = elements[i];
|
const element = elements[i];
|
||||||
const computedStyle = window.getComputedStyle(element);
|
const computedStyle = window.getComputedStyle(element);
|
||||||
|
|
||||||
if (computedStyle.display === 'none') {
|
if (computedStyle.display === 'none' || computedStyle.transform == 'matrix(0, 0, 0, 0, 0, 0)') {
|
||||||
element.remove();
|
element.remove();
|
||||||
removedCount++;
|
removedCount++;
|
||||||
}
|
}
|
||||||
@@ -513,7 +564,7 @@ func (b *Bilinovel) processContentWithPlaywright(htmlContent string) (string, er
|
|||||||
|
|
||||||
log.Printf("Hidden elements removal result: %s", result)
|
log.Printf("Hidden elements removal result: %s", result)
|
||||||
|
|
||||||
processedHTML, err := b.page.Content()
|
processedHTML, err := page.Content()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("could not get page content: %w", err)
|
return "", fmt.Errorf("could not get page content: %w", err)
|
||||||
}
|
}
|
||||||
|
|||||||
12
downloader/downloader.go
Normal file
12
downloader/downloader.go
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
package downloader
|
||||||
|
|
||||||
|
import "bilinovel-downloader/model"
|
||||||
|
|
||||||
|
type Downloader interface {
|
||||||
|
GetNovel(novelId int, skipChapterContent bool, skipVolumes []int) (*model.Novel, error)
|
||||||
|
GetVolume(novelId int, volumeId int, skipChapterContent bool) (*model.Volume, error)
|
||||||
|
GetChapter(novelId int, volumeId int, chapterId int) (*model.Chapter, error)
|
||||||
|
GetStyleCSS() string
|
||||||
|
GetExtraFiles() []model.ExtraFile
|
||||||
|
Close() error
|
||||||
|
}
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
package model
|
|
||||||
|
|
||||||
type ExtraFile struct {
|
|
||||||
Data []byte
|
|
||||||
Path string
|
|
||||||
ManifestItem ManifestItem
|
|
||||||
}
|
|
||||||
|
|
||||||
type Downloader interface {
|
|
||||||
GetNovel(novelId int, skipChapter bool) (*Novel, error)
|
|
||||||
GetVolume(novelId int, volumeId int, skipChapter bool) (*Volume, error)
|
|
||||||
GetChapter(novelId int, volumeId int, chapterId int) (*Chapter, error)
|
|
||||||
GetStyleCSS() string
|
|
||||||
GetExtraFiles() []ExtraFile
|
|
||||||
Close() error
|
|
||||||
}
|
|
||||||
@@ -1,6 +1,14 @@
|
|||||||
package model
|
package model
|
||||||
|
|
||||||
import "encoding/xml"
|
import (
|
||||||
|
"encoding/xml"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ExtraFile struct {
|
||||||
|
Data []byte
|
||||||
|
Path string
|
||||||
|
ManifestItem ManifestItem
|
||||||
|
}
|
||||||
|
|
||||||
type DublinCoreMetadata struct {
|
type DublinCoreMetadata struct {
|
||||||
XMLName xml.Name `xml:"metadata"`
|
XMLName xml.Name `xml:"metadata"`
|
||||||
|
|||||||
@@ -8,12 +8,12 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
func TestBilinovel_GetNovel(t *testing.T) {
|
func TestBilinovel_GetNovel(t *testing.T) {
|
||||||
bilinovel, err := bilinovel.New()
|
bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Headless: false, Concurrency: 5})
|
||||||
bilinovel.SetTextOnly(true)
|
bilinovel.SetTextOnly(true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("failed to create bilinovel: %v", err)
|
t.Fatalf("failed to create bilinovel: %v", err)
|
||||||
}
|
}
|
||||||
novel, err := bilinovel.GetNovel(4519, false)
|
novel, err := bilinovel.GetNovel(2727, false, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("failed to get novel: %v", err)
|
t.Fatalf("failed to get novel: %v", err)
|
||||||
}
|
}
|
||||||
@@ -25,12 +25,12 @@ func TestBilinovel_GetNovel(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestBilinovel_GetVolume(t *testing.T) {
|
func TestBilinovel_GetVolume(t *testing.T) {
|
||||||
bilinovel, err := bilinovel.New()
|
bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Headless: false, Concurrency: 1})
|
||||||
bilinovel.SetTextOnly(true)
|
bilinovel.SetTextOnly(true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("failed to create bilinovel: %v", err)
|
t.Fatalf("failed to create bilinovel: %v", err)
|
||||||
}
|
}
|
||||||
volume, err := bilinovel.GetVolume(1410, 52748, false)
|
volume, err := bilinovel.GetVolume(2727, 129092, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("failed to get volume: %v", err)
|
t.Fatalf("failed to get volume: %v", err)
|
||||||
}
|
}
|
||||||
@@ -42,12 +42,12 @@ func TestBilinovel_GetVolume(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestBilinovel_GetChapter(t *testing.T) {
|
func TestBilinovel_GetChapter(t *testing.T) {
|
||||||
bilinovel, err := bilinovel.New()
|
bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Headless: false, Concurrency: 1})
|
||||||
bilinovel.SetTextOnly(true)
|
bilinovel.SetTextOnly(true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("failed to create bilinovel: %v", err)
|
t.Fatalf("failed to create bilinovel: %v", err)
|
||||||
}
|
}
|
||||||
chapter, err := bilinovel.GetChapter(3095, 154930, 154933)
|
chapter, err := bilinovel.GetChapter(2727, 129092, 129094)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("failed to get chapter: %v", err)
|
t.Fatalf("failed to get chapter: %v", err)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user