refactor: streamline download process and enhance browser handling in Bilinovel

This commit is contained in:
2025-08-24 19:04:00 +10:00
parent ed5440f5fb
commit 560cdfdec9
5 changed files with 137 additions and 163 deletions

6
.vscode/launch.json vendored
View File

@@ -10,11 +10,7 @@
"args": [
"download",
"-n",
"1410",
"-v",
"52748",
"-t",
"epub"
"3095",
]
}
]

View File

@@ -50,128 +50,88 @@ func runDownloadNovel() error {
if err != nil {
return fmt.Errorf("failed to create downloader: %v", err)
}
// 确保在函数结束时关闭资源
defer func() {
if closeErr := downloader.Close(); closeErr != nil {
log.Printf("Failed to close downloader: %v", closeErr)
}
}()
if downloadArgs.NovelId == 0 {
return fmt.Errorf("novel id is required")
}
if downloadArgs.VolumeId == 0 {
novel, err := downloadNovel(downloader)
// 下载整本小说
novel, err := downloader.GetNovel(downloadArgs.NovelId, true)
if err != nil {
return fmt.Errorf("failed to get novel: %v", err)
}
switch downloadArgs.outputType {
case "epub":
for _, volume := range novel.Volumes {
err = epub.PackVolumeToEpub(volume, downloadArgs.outputPath, downloader.GetStyleCSS(), downloader.GetExtraFiles())
if err != nil {
return fmt.Errorf("failed to pack volume: %v", err)
}
}
case "text":
for _, volume := range novel.Volumes {
err = text.PackVolumeToText(volume, downloadArgs.outputPath)
if err != nil {
return fmt.Errorf("failed to pack volume: %v", err)
}
for _, volume := range novel.Volumes {
err = downloadVolume(downloader, volume.Id)
if err != nil {
return fmt.Errorf("failed to download volume: %v", err)
}
}
} else {
// 下载单卷
volume, err := downloadVolume(downloader)
err = downloadVolume(downloader, downloadArgs.VolumeId)
if err != nil {
return fmt.Errorf("failed to get volume: %v", err)
}
switch downloadArgs.outputType {
case "epub":
err = epub.PackVolumeToEpub(volume, downloadArgs.outputPath, downloader.GetStyleCSS(), downloader.GetExtraFiles())
if err != nil {
return fmt.Errorf("failed to pack volume: %v", err)
}
case "text":
err = text.PackVolumeToText(volume, downloadArgs.outputPath)
if err != nil {
return fmt.Errorf("failed to pack volume: %v", err)
}
return fmt.Errorf("failed to download volume: %v", err)
}
}
return nil
}
func downloadNovel(downloader model.Downloader) (*model.Novel, error) {
jsonPath := filepath.Join(downloadArgs.outputPath, fmt.Sprintf("novel-%d.json", downloadArgs.NovelId))
func downloadVolume(downloader model.Downloader, volumeId int) error {
jsonPath := filepath.Join(downloadArgs.outputPath, fmt.Sprintf("volume-%d-%d.json", downloadArgs.NovelId, volumeId))
err := os.MkdirAll(filepath.Dir(jsonPath), 0755)
if err != nil {
return nil, fmt.Errorf("failed to create directory: %v", err)
}
_, err = os.Stat(jsonPath)
novel := &model.Novel{}
if err != nil {
if os.IsNotExist(err) {
novel, err = downloader.GetNovel(downloadArgs.NovelId)
if err != nil {
return nil, fmt.Errorf("failed to get novel: %v", err)
}
jsonFile, err := os.Create(jsonPath)
if err != nil {
return nil, fmt.Errorf("failed to create json file: %v", err)
}
defer jsonFile.Close()
err = json.NewEncoder(jsonFile).Encode(novel)
if err != nil {
return nil, fmt.Errorf("failed to encode json file: %v", err)
}
} else {
return nil, fmt.Errorf("failed to get novel: %v", err)
}
} else {
jsonFile, err := os.Open(jsonPath)
if err != nil {
return nil, fmt.Errorf("failed to open json file: %v", err)
}
defer jsonFile.Close()
err = json.NewDecoder(jsonFile).Decode(novel)
if err != nil {
return nil, fmt.Errorf("failed to decode json file: %v", err)
}
}
return novel, nil
}
func downloadVolume(downloader model.Downloader) (*model.Volume, error) {
jsonPath := filepath.Join(downloadArgs.outputPath, fmt.Sprintf("volume-%d-%d.json", downloadArgs.NovelId, downloadArgs.VolumeId))
err := os.MkdirAll(filepath.Dir(jsonPath), 0755)
if err != nil {
return nil, fmt.Errorf("failed to create directory: %v", err)
return fmt.Errorf("failed to create directory: %v", err)
}
_, err = os.Stat(jsonPath)
volume := &model.Volume{}
if err != nil {
if os.IsNotExist(err) {
volume, err = downloader.GetVolume(downloadArgs.NovelId, downloadArgs.VolumeId)
volume, err = downloader.GetVolume(downloadArgs.NovelId, volumeId, false)
if err != nil {
return nil, fmt.Errorf("failed to get volume: %v", err)
return fmt.Errorf("failed to get volume: %v", err)
}
jsonFile, err := os.Create(jsonPath)
if err != nil {
return nil, fmt.Errorf("failed to create json file: %v", err)
return fmt.Errorf("failed to create json file: %v", err)
}
err = json.NewEncoder(jsonFile).Encode(volume)
if err != nil {
return nil, fmt.Errorf("failed to encode json file: %v", err)
return fmt.Errorf("failed to encode json file: %v", err)
}
} else {
return nil, fmt.Errorf("failed to get volume: %v", err)
return fmt.Errorf("failed to get volume: %v", err)
}
} else {
jsonFile, err := os.Open(jsonPath)
if err != nil {
return nil, fmt.Errorf("failed to open json file: %v", err)
return fmt.Errorf("failed to open json file: %v", err)
}
defer jsonFile.Close()
err = json.NewDecoder(jsonFile).Decode(volume)
if err != nil {
return nil, fmt.Errorf("failed to decode json file: %v", err)
return fmt.Errorf("failed to decode json file: %v", err)
}
}
return volume, nil
switch downloadArgs.outputType {
case "epub":
err = epub.PackVolumeToEpub(volume, downloadArgs.outputPath, downloader.GetStyleCSS(), downloader.GetExtraFiles())
if err != nil {
return fmt.Errorf("failed to pack volume: %v", err)
}
case "text":
err = text.PackVolumeToText(volume, downloadArgs.outputPath)
if err != nil {
return fmt.Errorf("failed to pack volume: %v", err)
}
}
return nil
}

View File

@@ -16,7 +16,6 @@ import (
"regexp"
"strconv"
"strings"
"sync"
"time"
"github.com/PuerkitoBio/goquery"
@@ -35,7 +34,12 @@ type Bilinovel struct {
fontMapper *mapper.GlyphOutlineMapper
textOnly bool
restyClient *utils.RestyClient
debug bool
// 浏览器实例复用
allocCtx context.Context
allocCancel context.CancelFunc
browserCtx context.Context
browserCancel context.CancelFunc
}
func New() (*Bilinovel, error) {
@@ -43,23 +47,73 @@ func New() (*Bilinovel, error) {
if err != nil {
return nil, fmt.Errorf("failed to create font mapper: %v", err)
}
restyClient := utils.NewRestyClient(10)
return &Bilinovel{
restyClient := utils.NewRestyClient(50)
b := &Bilinovel{
fontMapper: fontMapper,
textOnly: false,
restyClient: restyClient,
}, nil
}
// 初始化浏览器实例
err = b.initBrowser()
if err != nil {
return nil, fmt.Errorf("failed to init browser: %v", err)
}
return b, nil
}
func (b *Bilinovel) SetTextOnly(textOnly bool) {
b.textOnly = textOnly
}
func (b *Bilinovel) SetDebug(debug bool) {
b.debug = debug
func (b *Bilinovel) GetExtraFiles() []model.ExtraFile {
return nil
}
func (b *Bilinovel) GetExtraFiles() []model.ExtraFile {
// initBrowser 初始化浏览器实例
func (b *Bilinovel) initBrowser() error {
// 创建chromedp选项
opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.Flag("headless", true),
chromedp.Flag("disable-gpu", true),
chromedp.Flag("disable-dev-shm-usage", true),
chromedp.Flag("disable-extensions", true),
chromedp.Flag("no-sandbox", true),
chromedp.Flag("disable-background-timer-throttling", true),
chromedp.Flag("disable-backgrounding-occluded-windows", true),
chromedp.Flag("disable-renderer-backgrounding", true),
)
var err error
b.allocCtx, b.allocCancel = chromedp.NewExecAllocator(context.Background(), opts...)
b.browserCtx, b.browserCancel = chromedp.NewContext(b.allocCtx)
// 预热浏览器 - 导航到空白页
err = chromedp.Run(b.browserCtx, chromedp.Navigate("about:blank"))
if err != nil {
b.closeBrowser()
return fmt.Errorf("failed to initialize browser: %v", err)
}
log.Println("Browser initialized successfully")
return nil
}
// closeBrowser 关闭浏览器实例
func (b *Bilinovel) closeBrowser() {
if b.browserCancel != nil {
b.browserCancel()
}
if b.allocCancel != nil {
b.allocCancel()
}
}
// Close 关闭下载器时清理资源
func (b *Bilinovel) Close() error {
b.closeBrowser()
return nil
}
@@ -70,10 +124,9 @@ func (b *Bilinovel) GetStyleCSS() string {
return string(styleCSS)
}
func (b *Bilinovel) GetNovel(novelId int) (*model.Novel, error) {
if b.debug {
log.Printf("Getting novel %v\n", novelId)
}
func (b *Bilinovel) GetNovel(novelId int, skipChapter bool) (*model.Novel, error) {
log.Printf("Getting novel %v\n", novelId)
novelUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v.html", novelId)
resp, err := b.restyClient.R().Get(novelUrl)
if err != nil {
@@ -101,7 +154,7 @@ func (b *Bilinovel) GetNovel(novelId int) (*model.Novel, error) {
novel.Authors = append(novel.Authors, strings.TrimSpace(s.Text()))
})
volumes, err := b.getAllVolumes(novelId)
volumes, err := b.getAllVolumes(novelId, skipChapter)
if err != nil {
return nil, fmt.Errorf("failed to get novel volumes: %v", err)
}
@@ -110,10 +163,9 @@ func (b *Bilinovel) GetNovel(novelId int) (*model.Novel, error) {
return novel, nil
}
func (b *Bilinovel) GetVolume(novelId int, volumeId int) (*model.Volume, error) {
if b.debug {
log.Printf("Getting volume %v of novel %v\n", volumeId, novelId)
}
func (b *Bilinovel) GetVolume(novelId int, volumeId int, skipChapter bool) (*model.Volume, error) {
log.Printf("Getting volume %v of novel %v\n", volumeId, novelId)
novelUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v/catalog", novelId)
resp, err := b.restyClient.R().Get(novelUrl)
if err != nil {
@@ -185,48 +237,33 @@ func (b *Bilinovel) GetVolume(novelId int, volumeId int) (*model.Volume, error)
})
idRegexp := regexp.MustCompile(`/novel/(\d+)/(\d+).html`)
wg := sync.WaitGroup{}
errChan := make(chan error, len(volume.Chapters))
for i := range volume.Chapters {
wg.Add(1)
go func(i int) {
defer wg.Done()
if !skipChapter {
for i := range volume.Chapters {
matches := idRegexp.FindStringSubmatch(volume.Chapters[i].Url)
if len(matches) > 0 {
chapterId, err := strconv.Atoi(matches[2])
if err != nil {
errChan <- fmt.Errorf("failed to convert chapter id: %v", err)
return
return nil, fmt.Errorf("failed to convert chapter id: %v", err)
}
chapter, err := b.GetChapter(novelId, volumeId, chapterId)
if err != nil {
errChan <- fmt.Errorf("failed to get chapter: %v", err)
return
return nil, fmt.Errorf("failed to get chapter: %v", err)
}
chapter.Id = chapterId
volume.Chapters[i] = chapter
} else {
errChan <- fmt.Errorf("failed to get chapter id: %v", volume.Chapters[i].Url)
return
return nil, fmt.Errorf("failed to get chapter id: %v", volume.Chapters[i].Url)
}
}(i)
}
wg.Wait()
close(errChan)
// 检查是否有错误
for err := range errChan {
if err != nil {
return nil, err
}
}
return volume, nil
}
func (b *Bilinovel) getAllVolumes(novelId int) ([]*model.Volume, error) {
if b.debug {
log.Printf("Getting all volumes of novel %v\n", novelId)
}
func (b *Bilinovel) getAllVolumes(novelId int, skipChapter bool) ([]*model.Volume, error) {
log.Printf("Getting all volumes of novel %v\n", novelId)
catelogUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v/catalog", novelId)
resp, err := b.restyClient.R().Get(catelogUrl)
if err != nil {
@@ -258,7 +295,7 @@ func (b *Bilinovel) getAllVolumes(novelId int) ([]*model.Volume, error) {
if err != nil {
return nil, fmt.Errorf("failed to convert volume id: %v", err)
}
volume, err := b.GetVolume(novelId, volumeId)
volume, err := b.GetVolume(novelId, volumeId, skipChapter)
if err != nil {
return nil, fmt.Errorf("failed to get volume info: %v", err)
}
@@ -270,9 +307,8 @@ func (b *Bilinovel) getAllVolumes(novelId int) ([]*model.Volume, error) {
}
func (b *Bilinovel) GetChapter(novelId int, volumeId int, chapterId int) (*model.Chapter, error) {
if b.debug {
log.Printf("Getting chapter %v of novel %v\n", chapterId, novelId)
}
log.Printf("Getting chapter %v of novel %v\n", chapterId, novelId)
page := 1
chapter := &model.Chapter{
Id: chapterId,
@@ -294,9 +330,7 @@ func (b *Bilinovel) GetChapter(novelId int, volumeId int, chapterId int) (*model
}
func (b *Bilinovel) getChapterByPage(chapter *model.Chapter, page int) (bool, error) {
if b.debug {
log.Printf("Getting chapter %v by page %v\n", chapter.Id, page)
}
log.Printf("Getting chapter %v by page %v\n", chapter.Id, page)
Url := strings.TrimSuffix(chapter.Url, ".html") + fmt.Sprintf("_%v.html", page)
@@ -320,7 +354,7 @@ func (b *Bilinovel) getChapterByPage(chapter *model.Chapter, page int) (bool, er
html := resp.Body()
// 解决乱序问题
resortedHtml, err := ProcessContentWithChromedp(string(html))
resortedHtml, err := b.processContentWithChromedp(string(html))
if err != nil {
return false, fmt.Errorf("failed to process html: %w", err)
}
@@ -396,9 +430,7 @@ func (b *Bilinovel) getChapterByPage(chapter *model.Chapter, page int) (bool, er
}
func (b *Bilinovel) getImg(url string) ([]byte, error) {
if b.debug {
log.Printf("Getting img %v\n", url)
}
log.Printf("Getting img %v\n", url)
resp, err := b.restyClient.R().SetHeader("Referer", "https://www.bilinovel.com").Get(url)
if err != nil {
return nil, err
@@ -407,12 +439,14 @@ func (b *Bilinovel) getImg(url string) ([]byte, error) {
return resp.Body(), nil
}
func ProcessContentWithChromedp(htmlContent string) (string, error) {
// processContentWithChromedp 使用复用的浏览器实例处理内容
func (b *Bilinovel) processContentWithChromedp(htmlContent string) (string, error) {
tempFile, err := os.CreateTemp("", "bilinovel-temp-*.html")
if err != nil {
return "", fmt.Errorf("failed to create temp file: %w", err)
}
defer os.Remove(tempFile.Name())
_, err = tempFile.WriteString(htmlContent)
if err != nil {
return "", fmt.Errorf("failed to write temp file: %w", err)
@@ -420,28 +454,13 @@ func ProcessContentWithChromedp(htmlContent string) (string, error) {
tempFile.Close()
tempFilePath := tempFile.Name()
// 创建chromedp选项
opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.Flag("headless", true),
chromedp.Flag("disable-gpu", true),
chromedp.Flag("disable-dev-shm-usage", true),
chromedp.Flag("disable-extensions", true),
chromedp.Flag("no-sandbox", true),
)
allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
defer cancel()
ctx, cancel := chromedp.NewContext(allocCtx)
defer cancel()
// 设置超时
ctx, cancel = context.WithTimeout(ctx, 30*time.Second)
// 为当前任务创建子上下文
ctx, cancel := context.WithTimeout(b.browserCtx, 30*time.Second)
defer cancel()
var processedHTML string
// 3. 执行chromedp任务并获取页面代码
// 执行处理任务
err = chromedp.Run(ctx,
network.Enable(),

View File

@@ -7,9 +7,10 @@ type ExtraFile struct {
}
type Downloader interface {
GetNovel(novelId int) (*Novel, error)
GetVolume(novelId int, volumeId int) (*Volume, error)
GetNovel(novelId int, skipChapter bool) (*Novel, error)
GetVolume(novelId int, volumeId int, skipChapter bool) (*Volume, error)
GetChapter(novelId int, volumeId int, chapterId int) (*Chapter, error)
GetStyleCSS() string
GetExtraFiles() []ExtraFile
Close() error
}

View File

@@ -10,11 +10,10 @@ import (
func TestBilinovel_GetNovel(t *testing.T) {
bilinovel, err := bilinovel.New()
bilinovel.SetTextOnly(true)
bilinovel.SetDebug(true)
if err != nil {
t.Fatalf("failed to create bilinovel: %v", err)
}
novel, err := bilinovel.GetNovel(4519)
novel, err := bilinovel.GetNovel(4519, false)
if err != nil {
t.Fatalf("failed to get novel: %v", err)
}
@@ -31,7 +30,7 @@ func TestBilinovel_GetVolume(t *testing.T) {
if err != nil {
t.Fatalf("failed to create bilinovel: %v", err)
}
volume, err := bilinovel.GetVolume(1410, 52748)
volume, err := bilinovel.GetVolume(1410, 52748, false)
if err != nil {
t.Fatalf("failed to get volume: %v", err)
}
@@ -47,7 +46,6 @@ func TestBilinovel_GetChapter(t *testing.T) {
if err != nil {
t.Fatalf("failed to create bilinovel: %v", err)
}
bilinovel.SetDebug(true)
chapter, err := bilinovel.GetChapter(1410, 52748, 52752)
if err != nil {
t.Fatalf("failed to get chapter: %v", err)