diff --git a/.vscode/launch.json b/.vscode/launch.json index 4d4ee26..405989e 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -10,11 +10,7 @@ "args": [ "download", "-n", - "1410", - "-v", - "52748", - "-t", - "epub" + "3095", ] } ] diff --git a/cmd/download.go b/cmd/download.go index 9b8e9eb..75106c8 100644 --- a/cmd/download.go +++ b/cmd/download.go @@ -50,128 +50,88 @@ func runDownloadNovel() error { if err != nil { return fmt.Errorf("failed to create downloader: %v", err) } + // 确保在函数结束时关闭资源 + defer func() { + if closeErr := downloader.Close(); closeErr != nil { + log.Printf("Failed to close downloader: %v", closeErr) + } + }() + if downloadArgs.NovelId == 0 { return fmt.Errorf("novel id is required") } + if downloadArgs.VolumeId == 0 { - novel, err := downloadNovel(downloader) + // 下载整本小说 + novel, err := downloader.GetNovel(downloadArgs.NovelId, true) if err != nil { return fmt.Errorf("failed to get novel: %v", err) } - switch downloadArgs.outputType { - case "epub": - for _, volume := range novel.Volumes { - err = epub.PackVolumeToEpub(volume, downloadArgs.outputPath, downloader.GetStyleCSS(), downloader.GetExtraFiles()) - if err != nil { - return fmt.Errorf("failed to pack volume: %v", err) - } - } - case "text": - for _, volume := range novel.Volumes { - err = text.PackVolumeToText(volume, downloadArgs.outputPath) - if err != nil { - return fmt.Errorf("failed to pack volume: %v", err) - } + for _, volume := range novel.Volumes { + err = downloadVolume(downloader, volume.Id) + if err != nil { + return fmt.Errorf("failed to download volume: %v", err) } } } else { // 下载单卷 - volume, err := downloadVolume(downloader) + err = downloadVolume(downloader, downloadArgs.VolumeId) if err != nil { - return fmt.Errorf("failed to get volume: %v", err) - } - switch downloadArgs.outputType { - case "epub": - err = epub.PackVolumeToEpub(volume, downloadArgs.outputPath, downloader.GetStyleCSS(), downloader.GetExtraFiles()) - if err != nil { - return fmt.Errorf("failed to pack volume: %v", err) - } - case "text": - err = text.PackVolumeToText(volume, downloadArgs.outputPath) - if err != nil { - return fmt.Errorf("failed to pack volume: %v", err) - } + return fmt.Errorf("failed to download volume: %v", err) } } return nil } -func downloadNovel(downloader model.Downloader) (*model.Novel, error) { - jsonPath := filepath.Join(downloadArgs.outputPath, fmt.Sprintf("novel-%d.json", downloadArgs.NovelId)) +func downloadVolume(downloader model.Downloader, volumeId int) error { + jsonPath := filepath.Join(downloadArgs.outputPath, fmt.Sprintf("volume-%d-%d.json", downloadArgs.NovelId, volumeId)) err := os.MkdirAll(filepath.Dir(jsonPath), 0755) if err != nil { - return nil, fmt.Errorf("failed to create directory: %v", err) - } - _, err = os.Stat(jsonPath) - novel := &model.Novel{} - if err != nil { - if os.IsNotExist(err) { - novel, err = downloader.GetNovel(downloadArgs.NovelId) - if err != nil { - return nil, fmt.Errorf("failed to get novel: %v", err) - } - jsonFile, err := os.Create(jsonPath) - if err != nil { - return nil, fmt.Errorf("failed to create json file: %v", err) - } - defer jsonFile.Close() - err = json.NewEncoder(jsonFile).Encode(novel) - if err != nil { - return nil, fmt.Errorf("failed to encode json file: %v", err) - } - } else { - return nil, fmt.Errorf("failed to get novel: %v", err) - } - } else { - jsonFile, err := os.Open(jsonPath) - if err != nil { - return nil, fmt.Errorf("failed to open json file: %v", err) - } - defer jsonFile.Close() - err = json.NewDecoder(jsonFile).Decode(novel) - if err != nil { - return nil, fmt.Errorf("failed to decode json file: %v", err) - } - } - return novel, nil -} - -func downloadVolume(downloader model.Downloader) (*model.Volume, error) { - jsonPath := filepath.Join(downloadArgs.outputPath, fmt.Sprintf("volume-%d-%d.json", downloadArgs.NovelId, downloadArgs.VolumeId)) - err := os.MkdirAll(filepath.Dir(jsonPath), 0755) - if err != nil { - return nil, fmt.Errorf("failed to create directory: %v", err) + return fmt.Errorf("failed to create directory: %v", err) } _, err = os.Stat(jsonPath) volume := &model.Volume{} if err != nil { if os.IsNotExist(err) { - volume, err = downloader.GetVolume(downloadArgs.NovelId, downloadArgs.VolumeId) + volume, err = downloader.GetVolume(downloadArgs.NovelId, volumeId, false) if err != nil { - return nil, fmt.Errorf("failed to get volume: %v", err) + return fmt.Errorf("failed to get volume: %v", err) } jsonFile, err := os.Create(jsonPath) if err != nil { - return nil, fmt.Errorf("failed to create json file: %v", err) + return fmt.Errorf("failed to create json file: %v", err) } err = json.NewEncoder(jsonFile).Encode(volume) if err != nil { - return nil, fmt.Errorf("failed to encode json file: %v", err) + return fmt.Errorf("failed to encode json file: %v", err) } } else { - return nil, fmt.Errorf("failed to get volume: %v", err) + return fmt.Errorf("failed to get volume: %v", err) } } else { jsonFile, err := os.Open(jsonPath) if err != nil { - return nil, fmt.Errorf("failed to open json file: %v", err) + return fmt.Errorf("failed to open json file: %v", err) } defer jsonFile.Close() err = json.NewDecoder(jsonFile).Decode(volume) if err != nil { - return nil, fmt.Errorf("failed to decode json file: %v", err) + return fmt.Errorf("failed to decode json file: %v", err) } } - return volume, nil + + switch downloadArgs.outputType { + case "epub": + err = epub.PackVolumeToEpub(volume, downloadArgs.outputPath, downloader.GetStyleCSS(), downloader.GetExtraFiles()) + if err != nil { + return fmt.Errorf("failed to pack volume: %v", err) + } + case "text": + err = text.PackVolumeToText(volume, downloadArgs.outputPath) + if err != nil { + return fmt.Errorf("failed to pack volume: %v", err) + } + } + return nil } diff --git a/downloader/bilinovel/bilinovel.go b/downloader/bilinovel/bilinovel.go index 0c54060..e467471 100644 --- a/downloader/bilinovel/bilinovel.go +++ b/downloader/bilinovel/bilinovel.go @@ -16,7 +16,6 @@ import ( "regexp" "strconv" "strings" - "sync" "time" "github.com/PuerkitoBio/goquery" @@ -35,7 +34,12 @@ type Bilinovel struct { fontMapper *mapper.GlyphOutlineMapper textOnly bool restyClient *utils.RestyClient - debug bool + + // 浏览器实例复用 + allocCtx context.Context + allocCancel context.CancelFunc + browserCtx context.Context + browserCancel context.CancelFunc } func New() (*Bilinovel, error) { @@ -43,23 +47,73 @@ func New() (*Bilinovel, error) { if err != nil { return nil, fmt.Errorf("failed to create font mapper: %v", err) } - restyClient := utils.NewRestyClient(10) - return &Bilinovel{ + restyClient := utils.NewRestyClient(50) + + b := &Bilinovel{ fontMapper: fontMapper, textOnly: false, restyClient: restyClient, - }, nil + } + + // 初始化浏览器实例 + err = b.initBrowser() + if err != nil { + return nil, fmt.Errorf("failed to init browser: %v", err) + } + + return b, nil } func (b *Bilinovel) SetTextOnly(textOnly bool) { b.textOnly = textOnly } -func (b *Bilinovel) SetDebug(debug bool) { - b.debug = debug +func (b *Bilinovel) GetExtraFiles() []model.ExtraFile { + return nil } -func (b *Bilinovel) GetExtraFiles() []model.ExtraFile { +// initBrowser 初始化浏览器实例 +func (b *Bilinovel) initBrowser() error { + // 创建chromedp选项 + opts := append(chromedp.DefaultExecAllocatorOptions[:], + chromedp.Flag("headless", true), + chromedp.Flag("disable-gpu", true), + chromedp.Flag("disable-dev-shm-usage", true), + chromedp.Flag("disable-extensions", true), + chromedp.Flag("no-sandbox", true), + chromedp.Flag("disable-background-timer-throttling", true), + chromedp.Flag("disable-backgrounding-occluded-windows", true), + chromedp.Flag("disable-renderer-backgrounding", true), + ) + + var err error + b.allocCtx, b.allocCancel = chromedp.NewExecAllocator(context.Background(), opts...) + b.browserCtx, b.browserCancel = chromedp.NewContext(b.allocCtx) + + // 预热浏览器 - 导航到空白页 + err = chromedp.Run(b.browserCtx, chromedp.Navigate("about:blank")) + if err != nil { + b.closeBrowser() + return fmt.Errorf("failed to initialize browser: %v", err) + } + + log.Println("Browser initialized successfully") + return nil +} + +// closeBrowser 关闭浏览器实例 +func (b *Bilinovel) closeBrowser() { + if b.browserCancel != nil { + b.browserCancel() + } + if b.allocCancel != nil { + b.allocCancel() + } +} + +// Close 关闭下载器时清理资源 +func (b *Bilinovel) Close() error { + b.closeBrowser() return nil } @@ -70,10 +124,9 @@ func (b *Bilinovel) GetStyleCSS() string { return string(styleCSS) } -func (b *Bilinovel) GetNovel(novelId int) (*model.Novel, error) { - if b.debug { - log.Printf("Getting novel %v\n", novelId) - } +func (b *Bilinovel) GetNovel(novelId int, skipChapter bool) (*model.Novel, error) { + log.Printf("Getting novel %v\n", novelId) + novelUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v.html", novelId) resp, err := b.restyClient.R().Get(novelUrl) if err != nil { @@ -101,7 +154,7 @@ func (b *Bilinovel) GetNovel(novelId int) (*model.Novel, error) { novel.Authors = append(novel.Authors, strings.TrimSpace(s.Text())) }) - volumes, err := b.getAllVolumes(novelId) + volumes, err := b.getAllVolumes(novelId, skipChapter) if err != nil { return nil, fmt.Errorf("failed to get novel volumes: %v", err) } @@ -110,10 +163,9 @@ func (b *Bilinovel) GetNovel(novelId int) (*model.Novel, error) { return novel, nil } -func (b *Bilinovel) GetVolume(novelId int, volumeId int) (*model.Volume, error) { - if b.debug { - log.Printf("Getting volume %v of novel %v\n", volumeId, novelId) - } +func (b *Bilinovel) GetVolume(novelId int, volumeId int, skipChapter bool) (*model.Volume, error) { + log.Printf("Getting volume %v of novel %v\n", volumeId, novelId) + novelUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v/catalog", novelId) resp, err := b.restyClient.R().Get(novelUrl) if err != nil { @@ -185,48 +237,33 @@ func (b *Bilinovel) GetVolume(novelId int, volumeId int) (*model.Volume, error) }) idRegexp := regexp.MustCompile(`/novel/(\d+)/(\d+).html`) - wg := sync.WaitGroup{} - errChan := make(chan error, len(volume.Chapters)) - for i := range volume.Chapters { - wg.Add(1) - go func(i int) { - defer wg.Done() + + if !skipChapter { + for i := range volume.Chapters { matches := idRegexp.FindStringSubmatch(volume.Chapters[i].Url) if len(matches) > 0 { chapterId, err := strconv.Atoi(matches[2]) if err != nil { - errChan <- fmt.Errorf("failed to convert chapter id: %v", err) - return + return nil, fmt.Errorf("failed to convert chapter id: %v", err) } chapter, err := b.GetChapter(novelId, volumeId, chapterId) if err != nil { - errChan <- fmt.Errorf("failed to get chapter: %v", err) - return + return nil, fmt.Errorf("failed to get chapter: %v", err) } chapter.Id = chapterId volume.Chapters[i] = chapter } else { - errChan <- fmt.Errorf("failed to get chapter id: %v", volume.Chapters[i].Url) - return + return nil, fmt.Errorf("failed to get chapter id: %v", volume.Chapters[i].Url) } - }(i) - } - wg.Wait() - close(errChan) - - // 检查是否有错误 - for err := range errChan { - if err != nil { - return nil, err } } + return volume, nil } -func (b *Bilinovel) getAllVolumes(novelId int) ([]*model.Volume, error) { - if b.debug { - log.Printf("Getting all volumes of novel %v\n", novelId) - } +func (b *Bilinovel) getAllVolumes(novelId int, skipChapter bool) ([]*model.Volume, error) { + log.Printf("Getting all volumes of novel %v\n", novelId) + catelogUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v/catalog", novelId) resp, err := b.restyClient.R().Get(catelogUrl) if err != nil { @@ -258,7 +295,7 @@ func (b *Bilinovel) getAllVolumes(novelId int) ([]*model.Volume, error) { if err != nil { return nil, fmt.Errorf("failed to convert volume id: %v", err) } - volume, err := b.GetVolume(novelId, volumeId) + volume, err := b.GetVolume(novelId, volumeId, skipChapter) if err != nil { return nil, fmt.Errorf("failed to get volume info: %v", err) } @@ -270,9 +307,8 @@ func (b *Bilinovel) getAllVolumes(novelId int) ([]*model.Volume, error) { } func (b *Bilinovel) GetChapter(novelId int, volumeId int, chapterId int) (*model.Chapter, error) { - if b.debug { - log.Printf("Getting chapter %v of novel %v\n", chapterId, novelId) - } + log.Printf("Getting chapter %v of novel %v\n", chapterId, novelId) + page := 1 chapter := &model.Chapter{ Id: chapterId, @@ -294,9 +330,7 @@ func (b *Bilinovel) GetChapter(novelId int, volumeId int, chapterId int) (*model } func (b *Bilinovel) getChapterByPage(chapter *model.Chapter, page int) (bool, error) { - if b.debug { - log.Printf("Getting chapter %v by page %v\n", chapter.Id, page) - } + log.Printf("Getting chapter %v by page %v\n", chapter.Id, page) Url := strings.TrimSuffix(chapter.Url, ".html") + fmt.Sprintf("_%v.html", page) @@ -320,7 +354,7 @@ func (b *Bilinovel) getChapterByPage(chapter *model.Chapter, page int) (bool, er html := resp.Body() // 解决乱序问题 - resortedHtml, err := ProcessContentWithChromedp(string(html)) + resortedHtml, err := b.processContentWithChromedp(string(html)) if err != nil { return false, fmt.Errorf("failed to process html: %w", err) } @@ -396,9 +430,7 @@ func (b *Bilinovel) getChapterByPage(chapter *model.Chapter, page int) (bool, er } func (b *Bilinovel) getImg(url string) ([]byte, error) { - if b.debug { - log.Printf("Getting img %v\n", url) - } + log.Printf("Getting img %v\n", url) resp, err := b.restyClient.R().SetHeader("Referer", "https://www.bilinovel.com").Get(url) if err != nil { return nil, err @@ -407,12 +439,14 @@ func (b *Bilinovel) getImg(url string) ([]byte, error) { return resp.Body(), nil } -func ProcessContentWithChromedp(htmlContent string) (string, error) { +// processContentWithChromedp 使用复用的浏览器实例处理内容 +func (b *Bilinovel) processContentWithChromedp(htmlContent string) (string, error) { tempFile, err := os.CreateTemp("", "bilinovel-temp-*.html") if err != nil { return "", fmt.Errorf("failed to create temp file: %w", err) } defer os.Remove(tempFile.Name()) + _, err = tempFile.WriteString(htmlContent) if err != nil { return "", fmt.Errorf("failed to write temp file: %w", err) @@ -420,28 +454,13 @@ func ProcessContentWithChromedp(htmlContent string) (string, error) { tempFile.Close() tempFilePath := tempFile.Name() - // 创建chromedp选项 - opts := append(chromedp.DefaultExecAllocatorOptions[:], - chromedp.Flag("headless", true), - chromedp.Flag("disable-gpu", true), - chromedp.Flag("disable-dev-shm-usage", true), - chromedp.Flag("disable-extensions", true), - chromedp.Flag("no-sandbox", true), - ) - - allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...) - defer cancel() - - ctx, cancel := chromedp.NewContext(allocCtx) - defer cancel() - - // 设置超时 - ctx, cancel = context.WithTimeout(ctx, 30*time.Second) + // 为当前任务创建子上下文 + ctx, cancel := context.WithTimeout(b.browserCtx, 30*time.Second) defer cancel() var processedHTML string - // 3. 执行chromedp任务并获取页面代码 + // 执行处理任务 err = chromedp.Run(ctx, network.Enable(), diff --git a/model/downloader.go b/model/downloader.go index 0df4f9b..a245f1b 100644 --- a/model/downloader.go +++ b/model/downloader.go @@ -7,9 +7,10 @@ type ExtraFile struct { } type Downloader interface { - GetNovel(novelId int) (*Novel, error) - GetVolume(novelId int, volumeId int) (*Volume, error) + GetNovel(novelId int, skipChapter bool) (*Novel, error) + GetVolume(novelId int, volumeId int, skipChapter bool) (*Volume, error) GetChapter(novelId int, volumeId int, chapterId int) (*Chapter, error) GetStyleCSS() string GetExtraFiles() []ExtraFile + Close() error } diff --git a/test/bilinovel_test.go b/test/bilinovel_test.go index 47f0cf0..d1231fc 100644 --- a/test/bilinovel_test.go +++ b/test/bilinovel_test.go @@ -10,11 +10,10 @@ import ( func TestBilinovel_GetNovel(t *testing.T) { bilinovel, err := bilinovel.New() bilinovel.SetTextOnly(true) - bilinovel.SetDebug(true) if err != nil { t.Fatalf("failed to create bilinovel: %v", err) } - novel, err := bilinovel.GetNovel(4519) + novel, err := bilinovel.GetNovel(4519, false) if err != nil { t.Fatalf("failed to get novel: %v", err) } @@ -31,7 +30,7 @@ func TestBilinovel_GetVolume(t *testing.T) { if err != nil { t.Fatalf("failed to create bilinovel: %v", err) } - volume, err := bilinovel.GetVolume(1410, 52748) + volume, err := bilinovel.GetVolume(1410, 52748, false) if err != nil { t.Fatalf("failed to get volume: %v", err) } @@ -47,7 +46,6 @@ func TestBilinovel_GetChapter(t *testing.T) { if err != nil { t.Fatalf("failed to create bilinovel: %v", err) } - bilinovel.SetDebug(true) chapter, err := bilinovel.GetChapter(1410, 52748, 52752) if err != nil { t.Fatalf("failed to get chapter: %v", err)