16 Commits
v0.0.8 ... main

Author SHA1 Message Date
b650030f26 README 2025-10-22 02:25:35 +11:00
b136556822 test(unscrambleParagraphs): try a crawling method without playwright 2025-10-22 02:17:11 +11:00
17c3859e9e feat(logging): Implement structured logging and debug mode
fix: Windows cannot download novels correctly
2025-10-17 01:36:23 +11:00
11fccdb05f ci(goreleaser): Install templ before generating templates
Adds a `go install` hook to `.goreleaser.yaml` to ensure the `templ` binary is
installed and up-to-date before `templ generate` is executed. This prevents
potential build failures in CI/CD environments where `templ` might not be
pre-installed or could be an outdated version, making the release process
more robust and self-contained.
2025-10-06 18:20:47 +11:00
af968cbc9a ci(workflow): Upgrade GitHub Actions in release workflow
Updated the major versions of several GitHub Actions used in the release workflow:
- actions/checkout from v4 to v5
- actions/setup-go from v4 to v5
- goreleaser/goreleaser-action from v5 to v6

This ensures we are using the latest features, bug fixes, and security updates provided by these actions.
2025-10-06 18:11:27 +11:00
08e6280c34 feat: Add NFPM packaging and defer Playwright installation
This commit introduces NFPM configuration in `.goreleaser.yaml` to
generate native packages for various Linux distributions (e.g., .deb,
.rpm, .apk). This provides a more streamlined installation experience
for Linux users.

The Playwright browser installation logic has been moved from `main.go`
to the `Run` function of the `download` command. This change ensures
that Playwright binaries are only downloaded and installed when the
`download` command is actually invoked, improving initial application
startup performance and reducing unnecessary overhead for other commands.

The Goreleaser configuration has also been updated to version 2 syntax
and the `arm` architecture has been removed from builds.
2025-10-06 18:07:54 +11:00
34179b4dc0 Create LICENSE 2025-10-06 18:03:04 +11:00
b0f8f31dcc feat: Add concurrency and headless options for downloads
This commit introduces new features for controlling the download process:

-   **Concurrency**: Users can now specify the number of concurrent volume downloads using the `--concurrency` flag. This significantly speeds up the download of entire novels.
-   **Headless Mode**: A `--headless` flag has been added to control whether the browser operates in headless mode (without a visible UI). This is useful for debugging or running in environments without a display.

**Changes include:**

-   Updated `download` command to accept `--concurrency` and `--headless` flags.
-   Refactored `bilinovel` downloader to support `BilinovelNewOption` for configuring headless mode and concurrency.
-   Implemented a page pool and concurrency control mechanism within the `bilinovel` downloader to manage concurrent browser page usage.
-   Added `DownloadNovel` and `DownloadVolume` methods to the `bilinovel` downloader, utilizing goroutines and wait groups for parallel processing.
-   Updated `.vscode/launch.json` with new configurations for testing novel and volume downloads with the new options.
2025-10-06 10:20:36 +11:00
6084386989 refactor(bilinovel): Migrate browser automation from Chromedp to Playwright
This commit replaces the `chromedp` library with `playwright-go` for browser automation within the Bilinovel downloader.

Changes include:
*   Updated `Bilinovel` struct to manage Playwright browser, context, and page instances.
*   Rewrote `initBrowser` and `Close` methods to use Playwright's API for browser lifecycle management.
*   Refactored `processContentWithChromedp` to `processContentWithPlaywright`, adapting the logic to use Playwright's page evaluation capabilities.
*   Removed unused `context` and `time` imports.
*   Added HTML cleanup in `getChapterByPage` to remove `class` attributes from images and `data-k` attributes from all elements, improving content consistency.
2025-10-06 07:58:31 +11:00
f1320cb978 Merge pull request #2 from sarymo/patch-3
fix: normalize path separators in wrapper.go
2025-09-03 13:02:07 +10:00
sarymo
434d5f54bd Update wrapper.go 2025-09-03 08:39:30 +08:00
b8cd053b00 refactor: improve network event handling and cleanup of hidden elements in Bilinovel processing 2025-08-24 20:51:09 +10:00
560cdfdec9 refactor: streamline download process and enhance browser handling in Bilinovel 2025-08-24 19:04:00 +10:00
ed5440f5fb Update dependencies and templates: bump templ to v0.3.943, update pflag to v1.0.7, and golang.org/x/net to v0.43.0; adjust XML declaration formatting in cover template 2025-08-24 17:11:14 +10:00
26f82dd9ea fix: trim whitespace from text before writing to chapter file 2025-08-24 16:57:51 +10:00
e9fbe5c5db refactor 2025-08-24 16:53:05 +10:00
29 changed files with 1647 additions and 704 deletions

28
.github/workflows/release.yml vendored Normal file
View File

@@ -0,0 +1,28 @@
name: release
on:
push:
tags:
- "v*"
permissions:
contents: write
jobs:
goreleaser:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v5
- name: Set up Go
uses: actions/setup-go@v5
- name: Run GoReleaser
uses: goreleaser/goreleaser-action@v6
with:
distribution: goreleaser
version: latest
args: release --clean
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -1,6 +1,8 @@
version: 2
project_name: bilinovel-downloader
before:
hooks:
- go install github.com/a-h/templ/cmd/templ@latest
- templ generate
builds:
- env:
@@ -12,16 +14,15 @@ builds:
goarch:
- amd64
- arm64
- arm
- "386"
ldflags:
- -s -w -X bilinovel-downloader/cmd.Version={{ .Version }}
flags:
- -trimpath
archives:
- format: tar.gz
- formats: ["tar.gz"]
format_overrides:
- format: zip
- formats: ["zip"]
goos: windows
wrap_in_directory: true
release:
@@ -29,3 +30,17 @@ release:
upx:
- enabled: true
compress: best
nfpms:
- id: bilinovel-downloader
homepage: https://github.com/bestnite/bilinovel-downloader
maintainer: Nite <admin@nite07.com>
license: "MIT"
formats:
- apk
- deb
- rpm
- termux.deb
- archlinux
provides:
- bilinovel-downloader

29
.vscode/launch.json vendored
View File

@@ -1,21 +1,32 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "volume",
"type": "go",
"request": "launch",
"mode": "auto",
"program": "${workspaceFolder}",
"args": ["download", "volume", "-n", "2025", "-v", "72693"]
},
{
"name": "novel",
"type": "go",
"request": "launch",
"mode": "auto",
"program": "${workspaceFolder}",
"args": ["download", "novel", "-n", "4325"]
"args": [
"download",
"-n",
"2727",
"--concurrency",
"5"
]
},
{
"name": "volume",
"type": "go",
"request": "launch",
"mode": "auto",
"program": "${workspaceFolder}",
"args": [
"download",
"-n=2388",
"-v=84522",
"--debug=true"
]
}
]
}

21
LICENSE Normal file
View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2025 Nite
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -8,13 +8,13 @@
1. 下载整本 `https://www.bilinovel.com/novel/2388.html`
```bash
bilinovel-downloader download novel -n 2388
bilinovel-downloader download -n 2388
```
2. 下载单卷 `https://www.bilinovel.com/novel/2388/vol_84522.html`
```bash
bilinovel-downloader download volume -n 2388 -v 84522
bilinovel-downloader download -n 2388 -v 84522
```
3. 对自动生成的 epub 格式不满意可以自行修改后使用命令打包
@@ -22,3 +22,8 @@
```bash
bilinovel-downloader pack -d <目录路径>
```
## 算法分析
目前程序使用 playwright 进行爬取来规避 bilinovel 的反爬(诱饵段落和段落重排)策略。
但是依然对 bilinovel 的算法进行了简单的分析,具体可以参考[代码](./test/no_playwright_method_test.go),这个代码目前是可行的,但如果 bilinovel 频繁更改初始化种子的计算方式或算法的实现,会让排序方法失效,这也是为什么目前程序使用 playwright。

View File

@@ -1,9 +1,19 @@
package cmd
import (
"bilinovel-downloader/downloader"
"bilinovel-downloader/downloader/bilinovel"
"bilinovel-downloader/epub"
"bilinovel-downloader/model"
"bilinovel-downloader/text"
"encoding/json"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"github.com/playwright-community/playwright-go"
"github.com/spf13/cobra"
)
@@ -11,74 +21,184 @@ var downloadCmd = &cobra.Command{
Use: "download",
Short: "Download a novel or volume",
Long: "Download a novel or volume",
Run: func(cmd *cobra.Command, args []string) {
slog.Info("Installing playwright")
err := playwright.Install(&playwright.RunOptions{
Browsers: []string{"chromium"},
Stdout: io.Discard,
})
if err != nil {
slog.Error("failed to install playwright")
return
}
var downloadNovelCmd = &cobra.Command{
Use: "novel",
Short: "Download a novel, default download all volumes",
Long: "Download a novel, default download all volumes",
RunE: runDownloadNovel,
err = runDownloadNovel()
if err != nil {
slog.Error("failed to download novel", slog.Any("error", err))
return
}
},
}
var downloadVolumeCmd = &cobra.Command{
Use: "volume",
Short: "Download a volume",
Long: "Download a volume",
RunE: runDownloadVolume,
}
type downloadNovelArgs struct {
NovelId int `validate:"required"`
outputPath string
}
type downloadVolumeArgs struct {
type downloadCmdArgs struct {
NovelId int `validate:"required"`
VolumeId int `validate:"required"`
outputPath string
outputType string
concurrency int
debug bool
}
var (
novelArgs downloadNovelArgs
volumeArgs downloadVolumeArgs
downloadArgs downloadCmdArgs
)
func init() {
downloadNovelCmd.Flags().IntVarP(&novelArgs.NovelId, "novel-id", "n", 0, "novel id")
downloadNovelCmd.Flags().StringVarP(&novelArgs.outputPath, "output-path", "o", "./novels", "output path")
downloadVolumeCmd.Flags().IntVarP(&volumeArgs.NovelId, "novel-id", "n", 0, "novel id")
downloadVolumeCmd.Flags().IntVarP(&volumeArgs.VolumeId, "volume-id", "v", 0, "volume id")
downloadVolumeCmd.Flags().StringVarP(&volumeArgs.outputPath, "output-path", "o", "./novels", "output path")
downloadCmd.AddCommand(downloadNovelCmd)
downloadCmd.AddCommand(downloadVolumeCmd)
downloadCmd.Flags().IntVarP(&downloadArgs.NovelId, "novel-id", "n", 0, "novel id")
downloadCmd.Flags().IntVarP(&downloadArgs.VolumeId, "volume-id", "v", 0, "volume id")
downloadCmd.Flags().StringVarP(&downloadArgs.outputPath, "output-path", "o", "novels", "output path")
downloadCmd.Flags().StringVarP(&downloadArgs.outputType, "output-type", "t", "epub", "output type, epub or text")
downloadCmd.Flags().BoolVar(&downloadArgs.debug, "debug", false, "debug mode")
downloadCmd.Flags().IntVar(&downloadArgs.concurrency, "concurrency", 3, "concurrency of downloading volumes")
RootCmd.AddCommand(downloadCmd)
}
func runDownloadNovel(cmd *cobra.Command, args []string) error {
if novelArgs.NovelId == 0 {
return fmt.Errorf("novel id is required")
}
err := bilinovel.DownloadNovel(novelArgs.NovelId, novelArgs.outputPath)
func runDownloadNovel() error {
downloader, err := bilinovel.New(bilinovel.BilinovelNewOption{
Concurrency: downloadArgs.concurrency,
Debug: downloadArgs.debug,
})
if err != nil {
return fmt.Errorf("failed to download novel: %v", err)
return fmt.Errorf("failed to create downloader: %v", err)
}
return nil
// 确保在函数结束时关闭资源
defer func() {
if closeErr := downloader.Close(); closeErr != nil {
slog.Info("Failed to close downloader", slog.Any("error", closeErr))
}
}()
func runDownloadVolume(cmd *cobra.Command, args []string) error {
if volumeArgs.NovelId == 0 {
if downloadArgs.NovelId == 0 {
return fmt.Errorf("novel id is required")
}
if volumeArgs.VolumeId == 0 {
return fmt.Errorf("volume id is required")
if downloadArgs.VolumeId == 0 {
// 下载整本小说
err := downloadNovel(downloader, downloadArgs.NovelId)
if err != nil {
return fmt.Errorf("failed to get novel: %v", err)
}
err := bilinovel.DownloadVolume(volumeArgs.NovelId, volumeArgs.VolumeId, volumeArgs.outputPath)
} else {
// 下载单卷
err = downloadVolume(downloader, downloadArgs.VolumeId)
if err != nil {
return fmt.Errorf("failed to download volume: %v", err)
}
}
return nil
}
func downloadNovel(downloader downloader.Downloader, novelId int) error {
novelInfo, err := downloader.GetNovel(novelId, true, nil)
if err != nil {
return fmt.Errorf("failed to get novel info: %w", err)
}
skipVolumes := make([]int, 0)
for _, volume := range novelInfo.Volumes {
jsonPath := filepath.Join(downloadArgs.outputPath, fmt.Sprintf("volume-%d-%d.json", downloadArgs.NovelId, volume.Id))
err = os.MkdirAll(filepath.Dir(jsonPath), 0755)
if err != nil {
return fmt.Errorf("failed to create directory: %v", err)
}
_, err = os.Stat(jsonPath)
if err == nil {
// 已经下载
skipVolumes = append(skipVolumes, volume.Id)
}
}
novel, err := downloader.GetNovel(novelId, false, skipVolumes)
if err != nil {
return fmt.Errorf("failed to download novel: %w", err)
}
for _, volume := range novel.Volumes {
jsonPath := filepath.Join(downloadArgs.outputPath, fmt.Sprintf("volume-%d-%d.json", downloadArgs.NovelId, volume.Id))
err = os.MkdirAll(filepath.Dir(jsonPath), 0755)
if err != nil {
return fmt.Errorf("failed to create directory: %v", err)
}
jsonFile, err := os.Create(jsonPath)
if err != nil {
return fmt.Errorf("failed to create json file: %v", err)
}
err = json.NewEncoder(jsonFile).Encode(volume)
if err != nil {
return fmt.Errorf("failed to encode json file: %v", err)
}
switch downloadArgs.outputType {
case "epub":
err = epub.PackVolumeToEpub(volume, downloadArgs.outputPath, downloader.GetStyleCSS(), downloader.GetExtraFiles())
if err != nil {
return fmt.Errorf("failed to pack volume: %v", err)
}
case "text":
err = text.PackVolumeToText(volume, downloadArgs.outputPath)
if err != nil {
return fmt.Errorf("failed to pack volume: %v", err)
}
}
}
return nil
}
func downloadVolume(downloader downloader.Downloader, volumeId int) error {
jsonPath := filepath.Join(downloadArgs.outputPath, fmt.Sprintf("volume-%d-%d.json", downloadArgs.NovelId, volumeId))
err := os.MkdirAll(filepath.Dir(jsonPath), 0755)
if err != nil {
return fmt.Errorf("failed to create directory: %v", err)
}
_, err = os.Stat(jsonPath)
volume := &model.Volume{}
if err != nil {
if os.IsNotExist(err) {
volume, err = downloader.GetVolume(downloadArgs.NovelId, volumeId, false)
if err != nil {
return fmt.Errorf("failed to get volume: %v", err)
}
jsonFile, err := os.Create(jsonPath)
if err != nil {
return fmt.Errorf("failed to create json file: %v", err)
}
err = json.NewEncoder(jsonFile).Encode(volume)
if err != nil {
return fmt.Errorf("failed to encode json file: %v", err)
}
} else {
return fmt.Errorf("failed to get volume: %v", err)
}
} else {
jsonFile, err := os.Open(jsonPath)
if err != nil {
return fmt.Errorf("failed to open json file: %v", err)
}
defer jsonFile.Close()
err = json.NewDecoder(jsonFile).Decode(volume)
if err != nil {
return fmt.Errorf("failed to decode json file: %v", err)
}
}
switch downloadArgs.outputType {
case "epub":
err = epub.PackVolumeToEpub(volume, downloadArgs.outputPath, downloader.GetStyleCSS(), downloader.GetExtraFiles())
if err != nil {
return fmt.Errorf("failed to pack volume: %v", err)
}
case "text":
err = text.PackVolumeToText(volume, downloadArgs.outputPath)
if err != nil {
return fmt.Errorf("failed to pack volume: %v", err)
}
}
return nil
}

View File

@@ -1,7 +1,7 @@
package cmd
import (
"bilinovel-downloader/downloader/bilinovel"
"bilinovel-downloader/epub"
"fmt"
"github.com/spf13/cobra"
@@ -28,7 +28,7 @@ func init() {
}
func runPackage(cmd *cobra.Command, args []string) error {
err := bilinovel.CreateEpub(pArgs.DirPath)
err := epub.PackEpub(pArgs.DirPath)
if err != nil {
return fmt.Errorf("failed to create epub: %v", err)
}

View File

@@ -4,4 +4,6 @@ import (
"github.com/spf13/cobra"
)
var RootCmd = &cobra.Command{}
var RootCmd = &cobra.Command{
Use: "bilinovel-downloader",
}

View File

@@ -6,7 +6,7 @@ import (
"github.com/spf13/cobra"
)
const (
var (
Version = "dev"
)

Binary file not shown.

View File

@@ -2,32 +2,148 @@ package bilinovel
import (
"bilinovel-downloader/model"
"bilinovel-downloader/template"
"bilinovel-downloader/utils"
"bytes"
"context"
"crypto/sha256"
_ "embed"
"encoding/json"
"fmt"
"log"
"log/slog"
"net/http"
"os"
"path"
"path/filepath"
"regexp"
"slices"
"strconv"
"strings"
"time"
"sync"
"github.com/PuerkitoBio/goquery"
"github.com/google/uuid"
mapper "github.com/bestnite/font-mapper"
"github.com/playwright-community/playwright-go"
)
func GetNovel(novelId int) (*model.Novel, error) {
novelUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v.html", novelId)
resp, err := utils.Request().Get(novelUrl)
//go:embed read.ttf
var readTTF []byte
//go:embed "MI LANTING.ttf"
var miLantingTTF []byte
type Bilinovel struct {
fontMapper *mapper.GlyphOutlineMapper
textOnly bool
restyClient *utils.RestyClient
// 浏览器实例复用
browser playwright.Browser
browserContext playwright.BrowserContext
pages map[string]playwright.Page
concurrency int
concurrentChan chan any
logger *slog.Logger
}
type BilinovelNewOption struct {
Concurrency int
Debug bool
}
func New(option BilinovelNewOption) (*Bilinovel, error) {
fontMapper, err := mapper.NewGlyphOutlineMapper(readTTF, miLantingTTF)
if err != nil {
return nil, fmt.Errorf("failed to get novel info: %v", err)
return nil, fmt.Errorf("failed to create font mapper: %v", err)
}
restyClient := utils.NewRestyClient(50)
var logLevel slog.Level
if option.Debug {
logLevel = slog.LevelDebug
} else {
logLevel = slog.LevelInfo
}
handlerOptions := &slog.HandlerOptions{
Level: logLevel,
}
b := &Bilinovel{
fontMapper: fontMapper,
textOnly: false,
restyClient: restyClient,
pages: make(map[string]playwright.Page),
concurrency: option.Concurrency,
concurrentChan: make(chan any, option.Concurrency),
logger: slog.New(slog.NewTextHandler(os.Stdout, handlerOptions)),
}
// 初始化浏览器实例
err = b.initBrowser(option.Debug)
if err != nil {
return nil, fmt.Errorf("failed to init browser: %v", err)
}
return b, nil
}
func (b *Bilinovel) SetTextOnly(textOnly bool) {
b.textOnly = textOnly
}
func (b *Bilinovel) GetExtraFiles() []model.ExtraFile {
return nil
}
// initBrowser 初始化浏览器实例
func (b *Bilinovel) initBrowser(debug bool) error {
pw, err := playwright.Run()
if err != nil {
return fmt.Errorf("could not start playwright: %w", err)
}
b.browser, err = pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{
Headless: playwright.Bool(!debug),
Devtools: playwright.Bool(debug),
})
if err != nil {
return fmt.Errorf("could not launch browser: %w", err)
}
b.browserContext, err = b.browser.NewContext()
if err != nil {
return fmt.Errorf("could not create browser context: %w", err)
}
b.logger.Info("Browser initialized successfully")
return nil
}
// Close 清理资源
func (b *Bilinovel) Close() error {
if b.browser != nil {
if err := b.browser.Close(); err != nil {
b.logger.Error("could not close browser", slog.Any("error", err))
}
b.browser = nil
b.browserContext = nil
}
return nil
}
//go:embed style.css
var styleCSS []byte
func (b *Bilinovel) GetStyleCSS() string {
return string(styleCSS)
}
func (b *Bilinovel) GetNovel(novelId int, skipChapterContent bool, skipVolumes []int) (*model.Novel, error) {
b.logger.Info("Getting novel", slog.Int("novelId", novelId))
novelUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v.html", novelId)
resp, err := b.restyClient.R().Get(novelUrl)
if err != nil {
return nil, fmt.Errorf("failed to get novel info: %w", err)
}
if resp.StatusCode() != http.StatusOK {
return nil, fmt.Errorf("failed to get novel info: %v", resp.Status())
@@ -51,7 +167,7 @@ func GetNovel(novelId int) (*model.Novel, error) {
novel.Authors = append(novel.Authors, strings.TrimSpace(s.Text()))
})
volumes, err := getNovelVolumes(novelId)
volumes, err := b.getAllVolumes(novelId, skipChapterContent, skipVolumes)
if err != nil {
return nil, fmt.Errorf("failed to get novel volumes: %v", err)
}
@@ -60,11 +176,13 @@ func GetNovel(novelId int) (*model.Novel, error) {
return novel, nil
}
func GetVolume(novelId int, volumeId int) (*model.Volume, error) {
func (b *Bilinovel) GetVolume(novelId int, volumeId int, skipChapterContent bool) (*model.Volume, error) {
b.logger.Info("Getting volume of novel", slog.Int("volumeId", volumeId), slog.Int("novelId", novelId))
novelUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v/catalog", novelId)
resp, err := utils.Request().Get(novelUrl)
resp, err := b.restyClient.R().Get(novelUrl)
if err != nil {
return nil, fmt.Errorf("failed to get novel info: %v", err)
return nil, fmt.Errorf("failed to get novel info: %w", err)
}
if resp.StatusCode() != http.StatusOK {
return nil, fmt.Errorf("failed to get novel info: %v", resp.Status())
@@ -89,7 +207,7 @@ func GetVolume(novelId int, volumeId int) (*model.Volume, error) {
}
volumeUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v/vol_%v.html", novelId, volumeId)
resp, err = utils.Request().Get(volumeUrl)
resp, err = b.restyClient.R().Get(volumeUrl)
if err != nil {
return nil, fmt.Errorf("failed to get novel info: %v", err)
}
@@ -109,9 +227,14 @@ func GetVolume(novelId int, volumeId int) (*model.Volume, error) {
volume.SeriesIdx = seriesIdx
volume.Title = strings.TrimSpace(doc.Find(".book-title").First().Text())
volume.Description = strings.TrimSpace(doc.Find(".book-summary>content").First().Text())
volume.Cover = doc.Find(".book-cover").First().AttrOr("src", "")
volume.Url = volumeUrl
volume.Chapters = make([]*model.Chapter, 0)
volume.CoverUrl = doc.Find(".book-cover").First().AttrOr("src", "")
cover, err := b.getImg(volume.CoverUrl)
if err != nil {
return nil, fmt.Errorf("failed to get cover: %v", err)
}
volume.Cover = cover
doc.Find(".authorname>a").Each(func(i int, s *goquery.Selection) {
volume.Authors = append(volume.Authors, strings.TrimSpace(s.Text()))
@@ -119,7 +242,6 @@ func GetVolume(novelId int, volumeId int) (*model.Volume, error) {
doc.Find(".illname>a").Each(func(i int, s *goquery.Selection) {
volume.Authors = append(volume.Authors, strings.TrimSpace(s.Text()))
})
doc.Find(".chapter-li.jsChapter").Each(func(i int, s *goquery.Selection) {
volume.Chapters = append(volume.Chapters, &model.Chapter{
Title: s.Find("a").Text(),
@@ -127,12 +249,36 @@ func GetVolume(novelId int, volumeId int) (*model.Volume, error) {
})
})
idRegexp := regexp.MustCompile(`/novel/(\d+)/(\d+).html`)
if !skipChapterContent {
for i := range volume.Chapters {
matches := idRegexp.FindStringSubmatch(volume.Chapters[i].Url)
if len(matches) > 0 {
chapterId, err := strconv.Atoi(matches[2])
if err != nil {
return nil, fmt.Errorf("failed to convert chapter id: %v", err)
}
chapter, err := b.GetChapter(novelId, volumeId, chapterId)
if err != nil {
return nil, fmt.Errorf("failed to get chapter: %v", err)
}
chapter.Id = chapterId
volume.Chapters[i] = chapter
} else {
return nil, fmt.Errorf("failed to get chapter id: %v", volume.Chapters[i].Url)
}
}
}
return volume, nil
}
func getNovelVolumes(novelId int) ([]*model.Volume, error) {
func (b *Bilinovel) getAllVolumes(novelId int, skipChapterContent bool, skipVolumes []int) ([]*model.Volume, error) {
b.logger.Info("Getting all volumes of novel", slog.Int("novelId", novelId))
catelogUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v/catalog", novelId)
resp, err := utils.Request().Get(catelogUrl)
resp, err := b.restyClient.R().Get(catelogUrl)
if err != nil {
return nil, fmt.Errorf("failed to get catelog: %v", err)
}
@@ -156,224 +302,97 @@ func getNovelVolumes(novelId int) ([]*model.Volume, error) {
}
})
volumes := make([]*model.Volume, 0)
volumes := make([]*model.Volume, len(volumeIds))
var wg sync.WaitGroup
var mu sync.Mutex // 保护 volumes 写入的互斥锁
for i, volumeIdStr := range volumeIds {
wg.Add(1)
b.concurrentChan <- struct{}{} // 获取一个并发槽
go func(i int, volumeIdStr string) {
defer wg.Done()
defer func() { <-b.concurrentChan }() // 释放并发槽
volumeId, err := strconv.Atoi(volumeIdStr)
if err != nil {
return nil, fmt.Errorf("failed to convert volume id: %v", err)
b.logger.Error("failed to convert volume id", slog.String("volumeIdStr", volumeIdStr), slog.Any("error", err))
return
}
volume, err := GetVolume(novelId, volumeId)
if slices.Contains(skipVolumes, volumeId) {
return
}
volume, err := b.GetVolume(novelId, volumeId, skipChapterContent)
if err != nil {
return nil, fmt.Errorf("failed to get volume info: %v", err)
b.logger.Error("failed to get volume info", slog.Int("novelId", novelId), slog.Int("volumeId", volumeId), slog.Any("error", err))
return
}
volume.SeriesIdx = i
volumes = append(volumes, volume)
// 关闭浏览器标签页
pwPageKey := fmt.Sprintf("%v-%v", novelId, volumeId)
if pwPage, ok := b.pages[pwPageKey]; ok {
_ = pwPage.Close()
delete(b.pages, pwPageKey)
}
return volumes, nil
mu.Lock()
volumes[i] = volume
mu.Unlock()
}(i, volumeIdStr)
}
func DownloadNovel(novelId int, outputPath string) error {
log.Printf("Downloading Novel: %v", novelId)
wg.Wait()
novel, err := GetNovel(novelId)
if err != nil {
return fmt.Errorf("failed to get novel info: %v", err)
}
outputPath = filepath.Join(outputPath, utils.CleanDirName(novel.Title))
err = os.MkdirAll(outputPath, 0755)
if err != nil {
return fmt.Errorf("failed to create output directory: %v", err)
}
for _, volume := range novel.Volumes {
err := downloadVolume(volume, outputPath)
if err != nil {
return fmt.Errorf("failed to download volume: %v", err)
// 过滤掉获取失败的 nil volume
filteredVolumes := make([]*model.Volume, 0, len(volumes))
for _, vol := range volumes {
if vol != nil {
filteredVolumes = append(filteredVolumes, vol)
}
}
return nil
return filteredVolumes, nil
}
func DownloadVolume(novelId, volumeId int, outputPath string) error {
volume, err := GetVolume(novelId, volumeId)
if err != nil {
return fmt.Errorf("failed to get volume info: %v", err)
}
err = downloadVolume(volume, outputPath)
if err != nil {
return fmt.Errorf("failed to download volume: %v", err)
}
return nil
}
func (b *Bilinovel) GetChapter(novelId int, volumeId int, chapterId int) (*model.Chapter, error) {
b.logger.Info("Getting chapter of novel", slog.Int("chapterId", chapterId), slog.Int("novelId", novelId))
func downloadVolume(volume *model.Volume, outputPath string) error {
log.Printf("Downloading Volume: %s", volume.Title)
outputPath = filepath.Join(outputPath, utils.CleanDirName(volume.Title))
err := os.MkdirAll(outputPath, 0755)
if err != nil {
return fmt.Errorf("failed to create output directory: %v", err)
pageNum := 1
chapter := &model.Chapter{
Id: chapterId,
NovelId: novelId,
VolumeId: volumeId,
Url: fmt.Sprintf("https://www.bilinovel.com/novel/%v/%v.html", novelId, chapterId),
}
_, err = os.Stat(filepath.Join(outputPath, "volume.json"))
if os.IsNotExist(err) {
for idx, chapter := range volume.Chapters {
err := DownloadChapter(idx, chapter, outputPath)
if err != nil {
return fmt.Errorf("failed to download chapter: %v", err)
}
}
} else {
jsonBytes, err := os.ReadFile(filepath.Join(outputPath, "volume.json"))
if err != nil {
return fmt.Errorf("failed to read volume: %v", err)
}
err = json.Unmarshal(jsonBytes, volume)
if err != nil {
return fmt.Errorf("failed to unmarshal volume: %v", err)
}
for idx, chapter := range volume.Chapters {
file, err := os.Create(filepath.Join(outputPath, fmt.Sprintf("OEBPS/Text/chapter-%03v.xhtml", idx+1)))
if err != nil {
return fmt.Errorf("failed to create chapter file: %v", err)
}
err = template.ContentXHTML(chapter).Render(context.Background(), file)
if err != nil {
return fmt.Errorf("failed to render text file: %v", err)
}
}
}
for i := range volume.Chapters {
volume.Chapters[i].ImageFullPaths = utils.Unique(volume.Chapters[i].ImageFullPaths)
volume.Chapters[i].ImageOEBPSPaths = utils.Unique(volume.Chapters[i].ImageOEBPSPaths)
}
jsonBytes, err := json.Marshal(volume)
if err != nil {
return fmt.Errorf("failed to marshal volume: %v", err)
}
err = os.WriteFile(filepath.Join(outputPath, "volume.json"), jsonBytes, 0644)
if err != nil {
return fmt.Errorf("failed to write volume: %v", err)
}
coverPath := filepath.Join(outputPath, "cover.jpeg")
err = os.MkdirAll(path.Dir(coverPath), 0755)
if err != nil {
return fmt.Errorf("failed to create cover directory: %v", err)
}
err = DownloadImg(volume.Cover, coverPath)
if err != nil {
return fmt.Errorf("failed to download cover: %v", err)
}
coverXHTMLPath := filepath.Join(outputPath, "OEBPS/Text/cover.xhtml")
err = os.MkdirAll(path.Dir(coverXHTMLPath), 0755)
if err != nil {
return fmt.Errorf("failed to create cover directory: %v", err)
}
file, err := os.Create(coverXHTMLPath)
if err != nil {
return fmt.Errorf("failed to create cover file: %v", err)
}
err = template.CoverXHTML(fmt.Sprintf(`../../cover%s`, strings.ReplaceAll(path.Ext(volume.Cover), "jpg", "jpeg"))).Render(context.Background(), file)
if err != nil {
return fmt.Errorf("failed to render cover: %v", err)
}
err = DownloadFont(filepath.Join(outputPath, "OEBPS/Fonts"))
if err != nil {
return fmt.Errorf("failed to download font: %v", err)
}
contentsXHTMLPath := filepath.Join(outputPath, "OEBPS/Text/contents.xhtml")
err = os.MkdirAll(path.Dir(contentsXHTMLPath), 0755)
if err != nil {
return fmt.Errorf("failed to create contents directory: %v", err)
}
file, err = os.Create(contentsXHTMLPath)
if err != nil {
return fmt.Errorf("failed to create contents file: %v", err)
}
contents := strings.Builder{}
contents.WriteString(`<nav epub:type="toc" id="toc">`)
contents.WriteString(`<ol>`)
for _, chapter := range volume.Chapters {
contents.WriteString(fmt.Sprintf(`<li><a href="%s">%s</a></li>`, strings.TrimPrefix(chapter.TextOEBPSPath, "Text/"), chapter.Title))
}
contents.WriteString(`</ol>`)
contents.WriteString(`</nav>`)
err = template.ContentXHTML(&model.Chapter{
Title: "目录",
Content: contents.String(),
}).Render(context.Background(), file)
if err != nil {
return fmt.Errorf("failed to render contents: %v", err)
}
err = CreateContainerXML(outputPath)
if err != nil {
return fmt.Errorf("failed to create container xml: %v", err)
}
u, err := uuid.NewV7()
if err != nil {
return fmt.Errorf("failed to generate uuid: %v", err)
}
err = CreateContentOPF(outputPath, u.String(), volume)
if err != nil {
return fmt.Errorf("failed to create content opf: %v", err)
}
err = CreateEpub(outputPath)
if err != nil {
return fmt.Errorf("failed to create epub: %v", err)
}
return nil
}
func DownloadChapter(chapterIdx int, chapter *model.Chapter, outputPath string) error {
chapter.TextFullPath = filepath.Join(outputPath, fmt.Sprintf("OEBPS/Text/chapter-%03v.xhtml", chapterIdx+1))
chapter.TextOEBPSPath = fmt.Sprintf("Text/chapter-%03v.xhtml", chapterIdx+1)
err := os.MkdirAll(path.Dir(chapter.TextFullPath), 0755)
if err != nil {
return fmt.Errorf("failed to create text directory: %v", err)
}
page := 1
for {
hasNext, err := downloadChapterByPage(page, chapterIdx, chapter, outputPath)
pwPageKey := fmt.Sprintf("%v-%v", novelId, volumeId)
if _, ok := b.pages[pwPageKey]; !ok {
pwPage, err := b.browserContext.NewPage()
if err != nil {
return fmt.Errorf("failed to download chapter: %v", err)
return nil, fmt.Errorf("failed to create browser page: %w", err)
}
b.pages[pwPageKey] = pwPage
}
hasNext, err := b.getChapterByPage(b.pages[pwPageKey], chapter, pageNum)
if err != nil {
return nil, fmt.Errorf("failed to download chapter: %w", err)
}
if !hasNext {
break
}
page++
time.Sleep(time.Second)
pageNum++
}
return chapter, nil
}
file, err := os.Create(chapter.TextFullPath)
if err != nil {
return fmt.Errorf("failed to create text file: %v", err)
}
var nextPageUrlRegexp = regexp.MustCompile(`url_next:\s?['"]([^'"]*?)['"]`)
var cleanNextPageUrlRegexp = regexp.MustCompile(`(_\d+)?\.html$`)
err = template.ContentXHTML(chapter).Render(context.Background(), file)
if err != nil {
return fmt.Errorf("failed to render text file: %v", err)
}
func (b *Bilinovel) getChapterByPage(pwPage playwright.Page, chapter *model.Chapter, pageNum int) (bool, error) {
b.logger.Info("Getting chapter by page", slog.Int("chapter", chapter.Id), slog.Int("page", pageNum))
return nil
}
func downloadChapterByPage(page, chapterIdx int, chapter *model.Chapter, outputPath string) (bool, error) {
Url := strings.TrimSuffix(chapter.Url, ".html") + fmt.Sprintf("_%v.html", page)
log.Printf("Downloading Chapter: %s", Url)
Url := strings.TrimSuffix(chapter.Url, ".html") + fmt.Sprintf("_%v.html", pageNum)
hasNext := false
headers := map[string]string{
@@ -381,38 +400,68 @@ func downloadChapterByPage(page, chapterIdx int, chapter *model.Chapter, outputP
"Accept-Language": "zh-CN,zh;q=0.9,en-GB;q=0.8,en;q=0.7,zh-TW;q=0.6",
"Cookie": "night=1;",
}
resp, err := utils.Request().SetHeaders(headers).Get(Url)
resp, err := b.restyClient.R().SetHeaders(headers).Get(Url)
if err != nil {
return hasNext, err
return false, fmt.Errorf("failed to get chapter: %w", err)
}
if resp.StatusCode() != http.StatusOK {
return hasNext, fmt.Errorf("failed to get chapter: %v", resp.Status())
return false, fmt.Errorf("failed to get chapter: %v", resp.Status())
}
if strings.Contains(resp.String(), `<a onclick="window.location.href = ReadParams.url_next;">下一頁</a>`) {
hasNext = true
}
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
html := resp.Body()
// 解决乱序问题
resortedHtml, err := b.processContentWithPlaywright(pwPage, string(html))
if err != nil {
fmt.Println(err)
return hasNext, err
return false, fmt.Errorf("failed to process html: %w", err)
}
doc, err := goquery.NewDocumentFromReader(strings.NewReader(resortedHtml))
if err != nil {
return false, fmt.Errorf("failed to parse html: %w", err)
}
imgSavePath := fmt.Sprintf("OEBPS/Images/chapter-%03v", chapterIdx+1)
// 判断章节是否有下一页
n := nextPageUrlRegexp.FindStringSubmatch(resortedHtml)
if len(n) != 2 {
return false, fmt.Errorf("failed to determine wether there is a next page")
}
s := cleanNextPageUrlRegexp.ReplaceAllString(n[1], "")
if strings.Contains(Url, s) {
hasNext = true
}
if pageNum == 1 {
chapter.Title = doc.Find("#atitle").Text()
}
content := doc.Find("#acontent").First()
content.Find(".cgo").Remove()
content.Find("center").Remove()
content.Find(".google-auto-placed").Remove()
if strings.Contains(resp.String(), `font-family: "read"`) {
content.Find("p").Last().AddClass("read-font")
if strings.Contains(resortedHtml, `font-family: "read"`) {
html, err := content.Find("p").Last().Html()
if err != nil {
return false, fmt.Errorf("failed to get html: %v", err)
}
builder := strings.Builder{}
for _, r := range html {
_, newRune, ok := b.fontMapper.MappingRune(r)
if ok {
builder.WriteRune(newRune)
}
}
content.Find("p").Last().SetHtml(builder.String())
}
if b.textOnly {
content.Find("img").Remove()
} else {
content.Find("img").Each(func(i int, s *goquery.Selection) {
if err != nil {
return
}
imgUrl := s.AttrOr("data-src", "")
if imgUrl == "" {
imgUrl = s.AttrOr("src", "")
@@ -421,211 +470,171 @@ func downloadChapterByPage(page, chapterIdx int, chapter *model.Chapter, outputP
}
}
fileName := filepath.Join(imgSavePath, fmt.Sprintf("%03v%s", len(chapter.ImageFullPaths)+1, path.Ext(imgUrl)))
err = DownloadImg(imgUrl, filepath.Join(outputPath, fileName))
if err == nil {
s.SetAttr("src", "../"+strings.TrimPrefix(fileName, "OEBPS/"))
imageHash := sha256.Sum256([]byte(imgUrl))
imageFilename := fmt.Sprintf("%x%s", string(imageHash[:]), path.Ext(imgUrl))
s.SetAttr("src", imageFilename)
s.SetAttr("alt", imgUrl)
s.RemoveAttr("class")
s.RemoveAttr("data-src")
chapter.ImageFullPaths = append(chapter.ImageFullPaths, filepath.Join(outputPath, fileName))
chapter.ImageOEBPSPaths = append(chapter.ImageOEBPSPaths, strings.TrimPrefix(fileName, "OEBPS/"))
}
})
img, err := b.getImg(imgUrl)
if err != nil {
return false, fmt.Errorf("failed to download img: %v", err)
return
}
if chapter.Content == nil {
chapter.Content = &model.ChaperContent{}
}
if chapter.Content.Images == nil {
chapter.Content.Images = make(map[string][]byte)
}
chapter.Content.Images[imageFilename] = img
})
}
html, err := content.Html()
doc.Find("*").Each(func(i int, s *goquery.Selection) {
if len(s.Nodes) > 0 && len(s.Nodes[0].Attr) > 0 {
// 遍历元素的所有属性
for _, attr := range s.Nodes[0].Attr {
// 3. 检查属性名是否以 "data-k" 开头,且属性值是否为空
if strings.HasPrefix(attr.Key, "data-k") {
// 4. 如果满足条件,就移除这个属性
s.RemoveAttr(attr.Key)
}
}
}
})
htmlStr, err := content.Html()
if err != nil {
return false, fmt.Errorf("failed to get html: %v", err)
}
chapter.Content += strings.TrimSpace(html)
if chapter.Content == nil {
chapter.Content = &model.ChaperContent{}
}
chapter.Content.Html += strings.TrimSpace(htmlStr)
return hasNext, nil
}
func DownloadImg(url string, fileName string) error {
_, err := os.Stat(fileName)
if !os.IsNotExist(err) {
return nil
func (b *Bilinovel) getImg(url string) ([]byte, error) {
b.logger.Info("Getting img", slog.String("url", url))
resp, err := b.restyClient.R().SetHeader("Referer", "https://www.bilinovel.com").Get(url)
if err != nil {
return nil, err
}
log.Printf("Downloading Image: %s", url)
dir := filepath.Dir(fileName)
err = os.MkdirAll(dir, 0755)
if err != nil {
return err
return resp.Body(), nil
}
resp, err := utils.Request().SetHeader("Referer", "https://www.bilinovel.com").Get(url)
if err != nil {
return err
}
// processContentWithPlaywright 使用复用的浏览器实例处理内容
func (b *Bilinovel) processContentWithPlaywright(page playwright.Page, htmlContent string) (string, error) {
// 替换 window.location.replace防止页面跳转
htmlContent = strings.ReplaceAll(htmlContent, "window.location.replace", "console.log")
err = os.WriteFile(fileName, resp.Body(), 0644)
tempPath := filepath.Join(os.TempDir(), "bilinovel-downloader")
err := os.MkdirAll(tempPath, 0755)
if err != nil {
return err
return "", fmt.Errorf("failed to create temp dir: %w", err)
}
tempFile, err := os.CreateTemp(tempPath, "temp-*.html")
if err != nil {
return "", fmt.Errorf("failed to create temp file: %w", err)
}
defer os.Remove(tempFile.Name())
return nil
_, err = tempFile.WriteString(htmlContent)
if err != nil {
return "", fmt.Errorf("failed to write temp file: %w", err)
}
tempFile.Close()
tempFilePath := tempFile.Name()
func CreateContainerXML(dirPath string) error {
containerPath := filepath.Join(dirPath, "META-INF/container.xml")
err := os.MkdirAll(path.Dir(containerPath), 0755)
// // 屏蔽请求
// googleAdsDomains := []string{
// "adtrafficquality.google",
// "doubleclick.net",
// "googlesyndication.com",
// "googletagmanager.com",
// "hm.baidu.com",
// "cloudflareinsights.com",
// "fsdoa.js", // adblock 检测
// "https://www.linovelib.com/novel/", // 阻止从本地文件跳转到在线页面
// }
// err = page.Route("**/*", func(route playwright.Route) {
// for _, d := range googleAdsDomains {
// if strings.Contains(route.Request().URL(), d) {
// b.logger.Debug("blocking request", slog.String("url", route.Request().URL()))
// err := route.Abort("aborted")
// if err != nil {
// b.logger.Debug("failed to block request", route.Request().URL(), err)
// }
// return
// }
// }
// _ = route.Continue()
// })
// if err != nil {
// return "", fmt.Errorf("failed to intercept requests: %w", err)
// }
_, err = page.ExpectResponse(func(url string) bool {
return strings.Contains(url, "chapterlog.js")
}, func() error {
_, err = page.Goto("file://" + filepath.ToSlash(tempFilePath))
if err != nil {
return fmt.Errorf("failed to create container directory: %v", err)
}
file, err := os.Create(containerPath)
if err != nil {
return fmt.Errorf("failed to create container file: %v", err)
}
err = template.ContainerXML().Render(context.Background(), file)
if err != nil {
return fmt.Errorf("failed to render container: %v", err)
return fmt.Errorf("could not navigate to file: %w", err)
}
return nil
}
func CreateContentOPF(dirPath string, uuid string, volume *model.Volume) error {
creators := make([]model.DCCreator, 0)
for _, author := range volume.Authors {
creators = append(creators, model.DCCreator{
Value: author,
}, playwright.PageExpectResponseOptions{
Timeout: playwright.Float(10000),
})
}
dc := &model.DublinCoreMetadata{
Titles: []model.DCTitle{
{
Value: volume.Title,
},
},
Identifiers: []model.DCIdentifier{
{
Value: fmt.Sprintf("urn:uuid:%s", uuid),
ID: "book-id",
// Scheme: "UUID",
},
},
Languages: []model.DCLanguage{
{
Value: "zh-CN",
},
},
Descriptions: []model.DCDescription{
{
Value: volume.Description,
},
},
Creators: creators,
Metas: []model.DublinCoreMeta{
{
Name: "cover",
Content: "cover",
},
{
Property: "dcterms:modified",
Value: time.Now().UTC().Format("2006-01-02T15:04:05Z"),
},
{
Name: "calibre:series",
Content: volume.NovelTitle,
},
{
Name: "calibre:series_index",
Content: strconv.Itoa(volume.SeriesIdx),
},
},
}
manifest := &model.Manifest{
Items: make([]model.ManifestItem, 0),
}
manifest.Items = append(manifest.Items, model.ManifestItem{
ID: "cover.xhtml",
Link: "OEBPS/Text/cover.xhtml",
Media: "application/xhtml+xml",
})
manifest.Items = append(manifest.Items, model.ManifestItem{
ID: "contents.xhtml",
Link: "OEBPS/Text/contents.xhtml",
Media: "application/xhtml+xml",
Properties: "nav",
})
manifest.Items = append(manifest.Items, model.ManifestItem{
ID: "cover",
Link: fmt.Sprintf("cover%s", strings.ReplaceAll(path.Ext(volume.Cover), "jpg", "jpeg")),
Media: fmt.Sprintf("image/%s", strings.ReplaceAll(strings.TrimPrefix(path.Ext(volume.Cover), "."), "jpg", "jpeg")),
Properties: "cover-image",
})
manifest.Items = append(manifest.Items, model.ManifestItem{
ID: "read.ttf",
Link: "OEBPS/Fonts/read.ttf",
Media: "application/vnd.ms-opentype",
})
for _, chapter := range volume.Chapters {
manifest.Items = append(manifest.Items, model.ManifestItem{
ID: path.Base(chapter.TextOEBPSPath),
Link: "OEBPS/" + chapter.TextOEBPSPath,
Media: "application/xhtml+xml",
})
for _, image := range chapter.ImageOEBPSPaths {
item := model.ManifestItem{
ID: strings.Join(strings.Split(strings.ToLower(image), string(filepath.Separator)), "-"),
Link: "OEBPS/" + image,
}
item.Media = fmt.Sprintf("image/%s", strings.ReplaceAll(strings.TrimPrefix(path.Ext(volume.Cover), "."), "jpg", "jpeg"))
manifest.Items = append(manifest.Items, item)
}
}
manifest.Items = append(manifest.Items, model.ManifestItem{
ID: "style",
Link: "style.css",
Media: "text/css",
})
spine := &model.Spine{
Items: make([]model.SpineItem, 0),
}
for _, item := range manifest.Items {
if filepath.Ext(item.Link) == ".xhtml" {
spine.Items = append(spine.Items, model.SpineItem{
IDref: item.ID,
})
}
}
contentOPFPath := filepath.Join(dirPath, "content.opf")
err := os.MkdirAll(path.Dir(contentOPFPath), 0755)
if err != nil {
return fmt.Errorf("failed to create content directory: %v", err)
return "", fmt.Errorf("failed to wait for network request finish")
}
file, err := os.Create(contentOPFPath)
err = page.Locator("#acontent").WaitFor(playwright.LocatorWaitForOptions{
State: playwright.WaitForSelectorStateVisible,
Timeout: playwright.Float(10000),
})
if err != nil {
return fmt.Errorf("failed to create content file: %v", err)
return "", fmt.Errorf("could not wait for #acontent: %w", err)
}
err = template.ContentOPF("book-id", dc, manifest, spine, nil).Render(context.Background(), file)
// 遍历所有 #acontent 的子元素, 通过 window.getComputedStyle().display 检测是否是 none, 如果是 none 则从页面删除这个元素
result, err := page.Evaluate(`
(function() {
const acontent = document.getElementById('acontent');
if (!acontent) {
return 'acontent element not found';
}
let removedCount = 0;
const elements = acontent.querySelectorAll('*');
// 从后往前遍历,避免删除元素时影响索引
for (let i = elements.length - 1; i >= 0; i--) {
const element = elements[i];
const computedStyle = window.getComputedStyle(element);
if (computedStyle.display === 'none' || computedStyle.transform == 'matrix(0, 0, 0, 0, 0, 0)') {
element.remove();
removedCount++;
}
}
return 'Removed ' + removedCount + ' hidden elements';
})()
`)
if err != nil {
return fmt.Errorf("failed to render content: %v", err)
}
return nil
return "", fmt.Errorf("failed to remove hidden elements: %w", err)
}
//go:embed read.ttf
var readTTF []byte
b.logger.Debug("Hidden elements removal result", slog.Any("count", result))
func DownloadFont(outputPath string) error {
log.Printf("Writing Font: %s", outputPath)
fontPath := filepath.Join(outputPath, "read.ttf")
err := os.MkdirAll(path.Dir(fontPath), 0755)
processedHTML, err := page.Content()
if err != nil {
return fmt.Errorf("failed to create font directory: %v", err)
return "", fmt.Errorf("could not get page content: %w", err)
}
err = os.WriteFile(fontPath, readTTF, 0644)
if err != nil {
return fmt.Errorf("failed to write font: %v", err)
}
return nil
return processedHTML, nil
}

View File

@@ -1,122 +0,0 @@
package bilinovel
import (
"archive/zip"
"io"
"log"
"os"
"path/filepath"
)
func CreateEpub(path string) error {
log.Printf("Creating epub for %s", path)
savePath := path + ".epub"
zipFile, err := os.Create(savePath)
if err != nil {
return err
}
defer zipFile.Close()
zipWriter := zip.NewWriter(zipFile)
defer zipWriter.Close()
err = addStringToZip(zipWriter, "mimetype", "application/epub+zip", zip.Store)
if err != nil {
return err
}
err = addDirContentToZip(zipWriter, path, zip.Deflate)
if err != nil {
return err
}
err = addStringToZip(zipWriter, "style.css", StyleCSS, zip.Deflate)
if err != nil {
return err
}
return nil
}
// func addFileToZip(zipWriter *zip.Writer, filename string, relPath string, method uint16) error {
// file, err := os.Open(filename)
// if err != nil {
// return err
// }
// defer file.Close()
// info, err := file.Stat()
// if err != nil {
// return err
// }
// header, err := zip.FileInfoHeader(info)
// if err != nil {
// return err
// }
// header.Name = relPath
// header.Method = method
// writer, err := zipWriter.CreateHeader(header)
// if err != nil {
// return err
// }
// _, err = io.Copy(writer, file)
// return err
// }
func addStringToZip(zipWriter *zip.Writer, relPath, content string, method uint16) error {
header := &zip.FileHeader{
Name: relPath,
Method: method,
}
writer, err := zipWriter.CreateHeader(header)
if err != nil {
return err
}
_, err = writer.Write([]byte(content))
return err
}
func addDirContentToZip(zipWriter *zip.Writer, dirPath string, method uint16) error {
return filepath.Walk(dirPath, func(filePath string, info os.FileInfo, err error) error {
if filepath.Base(filePath) == "volume.json" {
return nil
}
if err != nil {
return err
}
if info.IsDir() {
return nil
}
relPath, err := filepath.Rel(dirPath, filePath)
if err != nil {
return err
}
file, err := os.Open(filePath)
if err != nil {
return err
}
defer file.Close()
header, err := zip.FileInfoHeader(info)
if err != nil {
return err
}
header.Name = relPath
header.Method = method
writer, err := zipWriter.CreateHeader(header)
if err != nil {
return err
}
_, err = io.Copy(writer, file)
return err
})
}

View File

@@ -1,19 +1,3 @@
package bilinovel
const StyleCSS = `
@font-face {
font-family: "MI LANTING";
src: url(OEBPS/Fonts/read.ttf);
}
.read-font {
display: block;
font-family: "MI LANTING", serif;
font-size: 1.33333em;
text-indent: 2em;
margin: 0.8em 0;
}
body > div {
margin: 0 auto;
padding: 20px;
@@ -53,4 +37,3 @@ img {
margin-top: 1em;
margin-bottom: 1em;
}
`

12
downloader/downloader.go Normal file
View File

@@ -0,0 +1,12 @@
package downloader
import "bilinovel-downloader/model"
type Downloader interface {
GetNovel(novelId int, skipChapterContent bool, skipVolumes []int) (*model.Novel, error)
GetVolume(novelId int, volumeId int, skipChapterContent bool) (*model.Volume, error)
GetChapter(novelId int, volumeId int, chapterId int) (*model.Chapter, error)
GetStyleCSS() string
GetExtraFiles() []model.ExtraFile
Close() error
}

365
epub/wrapper.go Normal file
View File

@@ -0,0 +1,365 @@
package epub
import (
"archive/zip"
"bilinovel-downloader/model"
"bilinovel-downloader/template"
"bilinovel-downloader/utils"
"context"
"fmt"
"io"
"os"
"path"
"path/filepath"
"strconv"
"strings"
"time"
"github.com/google/uuid"
)
func PackVolumeToEpub(volume *model.Volume, outputPath string, styleCSS string, extraFiles []model.ExtraFile) error {
outputPath = filepath.Join(outputPath, utils.CleanDirName(volume.Title))
_, err := os.Stat(outputPath)
if err != nil {
if os.IsNotExist(err) {
err = os.MkdirAll(outputPath, 0755)
if err != nil {
return fmt.Errorf("failed to create output directory: %v", err)
}
} else {
return fmt.Errorf("failed to get output directory: %v", err)
}
} else {
err = os.RemoveAll(outputPath)
if err != nil {
return fmt.Errorf("failed to remove output directory: %v", err)
}
err = os.MkdirAll(outputPath, 0755)
if err != nil {
return fmt.Errorf("failed to create output directory: %v", err)
}
}
// 将文字写入 OEBPS/Text/chapter-%03v.xhtml
// 将图片写入 OEBPS/Images/chapter-%03v/
for i, chapter := range volume.Chapters {
imageNames := make([]string, 0)
for imgName, imgData := range chapter.Content.Images {
imageNames = append(imageNames, imgName)
imgPath := filepath.Join(outputPath, fmt.Sprintf("OEBPS/Images/chapter-%03v/%s", i, imgName))
err := os.MkdirAll(filepath.Dir(imgPath), 0755)
if err != nil {
return fmt.Errorf("failed to create image directory: %v", err)
}
err = os.WriteFile(imgPath, imgData, 0644)
if err != nil {
return fmt.Errorf("failed to write image: %v", err)
}
}
chapterPath := filepath.Join(outputPath, fmt.Sprintf("OEBPS/Text/chapter-%03v.xhtml", i))
err = os.MkdirAll(filepath.Dir(chapterPath), 0755)
if err != nil {
return fmt.Errorf("failed to create chapter directory: %v", err)
}
file, err := os.Create(chapterPath)
if err != nil {
return fmt.Errorf("failed to create chapter file: %v", err)
}
defer file.Close()
text := chapter.Content.Html
for _, imgName := range imageNames {
text = strings.ReplaceAll(text, imgName, fmt.Sprintf("../Images/chapter-%03v/%s", i, imgName))
}
err = template.ContentXHTML(chapter.Title, text).Render(context.Background(), file)
if err != nil {
return fmt.Errorf("failed to write chapter: %v", err)
}
}
// 将 Cover 写入
coverPath := filepath.Join(outputPath, fmt.Sprintf("cover%s", filepath.Ext(volume.CoverUrl)))
err = os.WriteFile(coverPath, volume.Cover, 0644)
if err != nil {
return fmt.Errorf("failed to write cover: %v", err)
}
// 将 CoverXHTML 写入 OEBPS/Text/cover.xhtml
coverXHTMLPath := filepath.Join(outputPath, "OEBPS/Text/cover.xhtml")
file, err := os.Create(coverXHTMLPath)
if err != nil {
return fmt.Errorf("failed to create cover XHTML file: %v", err)
}
defer file.Close()
err = template.CoverXHTML(fmt.Sprintf("../../%s", filepath.Base(coverPath))).Render(context.Background(), file)
if err != nil {
return fmt.Errorf("failed to render cover XHTML: %v", err)
}
// OEBPS/Text/contents.xhtml 目录
contentsXHTMLPath := filepath.Join(outputPath, "OEBPS/Text/contents.xhtml")
file, err = os.Create(contentsXHTMLPath)
if err != nil {
return fmt.Errorf("failed to create contents XHTML file: %v", err)
}
defer file.Close()
contents := strings.Builder{}
contents.WriteString(`<nav epub:type="toc" id="toc">`)
contents.WriteString(`<ol>`)
for i, chapter := range volume.Chapters {
contents.WriteString(fmt.Sprintf(`<li><a href="chapter-%03v.xhtml">%s</a></li>`, i, chapter.Title))
}
contents.WriteString(`</ol>`)
contents.WriteString(`</nav>`)
err = template.ContentXHTML("目录", contents.String()).Render(context.Background(), file)
if err != nil {
return fmt.Errorf("failed to render contents XHTML: %v", err)
}
// ContainerXML
containerPath := filepath.Join(outputPath, "META-INF/container.xml")
err = os.MkdirAll(filepath.Dir(containerPath), 0755)
if err != nil {
return fmt.Errorf("failed to create container directory: %v", err)
}
file, err = os.Create(containerPath)
if err != nil {
return fmt.Errorf("failed to create container file: %v", err)
}
defer file.Close()
err = template.ContainerXML().Render(context.Background(), file)
if err != nil {
return fmt.Errorf("failed to render container: %v", err)
}
// ContentOPF
u := uuid.New()
err = CreateContentOPF(outputPath, u.String(), volume, extraFiles)
if err != nil {
return fmt.Errorf("failed to create content OPF: %v", err)
}
// 写入 CSS
cssPath := filepath.Join(outputPath, "style.css")
err = os.WriteFile(cssPath, []byte(styleCSS), 0644)
if err != nil {
return fmt.Errorf("failed to write CSS: %v", err)
}
// 写入 extraFiles
for _, file := range extraFiles {
extraFilePath := filepath.Join(outputPath, file.Path)
err = os.WriteFile(extraFilePath, file.Data, 0644)
if err != nil {
return fmt.Errorf("failed to write extra file: %v", err)
}
}
// 打包成 epub 文件
err = PackEpub(outputPath)
if err != nil {
return fmt.Errorf("failed to pack epub: %v", err)
}
return nil
}
func CreateContentOPF(outputPath string, uuid string, volume *model.Volume, extraFiles []model.ExtraFile) error {
creators := make([]model.DCCreator, 0)
for _, author := range volume.Authors {
creators = append(creators, model.DCCreator{
Value: author,
})
}
dc := &model.DublinCoreMetadata{
Titles: []model.DCTitle{
{
Value: volume.Title,
},
},
Identifiers: []model.DCIdentifier{
{
Value: fmt.Sprintf("urn:uuid:%s", uuid),
ID: "book-id",
// Scheme: "UUID",
},
},
Languages: []model.DCLanguage{
{
Value: "zh-CN",
},
},
Descriptions: []model.DCDescription{
{
Value: volume.Description,
},
},
Creators: creators,
Metas: []model.DublinCoreMeta{
{
Name: "cover",
Content: "cover",
},
{
Property: "dcterms:modified",
Value: time.Now().UTC().Format("2006-01-02T15:04:05Z"),
},
{
Name: "calibre:series",
Content: volume.NovelTitle,
},
{
Name: "calibre:series_index",
Content: strconv.Itoa(volume.SeriesIdx),
},
},
}
manifest := &model.Manifest{
Items: make([]model.ManifestItem, 0),
}
manifest.Items = append(manifest.Items, model.ManifestItem{
ID: "cover.xhtml",
Link: "OEBPS/Text/cover.xhtml",
Media: "application/xhtml+xml",
})
manifest.Items = append(manifest.Items, model.ManifestItem{
ID: "contents.xhtml",
Link: "OEBPS/Text/contents.xhtml",
Media: "application/xhtml+xml",
Properties: "nav",
})
manifest.Items = append(manifest.Items, model.ManifestItem{
ID: "cover",
Link: fmt.Sprintf("cover%s", filepath.Ext(volume.CoverUrl)),
Media: fmt.Sprintf("image/%s", strings.ReplaceAll(strings.TrimPrefix(filepath.Ext(volume.CoverUrl), "."), "jpg", "jpeg")),
Properties: "cover-image",
})
for i, chapter := range volume.Chapters {
manifest.Items = append(manifest.Items, model.ManifestItem{
ID: fmt.Sprintf("chapter-%03v.xhtml", i),
Link: fmt.Sprintf("OEBPS/Text/chapter-%03v.xhtml", i),
Media: "application/xhtml+xml",
})
for filename := range chapter.Content.Images {
item := model.ManifestItem{
ID: fmt.Sprintf("chapter-%03v-%s", i, filepath.Base(filename)),
Link: fmt.Sprintf("OEBPS/Images/chapter-%03v/%s", i, filepath.Base(filename)),
Media: fmt.Sprintf("image/%s", strings.ReplaceAll(strings.TrimPrefix(filepath.Ext(filename), "."), "jpg", "jpeg")),
}
manifest.Items = append(manifest.Items, item)
}
}
manifest.Items = append(manifest.Items, model.ManifestItem{
ID: "style",
Link: "style.css",
Media: "text/css",
})
// ExtraFiles
for _, file := range extraFiles {
manifest.Items = append(manifest.Items, file.ManifestItem)
}
spine := &model.Spine{
Items: make([]model.SpineItem, 0),
}
for _, item := range manifest.Items {
if filepath.Ext(item.Link) == ".xhtml" {
spine.Items = append(spine.Items, model.SpineItem{
IDref: item.ID,
})
}
}
contentOPFPath := filepath.Join(outputPath, "content.opf")
err := os.MkdirAll(path.Dir(contentOPFPath), 0755)
if err != nil {
return fmt.Errorf("failed to create content directory: %v", err)
}
file, err := os.Create(contentOPFPath)
if err != nil {
return fmt.Errorf("failed to create content file: %v", err)
}
err = template.ContentOPF("book-id", dc, manifest, spine, nil).Render(context.Background(), file)
if err != nil {
return fmt.Errorf("failed to render content: %v", err)
}
return nil
}
func PackEpub(dirPath string) error {
savePath := strings.TrimSuffix(dirPath, string(filepath.Separator)) + ".epub"
zipFile, err := os.Create(savePath)
if err != nil {
return err
}
defer zipFile.Close()
zipWriter := zip.NewWriter(zipFile)
defer zipWriter.Close()
err = addStringToZip(zipWriter, "mimetype", "application/epub+zip", zip.Store)
if err != nil {
return err
}
err = addDirContentToZip(zipWriter, dirPath, zip.Deflate)
if err != nil {
return err
}
return nil
}
func addStringToZip(zipWriter *zip.Writer, relPath, content string, method uint16) error {
header := &zip.FileHeader{
Name: relPath,
Method: method,
}
writer, err := zipWriter.CreateHeader(header)
if err != nil {
return err
}
_, err = writer.Write([]byte(content))
return err
}
func addDirContentToZip(zipWriter *zip.Writer, dirPath string, method uint16) error {
return filepath.Walk(dirPath, func(filePath string, info os.FileInfo, err error) error {
if filepath.Base(filePath) == "volume.json" {
return nil
}
if err != nil {
return err
}
if info.IsDir() {
return nil
}
relPath, err := filepath.Rel(dirPath, filePath)
if err != nil {
return err
}
relPath = filepath.ToSlash(relPath)
file, err := os.Open(filePath)
if err != nil {
return err
}
defer file.Close()
header, err := zip.FileInfoHeader(info)
if err != nil {
return err
}
header.Name = relPath
header.Method = method
writer, err := zipWriter.CreateHeader(header)
if err != nil {
return err
}
_, err = io.Copy(writer, file)
return err
})
}

13
go.mod
View File

@@ -4,15 +4,22 @@ go 1.24.2
require (
github.com/PuerkitoBio/goquery v1.10.3
github.com/a-h/templ v0.3.906
github.com/a-h/templ v0.3.943
github.com/bestnite/font-mapper v0.0.0-20250823155658-56c76d820267
github.com/go-resty/resty/v2 v2.16.5
github.com/google/uuid v1.6.0
github.com/playwright-community/playwright-go v0.5200.1
github.com/spf13/cobra v1.9.1
)
require (
github.com/andybalholm/cascadia v1.3.3 // indirect
github.com/deckarep/golang-set/v2 v2.8.0 // indirect
github.com/go-jose/go-jose/v3 v3.0.4 // indirect
github.com/go-stack/stack v1.8.1 // indirect
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/spf13/pflag v1.0.6 // indirect
golang.org/x/net v0.39.0 // indirect
github.com/spf13/pflag v1.0.7 // indirect
golang.org/x/image v0.30.0 // indirect
golang.org/x/net v0.43.0 // indirect
)

41
go.sum
View File

@@ -1,25 +1,48 @@
github.com/PuerkitoBio/goquery v1.10.3 h1:pFYcNSqHxBD06Fpj/KsbStFRsgRATgnf3LeXiUkhzPo=
github.com/PuerkitoBio/goquery v1.10.3/go.mod h1:tMUX0zDMHXYlAQk6p35XxQMqMweEKB7iK7iLNd4RH4Y=
github.com/a-h/templ v0.3.857 h1:6EqcJuGZW4OL+2iZ3MD+NnIcG7nGkaQeF2Zq5kf9ZGg=
github.com/a-h/templ v0.3.857/go.mod h1:qhrhAkRFubE7khxLZHsBFHfX+gWwVNKbzKeF9GlPV4M=
github.com/a-h/templ v0.3.906 h1:ZUThc8Q9n04UATaCwaG60pB1AqbulLmYEAMnWV63svg=
github.com/a-h/templ v0.3.906/go.mod h1:FFAu4dI//ESmEN7PQkJ7E7QfnSEMdcnu7QrAY8Dn334=
github.com/a-h/templ v0.3.943 h1:o+mT/4yqhZ33F3ootBiHwaY4HM5EVaOJfIshvd5UNTY=
github.com/a-h/templ v0.3.943/go.mod h1:oCZcnKRf5jjsGpf2yELzQfodLphd2mwecwG4Crk5HBo=
github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
github.com/bestnite/font-mapper v0.0.0-20250823155658-56c76d820267 h1:nmUTJV2u/0XmVjQ++VIy/Hu+MtxdpQvOevvcSZtUATA=
github.com/bestnite/font-mapper v0.0.0-20250823155658-56c76d820267/go.mod h1:cfB1e9YhoI/QWrXPp3h6QVAKU6iCI2ifbjRPHP3xf/0=
github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/deckarep/golang-set/v2 v2.8.0 h1:swm0rlPCmdWn9mESxKOjWk8hXSqoxOp+ZlfuyaAdFlQ=
github.com/deckarep/golang-set/v2 v2.8.0/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4=
github.com/go-jose/go-jose/v3 v3.0.4 h1:Wp5HA7bLQcKnf6YYao/4kpRpVMp/yf6+pJKV8WFSaNY=
github.com/go-jose/go-jose/v3 v3.0.4/go.mod h1:5b+7YgP7ZICgJDBdfjZaIt+H/9L9T/YQrVfLAMboGkQ=
github.com/go-resty/resty/v2 v2.16.5 h1:hBKqmWrr7uRc3euHVqmh1HTHcKn99Smr7o5spptdhTM=
github.com/go-resty/resty/v2 v2.16.5/go.mod h1:hkJtXbA2iKHzJheXYvQ8snQES5ZLGKMwQ07xAwp/fiA=
github.com/go-stack/stack v1.8.1 h1:ntEHSVwIt7PNXNpgPmVfMrNhLtgjlmnZha2kOpuRiDw=
github.com/go-stack/stack v1.8.1/go.mod h1:dcoOX6HbPZSZptuspn9bctJ+N/CnF5gGygcUP3XYfe4=
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g=
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/mitchellh/go-ps v1.0.0 h1:i6ampVEEF4wQFF+bkYfwYgY+F/uYJDktmvLPf7qIgjc=
github.com/mitchellh/go-ps v1.0.0/go.mod h1:J4lOc8z8yJs6vUwklHw2XEIiT4z4C40KtWVN3nvg8Pg=
github.com/playwright-community/playwright-go v0.5200.1 h1:Sm2oOuhqt0M5Y4kUi/Qh9w4cyyi3ZIWTBeGKImc2UVo=
github.com/playwright-community/playwright-go v0.5200.1/go.mod h1:UnnyQZaqUOO5ywAZu60+N4EiWReUqX1MQBBA3Oofvf8=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/spf13/cobra v1.9.1 h1:CXSaggrXdbHK9CF+8ywj8Amf7PBRmPCOJugH954Nnlo=
github.com/spf13/cobra v1.9.1/go.mod h1:nDyEzZ8ogv936Cinf6g1RU9MRY64Ir93oCnqb9wxYW0=
github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/spf13/pflag v1.0.7 h1:vN6T9TfwStFPFM5XzjsvmzZkLuaLX+HS+0SeFLRgU6M=
github.com/spf13/pflag v1.0.7/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
@@ -27,6 +50,8 @@ golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliY
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
golang.org/x/image v0.30.0 h1:jD5RhkmVAnjqaCUXfbGBrn3lpxbknfN9w2UhHHU+5B4=
golang.org/x/image v0.30.0/go.mod h1:SAEUTxCCMWSrJcCy/4HwavEsfZZJlYxeHLc6tTiAe/c=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
@@ -41,8 +66,8 @@ golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
golang.org/x/net v0.39.0 h1:ZCu7HMWDxpXpaiKdhzIfaltL9Lp31x/3fCP11bc6/fY=
golang.org/x/net v0.39.0/go.mod h1:X7NRbYVEA+ewNkCNyJ513WmMdQ3BineSwVtN2zD/d+E=
golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -89,4 +114,6 @@ golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -1,6 +1,14 @@
package model
import "encoding/xml"
import (
"encoding/xml"
)
type ExtraFile struct {
Data []byte
Path string
ManifestItem ManifestItem
}
type DublinCoreMetadata struct {
XMLName xml.Name `xml:"metadata"`

View File

@@ -1,13 +1,17 @@
package model
type ChaperContent struct {
Html string
Images map[string][]byte
}
type Chapter struct {
Id int
NovelId int
VolumeId int
Title string
Url string
Content string
ImageOEBPSPaths []string
ImageFullPaths []string
TextOEBPSPath string
TextFullPath string
Content *ChaperContent
}
type Volume struct {
@@ -15,7 +19,8 @@ type Volume struct {
SeriesIdx int
Title string
Url string
Cover string
CoverUrl string
Cover []byte
Description string
Authors []string
Chapters []*Chapter

View File

@@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.906
// templ: version: v0.3.943
package template
//lint:file-ignore SA4006 This context is only used if a nested component is present.

View File

@@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.906
// templ: version: v0.3.943
package template
//lint:file-ignore SA4006 This context is only used if a nested component is present.

View File

@@ -1,21 +1,19 @@
package template
import "bilinovel-downloader/model"
templ ContentXHTML(content *model.Chapter) {
templ ContentXHTML(title, content string) {
@templ.Raw(`<?xml version='1.0' encoding='utf-8'?>`)
// @templ.Raw(`<!DOCTYPE html>`)
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xml:lang="zh-CN">
<head>
<title>{ content.Title }</title>
<title>{ title }</title>
@templ.Raw(`<link href="../../style.css" rel="stylesheet" type="text/css"/>`)
</head>
<body>
<div class="chapter">
<h1>{ content.Title }</h1>
<h1>{ title }</h1>
@templ.Raw(`<hr/>`)
<div class="content">
@templ.Raw(content.Content)
@templ.Raw(content)
</div>
</div>
</body>

View File

@@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.906
// templ: version: v0.3.943
package template
//lint:file-ignore SA4006 This context is only used if a nested component is present.
@@ -8,9 +8,7 @@ package template
import "github.com/a-h/templ"
import templruntime "github.com/a-h/templ/runtime"
import "bilinovel-downloader/model"
func ContentXHTML(content *model.Chapter) templ.Component {
func ContentXHTML(title, content string) templ.Component {
return templruntime.GeneratedTemplate(func(templ_7745c5c3_Input templruntime.GeneratedComponentInput) (templ_7745c5c3_Err error) {
templ_7745c5c3_W, ctx := templ_7745c5c3_Input.Writer, templ_7745c5c3_Input.Context
if templ_7745c5c3_CtxErr := ctx.Err(); templ_7745c5c3_CtxErr != nil {
@@ -40,9 +38,9 @@ func ContentXHTML(content *model.Chapter) templ.Component {
return templ_7745c5c3_Err
}
var templ_7745c5c3_Var2 string
templ_7745c5c3_Var2, templ_7745c5c3_Err = templ.JoinStringErrs(content.Title)
templ_7745c5c3_Var2, templ_7745c5c3_Err = templ.JoinStringErrs(title)
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `template/content.xhtml.templ`, Line: 10, Col: 25}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `template/content.xhtml.templ`, Line: 8, Col: 17}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var2))
if templ_7745c5c3_Err != nil {
@@ -61,9 +59,9 @@ func ContentXHTML(content *model.Chapter) templ.Component {
return templ_7745c5c3_Err
}
var templ_7745c5c3_Var3 string
templ_7745c5c3_Var3, templ_7745c5c3_Err = templ.JoinStringErrs(content.Title)
templ_7745c5c3_Var3, templ_7745c5c3_Err = templ.JoinStringErrs(title)
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `template/content.xhtml.templ`, Line: 15, Col: 23}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `template/content.xhtml.templ`, Line: 13, Col: 15}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var3))
if templ_7745c5c3_Err != nil {
@@ -81,7 +79,7 @@ func ContentXHTML(content *model.Chapter) templ.Component {
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
templ_7745c5c3_Err = templ.Raw(content.Content).Render(ctx, templ_7745c5c3_Buffer)
templ_7745c5c3_Err = templ.Raw(content).Render(ctx, templ_7745c5c3_Buffer)
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}

View File

@@ -1,9 +1,9 @@
package template
templ CoverXHTML(coverPath string) {
@templ.Raw(`
<?xml version='1.0' encoding='utf-8'?>`)
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xml:lang="zh-CN">
@templ.Raw(`<?xml version='1.0' encoding='utf-8'?>`)
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops"
xml:lang="zh-CN">
<head>
<title>Cover</title>
</head>

View File

@@ -1,6 +1,6 @@
// Code generated by templ - DO NOT EDIT.
// templ: version: v0.3.906
// templ: version: v0.3.943
package template
//lint:file-ignore SA4006 This context is only used if a nested component is present.
@@ -29,19 +29,18 @@ func CoverXHTML(coverPath string) templ.Component {
templ_7745c5c3_Var1 = templ.NopComponent
}
ctx = templ.ClearChildren(ctx)
templ_7745c5c3_Err = templ.Raw(`
<?xml version='1.0' encoding='utf-8'?>`).Render(ctx, templ_7745c5c3_Buffer)
templ_7745c5c3_Err = templ.Raw(`<?xml version='1.0' encoding='utf-8'?>`).Render(ctx, templ_7745c5c3_Buffer)
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 1, "<html xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:epub=\"http://www.idpf.org/2007/ops\" xml:lang=\"zh-CN\"><head><title>Cover</title></head><style type=\"text/css\">\n\t\t@page {\n\t\t\tpadding: 0pt;\n\t\t\tmargin: 0pt\n\t\t}\n\t\tbody {\n\t\t\ttext-align: center;\n\t\t\tpadding: 0pt;\n\t\t\tmargin: 0pt;\n\t\t}\n\t\t</style><body><div><svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" version=\"1.1\" width=\"100%\" height=\"100%\" viewBox=\"0 0 400 581\" preserveAspectRatio=\"none\"><image width=\"400\" height=\"581\" xlink:href=\"")
templ_7745c5c3_Err = templruntime.WriteString(templ_7745c5c3_Buffer, 1, "<html xmlns=\"http://www.w3.org/1999/xhtml\" xmlns:epub=\"http://www.idpf.org/2007/ops\" xml:lang=\"zh-CN\"><head><title>Cover</title></head><style type=\"text/css\">\n\t\t@page {\n\t\tpadding: 0pt;\n\t\tmargin: 0pt\n\t\t}\n\t\tbody {\n\t\ttext-align: center;\n\t\tpadding: 0pt;\n\t\tmargin: 0pt;\n\t\t}\n\t</style><body><div><svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" version=\"1.1\" width=\"100%\" height=\"100%\" viewBox=\"0 0 400 581\" preserveAspectRatio=\"none\"><image width=\"400\" height=\"581\" xlink:href=\"")
if templ_7745c5c3_Err != nil {
return templ_7745c5c3_Err
}
var templ_7745c5c3_Var2 string
templ_7745c5c3_Var2, templ_7745c5c3_Err = templ.JoinStringErrs(coverPath)
if templ_7745c5c3_Err != nil {
return templ.Error{Err: templ_7745c5c3_Err, FileName: `template/cover.xhtml.templ`, Line: 32, Col: 59}
return templ.Error{Err: templ_7745c5c3_Err, FileName: `template/cover.xhtml.templ`, Line: 32, Col: 58}
}
_, templ_7745c5c3_Err = templ_7745c5c3_Buffer.WriteString(templ.EscapeString(templ_7745c5c3_Var2))
if templ_7745c5c3_Err != nil {

59
test/bilinovel_test.go Normal file
View File

@@ -0,0 +1,59 @@
package test
import (
"bilinovel-downloader/downloader/bilinovel"
"encoding/json"
"fmt"
"testing"
)
func TestBilinovel_GetNovel(t *testing.T) {
bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Concurrency: 5})
bilinovel.SetTextOnly(true)
if err != nil {
t.Fatalf("failed to create bilinovel: %v", err)
}
novel, err := bilinovel.GetNovel(2727, false, nil)
if err != nil {
t.Fatalf("failed to get novel: %v", err)
}
jsonBytes, err := json.Marshal(novel)
if err != nil {
t.Fatalf("failed to marshal novel: %v", err)
}
fmt.Println(string(jsonBytes))
}
func TestBilinovel_GetVolume(t *testing.T) {
bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Concurrency: 1})
bilinovel.SetTextOnly(true)
if err != nil {
t.Fatalf("failed to create bilinovel: %v", err)
}
volume, err := bilinovel.GetVolume(2727, 129092, false)
if err != nil {
t.Fatalf("failed to get volume: %v", err)
}
jsonBytes, err := json.Marshal(volume)
if err != nil {
t.Fatalf("failed to marshal volume: %v", err)
}
fmt.Println(string(jsonBytes))
}
func TestBilinovel_GetChapter(t *testing.T) {
bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Concurrency: 1})
bilinovel.SetTextOnly(true)
if err != nil {
t.Fatalf("failed to create bilinovel: %v", err)
}
chapter, err := bilinovel.GetChapter(2727, 129092, 129094)
if err != nil {
t.Fatalf("failed to get chapter: %v", err)
}
jsonBytes, err := json.Marshal(chapter)
if err != nil {
t.Fatalf("failed to marshal chapter: %v", err)
}
fmt.Println(string(jsonBytes))
}

View File

@@ -0,0 +1,318 @@
package test
import (
"fmt"
"log"
"strings"
"testing"
"github.com/PuerkitoBio/goquery"
)
// unscrambleParagraphs 函数的核心功能是接收一个乱序的段落列表,
// 并根据 chapterID 将它们重新排序为正确的阅读顺序。
// 算法来源 https://www.bilinovel.com/themes/zhmb/js/chapterlog.js?v1006c1
// 反混淆工具 https://obf-io.deobfuscate.io http://jsnice.org
// 这个方案是可行的,但如果 bilinovel 频繁更改初始化种子的计算方式或算法的实现,会让排序方法失效,可能 playwright 还是最优解。
func unscrambleParagraphs(scrambledParagraphs []*goquery.Selection, chapterID int) []*goquery.Selection {
j := len(scrambledParagraphs)
// 根据JS逻辑如果段落数小于等于20则不进行排序
if j <= 20 {
return scrambledParagraphs
}
// 1. 精确复刻JS中的伪随机数生成器和洗牌算法以得到正确的索引映射关系。
// 初始化种子
ms := int64(chapterID*127 + 235)
// value 数组存放的是需要被打乱的、从20开始的段落的相对索引0, 1, 2...
value := make([]int, j-20)
for i := range value {
value[i] = i
}
// 执行与JS完全相同的 Fisher-Yates-like 洗牌算法
for i := len(value) - 1; i > 0; i-- {
ms = (ms*9302 + 49397) % 233280
prop := int(float64(ms) / 233280.0 * float64(i+1))
// 交换元素
value[i], value[prop] = value[prop], value[i]
}
// 2. 构建最终的索引映射表 (aProperties)。
// 这个表告诉我们,乱序列表中的每一项,应该被放到正确顺序列表的哪个位置。
aProperties := make([]int, j)
// 前20个段落顺序不变
for i := range 20 {
aProperties[i] = i
}
// 后续的段落使用洗牌后的索引并加上20的偏移量
for i := range value {
aProperties[i+20] = value[i] + 20
}
// 3. 根据索引映射关系,从乱序列表中恢复出正确顺序。
// JS逻辑: elements[aProperties[i]] = out[i].node
// 翻译过来就是:乱序列表中的第 `i` 项 (scrambledParagraphs[i])
// 它在最终排好序的列表中的正确位置应该是 `aProperties[i]`。
correctlyOrdered := make([]*goquery.Selection, j)
for i := range j {
correctPosition := aProperties[i]
correctlyOrdered[correctPosition] = scrambledParagraphs[i]
}
return correctlyOrdered
}
func TestResortDom(t *testing.T) {
// --- 步骤 1: 准备原始HTML ---
// 请将您用 http 请求获取到的、未经处理的完整HTML源码粘贴到这里。
// 这里使用的是您之前提供的原始HTML作为示例。
unprocessedHtmlContent := `
<!DOCTYPE html>
<html lang="zh-Hans">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>女主角? 圣女? 不,我是全业女仆(自豪)! 第1章 第1话 目标成为女仆的少女_哔哩轻小说</title>
<meta name="keywords" content="女主角? 圣女? 不,我是全业女仆(自豪)!,第1话 目标成为女仆的少女,哔哩轻小说" />
<meta name="description" content="哔哩轻小说提供 あてきち 所创作的 女主角? 圣女? 不,我是全业女仆(自豪)! 第1章 第1话 目标成为女仆的少女 在线阅读与TXT,epub下载" />
<meta name="viewport" content="initial-scale=1.0,minimum-scale=1.0,user-scalable=yes,width=device-width" />
<meta name="theme-color" content="#232323" media="(prefers-color-scheme: dark)" />
<meta name="applicable-device" content="mobile" />
<link rel="stylesheet" href="https://www.bilinovel.com/themes/zhmb/css/read.css?v0409c2">
<link rel="stylesheet" href="https://www.bilinovel.com/themes/zhmb/css/chapter.css?v1126a9">
<link rel="dns-preconnect" href="https://www.bilinovel.com">
<link rel="alternate" hreflang="zh-Hant" href="https://tw.linovelib.com/novel/4126/236197.html" />
<script src="https://www.bilinovel.com/themes/zhmb/js/jquery-3.3.1.js"></script>
<script type="text/javascript" src="/scripts/darkmode.js"></script>
<script async src="https://www.bilinovel.com/themes/zhmb/js/lazysizes.min.js"></script>
<script src="https://www.bilinovel.com/scripts/common.js?v0922a3"></script>
<script src="https://www.bilinovel.com/scripts/zation.js?v1004a4"></script>
<style>.center-note{text-align: center; margin: 0; height: 50vh; display: flex ; justify-content: center; align-items: center;}.sum1{display:none}.footlink a{box-shadow: 0 0 1px rgba(150,150,150,.6);}.footlink a:nth-child(1){display: inline-block;margin-bottom: 10px;width: 90%;}.footlink a:nth-child(2){padding: 5px 10px;float: left;width: 35%;margin-left: 5%;}.footlink a:nth-child(3){padding: 5px 10px;float: right;width: 35%;margin-right: 5%;}.footlink a:nth-child(4){display: inline-block;margin-top: 10px;width: 90%;}#acontent{text-align: unset;}</style>
<script type="text/javascript">var ual = navigator.language.toLowerCase();var isWindows = navigator.platform.toLowerCase().includes("win");if(ual == 'zh-tw' || ual == 'zh-hk'){window.location.replace("https://tw.linovelib.com/novel/4126/236197.html");}if (ual === 'zh-cn' && isWindows) { window.location.replace("https://www.linovelib.com/novel/4126/236197.html");}</script>
</head>
<body id="aread">
<script type="text/javascript">var ReadParams={url_previous:'/novel/4126/236196.html',url_next:'/novel/4126/236197_2.html',url_index:'/novel/4126/catalog',url_articleinfo:'/novel/4126/vol_236194.html',url_image:'https://www.bilinovel.com/files/article/image/4/4126/4126s.jpg',url_home:'https://www.bilinovel.com/',articleid:'4126',articlename:'女主角? 圣女? 不,我是全业女仆(自豪)!',subid:'/4',author:'あてきち',chapterid:'236197',page:'1',chaptername:'第1章 第1话 目标成为女仆的少女',chapterisvip:'0',userid:'0',readtime:'1761057661'}</script>
<div class="main">
<div id="abox" class="abox">
<div id="apage" class="apage">
<div class="atitle"><h1 id="atitle">第1话 目标成为女仆的少女</h1><h3>第1章</h3></div>
<div id="acontent" class="contente"><div class="cgo"><!--<script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-8799828951681010"
crossorigin="anonymous"></script>
<ins class="adsbygoogle"
style="display:block"
data-ad-client="ca-pub-8799828951681010"
data-ad-slot="2277430192"
data-ad-format="auto"
data-full-width-responsive="true"></ins>
<script>
(adsbygoogle = window.adsbygoogle || []).push({});
</script>--></div><p>「欢迎回来,老爷。」</p>
<br>
<p>一位少女恭敬地弯腰向走进木质大门的绅士致意。</p>
<p>少女穿着一件做工精致的黑色连衣裙,上面系着花边以及刺绣、并不华丽的纯白围裙,梳成编辫的黑发上系着可爱的蕾丝头带。</p>
<br>
<p>无论从哪个角度看,都是迎接主人归来的女仆样子。</p>
<br>
<p>「啊,我回来了」</p>
<br>
<p>绅士把帽子和大衣交给恭敬地弯腰的女仆,用温柔的语气回答。</p>
<br>
<p>「我马上为您准备茶水。请问您想要哪一款?」</p>
<p>「那么,我想要一杯伯爵红茶。」</p>
<p>「要加牛奶之类的吗?」</p>
<p>「不,不用了。」</p>
<p>「遵命。茶点要什么呢?」</p>
<p>「嗯,就交给你吧。拜托了?」</p>
<br>
<p>对着绅士的话语,身为女仆的少女露出了轻柔的微笑。她可能只有十五、六岁吧。脸上还带着稚气,但未来值得期待,可爱又温柔的容貌。</p>
<br>
<p>「请交给我,我会准备合您口味的茶点。」</p>
<p>「啊,拜托了。」</p>
<br>
<p>女仆少女将帽子和大衣挂在衣架上,然后引导绅士到餐桌。</p>
<br>
<p></p>
<br>
<p>「那么,我要出门了。」</p>
<p>「好的,老爷」</p>
<p>「下次回来时,如果能再让妳接待就好了……」</p>
<br>
<p>「下次她想要带朋友在露台喝茶,也希望你能照顾他们。」</p>
<br>
<p>轻轻敲门后,听到「请进」的回答,少女走进了房间行礼。</p>
<p>一个少女嘟囔着。那是一位身穿简素蓝色连衣裙的少女。闪闪发光的银色头发留到了胸口。有着神秘的琉璃色瞳孔的美丽可爱少女站在母亲身旁。</p>
<p>送走绅士后,女仆少女前往总管的房间。</p>
<br>
<p>「欸,对我不需要用这种说话方式吧?……律子酱。」</p>
<p>薪水丰厚的兼职让她顺利存下了留学费用,留学之日即将到来。</p>
<p>「拜托了!」</p>
<br>
<p>女仆少女律子满脸笑容地回答。</p>
<br>
<p>「话说回来,律子酱。上次来的坂上夫人很喜欢你呢。上次寄来的邮件里相当称赞。她说下次还打算指名。」</p>
<br>
<p>「失礼了Miss 阿曼达。关于刚才离开宅邸的老爷报告……」</p>
<br>
<p>被叫做律子的女仆少女张开眼,刚才还散发着女仆气息的模样一下子变回稚气十足的少女,她嘟起嘴说道。</p>
<br>
<p>「这样很好啊!」</p>
<p>女仆少女律子满脸笑容地回答。</p>
<p>对担忧这一点的父母来说,当时的律子的情况无疑让人开心。</p>
<br>
<p>因此,父母并未反对女儿出人意表的宣言。</p>
<br>
<p>标题叫『深窗的公主的悲恋』。</p>
<p>优雅的动作,没有任何不自然的温柔笑容。仿佛是女仆典范一般的少女。看着她的身影,总管阿曼达皱了皱眉。不,这是因为……</p>
<p>「怎么了?瑟蕾丝蒂?」</p>
<br>
<p>「啊,拜托了。那么……」</p>
<p>「一路顺风,老爷。」</p>
<p>「遵命。我会将您的意愿转达给<ruby>女仆总管<rp>(</rp><rt>家政妇</rt><rp>)</rp></ruby>。」</p>
<p>(公主身后的女仆们是多么的优秀啊!)</p>
<br>
<p>「你真的很喜欢做这种工作呢。这样一来就得早晨开始准备了。下次我会去问问她们的希望。」</p>
<br>
<p>这部电影以旧时英国贵族的故事为题材。描述了一位在呵护下长大的贵族千金,偶然认识一位平民青年,并陷入爱河的故事。最后,因为身份差异,两人自尽,悲剧结局。</p>
<br>
<p>父母看着律子的身影,感到非常开心。</p>
<p>女仆们使出各种手段帮助她与男子相会。</p>
<p>在女仆的影响下,律子对各种事物产生了兴趣,玩耍、笑声、学习,成长为一个非常优秀的女儿。自从遇见女仆以来,好奇心无止境,虽然年龄和性格相比有些幼稚,但对父母来说,女仆这个存在也是让人有好感的。</p>
<br>
<p>她的名字是瑞波律子,二十岁,现在是大学二年级的学生。</p>
<br>
<p>「我讨厌那个名字啊。明明是日本人,却叫阿曼达……」 <span style="color: rgb(61142185);">(*亚万田日语念成阿曼达)</span></p>
<br>
<p>当然,因为主角是英国贵族千金,所以电影里并没有描绘女仆们努力的场景。但正因为如此,律子对在幕后默默支持的女仆们十分感动。</p>
<p>「……本来应该是这样的啊。」</p>
<p>来这家女仆咖啡厅的客人并不仅仅是男性。这家店的男女客人比例几乎是一比一。</p>
<br>
<p>会员制高级女仆咖啡厅『<ruby>贵族的日常<rp>(</rp><rt>Noble's One Day</rt><rp>)</rp></ruby>』。</p>
<br>
<p>生活了六年,律子慢慢的成长,但她却不对事物报持热情。喜欢的玩具和书籍都没有,看电视也不会表现出太多兴趣。</p>
<br>
<p>「拜托了!」</p>
<br>
<p>那是瑞波律子还不懂爱情的六岁春天的事……先不管给一个六岁小孩看悲恋电影的问题。</p>
<p>是被称为女仆总管的女性,亚万田凪沙创建的店。</p>
<br>
<p>「我在大学毕业后,想在英国成为真正的女仆!」</p>
<br>
<p>「好的,请放心交给我!」</p>
<br>
<p>「欸,真的吗!? 就是上周来过的那位温柔的女士吗?」</p>
<p>男士需穿着西装,女士需穿着礼服,这是服装规定。特别为女性客人提供服装租赁服务,因此女性客人可以享受穿着平时难得一穿的贵族少女或贵妇风格的洋装,扮演女主人的角色。</p>
<br>
<p>虽然二十岁了,律子的脸庞略显年幼,她是这家店最受欢迎的女仆。</p>
<br>
<p>看过这部电影的观众都为两人的悲恋流泪,感动不已。</p>
<br>
<p>从那时起,律子就迷上了女仆。她向父母说明了女仆是多么伟大的存在,并激动地宣布有一天她也会成为女仆。</p>
<p>完全预约制,到店时会有指名的女仆迎接。此时店员会完全扮演女仆角色,客人不是客人身份,而是扮演女仆的主人,享受其中。</p>
<br>
<p>一切都是顺风顺水。距离成为女仆只剩下最后一步!</p>
<br>
<p>美丽的行礼后,少女向绅士回以温柔的微笑。绅士推开门离开了。</p>
<br>
<p>律子的梦想是成为女仆。原因非常简单,那是因为她小时候看过的一部电影。</p>
<br>
<p>在父母的支持下,律子在大学学习外语、历史、文学、礼仪等,以成为女仆为目标,在本格派女仆咖啡厅进行女仆训练的日常。</p>
<br>
<p>「那么,我也可以帮忙准备衣服和化妆吗?」</p>
<p>「讨厌!再让我扮一下女仆也没关系嘛,亚万田小姐!」</p>
<br>
<p>绅士略显羞涩地说着,女仆的少女露出了微笑回答。</p>
<p>然而,律子却对另一方面感动不已。</p>
<br>
<br>
<br>
<br>
<br>
<p>支付是预付制,店内不谈金钱。没有菜单,女仆会自然接受点单。客人只需要享受那片刻的主人时光即可。</p>
<p>女主角的贵族千金拥有很温柔的人格,所以她的女仆们也非常喜爱她。</p>
<br>
<p>为了筹集到英国留学的资金,进入大学的律子开始寻找兼职工作。她认为对未来有帮助的工作是最好的,于是找到了这家女仆咖啡厅。</p><div class="cgo"><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-8799828951681010"
crossorigin="anonymous"></script>
<ins class="adsbygoogle"
style="display:block"
data-ad-client="ca-pub-8799828951681010"
data-ad-slot="9085546976"
data-ad-format="auto"
data-full-width-responsive="true"></ins>
<script>
(adsbygoogle = window.adsbygoogle || []).push({});
</script></div>
</div>
</div>
</div>
<div id="toptext" class="toptext" style="display:none;"></div>
<div id="bottomtext" class="bottomtext" style="display:none;"></div>
<div id="operatetip" class="operatetip" style="display:none;" onclick="this.style.display='none';">
<div class="tipl"><p>翻上页</p></div>
<div class="tipc"><p>呼出功能<br><br><small>漫画&插图<br>建议使用上下翻页</small><br><br><small>【翻页模式】章评·默认隐藏</small></p></div>
<div class="tipr"><p>翻下页</p></div>
</div>
</div>
<div id="footlink" class="footlink"><a onclick="window.location.href = ReadParams.url_previous;">序章 路多帕克家的大小姐以及万能女仆</a><a onclick="window.location.href = ReadParams.url_index;">目录</a><a onclick="window.location.href = ReadParams.url_articleinfo;">书页</a><a onclick="window.location.href = ReadParams.url_next;">下一頁</a></div>
<script>$(document).ready(function(){var prevpage="/novel/4126/236196.html";var nextpage="/novel/4126/236197_2.html";var bookpage="/novel/4126.html";$("body").keydown(function(event){var isInput=event.target.tagName==='INPUT'||event.target.tagName==='TEXTAREA';if(!isInput){if(event.keyCode==37){location=prevpage}else if(event.keyCode==39){location=nextpage}}})});</script>
<script type="text/javascript" src="https://www.bilinovel.com/themes/zhmb/js/readtools.js?42sfaj-8"></script>
<script type="text/javascript" src="https://www.bilinovel.com/scripts/json2.js"></script>
<script type="text/javascript" src="https://www.bilinovel.com/themes/zhmb/js/chapterlog.js?v1006c1"></script>
<script async src="https://www.googletagmanager.com/gtag/js?id=G-1K4JZ603WH"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-1K4JZ603WH');
</script>
<script>
var _hmt = _hmt || [];
(function() {
var hm = document.createElement("script");
hm.src = "https://hm.baidu.com/hm.js?6f9595b2c4b57f95a93aa5f575a77fb0";
var s = document.getElementsByTagName("script")[0];
s.parentNode.insertBefore(hm, s);
})();
</script>
<!--<script>
if ('serviceWorker' in navigator) {
navigator.serviceWorker.getRegistrations().then(function(registrations) {
for (let registration of registrations) {
registration.unregister();
}
});
}
</script>-->
<script defer src="https://static.cloudflareinsights.com/beacon.min.js/vcd15cbe7772f49c399c6a5babf22c1241717689176015" integrity="sha512-ZpsOmlRQV6y907TI0dKBHq9Md29nnaEIPlkf84rnaERnq6zvWvPUqr2ft8M1aS28oN72PdrCzSjY4U6VaAw1EQ==" data-cf-beacon='{"version":"2024.11.0","token":"192783771d59492782cd05bd12eb61b9","r":1,"server_timing":{"name":{"cfCacheStatus":true,"cfEdge":true,"cfExtPri":true,"cfL4":true,"cfOrigin":true,"cfSpeedBrain":true},"location_startswith":null}}' crossorigin="anonymous"></script>
</body>
</html>`
// --- 步骤 2: 解析HTML并提取关键信息 ---
doc, err := goquery.NewDocumentFromReader(strings.NewReader(unprocessedHtmlContent))
if err != nil {
log.Fatalf("解析HTML失败: %v", err)
}
chapterID := 236197
// --- 步骤 3: 收集所有需要重排的段落 ---
var scrambledParagraphs []*goquery.Selection
doc.Find("#acontent p").Each(func(i int, s *goquery.Selection) {
// 确保只添加非空段落与JS逻辑保持一致
if len(strings.TrimSpace(s.Text())) > 0 {
scrambledParagraphs = append(scrambledParagraphs, s)
}
})
fmt.Printf("从原始HTML中找到 %d 个乱序段落,准备重排。\n\n", len(scrambledParagraphs))
// --- 步骤 4: 执行重排算法 ---
correctlyOrderedParagraphs := unscrambleParagraphs(scrambledParagraphs, chapterID)
// --- 步骤 5: 输出最终结果 ---
fmt.Println("--- 已恢复正确顺序的最终内容 ---")
for i, p := range correctlyOrderedParagraphs {
fmt.Printf("%d: %s\n", i+1, p.Text())
}
}

55
text/wrapper.go Normal file
View File

@@ -0,0 +1,55 @@
package text
import (
"bilinovel-downloader/model"
"bilinovel-downloader/utils"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/PuerkitoBio/goquery"
)
func PackVolumeToText(volume *model.Volume, outputPath string) error {
outputPath = filepath.Join(outputPath, utils.CleanDirName(volume.Title))
_, err := os.Stat(outputPath)
if err != nil {
if os.IsNotExist(err) {
err = os.MkdirAll(outputPath, 0755)
if err != nil {
return fmt.Errorf("failed to create output directory: %v", err)
}
} else {
return fmt.Errorf("failed to get output directory: %v", err)
}
} else {
err = os.RemoveAll(outputPath)
if err != nil {
return fmt.Errorf("failed to remove output directory: %v", err)
}
err = os.MkdirAll(outputPath, 0755)
if err != nil {
return fmt.Errorf("failed to create output directory: %v", err)
}
}
for i, chapter := range volume.Chapters {
chapterPath := filepath.Join(outputPath, fmt.Sprintf("%03d-%s.txt", i, chapter.Title))
chapterFile, err := os.Create(chapterPath)
if err != nil {
return fmt.Errorf("failed to create chapter file: %v", err)
}
defer chapterFile.Close()
doc, err := goquery.NewDocumentFromReader(strings.NewReader(chapter.Content.Html))
if err != nil {
return fmt.Errorf("failed to create chapter file: %v", err)
}
doc.Find("img").Remove()
text := doc.Text()
_, err = chapterFile.WriteString(strings.TrimSpace(text))
if err != nil {
return fmt.Errorf("failed to write chapter file: %v", err)
}
}
return nil
}

View File

@@ -9,11 +9,19 @@ import (
"github.com/go-resty/resty/v2"
)
var client *resty.Client
type RestyClient struct {
client *resty.Client
concurrency int
sem chan struct{}
}
func init() {
client = resty.New()
client.SetTransport(&http.Transport{
func NewRestyClient(concurrency int) *RestyClient {
client := &RestyClient{
client: resty.New(),
concurrency: concurrency,
sem: make(chan struct{}, concurrency),
}
client.client.SetTransport(&http.Transport{
DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) {
if addr == "www.bilinovel.com:443" {
addr = "64.140.161.52:443"
@@ -24,7 +32,16 @@ func init() {
},
TLSHandshakeTimeout: 10 * time.Second,
})
client.SetRetryCount(10).
client.client.
OnBeforeRequest(func(c *resty.Client, req *resty.Request) error {
client.sem <- struct{}{}
return nil
}).
OnAfterResponse(func(c *resty.Client, resp *resty.Response) error {
<-client.sem
return nil
})
client.client.SetRetryCount(10).
SetRetryWaitTime(3 * time.Second).
SetRetryAfter(func(client *resty.Client, resp *resty.Response) (time.Duration, error) {
if resp.StatusCode() == http.StatusTooManyRequests {
@@ -43,10 +60,13 @@ func init() {
AddRetryCondition(func(r *resty.Response, err error) bool {
return err != nil || r.StatusCode() == http.StatusTooManyRequests
})
client.client.SetLogger(disableLogger{}).SetHeader("Accept-Charset", "utf-8").SetHeader("User-Agent", "Mozilla/5.0 (X11; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0")
return client
}
func Request() *resty.Request {
return client.R().SetLogger(disableLogger{}).SetHeader("Accept-Charset", "utf-8").SetHeader("User-Agent", "Mozilla/5.0 (X11; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0")
func (c *RestyClient) R() *resty.Request {
return c.client.R()
}
type disableLogger struct{}