7 Commits

Author SHA1 Message Date
b650030f26 README 2025-10-22 02:25:35 +11:00
b136556822 test(unscrambleParagraphs): try a crawling method without playwright 2025-10-22 02:17:11 +11:00
17c3859e9e feat(logging): Implement structured logging and debug mode
fix: Windows cannot download novels correctly
2025-10-17 01:36:23 +11:00
11fccdb05f ci(goreleaser): Install templ before generating templates
Adds a `go install` hook to `.goreleaser.yaml` to ensure the `templ` binary is
installed and up-to-date before `templ generate` is executed. This prevents
potential build failures in CI/CD environments where `templ` might not be
pre-installed or could be an outdated version, making the release process
more robust and self-contained.
2025-10-06 18:20:47 +11:00
af968cbc9a ci(workflow): Upgrade GitHub Actions in release workflow
Updated the major versions of several GitHub Actions used in the release workflow:
- actions/checkout from v4 to v5
- actions/setup-go from v4 to v5
- goreleaser/goreleaser-action from v5 to v6

This ensures we are using the latest features, bug fixes, and security updates provided by these actions.
2025-10-06 18:11:27 +11:00
08e6280c34 feat: Add NFPM packaging and defer Playwright installation
This commit introduces NFPM configuration in `.goreleaser.yaml` to
generate native packages for various Linux distributions (e.g., .deb,
.rpm, .apk). This provides a more streamlined installation experience
for Linux users.

The Playwright browser installation logic has been moved from `main.go`
to the `Run` function of the `download` command. This change ensures
that Playwright binaries are only downloaded and installed when the
`download` command is actually invoked, improving initial application
startup performance and reducing unnecessary overhead for other commands.

The Goreleaser configuration has also been updated to version 2 syntax
and the `arm` architecture has been removed from builds.
2025-10-06 18:07:54 +11:00
34179b4dc0 Create LICENSE 2025-10-06 18:03:04 +11:00
12 changed files with 507 additions and 50 deletions

28
.github/workflows/release.yml vendored Normal file
View File

@@ -0,0 +1,28 @@
name: release
on:
push:
tags:
- "v*"
permissions:
contents: write
jobs:
goreleaser:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v5
- name: Set up Go
uses: actions/setup-go@v5
- name: Run GoReleaser
uses: goreleaser/goreleaser-action@v6
with:
distribution: goreleaser
version: latest
args: release --clean
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -1,6 +1,8 @@
version: 2
project_name: bilinovel-downloader project_name: bilinovel-downloader
before: before:
hooks: hooks:
- go install github.com/a-h/templ/cmd/templ@latest
- templ generate - templ generate
builds: builds:
- env: - env:
@@ -12,16 +14,15 @@ builds:
goarch: goarch:
- amd64 - amd64
- arm64 - arm64
- arm
- "386" - "386"
ldflags: ldflags:
- -s -w -X bilinovel-downloader/cmd.Version={{ .Version }} - -s -w -X bilinovel-downloader/cmd.Version={{ .Version }}
flags: flags:
- -trimpath - -trimpath
archives: archives:
- format: tar.gz - formats: ["tar.gz"]
format_overrides: format_overrides:
- format: zip - formats: ["zip"]
goos: windows goos: windows
wrap_in_directory: true wrap_in_directory: true
release: release:
@@ -29,3 +30,17 @@ release:
upx: upx:
- enabled: true - enabled: true
compress: best compress: best
nfpms:
- id: bilinovel-downloader
homepage: https://github.com/bestnite/bilinovel-downloader
maintainer: Nite <admin@nite07.com>
license: "MIT"
formats:
- apk
- deb
- rpm
- termux.deb
- archlinux
provides:
- bilinovel-downloader

6
.vscode/launch.json vendored
View File

@@ -23,9 +23,9 @@
"program": "${workspaceFolder}", "program": "${workspaceFolder}",
"args": [ "args": [
"download", "download",
"-n=2727", "-n=2388",
"-v=150098", "-v=84522",
"--headless=false" "--debug=true"
] ]
} }
] ]

21
LICENSE Normal file
View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2025 Nite
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -22,3 +22,8 @@
```bash ```bash
bilinovel-downloader pack -d <目录路径> bilinovel-downloader pack -d <目录路径>
``` ```
## 算法分析
目前程序使用 playwright 进行爬取来规避 bilinovel 的反爬(诱饵段落和段落重排)策略。
但是依然对 bilinovel 的算法进行了简单的分析,具体可以参考[代码](./test/no_playwright_method_test.go),这个代码目前是可行的,但如果 bilinovel 频繁更改初始化种子的计算方式或算法的实现,会让排序方法失效,这也是为什么目前程序使用 playwright。

View File

@@ -8,10 +8,12 @@ import (
"bilinovel-downloader/text" "bilinovel-downloader/text"
"encoding/json" "encoding/json"
"fmt" "fmt"
"log" "io"
"log/slog"
"os" "os"
"path/filepath" "path/filepath"
"github.com/playwright-community/playwright-go"
"github.com/spf13/cobra" "github.com/spf13/cobra"
) )
@@ -20,9 +22,20 @@ var downloadCmd = &cobra.Command{
Short: "Download a novel or volume", Short: "Download a novel or volume",
Long: "Download a novel or volume", Long: "Download a novel or volume",
Run: func(cmd *cobra.Command, args []string) { Run: func(cmd *cobra.Command, args []string) {
err := runDownloadNovel() slog.Info("Installing playwright")
err := playwright.Install(&playwright.RunOptions{
Browsers: []string{"chromium"},
Stdout: io.Discard,
})
if err != nil { if err != nil {
log.Printf("failed to download novel: %v", err) slog.Error("failed to install playwright")
return
}
err = runDownloadNovel()
if err != nil {
slog.Error("failed to download novel", slog.Any("error", err))
return
} }
}, },
} }
@@ -32,8 +45,8 @@ type downloadCmdArgs struct {
VolumeId int `validate:"required"` VolumeId int `validate:"required"`
outputPath string outputPath string
outputType string outputType string
headless bool
concurrency int concurrency int
debug bool
} }
var ( var (
@@ -45,15 +58,15 @@ func init() {
downloadCmd.Flags().IntVarP(&downloadArgs.VolumeId, "volume-id", "v", 0, "volume id") downloadCmd.Flags().IntVarP(&downloadArgs.VolumeId, "volume-id", "v", 0, "volume id")
downloadCmd.Flags().StringVarP(&downloadArgs.outputPath, "output-path", "o", "novels", "output path") downloadCmd.Flags().StringVarP(&downloadArgs.outputPath, "output-path", "o", "novels", "output path")
downloadCmd.Flags().StringVarP(&downloadArgs.outputType, "output-type", "t", "epub", "output type, epub or text") downloadCmd.Flags().StringVarP(&downloadArgs.outputType, "output-type", "t", "epub", "output type, epub or text")
downloadCmd.Flags().BoolVar(&downloadArgs.headless, "headless", true, "headless mode") downloadCmd.Flags().BoolVar(&downloadArgs.debug, "debug", false, "debug mode")
downloadCmd.Flags().IntVar(&downloadArgs.concurrency, "concurrency", 3, "concurrency of downloading volumes") downloadCmd.Flags().IntVar(&downloadArgs.concurrency, "concurrency", 3, "concurrency of downloading volumes")
RootCmd.AddCommand(downloadCmd) RootCmd.AddCommand(downloadCmd)
} }
func runDownloadNovel() error { func runDownloadNovel() error {
downloader, err := bilinovel.New(bilinovel.BilinovelNewOption{ downloader, err := bilinovel.New(bilinovel.BilinovelNewOption{
Headless: downloadArgs.headless,
Concurrency: downloadArgs.concurrency, Concurrency: downloadArgs.concurrency,
Debug: downloadArgs.debug,
}) })
if err != nil { if err != nil {
return fmt.Errorf("failed to create downloader: %v", err) return fmt.Errorf("failed to create downloader: %v", err)
@@ -61,7 +74,7 @@ func runDownloadNovel() error {
// 确保在函数结束时关闭资源 // 确保在函数结束时关闭资源
defer func() { defer func() {
if closeErr := downloader.Close(); closeErr != nil { if closeErr := downloader.Close(); closeErr != nil {
log.Printf("Failed to close downloader: %v", closeErr) slog.Info("Failed to close downloader", slog.Any("error", closeErr))
} }
}() }()

View File

@@ -4,4 +4,6 @@ import (
"github.com/spf13/cobra" "github.com/spf13/cobra"
) )
var RootCmd = &cobra.Command{} var RootCmd = &cobra.Command{
Use: "bilinovel-downloader",
}

View File

@@ -6,7 +6,7 @@ import (
"github.com/spf13/cobra" "github.com/spf13/cobra"
) )
const ( var (
Version = "dev" Version = "dev"
) )

View File

@@ -7,7 +7,7 @@ import (
"crypto/sha256" "crypto/sha256"
_ "embed" _ "embed"
"fmt" "fmt"
"log" "log/slog"
"net/http" "net/http"
"os" "os"
"path" "path"
@@ -40,11 +40,13 @@ type Bilinovel struct {
pages map[string]playwright.Page pages map[string]playwright.Page
concurrency int concurrency int
concurrentChan chan any concurrentChan chan any
logger *slog.Logger
} }
type BilinovelNewOption struct { type BilinovelNewOption struct {
Headless bool
Concurrency int Concurrency int
Debug bool
} }
func New(option BilinovelNewOption) (*Bilinovel, error) { func New(option BilinovelNewOption) (*Bilinovel, error) {
@@ -54,6 +56,17 @@ func New(option BilinovelNewOption) (*Bilinovel, error) {
} }
restyClient := utils.NewRestyClient(50) restyClient := utils.NewRestyClient(50)
var logLevel slog.Level
if option.Debug {
logLevel = slog.LevelDebug
} else {
logLevel = slog.LevelInfo
}
handlerOptions := &slog.HandlerOptions{
Level: logLevel,
}
b := &Bilinovel{ b := &Bilinovel{
fontMapper: fontMapper, fontMapper: fontMapper,
textOnly: false, textOnly: false,
@@ -61,10 +74,11 @@ func New(option BilinovelNewOption) (*Bilinovel, error) {
pages: make(map[string]playwright.Page), pages: make(map[string]playwright.Page),
concurrency: option.Concurrency, concurrency: option.Concurrency,
concurrentChan: make(chan any, option.Concurrency), concurrentChan: make(chan any, option.Concurrency),
logger: slog.New(slog.NewTextHandler(os.Stdout, handlerOptions)),
} }
// 初始化浏览器实例 // 初始化浏览器实例
err = b.initBrowser(option.Headless) err = b.initBrowser(option.Debug)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to init browser: %v", err) return nil, fmt.Errorf("failed to init browser: %v", err)
} }
@@ -81,13 +95,15 @@ func (b *Bilinovel) GetExtraFiles() []model.ExtraFile {
} }
// initBrowser 初始化浏览器实例 // initBrowser 初始化浏览器实例
func (b *Bilinovel) initBrowser(headless bool) error { func (b *Bilinovel) initBrowser(debug bool) error {
pw, err := playwright.Run() pw, err := playwright.Run()
if err != nil { if err != nil {
return fmt.Errorf("could not start playwright: %w", err) return fmt.Errorf("could not start playwright: %w", err)
} }
b.browser, err = pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{ b.browser, err = pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{
Headless: playwright.Bool(headless), Headless: playwright.Bool(!debug),
Devtools: playwright.Bool(debug),
}) })
if err != nil { if err != nil {
return fmt.Errorf("could not launch browser: %w", err) return fmt.Errorf("could not launch browser: %w", err)
@@ -98,7 +114,7 @@ func (b *Bilinovel) initBrowser(headless bool) error {
return fmt.Errorf("could not create browser context: %w", err) return fmt.Errorf("could not create browser context: %w", err)
} }
log.Println("Browser initialized successfully") b.logger.Info("Browser initialized successfully")
return nil return nil
} }
@@ -106,7 +122,7 @@ func (b *Bilinovel) initBrowser(headless bool) error {
func (b *Bilinovel) Close() error { func (b *Bilinovel) Close() error {
if b.browser != nil { if b.browser != nil {
if err := b.browser.Close(); err != nil { if err := b.browser.Close(); err != nil {
log.Printf("could not close browser: %v", err) b.logger.Error("could not close browser", slog.Any("error", err))
} }
b.browser = nil b.browser = nil
b.browserContext = nil b.browserContext = nil
@@ -122,7 +138,7 @@ func (b *Bilinovel) GetStyleCSS() string {
} }
func (b *Bilinovel) GetNovel(novelId int, skipChapterContent bool, skipVolumes []int) (*model.Novel, error) { func (b *Bilinovel) GetNovel(novelId int, skipChapterContent bool, skipVolumes []int) (*model.Novel, error) {
log.Printf("Getting novel %v\n", novelId) b.logger.Info("Getting novel", slog.Int("novelId", novelId))
novelUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v.html", novelId) novelUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v.html", novelId)
resp, err := b.restyClient.R().Get(novelUrl) resp, err := b.restyClient.R().Get(novelUrl)
@@ -161,7 +177,7 @@ func (b *Bilinovel) GetNovel(novelId int, skipChapterContent bool, skipVolumes [
} }
func (b *Bilinovel) GetVolume(novelId int, volumeId int, skipChapterContent bool) (*model.Volume, error) { func (b *Bilinovel) GetVolume(novelId int, volumeId int, skipChapterContent bool) (*model.Volume, error) {
log.Printf("Getting volume %v of novel %v\n", volumeId, novelId) b.logger.Info("Getting volume of novel", slog.Int("volumeId", volumeId), slog.Int("novelId", novelId))
novelUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v/catalog", novelId) novelUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v/catalog", novelId)
resp, err := b.restyClient.R().Get(novelUrl) resp, err := b.restyClient.R().Get(novelUrl)
@@ -259,7 +275,7 @@ func (b *Bilinovel) GetVolume(novelId int, volumeId int, skipChapterContent bool
} }
func (b *Bilinovel) getAllVolumes(novelId int, skipChapterContent bool, skipVolumes []int) ([]*model.Volume, error) { func (b *Bilinovel) getAllVolumes(novelId int, skipChapterContent bool, skipVolumes []int) ([]*model.Volume, error) {
log.Printf("Getting all volumes of novel %v\n", novelId) b.logger.Info("Getting all volumes of novel", slog.Int("novelId", novelId))
catelogUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v/catalog", novelId) catelogUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v/catalog", novelId)
resp, err := b.restyClient.R().Get(catelogUrl) resp, err := b.restyClient.R().Get(catelogUrl)
@@ -300,7 +316,7 @@ func (b *Bilinovel) getAllVolumes(novelId int, skipChapterContent bool, skipVolu
volumeId, err := strconv.Atoi(volumeIdStr) volumeId, err := strconv.Atoi(volumeIdStr)
if err != nil { if err != nil {
log.Printf("failed to convert volume id %s: %v", volumeIdStr, err) b.logger.Error("failed to convert volume id", slog.String("volumeIdStr", volumeIdStr), slog.Any("error", err))
return return
} }
if slices.Contains(skipVolumes, volumeId) { if slices.Contains(skipVolumes, volumeId) {
@@ -308,7 +324,7 @@ func (b *Bilinovel) getAllVolumes(novelId int, skipChapterContent bool, skipVolu
} }
volume, err := b.GetVolume(novelId, volumeId, skipChapterContent) volume, err := b.GetVolume(novelId, volumeId, skipChapterContent)
if err != nil { if err != nil {
log.Printf("failed to get volume info for novel %d, volume %d: %v", novelId, volumeId, err) b.logger.Error("failed to get volume info", slog.Int("novelId", novelId), slog.Int("volumeId", volumeId), slog.Any("error", err))
return return
} }
volume.SeriesIdx = i volume.SeriesIdx = i
@@ -340,7 +356,7 @@ func (b *Bilinovel) getAllVolumes(novelId int, skipChapterContent bool, skipVolu
} }
func (b *Bilinovel) GetChapter(novelId int, volumeId int, chapterId int) (*model.Chapter, error) { func (b *Bilinovel) GetChapter(novelId int, volumeId int, chapterId int) (*model.Chapter, error) {
log.Printf("Getting chapter %v of novel %v\n", chapterId, novelId) b.logger.Info("Getting chapter of novel", slog.Int("chapterId", chapterId), slog.Int("novelId", novelId))
pageNum := 1 pageNum := 1
chapter := &model.Chapter{ chapter := &model.Chapter{
@@ -370,8 +386,11 @@ func (b *Bilinovel) GetChapter(novelId int, volumeId int, chapterId int) (*model
return chapter, nil return chapter, nil
} }
var nextPageUrlRegexp = regexp.MustCompile(`url_next:\s?['"]([^'"]*?)['"]`)
var cleanNextPageUrlRegexp = regexp.MustCompile(`(_\d+)?\.html$`)
func (b *Bilinovel) getChapterByPage(pwPage playwright.Page, chapter *model.Chapter, pageNum int) (bool, error) { func (b *Bilinovel) getChapterByPage(pwPage playwright.Page, chapter *model.Chapter, pageNum int) (bool, error) {
log.Printf("Getting chapter %v by page %v\n", chapter.Id, pageNum) b.logger.Info("Getting chapter by page", slog.Int("chapter", chapter.Id), slog.Int("page", pageNum))
Url := strings.TrimSuffix(chapter.Url, ".html") + fmt.Sprintf("_%v.html", pageNum) Url := strings.TrimSuffix(chapter.Url, ".html") + fmt.Sprintf("_%v.html", pageNum)
@@ -405,6 +424,17 @@ func (b *Bilinovel) getChapterByPage(pwPage playwright.Page, chapter *model.Chap
return false, fmt.Errorf("failed to parse html: %w", err) return false, fmt.Errorf("failed to parse html: %w", err)
} }
// 判断章节是否有下一页
n := nextPageUrlRegexp.FindStringSubmatch(resortedHtml)
if len(n) != 2 {
return false, fmt.Errorf("failed to determine wether there is a next page")
}
s := cleanNextPageUrlRegexp.ReplaceAllString(n[1], "")
if strings.Contains(Url, s) {
hasNext = true
}
if pageNum == 1 { if pageNum == 1 {
chapter.Title = doc.Find("#atitle").Text() chapter.Title = doc.Find("#atitle").Text()
} }
@@ -413,7 +443,7 @@ func (b *Bilinovel) getChapterByPage(pwPage playwright.Page, chapter *model.Chap
content.Find("center").Remove() content.Find("center").Remove()
content.Find(".google-auto-placed").Remove() content.Find(".google-auto-placed").Remove()
if strings.Contains(resp.String(), `font-family: "read"`) { if strings.Contains(resortedHtml, `font-family: "read"`) {
html, err := content.Find("p").Last().Html() html, err := content.Find("p").Last().Html()
if err != nil { if err != nil {
return false, fmt.Errorf("failed to get html: %v", err) return false, fmt.Errorf("failed to get html: %v", err)
@@ -486,7 +516,7 @@ func (b *Bilinovel) getChapterByPage(pwPage playwright.Page, chapter *model.Chap
} }
func (b *Bilinovel) getImg(url string) ([]byte, error) { func (b *Bilinovel) getImg(url string) ([]byte, error) {
log.Printf("Getting img %v\n", url) b.logger.Info("Getting img", slog.String("url", url))
resp, err := b.restyClient.R().SetHeader("Referer", "https://www.bilinovel.com").Get(url) resp, err := b.restyClient.R().SetHeader("Referer", "https://www.bilinovel.com").Get(url)
if err != nil { if err != nil {
return nil, err return nil, err
@@ -497,7 +527,15 @@ func (b *Bilinovel) getImg(url string) ([]byte, error) {
// processContentWithPlaywright 使用复用的浏览器实例处理内容 // processContentWithPlaywright 使用复用的浏览器实例处理内容
func (b *Bilinovel) processContentWithPlaywright(page playwright.Page, htmlContent string) (string, error) { func (b *Bilinovel) processContentWithPlaywright(page playwright.Page, htmlContent string) (string, error) {
tempFile, err := os.CreateTemp("", "bilinovel-temp-*.html") // 替换 window.location.replace防止页面跳转
htmlContent = strings.ReplaceAll(htmlContent, "window.location.replace", "console.log")
tempPath := filepath.Join(os.TempDir(), "bilinovel-downloader")
err := os.MkdirAll(tempPath, 0755)
if err != nil {
return "", fmt.Errorf("failed to create temp dir: %w", err)
}
tempFile, err := os.CreateTemp(tempPath, "temp-*.html")
if err != nil { if err != nil {
return "", fmt.Errorf("failed to create temp file: %w", err) return "", fmt.Errorf("failed to create temp file: %w", err)
} }
@@ -510,6 +548,34 @@ func (b *Bilinovel) processContentWithPlaywright(page playwright.Page, htmlConte
tempFile.Close() tempFile.Close()
tempFilePath := tempFile.Name() tempFilePath := tempFile.Name()
// // 屏蔽请求
// googleAdsDomains := []string{
// "adtrafficquality.google",
// "doubleclick.net",
// "googlesyndication.com",
// "googletagmanager.com",
// "hm.baidu.com",
// "cloudflareinsights.com",
// "fsdoa.js", // adblock 检测
// "https://www.linovelib.com/novel/", // 阻止从本地文件跳转到在线页面
// }
// err = page.Route("**/*", func(route playwright.Route) {
// for _, d := range googleAdsDomains {
// if strings.Contains(route.Request().URL(), d) {
// b.logger.Debug("blocking request", slog.String("url", route.Request().URL()))
// err := route.Abort("aborted")
// if err != nil {
// b.logger.Debug("failed to block request", route.Request().URL(), err)
// }
// return
// }
// }
// _ = route.Continue()
// })
// if err != nil {
// return "", fmt.Errorf("failed to intercept requests: %w", err)
// }
_, err = page.ExpectResponse(func(url string) bool { _, err = page.ExpectResponse(func(url string) bool {
return strings.Contains(url, "chapterlog.js") return strings.Contains(url, "chapterlog.js")
}, func() error { }, func() error {
@@ -519,7 +585,7 @@ func (b *Bilinovel) processContentWithPlaywright(page playwright.Page, htmlConte
} }
return nil return nil
}, playwright.PageExpectResponseOptions{ }, playwright.PageExpectResponseOptions{
Timeout: playwright.Float(5000), Timeout: playwright.Float(10000),
}) })
if err != nil { if err != nil {
return "", fmt.Errorf("failed to wait for network request finish") return "", fmt.Errorf("failed to wait for network request finish")
@@ -527,6 +593,7 @@ func (b *Bilinovel) processContentWithPlaywright(page playwright.Page, htmlConte
err = page.Locator("#acontent").WaitFor(playwright.LocatorWaitForOptions{ err = page.Locator("#acontent").WaitFor(playwright.LocatorWaitForOptions{
State: playwright.WaitForSelectorStateVisible, State: playwright.WaitForSelectorStateVisible,
Timeout: playwright.Float(10000),
}) })
if err != nil { if err != nil {
return "", fmt.Errorf("could not wait for #acontent: %w", err) return "", fmt.Errorf("could not wait for #acontent: %w", err)
@@ -562,7 +629,7 @@ func (b *Bilinovel) processContentWithPlaywright(page playwright.Page, htmlConte
return "", fmt.Errorf("failed to remove hidden elements: %w", err) return "", fmt.Errorf("failed to remove hidden elements: %w", err)
} }
log.Printf("Hidden elements removal result: %s", result) b.logger.Debug("Hidden elements removal result", slog.Any("count", result))
processedHTML, err := page.Content() processedHTML, err := page.Content()
if err != nil { if err != nil {

12
main.go
View File

@@ -2,20 +2,8 @@ package main
import ( import (
"bilinovel-downloader/cmd" "bilinovel-downloader/cmd"
"io"
"log"
"github.com/playwright-community/playwright-go"
) )
func main() { func main() {
log.Println("Installing playwright")
err := playwright.Install(&playwright.RunOptions{
Browsers: []string{"chromium"},
Stdout: io.Discard,
})
if err != nil {
log.Panicf("failed to install playwright")
}
_ = cmd.RootCmd.Execute() _ = cmd.RootCmd.Execute()
} }

View File

@@ -8,7 +8,7 @@ import (
) )
func TestBilinovel_GetNovel(t *testing.T) { func TestBilinovel_GetNovel(t *testing.T) {
bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Headless: false, Concurrency: 5}) bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Concurrency: 5})
bilinovel.SetTextOnly(true) bilinovel.SetTextOnly(true)
if err != nil { if err != nil {
t.Fatalf("failed to create bilinovel: %v", err) t.Fatalf("failed to create bilinovel: %v", err)
@@ -25,7 +25,7 @@ func TestBilinovel_GetNovel(t *testing.T) {
} }
func TestBilinovel_GetVolume(t *testing.T) { func TestBilinovel_GetVolume(t *testing.T) {
bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Headless: false, Concurrency: 1}) bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Concurrency: 1})
bilinovel.SetTextOnly(true) bilinovel.SetTextOnly(true)
if err != nil { if err != nil {
t.Fatalf("failed to create bilinovel: %v", err) t.Fatalf("failed to create bilinovel: %v", err)
@@ -42,7 +42,7 @@ func TestBilinovel_GetVolume(t *testing.T) {
} }
func TestBilinovel_GetChapter(t *testing.T) { func TestBilinovel_GetChapter(t *testing.T) {
bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Headless: false, Concurrency: 1}) bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Concurrency: 1})
bilinovel.SetTextOnly(true) bilinovel.SetTextOnly(true)
if err != nil { if err != nil {
t.Fatalf("failed to create bilinovel: %v", err) t.Fatalf("failed to create bilinovel: %v", err)

View File

@@ -0,0 +1,318 @@
package test
import (
"fmt"
"log"
"strings"
"testing"
"github.com/PuerkitoBio/goquery"
)
// unscrambleParagraphs 函数的核心功能是接收一个乱序的段落列表,
// 并根据 chapterID 将它们重新排序为正确的阅读顺序。
// 算法来源 https://www.bilinovel.com/themes/zhmb/js/chapterlog.js?v1006c1
// 反混淆工具 https://obf-io.deobfuscate.io http://jsnice.org
// 这个方案是可行的,但如果 bilinovel 频繁更改初始化种子的计算方式或算法的实现,会让排序方法失效,可能 playwright 还是最优解。
func unscrambleParagraphs(scrambledParagraphs []*goquery.Selection, chapterID int) []*goquery.Selection {
j := len(scrambledParagraphs)
// 根据JS逻辑如果段落数小于等于20则不进行排序
if j <= 20 {
return scrambledParagraphs
}
// 1. 精确复刻JS中的伪随机数生成器和洗牌算法以得到正确的索引映射关系。
// 初始化种子
ms := int64(chapterID*127 + 235)
// value 数组存放的是需要被打乱的、从20开始的段落的相对索引0, 1, 2...
value := make([]int, j-20)
for i := range value {
value[i] = i
}
// 执行与JS完全相同的 Fisher-Yates-like 洗牌算法
for i := len(value) - 1; i > 0; i-- {
ms = (ms*9302 + 49397) % 233280
prop := int(float64(ms) / 233280.0 * float64(i+1))
// 交换元素
value[i], value[prop] = value[prop], value[i]
}
// 2. 构建最终的索引映射表 (aProperties)。
// 这个表告诉我们,乱序列表中的每一项,应该被放到正确顺序列表的哪个位置。
aProperties := make([]int, j)
// 前20个段落顺序不变
for i := range 20 {
aProperties[i] = i
}
// 后续的段落使用洗牌后的索引并加上20的偏移量
for i := range value {
aProperties[i+20] = value[i] + 20
}
// 3. 根据索引映射关系,从乱序列表中恢复出正确顺序。
// JS逻辑: elements[aProperties[i]] = out[i].node
// 翻译过来就是:乱序列表中的第 `i` 项 (scrambledParagraphs[i])
// 它在最终排好序的列表中的正确位置应该是 `aProperties[i]`。
correctlyOrdered := make([]*goquery.Selection, j)
for i := range j {
correctPosition := aProperties[i]
correctlyOrdered[correctPosition] = scrambledParagraphs[i]
}
return correctlyOrdered
}
func TestResortDom(t *testing.T) {
// --- 步骤 1: 准备原始HTML ---
// 请将您用 http 请求获取到的、未经处理的完整HTML源码粘贴到这里。
// 这里使用的是您之前提供的原始HTML作为示例。
unprocessedHtmlContent := `
<!DOCTYPE html>
<html lang="zh-Hans">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>女主角? 圣女? 不,我是全业女仆(自豪)! 第1章 第1话 目标成为女仆的少女_哔哩轻小说</title>
<meta name="keywords" content="女主角? 圣女? 不,我是全业女仆(自豪)!,第1话 目标成为女仆的少女,哔哩轻小说" />
<meta name="description" content="哔哩轻小说提供 あてきち 所创作的 女主角? 圣女? 不,我是全业女仆(自豪)! 第1章 第1话 目标成为女仆的少女 在线阅读与TXT,epub下载" />
<meta name="viewport" content="initial-scale=1.0,minimum-scale=1.0,user-scalable=yes,width=device-width" />
<meta name="theme-color" content="#232323" media="(prefers-color-scheme: dark)" />
<meta name="applicable-device" content="mobile" />
<link rel="stylesheet" href="https://www.bilinovel.com/themes/zhmb/css/read.css?v0409c2">
<link rel="stylesheet" href="https://www.bilinovel.com/themes/zhmb/css/chapter.css?v1126a9">
<link rel="dns-preconnect" href="https://www.bilinovel.com">
<link rel="alternate" hreflang="zh-Hant" href="https://tw.linovelib.com/novel/4126/236197.html" />
<script src="https://www.bilinovel.com/themes/zhmb/js/jquery-3.3.1.js"></script>
<script type="text/javascript" src="/scripts/darkmode.js"></script>
<script async src="https://www.bilinovel.com/themes/zhmb/js/lazysizes.min.js"></script>
<script src="https://www.bilinovel.com/scripts/common.js?v0922a3"></script>
<script src="https://www.bilinovel.com/scripts/zation.js?v1004a4"></script>
<style>.center-note{text-align: center; margin: 0; height: 50vh; display: flex ; justify-content: center; align-items: center;}.sum1{display:none}.footlink a{box-shadow: 0 0 1px rgba(150,150,150,.6);}.footlink a:nth-child(1){display: inline-block;margin-bottom: 10px;width: 90%;}.footlink a:nth-child(2){padding: 5px 10px;float: left;width: 35%;margin-left: 5%;}.footlink a:nth-child(3){padding: 5px 10px;float: right;width: 35%;margin-right: 5%;}.footlink a:nth-child(4){display: inline-block;margin-top: 10px;width: 90%;}#acontent{text-align: unset;}</style>
<script type="text/javascript">var ual = navigator.language.toLowerCase();var isWindows = navigator.platform.toLowerCase().includes("win");if(ual == 'zh-tw' || ual == 'zh-hk'){window.location.replace("https://tw.linovelib.com/novel/4126/236197.html");}if (ual === 'zh-cn' && isWindows) { window.location.replace("https://www.linovelib.com/novel/4126/236197.html");}</script>
</head>
<body id="aread">
<script type="text/javascript">var ReadParams={url_previous:'/novel/4126/236196.html',url_next:'/novel/4126/236197_2.html',url_index:'/novel/4126/catalog',url_articleinfo:'/novel/4126/vol_236194.html',url_image:'https://www.bilinovel.com/files/article/image/4/4126/4126s.jpg',url_home:'https://www.bilinovel.com/',articleid:'4126',articlename:'女主角? 圣女? 不,我是全业女仆(自豪)!',subid:'/4',author:'あてきち',chapterid:'236197',page:'1',chaptername:'第1章 第1话 目标成为女仆的少女',chapterisvip:'0',userid:'0',readtime:'1761057661'}</script>
<div class="main">
<div id="abox" class="abox">
<div id="apage" class="apage">
<div class="atitle"><h1 id="atitle">第1话 目标成为女仆的少女</h1><h3>第1章</h3></div>
<div id="acontent" class="contente"><div class="cgo"><!--<script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-8799828951681010"
crossorigin="anonymous"></script>
<ins class="adsbygoogle"
style="display:block"
data-ad-client="ca-pub-8799828951681010"
data-ad-slot="2277430192"
data-ad-format="auto"
data-full-width-responsive="true"></ins>
<script>
(adsbygoogle = window.adsbygoogle || []).push({});
</script>--></div><p>「欢迎回来,老爷。」</p>
<br>
<p>一位少女恭敬地弯腰向走进木质大门的绅士致意。</p>
<p>少女穿着一件做工精致的黑色连衣裙,上面系着花边以及刺绣、并不华丽的纯白围裙,梳成编辫的黑发上系着可爱的蕾丝头带。</p>
<br>
<p>无论从哪个角度看,都是迎接主人归来的女仆样子。</p>
<br>
<p>「啊,我回来了」</p>
<br>
<p>绅士把帽子和大衣交给恭敬地弯腰的女仆,用温柔的语气回答。</p>
<br>
<p>「我马上为您准备茶水。请问您想要哪一款?」</p>
<p>「那么,我想要一杯伯爵红茶。」</p>
<p>「要加牛奶之类的吗?」</p>
<p>「不,不用了。」</p>
<p>「遵命。茶点要什么呢?」</p>
<p>「嗯,就交给你吧。拜托了?」</p>
<br>
<p>对着绅士的话语,身为女仆的少女露出了轻柔的微笑。她可能只有十五、六岁吧。脸上还带着稚气,但未来值得期待,可爱又温柔的容貌。</p>
<br>
<p>「请交给我,我会准备合您口味的茶点。」</p>
<p>「啊,拜托了。」</p>
<br>
<p>女仆少女将帽子和大衣挂在衣架上,然后引导绅士到餐桌。</p>
<br>
<p></p>
<br>
<p>「那么,我要出门了。」</p>
<p>「好的,老爷」</p>
<p>「下次回来时,如果能再让妳接待就好了……」</p>
<br>
<p>「下次她想要带朋友在露台喝茶,也希望你能照顾他们。」</p>
<br>
<p>轻轻敲门后,听到「请进」的回答,少女走进了房间行礼。</p>
<p>一个少女嘟囔着。那是一位身穿简素蓝色连衣裙的少女。闪闪发光的银色头发留到了胸口。有着神秘的琉璃色瞳孔的美丽可爱少女站在母亲身旁。</p>
<p>送走绅士后,女仆少女前往总管的房间。</p>
<br>
<p>「欸,对我不需要用这种说话方式吧?……律子酱。」</p>
<p>薪水丰厚的兼职让她顺利存下了留学费用,留学之日即将到来。</p>
<p>「拜托了!」</p>
<br>
<p>女仆少女律子满脸笑容地回答。</p>
<br>
<p>「话说回来,律子酱。上次来的坂上夫人很喜欢你呢。上次寄来的邮件里相当称赞。她说下次还打算指名。」</p>
<br>
<p>「失礼了Miss 阿曼达。关于刚才离开宅邸的老爷报告……」</p>
<br>
<p>被叫做律子的女仆少女张开眼,刚才还散发着女仆气息的模样一下子变回稚气十足的少女,她嘟起嘴说道。</p>
<br>
<p>「这样很好啊!」</p>
<p>女仆少女律子满脸笑容地回答。</p>
<p>对担忧这一点的父母来说,当时的律子的情况无疑让人开心。</p>
<br>
<p>因此,父母并未反对女儿出人意表的宣言。</p>
<br>
<p>标题叫『深窗的公主的悲恋』。</p>
<p>优雅的动作,没有任何不自然的温柔笑容。仿佛是女仆典范一般的少女。看着她的身影,总管阿曼达皱了皱眉。不,这是因为……</p>
<p>「怎么了?瑟蕾丝蒂?」</p>
<br>
<p>「啊,拜托了。那么……」</p>
<p>「一路顺风,老爷。」</p>
<p>「遵命。我会将您的意愿转达给<ruby>女仆总管<rp>(</rp><rt>家政妇</rt><rp>)</rp></ruby>。」</p>
<p>(公主身后的女仆们是多么的优秀啊!)</p>
<br>
<p>「你真的很喜欢做这种工作呢。这样一来就得早晨开始准备了。下次我会去问问她们的希望。」</p>
<br>
<p>这部电影以旧时英国贵族的故事为题材。描述了一位在呵护下长大的贵族千金,偶然认识一位平民青年,并陷入爱河的故事。最后,因为身份差异,两人自尽,悲剧结局。</p>
<br>
<p>父母看着律子的身影,感到非常开心。</p>
<p>女仆们使出各种手段帮助她与男子相会。</p>
<p>在女仆的影响下,律子对各种事物产生了兴趣,玩耍、笑声、学习,成长为一个非常优秀的女儿。自从遇见女仆以来,好奇心无止境,虽然年龄和性格相比有些幼稚,但对父母来说,女仆这个存在也是让人有好感的。</p>
<br>
<p>她的名字是瑞波律子,二十岁,现在是大学二年级的学生。</p>
<br>
<p>「我讨厌那个名字啊。明明是日本人,却叫阿曼达……」 <span style="color: rgb(61142185);">(*亚万田日语念成阿曼达)</span></p>
<br>
<p>当然,因为主角是英国贵族千金,所以电影里并没有描绘女仆们努力的场景。但正因为如此,律子对在幕后默默支持的女仆们十分感动。</p>
<p>「……本来应该是这样的啊。」</p>
<p>来这家女仆咖啡厅的客人并不仅仅是男性。这家店的男女客人比例几乎是一比一。</p>
<br>
<p>会员制高级女仆咖啡厅『<ruby>贵族的日常<rp>(</rp><rt>Noble's One Day</rt><rp>)</rp></ruby>』。</p>
<br>
<p>生活了六年,律子慢慢的成长,但她却不对事物报持热情。喜欢的玩具和书籍都没有,看电视也不会表现出太多兴趣。</p>
<br>
<p>「拜托了!」</p>
<br>
<p>那是瑞波律子还不懂爱情的六岁春天的事……先不管给一个六岁小孩看悲恋电影的问题。</p>
<p>是被称为女仆总管的女性,亚万田凪沙创建的店。</p>
<br>
<p>「我在大学毕业后,想在英国成为真正的女仆!」</p>
<br>
<p>「好的,请放心交给我!」</p>
<br>
<p>「欸,真的吗!? 就是上周来过的那位温柔的女士吗?」</p>
<p>男士需穿着西装,女士需穿着礼服,这是服装规定。特别为女性客人提供服装租赁服务,因此女性客人可以享受穿着平时难得一穿的贵族少女或贵妇风格的洋装,扮演女主人的角色。</p>
<br>
<p>虽然二十岁了,律子的脸庞略显年幼,她是这家店最受欢迎的女仆。</p>
<br>
<p>看过这部电影的观众都为两人的悲恋流泪,感动不已。</p>
<br>
<p>从那时起,律子就迷上了女仆。她向父母说明了女仆是多么伟大的存在,并激动地宣布有一天她也会成为女仆。</p>
<p>完全预约制,到店时会有指名的女仆迎接。此时店员会完全扮演女仆角色,客人不是客人身份,而是扮演女仆的主人,享受其中。</p>
<br>
<p>一切都是顺风顺水。距离成为女仆只剩下最后一步!</p>
<br>
<p>美丽的行礼后,少女向绅士回以温柔的微笑。绅士推开门离开了。</p>
<br>
<p>律子的梦想是成为女仆。原因非常简单,那是因为她小时候看过的一部电影。</p>
<br>
<p>在父母的支持下,律子在大学学习外语、历史、文学、礼仪等,以成为女仆为目标,在本格派女仆咖啡厅进行女仆训练的日常。</p>
<br>
<p>「那么,我也可以帮忙准备衣服和化妆吗?」</p>
<p>「讨厌!再让我扮一下女仆也没关系嘛,亚万田小姐!」</p>
<br>
<p>绅士略显羞涩地说着,女仆的少女露出了微笑回答。</p>
<p>然而,律子却对另一方面感动不已。</p>
<br>
<br>
<br>
<br>
<br>
<p>支付是预付制,店内不谈金钱。没有菜单,女仆会自然接受点单。客人只需要享受那片刻的主人时光即可。</p>
<p>女主角的贵族千金拥有很温柔的人格,所以她的女仆们也非常喜爱她。</p>
<br>
<p>为了筹集到英国留学的资金,进入大学的律子开始寻找兼职工作。她认为对未来有帮助的工作是最好的,于是找到了这家女仆咖啡厅。</p><div class="cgo"><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-8799828951681010"
crossorigin="anonymous"></script>
<ins class="adsbygoogle"
style="display:block"
data-ad-client="ca-pub-8799828951681010"
data-ad-slot="9085546976"
data-ad-format="auto"
data-full-width-responsive="true"></ins>
<script>
(adsbygoogle = window.adsbygoogle || []).push({});
</script></div>
</div>
</div>
</div>
<div id="toptext" class="toptext" style="display:none;"></div>
<div id="bottomtext" class="bottomtext" style="display:none;"></div>
<div id="operatetip" class="operatetip" style="display:none;" onclick="this.style.display='none';">
<div class="tipl"><p>翻上页</p></div>
<div class="tipc"><p>呼出功能<br><br><small>漫画&插图<br>建议使用上下翻页</small><br><br><small>【翻页模式】章评·默认隐藏</small></p></div>
<div class="tipr"><p>翻下页</p></div>
</div>
</div>
<div id="footlink" class="footlink"><a onclick="window.location.href = ReadParams.url_previous;">序章 路多帕克家的大小姐以及万能女仆</a><a onclick="window.location.href = ReadParams.url_index;">目录</a><a onclick="window.location.href = ReadParams.url_articleinfo;">书页</a><a onclick="window.location.href = ReadParams.url_next;">下一頁</a></div>
<script>$(document).ready(function(){var prevpage="/novel/4126/236196.html";var nextpage="/novel/4126/236197_2.html";var bookpage="/novel/4126.html";$("body").keydown(function(event){var isInput=event.target.tagName==='INPUT'||event.target.tagName==='TEXTAREA';if(!isInput){if(event.keyCode==37){location=prevpage}else if(event.keyCode==39){location=nextpage}}})});</script>
<script type="text/javascript" src="https://www.bilinovel.com/themes/zhmb/js/readtools.js?42sfaj-8"></script>
<script type="text/javascript" src="https://www.bilinovel.com/scripts/json2.js"></script>
<script type="text/javascript" src="https://www.bilinovel.com/themes/zhmb/js/chapterlog.js?v1006c1"></script>
<script async src="https://www.googletagmanager.com/gtag/js?id=G-1K4JZ603WH"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-1K4JZ603WH');
</script>
<script>
var _hmt = _hmt || [];
(function() {
var hm = document.createElement("script");
hm.src = "https://hm.baidu.com/hm.js?6f9595b2c4b57f95a93aa5f575a77fb0";
var s = document.getElementsByTagName("script")[0];
s.parentNode.insertBefore(hm, s);
})();
</script>
<!--<script>
if ('serviceWorker' in navigator) {
navigator.serviceWorker.getRegistrations().then(function(registrations) {
for (let registration of registrations) {
registration.unregister();
}
});
}
</script>-->
<script defer src="https://static.cloudflareinsights.com/beacon.min.js/vcd15cbe7772f49c399c6a5babf22c1241717689176015" integrity="sha512-ZpsOmlRQV6y907TI0dKBHq9Md29nnaEIPlkf84rnaERnq6zvWvPUqr2ft8M1aS28oN72PdrCzSjY4U6VaAw1EQ==" data-cf-beacon='{"version":"2024.11.0","token":"192783771d59492782cd05bd12eb61b9","r":1,"server_timing":{"name":{"cfCacheStatus":true,"cfEdge":true,"cfExtPri":true,"cfL4":true,"cfOrigin":true,"cfSpeedBrain":true},"location_startswith":null}}' crossorigin="anonymous"></script>
</body>
</html>`
// --- 步骤 2: 解析HTML并提取关键信息 ---
doc, err := goquery.NewDocumentFromReader(strings.NewReader(unprocessedHtmlContent))
if err != nil {
log.Fatalf("解析HTML失败: %v", err)
}
chapterID := 236197
// --- 步骤 3: 收集所有需要重排的段落 ---
var scrambledParagraphs []*goquery.Selection
doc.Find("#acontent p").Each(func(i int, s *goquery.Selection) {
// 确保只添加非空段落与JS逻辑保持一致
if len(strings.TrimSpace(s.Text())) > 0 {
scrambledParagraphs = append(scrambledParagraphs, s)
}
})
fmt.Printf("从原始HTML中找到 %d 个乱序段落,准备重排。\n\n", len(scrambledParagraphs))
// --- 步骤 4: 执行重排算法 ---
correctlyOrderedParagraphs := unscrambleParagraphs(scrambledParagraphs, chapterID)
// --- 步骤 5: 输出最终结果 ---
fmt.Println("--- 已恢复正确顺序的最终内容 ---")
for i, p := range correctlyOrderedParagraphs {
fmt.Printf("%d: %s\n", i+1, p.Text())
}
}