feat(logging): Implement structured logging and debug mode

fix: Windows cannot download novels correctly
This commit is contained in:
2025-10-17 01:36:23 +11:00
parent 11fccdb05f
commit 17c3859e9e
4 changed files with 103 additions and 34 deletions

6
.vscode/launch.json vendored
View File

@@ -23,9 +23,9 @@
"program": "${workspaceFolder}",
"args": [
"download",
"-n=2727",
"-v=150098",
"--headless=false"
"-n=2388",
"-v=84522",
"--debug=true"
]
}
]

View File

@@ -9,7 +9,7 @@ import (
"encoding/json"
"fmt"
"io"
"log"
"log/slog"
"os"
"path/filepath"
@@ -22,18 +22,20 @@ var downloadCmd = &cobra.Command{
Short: "Download a novel or volume",
Long: "Download a novel or volume",
Run: func(cmd *cobra.Command, args []string) {
log.Println("Installing playwright")
slog.Info("Installing playwright")
err := playwright.Install(&playwright.RunOptions{
Browsers: []string{"chromium"},
Stdout: io.Discard,
})
if err != nil {
log.Panicf("failed to install playwright")
slog.Error("failed to install playwright")
return
}
err = runDownloadNovel()
if err != nil {
log.Printf("failed to download novel: %v", err)
slog.Error("failed to download novel", slog.Any("error", err))
return
}
},
}
@@ -43,8 +45,8 @@ type downloadCmdArgs struct {
VolumeId int `validate:"required"`
outputPath string
outputType string
headless bool
concurrency int
debug bool
}
var (
@@ -56,15 +58,15 @@ func init() {
downloadCmd.Flags().IntVarP(&downloadArgs.VolumeId, "volume-id", "v", 0, "volume id")
downloadCmd.Flags().StringVarP(&downloadArgs.outputPath, "output-path", "o", "novels", "output path")
downloadCmd.Flags().StringVarP(&downloadArgs.outputType, "output-type", "t", "epub", "output type, epub or text")
downloadCmd.Flags().BoolVar(&downloadArgs.headless, "headless", true, "headless mode")
downloadCmd.Flags().BoolVar(&downloadArgs.debug, "debug", false, "debug mode")
downloadCmd.Flags().IntVar(&downloadArgs.concurrency, "concurrency", 3, "concurrency of downloading volumes")
RootCmd.AddCommand(downloadCmd)
}
func runDownloadNovel() error {
downloader, err := bilinovel.New(bilinovel.BilinovelNewOption{
Headless: downloadArgs.headless,
Concurrency: downloadArgs.concurrency,
Debug: downloadArgs.debug,
})
if err != nil {
return fmt.Errorf("failed to create downloader: %v", err)
@@ -72,7 +74,7 @@ func runDownloadNovel() error {
// 确保在函数结束时关闭资源
defer func() {
if closeErr := downloader.Close(); closeErr != nil {
log.Printf("Failed to close downloader: %v", closeErr)
slog.Info("Failed to close downloader", slog.Any("error", closeErr))
}
}()

View File

@@ -7,7 +7,7 @@ import (
"crypto/sha256"
_ "embed"
"fmt"
"log"
"log/slog"
"net/http"
"os"
"path"
@@ -40,11 +40,13 @@ type Bilinovel struct {
pages map[string]playwright.Page
concurrency int
concurrentChan chan any
logger *slog.Logger
}
type BilinovelNewOption struct {
Headless bool
Concurrency int
Debug bool
}
func New(option BilinovelNewOption) (*Bilinovel, error) {
@@ -54,6 +56,17 @@ func New(option BilinovelNewOption) (*Bilinovel, error) {
}
restyClient := utils.NewRestyClient(50)
var logLevel slog.Level
if option.Debug {
logLevel = slog.LevelDebug
} else {
logLevel = slog.LevelInfo
}
handlerOptions := &slog.HandlerOptions{
Level: logLevel,
}
b := &Bilinovel{
fontMapper: fontMapper,
textOnly: false,
@@ -61,10 +74,11 @@ func New(option BilinovelNewOption) (*Bilinovel, error) {
pages: make(map[string]playwright.Page),
concurrency: option.Concurrency,
concurrentChan: make(chan any, option.Concurrency),
logger: slog.New(slog.NewTextHandler(os.Stdout, handlerOptions)),
}
// 初始化浏览器实例
err = b.initBrowser(option.Headless)
err = b.initBrowser(option.Debug)
if err != nil {
return nil, fmt.Errorf("failed to init browser: %v", err)
}
@@ -81,13 +95,15 @@ func (b *Bilinovel) GetExtraFiles() []model.ExtraFile {
}
// initBrowser 初始化浏览器实例
func (b *Bilinovel) initBrowser(headless bool) error {
func (b *Bilinovel) initBrowser(debug bool) error {
pw, err := playwright.Run()
if err != nil {
return fmt.Errorf("could not start playwright: %w", err)
}
b.browser, err = pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{
Headless: playwright.Bool(headless),
Headless: playwright.Bool(!debug),
Devtools: playwright.Bool(debug),
})
if err != nil {
return fmt.Errorf("could not launch browser: %w", err)
@@ -98,7 +114,7 @@ func (b *Bilinovel) initBrowser(headless bool) error {
return fmt.Errorf("could not create browser context: %w", err)
}
log.Println("Browser initialized successfully")
b.logger.Info("Browser initialized successfully")
return nil
}
@@ -106,7 +122,7 @@ func (b *Bilinovel) initBrowser(headless bool) error {
func (b *Bilinovel) Close() error {
if b.browser != nil {
if err := b.browser.Close(); err != nil {
log.Printf("could not close browser: %v", err)
b.logger.Error("could not close browser", slog.Any("error", err))
}
b.browser = nil
b.browserContext = nil
@@ -122,7 +138,7 @@ func (b *Bilinovel) GetStyleCSS() string {
}
func (b *Bilinovel) GetNovel(novelId int, skipChapterContent bool, skipVolumes []int) (*model.Novel, error) {
log.Printf("Getting novel %v\n", novelId)
b.logger.Info("Getting novel", slog.Int("novelId", novelId))
novelUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v.html", novelId)
resp, err := b.restyClient.R().Get(novelUrl)
@@ -161,7 +177,7 @@ func (b *Bilinovel) GetNovel(novelId int, skipChapterContent bool, skipVolumes [
}
func (b *Bilinovel) GetVolume(novelId int, volumeId int, skipChapterContent bool) (*model.Volume, error) {
log.Printf("Getting volume %v of novel %v\n", volumeId, novelId)
b.logger.Info("Getting volume of novel", slog.Int("volumeId", volumeId), slog.Int("novelId", novelId))
novelUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v/catalog", novelId)
resp, err := b.restyClient.R().Get(novelUrl)
@@ -259,7 +275,7 @@ func (b *Bilinovel) GetVolume(novelId int, volumeId int, skipChapterContent bool
}
func (b *Bilinovel) getAllVolumes(novelId int, skipChapterContent bool, skipVolumes []int) ([]*model.Volume, error) {
log.Printf("Getting all volumes of novel %v\n", novelId)
b.logger.Info("Getting all volumes of novel", slog.Int("novelId", novelId))
catelogUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v/catalog", novelId)
resp, err := b.restyClient.R().Get(catelogUrl)
@@ -300,7 +316,7 @@ func (b *Bilinovel) getAllVolumes(novelId int, skipChapterContent bool, skipVolu
volumeId, err := strconv.Atoi(volumeIdStr)
if err != nil {
log.Printf("failed to convert volume id %s: %v", volumeIdStr, err)
b.logger.Error("failed to convert volume id", slog.String("volumeIdStr", volumeIdStr), slog.Any("error", err))
return
}
if slices.Contains(skipVolumes, volumeId) {
@@ -308,7 +324,7 @@ func (b *Bilinovel) getAllVolumes(novelId int, skipChapterContent bool, skipVolu
}
volume, err := b.GetVolume(novelId, volumeId, skipChapterContent)
if err != nil {
log.Printf("failed to get volume info for novel %d, volume %d: %v", novelId, volumeId, err)
b.logger.Error("failed to get volume info", slog.Int("novelId", novelId), slog.Int("volumeId", volumeId), slog.Any("error", err))
return
}
volume.SeriesIdx = i
@@ -340,7 +356,7 @@ func (b *Bilinovel) getAllVolumes(novelId int, skipChapterContent bool, skipVolu
}
func (b *Bilinovel) GetChapter(novelId int, volumeId int, chapterId int) (*model.Chapter, error) {
log.Printf("Getting chapter %v of novel %v\n", chapterId, novelId)
b.logger.Info("Getting chapter of novel", slog.Int("chapterId", chapterId), slog.Int("novelId", novelId))
pageNum := 1
chapter := &model.Chapter{
@@ -370,8 +386,11 @@ func (b *Bilinovel) GetChapter(novelId int, volumeId int, chapterId int) (*model
return chapter, nil
}
var nextPageUrlRegexp = regexp.MustCompile(`url_next:\s?['"]([^'"]*?)['"]`)
var cleanNextPageUrlRegexp = regexp.MustCompile(`(_\d+)?\.html$`)
func (b *Bilinovel) getChapterByPage(pwPage playwright.Page, chapter *model.Chapter, pageNum int) (bool, error) {
log.Printf("Getting chapter %v by page %v\n", chapter.Id, pageNum)
b.logger.Info("Getting chapter by page", slog.Int("chapter", chapter.Id), slog.Int("page", pageNum))
Url := strings.TrimSuffix(chapter.Url, ".html") + fmt.Sprintf("_%v.html", pageNum)
@@ -405,6 +424,17 @@ func (b *Bilinovel) getChapterByPage(pwPage playwright.Page, chapter *model.Chap
return false, fmt.Errorf("failed to parse html: %w", err)
}
// 判断章节是否有下一页
n := nextPageUrlRegexp.FindStringSubmatch(resortedHtml)
if len(n) != 2 {
return false, fmt.Errorf("failed to determine wether there is a next page")
}
s := cleanNextPageUrlRegexp.ReplaceAllString(n[1], "")
if strings.Contains(Url, s) {
hasNext = true
}
if pageNum == 1 {
chapter.Title = doc.Find("#atitle").Text()
}
@@ -413,7 +443,7 @@ func (b *Bilinovel) getChapterByPage(pwPage playwright.Page, chapter *model.Chap
content.Find("center").Remove()
content.Find(".google-auto-placed").Remove()
if strings.Contains(resp.String(), `font-family: "read"`) {
if strings.Contains(resortedHtml, `font-family: "read"`) {
html, err := content.Find("p").Last().Html()
if err != nil {
return false, fmt.Errorf("failed to get html: %v", err)
@@ -486,7 +516,7 @@ func (b *Bilinovel) getChapterByPage(pwPage playwright.Page, chapter *model.Chap
}
func (b *Bilinovel) getImg(url string) ([]byte, error) {
log.Printf("Getting img %v\n", url)
b.logger.Info("Getting img", slog.String("url", url))
resp, err := b.restyClient.R().SetHeader("Referer", "https://www.bilinovel.com").Get(url)
if err != nil {
return nil, err
@@ -497,7 +527,15 @@ func (b *Bilinovel) getImg(url string) ([]byte, error) {
// processContentWithPlaywright 使用复用的浏览器实例处理内容
func (b *Bilinovel) processContentWithPlaywright(page playwright.Page, htmlContent string) (string, error) {
tempFile, err := os.CreateTemp("", "bilinovel-temp-*.html")
// 替换 window.location.replace防止页面跳转
htmlContent = strings.ReplaceAll(htmlContent, "window.location.replace", "console.log")
tempPath := filepath.Join(os.TempDir(), "bilinovel-downloader")
err := os.MkdirAll(tempPath, 0755)
if err != nil {
return "", fmt.Errorf("failed to create temp dir: %w", err)
}
tempFile, err := os.CreateTemp(tempPath, "temp-*.html")
if err != nil {
return "", fmt.Errorf("failed to create temp file: %w", err)
}
@@ -510,6 +548,34 @@ func (b *Bilinovel) processContentWithPlaywright(page playwright.Page, htmlConte
tempFile.Close()
tempFilePath := tempFile.Name()
// // 屏蔽请求
// googleAdsDomains := []string{
// "adtrafficquality.google",
// "doubleclick.net",
// "googlesyndication.com",
// "googletagmanager.com",
// "hm.baidu.com",
// "cloudflareinsights.com",
// "fsdoa.js", // adblock 检测
// "https://www.linovelib.com/novel/", // 阻止从本地文件跳转到在线页面
// }
// err = page.Route("**/*", func(route playwright.Route) {
// for _, d := range googleAdsDomains {
// if strings.Contains(route.Request().URL(), d) {
// b.logger.Debug("blocking request", slog.String("url", route.Request().URL()))
// err := route.Abort("aborted")
// if err != nil {
// b.logger.Debug("failed to block request", route.Request().URL(), err)
// }
// return
// }
// }
// _ = route.Continue()
// })
// if err != nil {
// return "", fmt.Errorf("failed to intercept requests: %w", err)
// }
_, err = page.ExpectResponse(func(url string) bool {
return strings.Contains(url, "chapterlog.js")
}, func() error {
@@ -519,14 +585,15 @@ func (b *Bilinovel) processContentWithPlaywright(page playwright.Page, htmlConte
}
return nil
}, playwright.PageExpectResponseOptions{
Timeout: playwright.Float(5000),
Timeout: playwright.Float(10000),
})
if err != nil {
return "", fmt.Errorf("failed to wait for network request finish")
}
err = page.Locator("#acontent").WaitFor(playwright.LocatorWaitForOptions{
State: playwright.WaitForSelectorStateVisible,
State: playwright.WaitForSelectorStateVisible,
Timeout: playwright.Float(10000),
})
if err != nil {
return "", fmt.Errorf("could not wait for #acontent: %w", err)
@@ -562,7 +629,7 @@ func (b *Bilinovel) processContentWithPlaywright(page playwright.Page, htmlConte
return "", fmt.Errorf("failed to remove hidden elements: %w", err)
}
log.Printf("Hidden elements removal result: %s", result)
b.logger.Debug("Hidden elements removal result", slog.Any("count", result))
processedHTML, err := page.Content()
if err != nil {

View File

@@ -8,7 +8,7 @@ import (
)
func TestBilinovel_GetNovel(t *testing.T) {
bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Headless: false, Concurrency: 5})
bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Concurrency: 5})
bilinovel.SetTextOnly(true)
if err != nil {
t.Fatalf("failed to create bilinovel: %v", err)
@@ -25,7 +25,7 @@ func TestBilinovel_GetNovel(t *testing.T) {
}
func TestBilinovel_GetVolume(t *testing.T) {
bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Headless: false, Concurrency: 1})
bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Concurrency: 1})
bilinovel.SetTextOnly(true)
if err != nil {
t.Fatalf("failed to create bilinovel: %v", err)
@@ -42,7 +42,7 @@ func TestBilinovel_GetVolume(t *testing.T) {
}
func TestBilinovel_GetChapter(t *testing.T) {
bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Headless: false, Concurrency: 1})
bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Concurrency: 1})
bilinovel.SetTextOnly(true)
if err != nil {
t.Fatalf("failed to create bilinovel: %v", err)