From 17c3859e9e54bb4bfe58516feb46b387b27a6d17 Mon Sep 17 00:00:00 2001 From: nite Date: Fri, 17 Oct 2025 01:36:23 +1100 Subject: [PATCH] feat(logging): Implement structured logging and debug mode fix: Windows cannot download novels correctly --- .vscode/launch.json | 6 +- cmd/download.go | 18 ++--- downloader/bilinovel/bilinovel.go | 107 ++++++++++++++++++++++++------ test/bilinovel_test.go | 6 +- 4 files changed, 103 insertions(+), 34 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 3dd9a89..e6e64c0 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -23,9 +23,9 @@ "program": "${workspaceFolder}", "args": [ "download", - "-n=2727", - "-v=150098", - "--headless=false" + "-n=2388", + "-v=84522", + "--debug=true" ] } ] diff --git a/cmd/download.go b/cmd/download.go index 31e31a1..01ad03a 100644 --- a/cmd/download.go +++ b/cmd/download.go @@ -9,7 +9,7 @@ import ( "encoding/json" "fmt" "io" - "log" + "log/slog" "os" "path/filepath" @@ -22,18 +22,20 @@ var downloadCmd = &cobra.Command{ Short: "Download a novel or volume", Long: "Download a novel or volume", Run: func(cmd *cobra.Command, args []string) { - log.Println("Installing playwright") + slog.Info("Installing playwright") err := playwright.Install(&playwright.RunOptions{ Browsers: []string{"chromium"}, Stdout: io.Discard, }) if err != nil { - log.Panicf("failed to install playwright") + slog.Error("failed to install playwright") + return } err = runDownloadNovel() if err != nil { - log.Printf("failed to download novel: %v", err) + slog.Error("failed to download novel", slog.Any("error", err)) + return } }, } @@ -43,8 +45,8 @@ type downloadCmdArgs struct { VolumeId int `validate:"required"` outputPath string outputType string - headless bool concurrency int + debug bool } var ( @@ -56,15 +58,15 @@ func init() { downloadCmd.Flags().IntVarP(&downloadArgs.VolumeId, "volume-id", "v", 0, "volume id") downloadCmd.Flags().StringVarP(&downloadArgs.outputPath, "output-path", "o", "novels", "output path") downloadCmd.Flags().StringVarP(&downloadArgs.outputType, "output-type", "t", "epub", "output type, epub or text") - downloadCmd.Flags().BoolVar(&downloadArgs.headless, "headless", true, "headless mode") + downloadCmd.Flags().BoolVar(&downloadArgs.debug, "debug", false, "debug mode") downloadCmd.Flags().IntVar(&downloadArgs.concurrency, "concurrency", 3, "concurrency of downloading volumes") RootCmd.AddCommand(downloadCmd) } func runDownloadNovel() error { downloader, err := bilinovel.New(bilinovel.BilinovelNewOption{ - Headless: downloadArgs.headless, Concurrency: downloadArgs.concurrency, + Debug: downloadArgs.debug, }) if err != nil { return fmt.Errorf("failed to create downloader: %v", err) @@ -72,7 +74,7 @@ func runDownloadNovel() error { // 确保在函数结束时关闭资源 defer func() { if closeErr := downloader.Close(); closeErr != nil { - log.Printf("Failed to close downloader: %v", closeErr) + slog.Info("Failed to close downloader", slog.Any("error", closeErr)) } }() diff --git a/downloader/bilinovel/bilinovel.go b/downloader/bilinovel/bilinovel.go index 56f5b93..2e397d4 100644 --- a/downloader/bilinovel/bilinovel.go +++ b/downloader/bilinovel/bilinovel.go @@ -7,7 +7,7 @@ import ( "crypto/sha256" _ "embed" "fmt" - "log" + "log/slog" "net/http" "os" "path" @@ -40,11 +40,13 @@ type Bilinovel struct { pages map[string]playwright.Page concurrency int concurrentChan chan any + + logger *slog.Logger } type BilinovelNewOption struct { - Headless bool Concurrency int + Debug bool } func New(option BilinovelNewOption) (*Bilinovel, error) { @@ -54,6 +56,17 @@ func New(option BilinovelNewOption) (*Bilinovel, error) { } restyClient := utils.NewRestyClient(50) + var logLevel slog.Level + if option.Debug { + logLevel = slog.LevelDebug + } else { + logLevel = slog.LevelInfo + } + + handlerOptions := &slog.HandlerOptions{ + Level: logLevel, + } + b := &Bilinovel{ fontMapper: fontMapper, textOnly: false, @@ -61,10 +74,11 @@ func New(option BilinovelNewOption) (*Bilinovel, error) { pages: make(map[string]playwright.Page), concurrency: option.Concurrency, concurrentChan: make(chan any, option.Concurrency), + logger: slog.New(slog.NewTextHandler(os.Stdout, handlerOptions)), } // 初始化浏览器实例 - err = b.initBrowser(option.Headless) + err = b.initBrowser(option.Debug) if err != nil { return nil, fmt.Errorf("failed to init browser: %v", err) } @@ -81,13 +95,15 @@ func (b *Bilinovel) GetExtraFiles() []model.ExtraFile { } // initBrowser 初始化浏览器实例 -func (b *Bilinovel) initBrowser(headless bool) error { +func (b *Bilinovel) initBrowser(debug bool) error { pw, err := playwright.Run() if err != nil { return fmt.Errorf("could not start playwright: %w", err) } + b.browser, err = pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{ - Headless: playwright.Bool(headless), + Headless: playwright.Bool(!debug), + Devtools: playwright.Bool(debug), }) if err != nil { return fmt.Errorf("could not launch browser: %w", err) @@ -98,7 +114,7 @@ func (b *Bilinovel) initBrowser(headless bool) error { return fmt.Errorf("could not create browser context: %w", err) } - log.Println("Browser initialized successfully") + b.logger.Info("Browser initialized successfully") return nil } @@ -106,7 +122,7 @@ func (b *Bilinovel) initBrowser(headless bool) error { func (b *Bilinovel) Close() error { if b.browser != nil { if err := b.browser.Close(); err != nil { - log.Printf("could not close browser: %v", err) + b.logger.Error("could not close browser", slog.Any("error", err)) } b.browser = nil b.browserContext = nil @@ -122,7 +138,7 @@ func (b *Bilinovel) GetStyleCSS() string { } func (b *Bilinovel) GetNovel(novelId int, skipChapterContent bool, skipVolumes []int) (*model.Novel, error) { - log.Printf("Getting novel %v\n", novelId) + b.logger.Info("Getting novel", slog.Int("novelId", novelId)) novelUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v.html", novelId) resp, err := b.restyClient.R().Get(novelUrl) @@ -161,7 +177,7 @@ func (b *Bilinovel) GetNovel(novelId int, skipChapterContent bool, skipVolumes [ } func (b *Bilinovel) GetVolume(novelId int, volumeId int, skipChapterContent bool) (*model.Volume, error) { - log.Printf("Getting volume %v of novel %v\n", volumeId, novelId) + b.logger.Info("Getting volume of novel", slog.Int("volumeId", volumeId), slog.Int("novelId", novelId)) novelUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v/catalog", novelId) resp, err := b.restyClient.R().Get(novelUrl) @@ -259,7 +275,7 @@ func (b *Bilinovel) GetVolume(novelId int, volumeId int, skipChapterContent bool } func (b *Bilinovel) getAllVolumes(novelId int, skipChapterContent bool, skipVolumes []int) ([]*model.Volume, error) { - log.Printf("Getting all volumes of novel %v\n", novelId) + b.logger.Info("Getting all volumes of novel", slog.Int("novelId", novelId)) catelogUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v/catalog", novelId) resp, err := b.restyClient.R().Get(catelogUrl) @@ -300,7 +316,7 @@ func (b *Bilinovel) getAllVolumes(novelId int, skipChapterContent bool, skipVolu volumeId, err := strconv.Atoi(volumeIdStr) if err != nil { - log.Printf("failed to convert volume id %s: %v", volumeIdStr, err) + b.logger.Error("failed to convert volume id", slog.String("volumeIdStr", volumeIdStr), slog.Any("error", err)) return } if slices.Contains(skipVolumes, volumeId) { @@ -308,7 +324,7 @@ func (b *Bilinovel) getAllVolumes(novelId int, skipChapterContent bool, skipVolu } volume, err := b.GetVolume(novelId, volumeId, skipChapterContent) if err != nil { - log.Printf("failed to get volume info for novel %d, volume %d: %v", novelId, volumeId, err) + b.logger.Error("failed to get volume info", slog.Int("novelId", novelId), slog.Int("volumeId", volumeId), slog.Any("error", err)) return } volume.SeriesIdx = i @@ -340,7 +356,7 @@ func (b *Bilinovel) getAllVolumes(novelId int, skipChapterContent bool, skipVolu } func (b *Bilinovel) GetChapter(novelId int, volumeId int, chapterId int) (*model.Chapter, error) { - log.Printf("Getting chapter %v of novel %v\n", chapterId, novelId) + b.logger.Info("Getting chapter of novel", slog.Int("chapterId", chapterId), slog.Int("novelId", novelId)) pageNum := 1 chapter := &model.Chapter{ @@ -370,8 +386,11 @@ func (b *Bilinovel) GetChapter(novelId int, volumeId int, chapterId int) (*model return chapter, nil } +var nextPageUrlRegexp = regexp.MustCompile(`url_next:\s?['"]([^'"]*?)['"]`) +var cleanNextPageUrlRegexp = regexp.MustCompile(`(_\d+)?\.html$`) + func (b *Bilinovel) getChapterByPage(pwPage playwright.Page, chapter *model.Chapter, pageNum int) (bool, error) { - log.Printf("Getting chapter %v by page %v\n", chapter.Id, pageNum) + b.logger.Info("Getting chapter by page", slog.Int("chapter", chapter.Id), slog.Int("page", pageNum)) Url := strings.TrimSuffix(chapter.Url, ".html") + fmt.Sprintf("_%v.html", pageNum) @@ -405,6 +424,17 @@ func (b *Bilinovel) getChapterByPage(pwPage playwright.Page, chapter *model.Chap return false, fmt.Errorf("failed to parse html: %w", err) } + // 判断章节是否有下一页 + n := nextPageUrlRegexp.FindStringSubmatch(resortedHtml) + if len(n) != 2 { + return false, fmt.Errorf("failed to determine wether there is a next page") + } + + s := cleanNextPageUrlRegexp.ReplaceAllString(n[1], "") + if strings.Contains(Url, s) { + hasNext = true + } + if pageNum == 1 { chapter.Title = doc.Find("#atitle").Text() } @@ -413,7 +443,7 @@ func (b *Bilinovel) getChapterByPage(pwPage playwright.Page, chapter *model.Chap content.Find("center").Remove() content.Find(".google-auto-placed").Remove() - if strings.Contains(resp.String(), `font-family: "read"`) { + if strings.Contains(resortedHtml, `font-family: "read"`) { html, err := content.Find("p").Last().Html() if err != nil { return false, fmt.Errorf("failed to get html: %v", err) @@ -486,7 +516,7 @@ func (b *Bilinovel) getChapterByPage(pwPage playwright.Page, chapter *model.Chap } func (b *Bilinovel) getImg(url string) ([]byte, error) { - log.Printf("Getting img %v\n", url) + b.logger.Info("Getting img", slog.String("url", url)) resp, err := b.restyClient.R().SetHeader("Referer", "https://www.bilinovel.com").Get(url) if err != nil { return nil, err @@ -497,7 +527,15 @@ func (b *Bilinovel) getImg(url string) ([]byte, error) { // processContentWithPlaywright 使用复用的浏览器实例处理内容 func (b *Bilinovel) processContentWithPlaywright(page playwright.Page, htmlContent string) (string, error) { - tempFile, err := os.CreateTemp("", "bilinovel-temp-*.html") + // 替换 window.location.replace,防止页面跳转 + htmlContent = strings.ReplaceAll(htmlContent, "window.location.replace", "console.log") + + tempPath := filepath.Join(os.TempDir(), "bilinovel-downloader") + err := os.MkdirAll(tempPath, 0755) + if err != nil { + return "", fmt.Errorf("failed to create temp dir: %w", err) + } + tempFile, err := os.CreateTemp(tempPath, "temp-*.html") if err != nil { return "", fmt.Errorf("failed to create temp file: %w", err) } @@ -510,6 +548,34 @@ func (b *Bilinovel) processContentWithPlaywright(page playwright.Page, htmlConte tempFile.Close() tempFilePath := tempFile.Name() + // // 屏蔽请求 + // googleAdsDomains := []string{ + // "adtrafficquality.google", + // "doubleclick.net", + // "googlesyndication.com", + // "googletagmanager.com", + // "hm.baidu.com", + // "cloudflareinsights.com", + // "fsdoa.js", // adblock 检测 + // "https://www.linovelib.com/novel/", // 阻止从本地文件跳转到在线页面 + // } + // err = page.Route("**/*", func(route playwright.Route) { + // for _, d := range googleAdsDomains { + // if strings.Contains(route.Request().URL(), d) { + // b.logger.Debug("blocking request", slog.String("url", route.Request().URL())) + // err := route.Abort("aborted") + // if err != nil { + // b.logger.Debug("failed to block request", route.Request().URL(), err) + // } + // return + // } + // } + // _ = route.Continue() + // }) + // if err != nil { + // return "", fmt.Errorf("failed to intercept requests: %w", err) + // } + _, err = page.ExpectResponse(func(url string) bool { return strings.Contains(url, "chapterlog.js") }, func() error { @@ -519,14 +585,15 @@ func (b *Bilinovel) processContentWithPlaywright(page playwright.Page, htmlConte } return nil }, playwright.PageExpectResponseOptions{ - Timeout: playwright.Float(5000), + Timeout: playwright.Float(10000), }) if err != nil { return "", fmt.Errorf("failed to wait for network request finish") } err = page.Locator("#acontent").WaitFor(playwright.LocatorWaitForOptions{ - State: playwright.WaitForSelectorStateVisible, + State: playwright.WaitForSelectorStateVisible, + Timeout: playwright.Float(10000), }) if err != nil { return "", fmt.Errorf("could not wait for #acontent: %w", err) @@ -562,7 +629,7 @@ func (b *Bilinovel) processContentWithPlaywright(page playwright.Page, htmlConte return "", fmt.Errorf("failed to remove hidden elements: %w", err) } - log.Printf("Hidden elements removal result: %s", result) + b.logger.Debug("Hidden elements removal result", slog.Any("count", result)) processedHTML, err := page.Content() if err != nil { diff --git a/test/bilinovel_test.go b/test/bilinovel_test.go index e186b3d..024e51b 100644 --- a/test/bilinovel_test.go +++ b/test/bilinovel_test.go @@ -8,7 +8,7 @@ import ( ) func TestBilinovel_GetNovel(t *testing.T) { - bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Headless: false, Concurrency: 5}) + bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Concurrency: 5}) bilinovel.SetTextOnly(true) if err != nil { t.Fatalf("failed to create bilinovel: %v", err) @@ -25,7 +25,7 @@ func TestBilinovel_GetNovel(t *testing.T) { } func TestBilinovel_GetVolume(t *testing.T) { - bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Headless: false, Concurrency: 1}) + bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Concurrency: 1}) bilinovel.SetTextOnly(true) if err != nil { t.Fatalf("failed to create bilinovel: %v", err) @@ -42,7 +42,7 @@ func TestBilinovel_GetVolume(t *testing.T) { } func TestBilinovel_GetChapter(t *testing.T) { - bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Headless: false, Concurrency: 1}) + bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Concurrency: 1}) bilinovel.SetTextOnly(true) if err != nil { t.Fatalf("failed to create bilinovel: %v", err)