mirror of
https://github.com/bestnite/bilinovel-downloader.git
synced 2025-10-25 16:51:01 +00:00
feat(logging): Implement structured logging and debug mode
fix: Windows cannot download novels correctly
This commit is contained in:
6
.vscode/launch.json
vendored
6
.vscode/launch.json
vendored
@@ -23,9 +23,9 @@
|
||||
"program": "${workspaceFolder}",
|
||||
"args": [
|
||||
"download",
|
||||
"-n=2727",
|
||||
"-v=150098",
|
||||
"--headless=false"
|
||||
"-n=2388",
|
||||
"-v=84522",
|
||||
"--debug=true"
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
@@ -9,7 +9,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
@@ -22,18 +22,20 @@ var downloadCmd = &cobra.Command{
|
||||
Short: "Download a novel or volume",
|
||||
Long: "Download a novel or volume",
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
log.Println("Installing playwright")
|
||||
slog.Info("Installing playwright")
|
||||
err := playwright.Install(&playwright.RunOptions{
|
||||
Browsers: []string{"chromium"},
|
||||
Stdout: io.Discard,
|
||||
})
|
||||
if err != nil {
|
||||
log.Panicf("failed to install playwright")
|
||||
slog.Error("failed to install playwright")
|
||||
return
|
||||
}
|
||||
|
||||
err = runDownloadNovel()
|
||||
if err != nil {
|
||||
log.Printf("failed to download novel: %v", err)
|
||||
slog.Error("failed to download novel", slog.Any("error", err))
|
||||
return
|
||||
}
|
||||
},
|
||||
}
|
||||
@@ -43,8 +45,8 @@ type downloadCmdArgs struct {
|
||||
VolumeId int `validate:"required"`
|
||||
outputPath string
|
||||
outputType string
|
||||
headless bool
|
||||
concurrency int
|
||||
debug bool
|
||||
}
|
||||
|
||||
var (
|
||||
@@ -56,15 +58,15 @@ func init() {
|
||||
downloadCmd.Flags().IntVarP(&downloadArgs.VolumeId, "volume-id", "v", 0, "volume id")
|
||||
downloadCmd.Flags().StringVarP(&downloadArgs.outputPath, "output-path", "o", "novels", "output path")
|
||||
downloadCmd.Flags().StringVarP(&downloadArgs.outputType, "output-type", "t", "epub", "output type, epub or text")
|
||||
downloadCmd.Flags().BoolVar(&downloadArgs.headless, "headless", true, "headless mode")
|
||||
downloadCmd.Flags().BoolVar(&downloadArgs.debug, "debug", false, "debug mode")
|
||||
downloadCmd.Flags().IntVar(&downloadArgs.concurrency, "concurrency", 3, "concurrency of downloading volumes")
|
||||
RootCmd.AddCommand(downloadCmd)
|
||||
}
|
||||
|
||||
func runDownloadNovel() error {
|
||||
downloader, err := bilinovel.New(bilinovel.BilinovelNewOption{
|
||||
Headless: downloadArgs.headless,
|
||||
Concurrency: downloadArgs.concurrency,
|
||||
Debug: downloadArgs.debug,
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create downloader: %v", err)
|
||||
@@ -72,7 +74,7 @@ func runDownloadNovel() error {
|
||||
// 确保在函数结束时关闭资源
|
||||
defer func() {
|
||||
if closeErr := downloader.Close(); closeErr != nil {
|
||||
log.Printf("Failed to close downloader: %v", closeErr)
|
||||
slog.Info("Failed to close downloader", slog.Any("error", closeErr))
|
||||
}
|
||||
}()
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
"crypto/sha256"
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"log"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
"os"
|
||||
"path"
|
||||
@@ -40,11 +40,13 @@ type Bilinovel struct {
|
||||
pages map[string]playwright.Page
|
||||
concurrency int
|
||||
concurrentChan chan any
|
||||
|
||||
logger *slog.Logger
|
||||
}
|
||||
|
||||
type BilinovelNewOption struct {
|
||||
Headless bool
|
||||
Concurrency int
|
||||
Debug bool
|
||||
}
|
||||
|
||||
func New(option BilinovelNewOption) (*Bilinovel, error) {
|
||||
@@ -54,6 +56,17 @@ func New(option BilinovelNewOption) (*Bilinovel, error) {
|
||||
}
|
||||
restyClient := utils.NewRestyClient(50)
|
||||
|
||||
var logLevel slog.Level
|
||||
if option.Debug {
|
||||
logLevel = slog.LevelDebug
|
||||
} else {
|
||||
logLevel = slog.LevelInfo
|
||||
}
|
||||
|
||||
handlerOptions := &slog.HandlerOptions{
|
||||
Level: logLevel,
|
||||
}
|
||||
|
||||
b := &Bilinovel{
|
||||
fontMapper: fontMapper,
|
||||
textOnly: false,
|
||||
@@ -61,10 +74,11 @@ func New(option BilinovelNewOption) (*Bilinovel, error) {
|
||||
pages: make(map[string]playwright.Page),
|
||||
concurrency: option.Concurrency,
|
||||
concurrentChan: make(chan any, option.Concurrency),
|
||||
logger: slog.New(slog.NewTextHandler(os.Stdout, handlerOptions)),
|
||||
}
|
||||
|
||||
// 初始化浏览器实例
|
||||
err = b.initBrowser(option.Headless)
|
||||
err = b.initBrowser(option.Debug)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to init browser: %v", err)
|
||||
}
|
||||
@@ -81,13 +95,15 @@ func (b *Bilinovel) GetExtraFiles() []model.ExtraFile {
|
||||
}
|
||||
|
||||
// initBrowser 初始化浏览器实例
|
||||
func (b *Bilinovel) initBrowser(headless bool) error {
|
||||
func (b *Bilinovel) initBrowser(debug bool) error {
|
||||
pw, err := playwright.Run()
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not start playwright: %w", err)
|
||||
}
|
||||
|
||||
b.browser, err = pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{
|
||||
Headless: playwright.Bool(headless),
|
||||
Headless: playwright.Bool(!debug),
|
||||
Devtools: playwright.Bool(debug),
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not launch browser: %w", err)
|
||||
@@ -98,7 +114,7 @@ func (b *Bilinovel) initBrowser(headless bool) error {
|
||||
return fmt.Errorf("could not create browser context: %w", err)
|
||||
}
|
||||
|
||||
log.Println("Browser initialized successfully")
|
||||
b.logger.Info("Browser initialized successfully")
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -106,7 +122,7 @@ func (b *Bilinovel) initBrowser(headless bool) error {
|
||||
func (b *Bilinovel) Close() error {
|
||||
if b.browser != nil {
|
||||
if err := b.browser.Close(); err != nil {
|
||||
log.Printf("could not close browser: %v", err)
|
||||
b.logger.Error("could not close browser", slog.Any("error", err))
|
||||
}
|
||||
b.browser = nil
|
||||
b.browserContext = nil
|
||||
@@ -122,7 +138,7 @@ func (b *Bilinovel) GetStyleCSS() string {
|
||||
}
|
||||
|
||||
func (b *Bilinovel) GetNovel(novelId int, skipChapterContent bool, skipVolumes []int) (*model.Novel, error) {
|
||||
log.Printf("Getting novel %v\n", novelId)
|
||||
b.logger.Info("Getting novel", slog.Int("novelId", novelId))
|
||||
|
||||
novelUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v.html", novelId)
|
||||
resp, err := b.restyClient.R().Get(novelUrl)
|
||||
@@ -161,7 +177,7 @@ func (b *Bilinovel) GetNovel(novelId int, skipChapterContent bool, skipVolumes [
|
||||
}
|
||||
|
||||
func (b *Bilinovel) GetVolume(novelId int, volumeId int, skipChapterContent bool) (*model.Volume, error) {
|
||||
log.Printf("Getting volume %v of novel %v\n", volumeId, novelId)
|
||||
b.logger.Info("Getting volume of novel", slog.Int("volumeId", volumeId), slog.Int("novelId", novelId))
|
||||
|
||||
novelUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v/catalog", novelId)
|
||||
resp, err := b.restyClient.R().Get(novelUrl)
|
||||
@@ -259,7 +275,7 @@ func (b *Bilinovel) GetVolume(novelId int, volumeId int, skipChapterContent bool
|
||||
}
|
||||
|
||||
func (b *Bilinovel) getAllVolumes(novelId int, skipChapterContent bool, skipVolumes []int) ([]*model.Volume, error) {
|
||||
log.Printf("Getting all volumes of novel %v\n", novelId)
|
||||
b.logger.Info("Getting all volumes of novel", slog.Int("novelId", novelId))
|
||||
|
||||
catelogUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v/catalog", novelId)
|
||||
resp, err := b.restyClient.R().Get(catelogUrl)
|
||||
@@ -300,7 +316,7 @@ func (b *Bilinovel) getAllVolumes(novelId int, skipChapterContent bool, skipVolu
|
||||
|
||||
volumeId, err := strconv.Atoi(volumeIdStr)
|
||||
if err != nil {
|
||||
log.Printf("failed to convert volume id %s: %v", volumeIdStr, err)
|
||||
b.logger.Error("failed to convert volume id", slog.String("volumeIdStr", volumeIdStr), slog.Any("error", err))
|
||||
return
|
||||
}
|
||||
if slices.Contains(skipVolumes, volumeId) {
|
||||
@@ -308,7 +324,7 @@ func (b *Bilinovel) getAllVolumes(novelId int, skipChapterContent bool, skipVolu
|
||||
}
|
||||
volume, err := b.GetVolume(novelId, volumeId, skipChapterContent)
|
||||
if err != nil {
|
||||
log.Printf("failed to get volume info for novel %d, volume %d: %v", novelId, volumeId, err)
|
||||
b.logger.Error("failed to get volume info", slog.Int("novelId", novelId), slog.Int("volumeId", volumeId), slog.Any("error", err))
|
||||
return
|
||||
}
|
||||
volume.SeriesIdx = i
|
||||
@@ -340,7 +356,7 @@ func (b *Bilinovel) getAllVolumes(novelId int, skipChapterContent bool, skipVolu
|
||||
}
|
||||
|
||||
func (b *Bilinovel) GetChapter(novelId int, volumeId int, chapterId int) (*model.Chapter, error) {
|
||||
log.Printf("Getting chapter %v of novel %v\n", chapterId, novelId)
|
||||
b.logger.Info("Getting chapter of novel", slog.Int("chapterId", chapterId), slog.Int("novelId", novelId))
|
||||
|
||||
pageNum := 1
|
||||
chapter := &model.Chapter{
|
||||
@@ -370,8 +386,11 @@ func (b *Bilinovel) GetChapter(novelId int, volumeId int, chapterId int) (*model
|
||||
return chapter, nil
|
||||
}
|
||||
|
||||
var nextPageUrlRegexp = regexp.MustCompile(`url_next:\s?['"]([^'"]*?)['"]`)
|
||||
var cleanNextPageUrlRegexp = regexp.MustCompile(`(_\d+)?\.html$`)
|
||||
|
||||
func (b *Bilinovel) getChapterByPage(pwPage playwright.Page, chapter *model.Chapter, pageNum int) (bool, error) {
|
||||
log.Printf("Getting chapter %v by page %v\n", chapter.Id, pageNum)
|
||||
b.logger.Info("Getting chapter by page", slog.Int("chapter", chapter.Id), slog.Int("page", pageNum))
|
||||
|
||||
Url := strings.TrimSuffix(chapter.Url, ".html") + fmt.Sprintf("_%v.html", pageNum)
|
||||
|
||||
@@ -405,6 +424,17 @@ func (b *Bilinovel) getChapterByPage(pwPage playwright.Page, chapter *model.Chap
|
||||
return false, fmt.Errorf("failed to parse html: %w", err)
|
||||
}
|
||||
|
||||
// 判断章节是否有下一页
|
||||
n := nextPageUrlRegexp.FindStringSubmatch(resortedHtml)
|
||||
if len(n) != 2 {
|
||||
return false, fmt.Errorf("failed to determine wether there is a next page")
|
||||
}
|
||||
|
||||
s := cleanNextPageUrlRegexp.ReplaceAllString(n[1], "")
|
||||
if strings.Contains(Url, s) {
|
||||
hasNext = true
|
||||
}
|
||||
|
||||
if pageNum == 1 {
|
||||
chapter.Title = doc.Find("#atitle").Text()
|
||||
}
|
||||
@@ -413,7 +443,7 @@ func (b *Bilinovel) getChapterByPage(pwPage playwright.Page, chapter *model.Chap
|
||||
content.Find("center").Remove()
|
||||
content.Find(".google-auto-placed").Remove()
|
||||
|
||||
if strings.Contains(resp.String(), `font-family: "read"`) {
|
||||
if strings.Contains(resortedHtml, `font-family: "read"`) {
|
||||
html, err := content.Find("p").Last().Html()
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to get html: %v", err)
|
||||
@@ -486,7 +516,7 @@ func (b *Bilinovel) getChapterByPage(pwPage playwright.Page, chapter *model.Chap
|
||||
}
|
||||
|
||||
func (b *Bilinovel) getImg(url string) ([]byte, error) {
|
||||
log.Printf("Getting img %v\n", url)
|
||||
b.logger.Info("Getting img", slog.String("url", url))
|
||||
resp, err := b.restyClient.R().SetHeader("Referer", "https://www.bilinovel.com").Get(url)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -497,7 +527,15 @@ func (b *Bilinovel) getImg(url string) ([]byte, error) {
|
||||
|
||||
// processContentWithPlaywright 使用复用的浏览器实例处理内容
|
||||
func (b *Bilinovel) processContentWithPlaywright(page playwright.Page, htmlContent string) (string, error) {
|
||||
tempFile, err := os.CreateTemp("", "bilinovel-temp-*.html")
|
||||
// 替换 window.location.replace,防止页面跳转
|
||||
htmlContent = strings.ReplaceAll(htmlContent, "window.location.replace", "console.log")
|
||||
|
||||
tempPath := filepath.Join(os.TempDir(), "bilinovel-downloader")
|
||||
err := os.MkdirAll(tempPath, 0755)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to create temp dir: %w", err)
|
||||
}
|
||||
tempFile, err := os.CreateTemp(tempPath, "temp-*.html")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to create temp file: %w", err)
|
||||
}
|
||||
@@ -510,6 +548,34 @@ func (b *Bilinovel) processContentWithPlaywright(page playwright.Page, htmlConte
|
||||
tempFile.Close()
|
||||
tempFilePath := tempFile.Name()
|
||||
|
||||
// // 屏蔽请求
|
||||
// googleAdsDomains := []string{
|
||||
// "adtrafficquality.google",
|
||||
// "doubleclick.net",
|
||||
// "googlesyndication.com",
|
||||
// "googletagmanager.com",
|
||||
// "hm.baidu.com",
|
||||
// "cloudflareinsights.com",
|
||||
// "fsdoa.js", // adblock 检测
|
||||
// "https://www.linovelib.com/novel/", // 阻止从本地文件跳转到在线页面
|
||||
// }
|
||||
// err = page.Route("**/*", func(route playwright.Route) {
|
||||
// for _, d := range googleAdsDomains {
|
||||
// if strings.Contains(route.Request().URL(), d) {
|
||||
// b.logger.Debug("blocking request", slog.String("url", route.Request().URL()))
|
||||
// err := route.Abort("aborted")
|
||||
// if err != nil {
|
||||
// b.logger.Debug("failed to block request", route.Request().URL(), err)
|
||||
// }
|
||||
// return
|
||||
// }
|
||||
// }
|
||||
// _ = route.Continue()
|
||||
// })
|
||||
// if err != nil {
|
||||
// return "", fmt.Errorf("failed to intercept requests: %w", err)
|
||||
// }
|
||||
|
||||
_, err = page.ExpectResponse(func(url string) bool {
|
||||
return strings.Contains(url, "chapterlog.js")
|
||||
}, func() error {
|
||||
@@ -519,14 +585,15 @@ func (b *Bilinovel) processContentWithPlaywright(page playwright.Page, htmlConte
|
||||
}
|
||||
return nil
|
||||
}, playwright.PageExpectResponseOptions{
|
||||
Timeout: playwright.Float(5000),
|
||||
Timeout: playwright.Float(10000),
|
||||
})
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to wait for network request finish")
|
||||
}
|
||||
|
||||
err = page.Locator("#acontent").WaitFor(playwright.LocatorWaitForOptions{
|
||||
State: playwright.WaitForSelectorStateVisible,
|
||||
State: playwright.WaitForSelectorStateVisible,
|
||||
Timeout: playwright.Float(10000),
|
||||
})
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("could not wait for #acontent: %w", err)
|
||||
@@ -562,7 +629,7 @@ func (b *Bilinovel) processContentWithPlaywright(page playwright.Page, htmlConte
|
||||
return "", fmt.Errorf("failed to remove hidden elements: %w", err)
|
||||
}
|
||||
|
||||
log.Printf("Hidden elements removal result: %s", result)
|
||||
b.logger.Debug("Hidden elements removal result", slog.Any("count", result))
|
||||
|
||||
processedHTML, err := page.Content()
|
||||
if err != nil {
|
||||
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
)
|
||||
|
||||
func TestBilinovel_GetNovel(t *testing.T) {
|
||||
bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Headless: false, Concurrency: 5})
|
||||
bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Concurrency: 5})
|
||||
bilinovel.SetTextOnly(true)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create bilinovel: %v", err)
|
||||
@@ -25,7 +25,7 @@ func TestBilinovel_GetNovel(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestBilinovel_GetVolume(t *testing.T) {
|
||||
bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Headless: false, Concurrency: 1})
|
||||
bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Concurrency: 1})
|
||||
bilinovel.SetTextOnly(true)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create bilinovel: %v", err)
|
||||
@@ -42,7 +42,7 @@ func TestBilinovel_GetVolume(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestBilinovel_GetChapter(t *testing.T) {
|
||||
bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Headless: false, Concurrency: 1})
|
||||
bilinovel, err := bilinovel.New(bilinovel.BilinovelNewOption{Concurrency: 1})
|
||||
bilinovel.SetTextOnly(true)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create bilinovel: %v", err)
|
||||
|
||||
Reference in New Issue
Block a user