mirror of
https://github.com/bestnite/bilinovel-downloader.git
synced 2025-10-26 09:11:01 +00:00
641 lines
18 KiB
Go
641 lines
18 KiB
Go
package bilinovel
|
||
|
||
import (
|
||
"bilinovel-downloader/model"
|
||
"bilinovel-downloader/utils"
|
||
"bytes"
|
||
"crypto/sha256"
|
||
_ "embed"
|
||
"fmt"
|
||
"log/slog"
|
||
"net/http"
|
||
"os"
|
||
"path"
|
||
"path/filepath"
|
||
"regexp"
|
||
"slices"
|
||
"strconv"
|
||
"strings"
|
||
"sync"
|
||
|
||
"github.com/PuerkitoBio/goquery"
|
||
mapper "github.com/bestnite/font-mapper"
|
||
"github.com/playwright-community/playwright-go"
|
||
)
|
||
|
||
//go:embed read.ttf
|
||
var readTTF []byte
|
||
|
||
//go:embed "MI LANTING.ttf"
|
||
var miLantingTTF []byte
|
||
|
||
type Bilinovel struct {
|
||
fontMapper *mapper.GlyphOutlineMapper
|
||
textOnly bool
|
||
restyClient *utils.RestyClient
|
||
|
||
// 浏览器实例复用
|
||
browser playwright.Browser
|
||
browserContext playwright.BrowserContext
|
||
pages map[string]playwright.Page
|
||
concurrency int
|
||
concurrentChan chan any
|
||
|
||
logger *slog.Logger
|
||
}
|
||
|
||
type BilinovelNewOption struct {
|
||
Concurrency int
|
||
Debug bool
|
||
}
|
||
|
||
func New(option BilinovelNewOption) (*Bilinovel, error) {
|
||
fontMapper, err := mapper.NewGlyphOutlineMapper(readTTF, miLantingTTF)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to create font mapper: %v", err)
|
||
}
|
||
restyClient := utils.NewRestyClient(50)
|
||
|
||
var logLevel slog.Level
|
||
if option.Debug {
|
||
logLevel = slog.LevelDebug
|
||
} else {
|
||
logLevel = slog.LevelInfo
|
||
}
|
||
|
||
handlerOptions := &slog.HandlerOptions{
|
||
Level: logLevel,
|
||
}
|
||
|
||
b := &Bilinovel{
|
||
fontMapper: fontMapper,
|
||
textOnly: false,
|
||
restyClient: restyClient,
|
||
pages: make(map[string]playwright.Page),
|
||
concurrency: option.Concurrency,
|
||
concurrentChan: make(chan any, option.Concurrency),
|
||
logger: slog.New(slog.NewTextHandler(os.Stdout, handlerOptions)),
|
||
}
|
||
|
||
// 初始化浏览器实例
|
||
err = b.initBrowser(option.Debug)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to init browser: %v", err)
|
||
}
|
||
|
||
return b, nil
|
||
}
|
||
|
||
func (b *Bilinovel) SetTextOnly(textOnly bool) {
|
||
b.textOnly = textOnly
|
||
}
|
||
|
||
func (b *Bilinovel) GetExtraFiles() []model.ExtraFile {
|
||
return nil
|
||
}
|
||
|
||
// initBrowser 初始化浏览器实例
|
||
func (b *Bilinovel) initBrowser(debug bool) error {
|
||
pw, err := playwright.Run()
|
||
if err != nil {
|
||
return fmt.Errorf("could not start playwright: %w", err)
|
||
}
|
||
|
||
b.browser, err = pw.Chromium.Launch(playwright.BrowserTypeLaunchOptions{
|
||
Headless: playwright.Bool(!debug),
|
||
Devtools: playwright.Bool(debug),
|
||
})
|
||
if err != nil {
|
||
return fmt.Errorf("could not launch browser: %w", err)
|
||
}
|
||
|
||
b.browserContext, err = b.browser.NewContext()
|
||
if err != nil {
|
||
return fmt.Errorf("could not create browser context: %w", err)
|
||
}
|
||
|
||
b.logger.Info("Browser initialized successfully")
|
||
return nil
|
||
}
|
||
|
||
// Close 清理资源
|
||
func (b *Bilinovel) Close() error {
|
||
if b.browser != nil {
|
||
if err := b.browser.Close(); err != nil {
|
||
b.logger.Error("could not close browser", slog.Any("error", err))
|
||
}
|
||
b.browser = nil
|
||
b.browserContext = nil
|
||
}
|
||
return nil
|
||
}
|
||
|
||
//go:embed style.css
|
||
var styleCSS []byte
|
||
|
||
func (b *Bilinovel) GetStyleCSS() string {
|
||
return string(styleCSS)
|
||
}
|
||
|
||
func (b *Bilinovel) GetNovel(novelId int, skipChapterContent bool, skipVolumes []int) (*model.Novel, error) {
|
||
b.logger.Info("Getting novel", slog.Int("novelId", novelId))
|
||
|
||
novelUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v.html", novelId)
|
||
resp, err := b.restyClient.R().Get(novelUrl)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to get novel info: %w", err)
|
||
}
|
||
if resp.StatusCode() != http.StatusOK {
|
||
return nil, fmt.Errorf("failed to get novel info: %v", resp.Status())
|
||
}
|
||
|
||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to parse html: %v", err)
|
||
}
|
||
|
||
novel := &model.Novel{}
|
||
|
||
novel.Title = strings.TrimSpace(doc.Find(".book-title").First().Text())
|
||
novel.Description = strings.TrimSpace(doc.Find(".book-summary>content").First().Text())
|
||
novel.Id = novelId
|
||
|
||
doc.Find(".authorname>a").Each(func(i int, s *goquery.Selection) {
|
||
novel.Authors = append(novel.Authors, strings.TrimSpace(s.Text()))
|
||
})
|
||
doc.Find(".illname>a").Each(func(i int, s *goquery.Selection) {
|
||
novel.Authors = append(novel.Authors, strings.TrimSpace(s.Text()))
|
||
})
|
||
|
||
volumes, err := b.getAllVolumes(novelId, skipChapterContent, skipVolumes)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to get novel volumes: %v", err)
|
||
}
|
||
novel.Volumes = volumes
|
||
|
||
return novel, nil
|
||
}
|
||
|
||
func (b *Bilinovel) GetVolume(novelId int, volumeId int, skipChapterContent bool) (*model.Volume, error) {
|
||
b.logger.Info("Getting volume of novel", slog.Int("volumeId", volumeId), slog.Int("novelId", novelId))
|
||
|
||
novelUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v/catalog", novelId)
|
||
resp, err := b.restyClient.R().Get(novelUrl)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to get novel info: %w", err)
|
||
}
|
||
if resp.StatusCode() != http.StatusOK {
|
||
return nil, fmt.Errorf("failed to get novel info: %v", resp.Status())
|
||
}
|
||
|
||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to parse html: %v", err)
|
||
}
|
||
|
||
seriesIdx := 0
|
||
doc.Find("a.volume-cover-img").Each(func(i int, s *goquery.Selection) {
|
||
if s.AttrOr("href", "") == fmt.Sprintf("/novel/%v/vol_%v.html", novelId, volumeId) {
|
||
seriesIdx = i + 1
|
||
}
|
||
})
|
||
|
||
novelTitle := strings.TrimSpace(doc.Find(".book-title").First().Text())
|
||
|
||
if seriesIdx == 0 {
|
||
return nil, fmt.Errorf("volume not found: %v", volumeId)
|
||
}
|
||
|
||
volumeUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v/vol_%v.html", novelId, volumeId)
|
||
resp, err = b.restyClient.R().Get(volumeUrl)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to get novel info: %v", err)
|
||
}
|
||
if resp.StatusCode() != http.StatusOK {
|
||
return nil, fmt.Errorf("failed to get novel info: %v", resp.Status())
|
||
}
|
||
|
||
doc, err = goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to parse html: %v", err)
|
||
}
|
||
|
||
volume := &model.Volume{}
|
||
volume.NovelId = novelId
|
||
volume.NovelTitle = novelTitle
|
||
volume.Id = volumeId
|
||
volume.SeriesIdx = seriesIdx
|
||
volume.Title = strings.TrimSpace(doc.Find(".book-title").First().Text())
|
||
volume.Description = strings.TrimSpace(doc.Find(".book-summary>content").First().Text())
|
||
volume.Url = volumeUrl
|
||
volume.Chapters = make([]*model.Chapter, 0)
|
||
volume.CoverUrl = doc.Find(".book-cover").First().AttrOr("src", "")
|
||
cover, err := b.getImg(volume.CoverUrl)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to get cover: %v", err)
|
||
}
|
||
volume.Cover = cover
|
||
|
||
doc.Find(".authorname>a").Each(func(i int, s *goquery.Selection) {
|
||
volume.Authors = append(volume.Authors, strings.TrimSpace(s.Text()))
|
||
})
|
||
doc.Find(".illname>a").Each(func(i int, s *goquery.Selection) {
|
||
volume.Authors = append(volume.Authors, strings.TrimSpace(s.Text()))
|
||
})
|
||
doc.Find(".chapter-li.jsChapter").Each(func(i int, s *goquery.Selection) {
|
||
volume.Chapters = append(volume.Chapters, &model.Chapter{
|
||
Title: s.Find("a").Text(),
|
||
Url: fmt.Sprintf("https://www.bilinovel.com%v", s.Find("a").AttrOr("href", "")),
|
||
})
|
||
})
|
||
|
||
idRegexp := regexp.MustCompile(`/novel/(\d+)/(\d+).html`)
|
||
|
||
if !skipChapterContent {
|
||
for i := range volume.Chapters {
|
||
matches := idRegexp.FindStringSubmatch(volume.Chapters[i].Url)
|
||
if len(matches) > 0 {
|
||
chapterId, err := strconv.Atoi(matches[2])
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to convert chapter id: %v", err)
|
||
}
|
||
chapter, err := b.GetChapter(novelId, volumeId, chapterId)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to get chapter: %v", err)
|
||
}
|
||
chapter.Id = chapterId
|
||
volume.Chapters[i] = chapter
|
||
} else {
|
||
return nil, fmt.Errorf("failed to get chapter id: %v", volume.Chapters[i].Url)
|
||
}
|
||
}
|
||
}
|
||
|
||
return volume, nil
|
||
}
|
||
|
||
func (b *Bilinovel) getAllVolumes(novelId int, skipChapterContent bool, skipVolumes []int) ([]*model.Volume, error) {
|
||
b.logger.Info("Getting all volumes of novel", slog.Int("novelId", novelId))
|
||
|
||
catelogUrl := fmt.Sprintf("https://www.bilinovel.com/novel/%v/catalog", novelId)
|
||
resp, err := b.restyClient.R().Get(catelogUrl)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to get catelog: %v", err)
|
||
}
|
||
if resp.StatusCode() != http.StatusOK {
|
||
return nil, fmt.Errorf("failed to get catelog: %v", resp.Status())
|
||
}
|
||
|
||
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(resp.Body()))
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to parse html: %v", err)
|
||
}
|
||
|
||
volumeRegexp := regexp.MustCompile(fmt.Sprintf(`/novel/%v/vol_(\d+).html`, novelId))
|
||
|
||
volumeIds := make([]string, 0)
|
||
doc.Find("a.volume-cover-img").Each(func(i int, s *goquery.Selection) {
|
||
link := s.AttrOr("href", "")
|
||
matches := volumeRegexp.FindStringSubmatch(link)
|
||
if len(matches) > 0 {
|
||
volumeIds = append(volumeIds, matches[1])
|
||
}
|
||
})
|
||
|
||
volumes := make([]*model.Volume, len(volumeIds))
|
||
var wg sync.WaitGroup
|
||
var mu sync.Mutex // 保护 volumes 写入的互斥锁
|
||
|
||
for i, volumeIdStr := range volumeIds {
|
||
wg.Add(1)
|
||
b.concurrentChan <- struct{}{} // 获取一个并发槽
|
||
|
||
go func(i int, volumeIdStr string) {
|
||
defer wg.Done()
|
||
defer func() { <-b.concurrentChan }() // 释放并发槽
|
||
|
||
volumeId, err := strconv.Atoi(volumeIdStr)
|
||
if err != nil {
|
||
b.logger.Error("failed to convert volume id", slog.String("volumeIdStr", volumeIdStr), slog.Any("error", err))
|
||
return
|
||
}
|
||
if slices.Contains(skipVolumes, volumeId) {
|
||
return
|
||
}
|
||
volume, err := b.GetVolume(novelId, volumeId, skipChapterContent)
|
||
if err != nil {
|
||
b.logger.Error("failed to get volume info", slog.Int("novelId", novelId), slog.Int("volumeId", volumeId), slog.Any("error", err))
|
||
return
|
||
}
|
||
volume.SeriesIdx = i
|
||
|
||
// 关闭浏览器标签页
|
||
pwPageKey := fmt.Sprintf("%v-%v", novelId, volumeId)
|
||
if pwPage, ok := b.pages[pwPageKey]; ok {
|
||
_ = pwPage.Close()
|
||
delete(b.pages, pwPageKey)
|
||
}
|
||
|
||
mu.Lock()
|
||
volumes[i] = volume
|
||
mu.Unlock()
|
||
}(i, volumeIdStr)
|
||
}
|
||
|
||
wg.Wait()
|
||
|
||
// 过滤掉获取失败的 nil volume
|
||
filteredVolumes := make([]*model.Volume, 0, len(volumes))
|
||
for _, vol := range volumes {
|
||
if vol != nil {
|
||
filteredVolumes = append(filteredVolumes, vol)
|
||
}
|
||
}
|
||
|
||
return filteredVolumes, nil
|
||
}
|
||
|
||
func (b *Bilinovel) GetChapter(novelId int, volumeId int, chapterId int) (*model.Chapter, error) {
|
||
b.logger.Info("Getting chapter of novel", slog.Int("chapterId", chapterId), slog.Int("novelId", novelId))
|
||
|
||
pageNum := 1
|
||
chapter := &model.Chapter{
|
||
Id: chapterId,
|
||
NovelId: novelId,
|
||
VolumeId: volumeId,
|
||
Url: fmt.Sprintf("https://www.bilinovel.com/novel/%v/%v.html", novelId, chapterId),
|
||
}
|
||
for {
|
||
pwPageKey := fmt.Sprintf("%v-%v", novelId, volumeId)
|
||
if _, ok := b.pages[pwPageKey]; !ok {
|
||
pwPage, err := b.browserContext.NewPage()
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to create browser page: %w", err)
|
||
}
|
||
b.pages[pwPageKey] = pwPage
|
||
}
|
||
hasNext, err := b.getChapterByPage(b.pages[pwPageKey], chapter, pageNum)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("failed to download chapter: %w", err)
|
||
}
|
||
if !hasNext {
|
||
break
|
||
}
|
||
pageNum++
|
||
}
|
||
return chapter, nil
|
||
}
|
||
|
||
var nextPageUrlRegexp = regexp.MustCompile(`url_next:\s?['"]([^'"]*?)['"]`)
|
||
var cleanNextPageUrlRegexp = regexp.MustCompile(`(_\d+)?\.html$`)
|
||
|
||
func (b *Bilinovel) getChapterByPage(pwPage playwright.Page, chapter *model.Chapter, pageNum int) (bool, error) {
|
||
b.logger.Info("Getting chapter by page", slog.Int("chapter", chapter.Id), slog.Int("page", pageNum))
|
||
|
||
Url := strings.TrimSuffix(chapter.Url, ".html") + fmt.Sprintf("_%v.html", pageNum)
|
||
|
||
hasNext := false
|
||
headers := map[string]string{
|
||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
||
"Accept-Language": "zh-CN,zh;q=0.9,en-GB;q=0.8,en;q=0.7,zh-TW;q=0.6",
|
||
"Cookie": "night=1;",
|
||
}
|
||
resp, err := b.restyClient.R().SetHeaders(headers).Get(Url)
|
||
if err != nil {
|
||
return false, fmt.Errorf("failed to get chapter: %w", err)
|
||
}
|
||
if resp.StatusCode() != http.StatusOK {
|
||
return false, fmt.Errorf("failed to get chapter: %v", resp.Status())
|
||
}
|
||
|
||
if strings.Contains(resp.String(), `<a onclick="window.location.href = ReadParams.url_next;">下一頁</a>`) {
|
||
hasNext = true
|
||
}
|
||
|
||
html := resp.Body()
|
||
|
||
// 解决乱序问题
|
||
resortedHtml, err := b.processContentWithPlaywright(pwPage, string(html))
|
||
if err != nil {
|
||
return false, fmt.Errorf("failed to process html: %w", err)
|
||
}
|
||
doc, err := goquery.NewDocumentFromReader(strings.NewReader(resortedHtml))
|
||
if err != nil {
|
||
return false, fmt.Errorf("failed to parse html: %w", err)
|
||
}
|
||
|
||
// 判断章节是否有下一页
|
||
n := nextPageUrlRegexp.FindStringSubmatch(resortedHtml)
|
||
if len(n) != 2 {
|
||
return false, fmt.Errorf("failed to determine wether there is a next page")
|
||
}
|
||
|
||
s := cleanNextPageUrlRegexp.ReplaceAllString(n[1], "")
|
||
if strings.Contains(Url, s) {
|
||
hasNext = true
|
||
}
|
||
|
||
if pageNum == 1 {
|
||
chapter.Title = doc.Find("#atitle").Text()
|
||
}
|
||
content := doc.Find("#acontent").First()
|
||
content.Find(".cgo").Remove()
|
||
content.Find("center").Remove()
|
||
content.Find(".google-auto-placed").Remove()
|
||
|
||
if strings.Contains(resortedHtml, `font-family: "read"`) {
|
||
html, err := content.Find("p").Last().Html()
|
||
if err != nil {
|
||
return false, fmt.Errorf("failed to get html: %v", err)
|
||
}
|
||
builder := strings.Builder{}
|
||
for _, r := range html {
|
||
_, newRune, ok := b.fontMapper.MappingRune(r)
|
||
if ok {
|
||
builder.WriteRune(newRune)
|
||
}
|
||
}
|
||
content.Find("p").Last().SetHtml(builder.String())
|
||
}
|
||
|
||
if b.textOnly {
|
||
content.Find("img").Remove()
|
||
} else {
|
||
content.Find("img").Each(func(i int, s *goquery.Selection) {
|
||
imgUrl := s.AttrOr("data-src", "")
|
||
if imgUrl == "" {
|
||
imgUrl = s.AttrOr("src", "")
|
||
if imgUrl == "" {
|
||
return
|
||
}
|
||
}
|
||
|
||
imageHash := sha256.Sum256([]byte(imgUrl))
|
||
imageFilename := fmt.Sprintf("%x%s", string(imageHash[:]), path.Ext(imgUrl))
|
||
s.SetAttr("src", imageFilename)
|
||
s.SetAttr("alt", imgUrl)
|
||
s.RemoveAttr("class")
|
||
img, err := b.getImg(imgUrl)
|
||
if err != nil {
|
||
return
|
||
}
|
||
if chapter.Content == nil {
|
||
chapter.Content = &model.ChaperContent{}
|
||
}
|
||
if chapter.Content.Images == nil {
|
||
chapter.Content.Images = make(map[string][]byte)
|
||
}
|
||
chapter.Content.Images[imageFilename] = img
|
||
})
|
||
}
|
||
|
||
doc.Find("*").Each(func(i int, s *goquery.Selection) {
|
||
if len(s.Nodes) > 0 && len(s.Nodes[0].Attr) > 0 {
|
||
// 遍历元素的所有属性
|
||
for _, attr := range s.Nodes[0].Attr {
|
||
// 3. 检查属性名是否以 "data-k" 开头,且属性值是否为空
|
||
if strings.HasPrefix(attr.Key, "data-k") {
|
||
// 4. 如果满足条件,就移除这个属性
|
||
s.RemoveAttr(attr.Key)
|
||
}
|
||
}
|
||
}
|
||
})
|
||
|
||
htmlStr, err := content.Html()
|
||
if err != nil {
|
||
return false, fmt.Errorf("failed to get html: %v", err)
|
||
}
|
||
|
||
if chapter.Content == nil {
|
||
chapter.Content = &model.ChaperContent{}
|
||
}
|
||
chapter.Content.Html += strings.TrimSpace(htmlStr)
|
||
|
||
return hasNext, nil
|
||
}
|
||
|
||
func (b *Bilinovel) getImg(url string) ([]byte, error) {
|
||
b.logger.Info("Getting img", slog.String("url", url))
|
||
resp, err := b.restyClient.R().SetHeader("Referer", "https://www.bilinovel.com").Get(url)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
return resp.Body(), nil
|
||
}
|
||
|
||
// processContentWithPlaywright 使用复用的浏览器实例处理内容
|
||
func (b *Bilinovel) processContentWithPlaywright(page playwright.Page, htmlContent string) (string, error) {
|
||
// 替换 window.location.replace,防止页面跳转
|
||
htmlContent = strings.ReplaceAll(htmlContent, "window.location.replace", "console.log")
|
||
|
||
tempPath := filepath.Join(os.TempDir(), "bilinovel-downloader")
|
||
err := os.MkdirAll(tempPath, 0755)
|
||
if err != nil {
|
||
return "", fmt.Errorf("failed to create temp dir: %w", err)
|
||
}
|
||
tempFile, err := os.CreateTemp(tempPath, "temp-*.html")
|
||
if err != nil {
|
||
return "", fmt.Errorf("failed to create temp file: %w", err)
|
||
}
|
||
defer os.Remove(tempFile.Name())
|
||
|
||
_, err = tempFile.WriteString(htmlContent)
|
||
if err != nil {
|
||
return "", fmt.Errorf("failed to write temp file: %w", err)
|
||
}
|
||
tempFile.Close()
|
||
tempFilePath := tempFile.Name()
|
||
|
||
// // 屏蔽请求
|
||
// googleAdsDomains := []string{
|
||
// "adtrafficquality.google",
|
||
// "doubleclick.net",
|
||
// "googlesyndication.com",
|
||
// "googletagmanager.com",
|
||
// "hm.baidu.com",
|
||
// "cloudflareinsights.com",
|
||
// "fsdoa.js", // adblock 检测
|
||
// "https://www.linovelib.com/novel/", // 阻止从本地文件跳转到在线页面
|
||
// }
|
||
// err = page.Route("**/*", func(route playwright.Route) {
|
||
// for _, d := range googleAdsDomains {
|
||
// if strings.Contains(route.Request().URL(), d) {
|
||
// b.logger.Debug("blocking request", slog.String("url", route.Request().URL()))
|
||
// err := route.Abort("aborted")
|
||
// if err != nil {
|
||
// b.logger.Debug("failed to block request", route.Request().URL(), err)
|
||
// }
|
||
// return
|
||
// }
|
||
// }
|
||
// _ = route.Continue()
|
||
// })
|
||
// if err != nil {
|
||
// return "", fmt.Errorf("failed to intercept requests: %w", err)
|
||
// }
|
||
|
||
_, err = page.ExpectResponse(func(url string) bool {
|
||
return strings.Contains(url, "chapterlog.js")
|
||
}, func() error {
|
||
_, err = page.Goto("file://" + filepath.ToSlash(tempFilePath))
|
||
if err != nil {
|
||
return fmt.Errorf("could not navigate to file: %w", err)
|
||
}
|
||
return nil
|
||
}, playwright.PageExpectResponseOptions{
|
||
Timeout: playwright.Float(10000),
|
||
})
|
||
if err != nil {
|
||
return "", fmt.Errorf("failed to wait for network request finish")
|
||
}
|
||
|
||
err = page.Locator("#acontent").WaitFor(playwright.LocatorWaitForOptions{
|
||
State: playwright.WaitForSelectorStateVisible,
|
||
Timeout: playwright.Float(10000),
|
||
})
|
||
if err != nil {
|
||
return "", fmt.Errorf("could not wait for #acontent: %w", err)
|
||
}
|
||
|
||
// 遍历所有 #acontent 的子元素, 通过 window.getComputedStyle().display 检测是否是 none, 如果是 none 则从页面删除这个元素
|
||
result, err := page.Evaluate(`
|
||
(function() {
|
||
const acontent = document.getElementById('acontent');
|
||
if (!acontent) {
|
||
return 'acontent element not found';
|
||
}
|
||
|
||
let removedCount = 0;
|
||
const elements = acontent.querySelectorAll('*');
|
||
|
||
// 从后往前遍历,避免删除元素时影响索引
|
||
for (let i = elements.length - 1; i >= 0; i--) {
|
||
const element = elements[i];
|
||
const computedStyle = window.getComputedStyle(element);
|
||
|
||
if (computedStyle.display === 'none' || computedStyle.transform == 'matrix(0, 0, 0, 0, 0, 0)') {
|
||
element.remove();
|
||
removedCount++;
|
||
}
|
||
}
|
||
|
||
return 'Removed ' + removedCount + ' hidden elements';
|
||
})()
|
||
`)
|
||
|
||
if err != nil {
|
||
return "", fmt.Errorf("failed to remove hidden elements: %w", err)
|
||
}
|
||
|
||
b.logger.Debug("Hidden elements removal result", slog.Any("count", result))
|
||
|
||
processedHTML, err := page.Content()
|
||
if err != nil {
|
||
return "", fmt.Errorf("could not get page content: %w", err)
|
||
}
|
||
|
||
return processedHTML, nil
|
||
}
|