package main

import (
	"errors"
	"fmt"
	"io"
	"log/slog"
	"net/http"

	"github.com/PuerkitoBio/goquery"
	"resty.dev/v3"
)

var (
	errArticleContentNotFound    = errors.New("无法找到文章内容")
	errArticleContentParseFailed = errors.New("无法解析文章内容")
)

// getArticleHTML 优先从缓存读取文章 HTML，缓存未命中时从源站抓取。
func getArticleHTML(db *DB, client *resty.Client, articleID string) (string, error) {
	html, err := db.GetArticleHtml(articleID)
	if err == nil {
		return html, nil
	}

	cookies, err := getLoginCookies(db, client)
	if err != nil {
		return "", err
	}

	html, err = fetchRemoteArticleHTML(client, articleID, cookies)
	if err != nil {
		return "", err
	}

	if err := db.SetArticleHtml(articleID, html, articleCacheTTL); err != nil {
		slog.Warn("failed to cache article", slog.String("article_id", articleID), slog.String("error", err.Error()))
	}

	return html, nil
}

// fetchRemoteArticleHTML 从源站下载文章页面并提取正文 HTML。
func fetchRemoteArticleHTML(client *resty.Client, articleID string, cookies []*http.Cookie) (string, error) {
	request := client.R()
	if len(cookies) > 0 {
		request.SetCookies(cookies)
	}

	resp, err := request.Get(fmt.Sprintf(articleURLFormat, articleID))
	if err != nil {
		return "", err
	}
	defer func() { _ = resp.Body.Close() }()

	return extractArticleHTML(resp.Body)
}

// extractArticleHTML 从源站页面中提取文章正文区域。
func extractArticleHTML(reader io.Reader) (string, error) {
	doc, err := goquery.NewDocumentFromReader(reader)
	if err != nil {
		return "", err
	}

	selection := doc.Find("div.entry")
	if selection.Length() == 0 {
		return "", errArticleContentNotFound
	}

	html, err := selection.Html()
	if err != nil {
		return "", fmt.Errorf("%w：%v", errArticleContentParseFailed, err)
	}

	return html, nil
}