Initial 423down proxy service

This commit is contained in:
2026-05-12 07:58:30 +00:00
commit 6e6e836d09
14 changed files with 865 additions and 0 deletions
+77
View File
@@ -0,0 +1,77 @@
package main
import (
"errors"
"fmt"
"io"
"log/slog"
"net/http"
"github.com/PuerkitoBio/goquery"
"resty.dev/v3"
)
var (
errArticleContentNotFound = errors.New("无法找到文章内容")
errArticleContentParseFailed = errors.New("无法解析文章内容")
)
// getArticleHTML 优先从缓存读取文章 HTML,缓存未命中时从源站抓取。
func getArticleHTML(db *DB, client *resty.Client, articleID string) (string, error) {
html, err := db.GetArticleHtml(articleID)
if err == nil {
return html, nil
}
cookies, err := getLoginCookies(db, client)
if err != nil {
return "", err
}
html, err = fetchRemoteArticleHTML(client, articleID, cookies)
if err != nil {
return "", err
}
if err := db.SetArticleHtml(articleID, html, articleCacheTTL); err != nil {
slog.Warn("failed to cache article", slog.String("article_id", articleID), slog.String("error", err.Error()))
}
return html, nil
}
// fetchRemoteArticleHTML 从源站下载文章页面并提取正文 HTML。
func fetchRemoteArticleHTML(client *resty.Client, articleID string, cookies []*http.Cookie) (string, error) {
request := client.R()
if len(cookies) > 0 {
request.SetCookies(cookies)
}
resp, err := request.Get(fmt.Sprintf(articleURLFormat, articleID))
if err != nil {
return "", err
}
defer func() { _ = resp.Body.Close() }()
return extractArticleHTML(resp.Body)
}
// extractArticleHTML 从源站页面中提取文章正文区域。
func extractArticleHTML(reader io.Reader) (string, error) {
doc, err := goquery.NewDocumentFromReader(reader)
if err != nil {
return "", err
}
selection := doc.Find("div.entry")
if selection.Length() == 0 {
return "", errArticleContentNotFound
}
html, err := selection.Html()
if err != nil {
return "", fmt.Errorf("%w%v", errArticleContentParseFailed, err)
}
return html, nil
}