341 lines
8.1 KiB
Go
341 lines
8.1 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"flag"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"os"
|
|
"path/filepath"
|
|
"regexp"
|
|
"runtime"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
)
|
|
|
|
type galleryResponse struct {
|
|
ID int64 `json:"id"`
|
|
NHID string `json:"nh_id"`
|
|
Source string `json:"source"`
|
|
MediaID string `json:"media_id"`
|
|
TitleEn string `json:"title_en"`
|
|
TitleJp string `json:"title_jp"`
|
|
Title string `json:"title"`
|
|
NumPages int `json:"num_pages"`
|
|
Images []galleryImage `json:"images"`
|
|
}
|
|
|
|
type galleryImage struct {
|
|
ID int64 `json:"id"`
|
|
Type string `json:"type"`
|
|
Extension string `json:"extension"`
|
|
Width int `json:"width"`
|
|
Height int `json:"height"`
|
|
URL string `json:"url"`
|
|
}
|
|
|
|
type job struct {
|
|
index int
|
|
image galleryImage
|
|
}
|
|
|
|
type result struct {
|
|
index int
|
|
path string
|
|
err error
|
|
}
|
|
|
|
func main() {
|
|
var (
|
|
galleryArg string
|
|
outputDir string
|
|
concurrency int
|
|
timeoutSec int
|
|
skipExisting bool
|
|
metadataOnly bool
|
|
)
|
|
|
|
flag.StringVar(&galleryArg, "url", "", "Hitomi gallery URL or ID")
|
|
flag.StringVar(&outputDir, "out", "downloads", "Output directory")
|
|
flag.IntVar(&concurrency, "workers", min(8, max(2, runtime.NumCPU())), "Concurrent download workers")
|
|
flag.IntVar(&timeoutSec, "timeout", 30, "Per-request timeout in seconds")
|
|
flag.BoolVar(&skipExisting, "skip-existing", true, "Skip files that already exist with non-zero size")
|
|
flag.BoolVar(&metadataOnly, "metadata-only", false, "Only fetch gallery metadata and write info.json")
|
|
flag.Parse()
|
|
|
|
if galleryArg == "" && flag.NArg() > 0 {
|
|
galleryArg = flag.Arg(0)
|
|
}
|
|
if galleryArg == "" {
|
|
exitf("usage: hitomi-downloader -url https://hitomi.moe/g/123456 [-out downloads]")
|
|
}
|
|
if concurrency < 1 {
|
|
exitf("workers must be >= 1")
|
|
}
|
|
if timeoutSec < 1 {
|
|
exitf("timeout must be >= 1")
|
|
}
|
|
|
|
galleryID, err := normalizeGalleryID(galleryArg)
|
|
if err != nil {
|
|
exitf("invalid gallery identifier: %v", err)
|
|
}
|
|
|
|
client := &http.Client{Timeout: time.Duration(timeoutSec) * time.Second}
|
|
ctx := context.Background()
|
|
|
|
gallery, err := fetchGalleryInfo(ctx, client, galleryID)
|
|
if err != nil {
|
|
exitf("fetch gallery info failed: %v", err)
|
|
}
|
|
if len(gallery.Images) == 0 {
|
|
exitf("gallery %s returned no images", galleryID)
|
|
}
|
|
|
|
dirName := fmt.Sprintf("%s_%s", galleryID, sanitizeFilename(preferredTitle(gallery)))
|
|
root := filepath.Join(outputDir, dirName)
|
|
if err := os.MkdirAll(root, 0o755); err != nil {
|
|
exitf("create output dir failed: %v", err)
|
|
}
|
|
|
|
if err := writeMetadata(root, gallery); err != nil {
|
|
exitf("write metadata failed: %v", err)
|
|
}
|
|
|
|
fmt.Printf("gallery: %s\npages: %d\noutput: %s\n", preferredTitle(gallery), len(gallery.Images), root)
|
|
if metadataOnly {
|
|
return
|
|
}
|
|
|
|
if err := downloadAll(ctx, client, root, gallery.Images, concurrency, skipExisting); err != nil {
|
|
exitf("download failed: %v", err)
|
|
}
|
|
|
|
fmt.Println("done")
|
|
}
|
|
|
|
func fetchGalleryInfo(ctx context.Context, client *http.Client, galleryID string) (*galleryResponse, error) {
|
|
endpoint := fmt.Sprintf("https://hitomi.moe/reader/info/%s", galleryID)
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36")
|
|
req.Header.Set("Accept", "application/json")
|
|
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer func() { _ = resp.Body.Close() }()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
|
|
return nil, fmt.Errorf("unexpected status %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
|
|
}
|
|
|
|
var gallery galleryResponse
|
|
if err := json.NewDecoder(resp.Body).Decode(&gallery); err != nil {
|
|
return nil, err
|
|
}
|
|
return &gallery, nil
|
|
}
|
|
|
|
func writeMetadata(root string, gallery *galleryResponse) error {
|
|
data, err := json.MarshalIndent(gallery, "", " ")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return os.WriteFile(filepath.Join(root, "info.json"), data, 0o644)
|
|
}
|
|
|
|
func downloadAll(ctx context.Context, client *http.Client, root string, images []galleryImage, workers int, skipExisting bool) error {
|
|
jobs := make(chan job)
|
|
results := make(chan result)
|
|
|
|
var wg sync.WaitGroup
|
|
for range workers {
|
|
wg.Go(func() {
|
|
for j := range jobs {
|
|
path, err := downloadOne(ctx, client, root, j.index, j.image, skipExisting)
|
|
results <- result{index: j.index, path: path, err: err}
|
|
}
|
|
})
|
|
}
|
|
|
|
go func() {
|
|
for i, image := range images {
|
|
jobs <- job{index: i + 1, image: image}
|
|
}
|
|
close(jobs)
|
|
wg.Wait()
|
|
close(results)
|
|
}()
|
|
|
|
var failed []string
|
|
var completed atomic.Int64
|
|
total := int64(len(images))
|
|
|
|
for res := range results {
|
|
if res.err != nil {
|
|
failed = append(failed, fmt.Sprintf("page %d: %v", res.index, res.err))
|
|
fmt.Printf("[ERR] %03d %v\n", res.index, res.err)
|
|
continue
|
|
}
|
|
done := completed.Add(1)
|
|
fmt.Printf("[OK ] %03d %s (%d/%d)\n", res.index, filepath.Base(res.path), done, total)
|
|
}
|
|
|
|
if len(failed) > 0 {
|
|
sort.Strings(failed)
|
|
return errors.New(strings.Join(failed, "; "))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func downloadOne(ctx context.Context, client *http.Client, root string, page int, image galleryImage, skipExisting bool) (string, error) {
|
|
if image.URL == "" {
|
|
return "", errors.New("empty image url")
|
|
}
|
|
ext := extensionFromURL(image.URL)
|
|
if ext == "" {
|
|
ext = ".bin"
|
|
}
|
|
filename := fmt.Sprintf("%03d%s", page, ext)
|
|
target := filepath.Join(root, filename)
|
|
|
|
if skipExisting {
|
|
if info, err := os.Stat(target); err == nil && info.Size() > 0 {
|
|
return target, nil
|
|
}
|
|
}
|
|
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, image.URL, nil)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36")
|
|
req.Header.Set("Referer", "https://hitomi.moe/")
|
|
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer func() { _ = resp.Body.Close() }()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
body, _ := io.ReadAll(io.LimitReader(resp.Body, 2048))
|
|
return "", fmt.Errorf("status %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
|
|
}
|
|
|
|
tmp := target + ".part"
|
|
file, err := os.Create(tmp)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
_, copyErr := io.Copy(file, resp.Body)
|
|
closeErr := file.Close()
|
|
if copyErr != nil {
|
|
_ = os.Remove(tmp)
|
|
return "", copyErr
|
|
}
|
|
if closeErr != nil {
|
|
_ = os.Remove(tmp)
|
|
return "", closeErr
|
|
}
|
|
if err := os.Rename(tmp, target); err != nil {
|
|
_ = os.Remove(tmp)
|
|
return "", err
|
|
}
|
|
return target, nil
|
|
}
|
|
|
|
func preferredTitle(g *galleryResponse) string {
|
|
for _, s := range []string{g.TitleEn, g.Title, g.TitleJp, g.NHID} {
|
|
s = strings.TrimSpace(s)
|
|
if s != "" {
|
|
return s
|
|
}
|
|
}
|
|
return "gallery"
|
|
}
|
|
|
|
func normalizeGalleryID(input string) (string, error) {
|
|
input = strings.TrimSpace(input)
|
|
if input == "" {
|
|
return "", errors.New("empty input")
|
|
}
|
|
if digitsOnly(input) {
|
|
return input, nil
|
|
}
|
|
u, err := url.Parse(input)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
re := regexp.MustCompile(`/g/(\d+)`)
|
|
match := re.FindStringSubmatch(u.Path)
|
|
if len(match) != 2 {
|
|
return "", fmt.Errorf("could not extract /g/<id> from %q", input)
|
|
}
|
|
return match[1], nil
|
|
}
|
|
|
|
func digitsOnly(s string) bool {
|
|
if s == "" {
|
|
return false
|
|
}
|
|
for _, r := range s {
|
|
if r < '0' || r > '9' {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func sanitizeFilename(s string) string {
|
|
s = strings.TrimSpace(strings.ToLower(s))
|
|
replacer := strings.NewReplacer(
|
|
"\\", "_",
|
|
"/", "_",
|
|
":", "_",
|
|
"*", "_",
|
|
"?", "_",
|
|
"\"", "_",
|
|
"<", "_",
|
|
">", "_",
|
|
"|", "_",
|
|
"'", "_",
|
|
)
|
|
s = replacer.Replace(s)
|
|
s = strings.Join(strings.Fields(s), "_")
|
|
if len(s) > 120 {
|
|
s = s[:120]
|
|
}
|
|
s = strings.Trim(s, "._-")
|
|
if s == "" {
|
|
return "gallery"
|
|
}
|
|
return s
|
|
}
|
|
|
|
func extensionFromURL(raw string) string {
|
|
u, err := url.Parse(raw)
|
|
if err != nil {
|
|
return strings.ToLower(filepath.Ext(raw))
|
|
}
|
|
return strings.ToLower(filepath.Ext(u.Path))
|
|
}
|
|
|
|
func exitf(format string, args ...any) {
|
|
fmt.Fprintf(os.Stderr, format+"\n", args...)
|
|
os.Exit(1)
|
|
}
|