Files
2026-05-12 01:30:35 +10:00

341 lines
8.1 KiB
Go

package main
import (
"context"
"encoding/json"
"errors"
"flag"
"fmt"
"io"
"net/http"
"net/url"
"os"
"path/filepath"
"regexp"
"runtime"
"sort"
"strings"
"sync"
"sync/atomic"
"time"
)
type galleryResponse struct {
ID int64 `json:"id"`
NHID string `json:"nh_id"`
Source string `json:"source"`
MediaID string `json:"media_id"`
TitleEn string `json:"title_en"`
TitleJp string `json:"title_jp"`
Title string `json:"title"`
NumPages int `json:"num_pages"`
Images []galleryImage `json:"images"`
}
type galleryImage struct {
ID int64 `json:"id"`
Type string `json:"type"`
Extension string `json:"extension"`
Width int `json:"width"`
Height int `json:"height"`
URL string `json:"url"`
}
type job struct {
index int
image galleryImage
}
type result struct {
index int
path string
err error
}
func main() {
var (
galleryArg string
outputDir string
concurrency int
timeoutSec int
skipExisting bool
metadataOnly bool
)
flag.StringVar(&galleryArg, "url", "", "Hitomi gallery URL or ID")
flag.StringVar(&outputDir, "out", "downloads", "Output directory")
flag.IntVar(&concurrency, "workers", min(8, max(2, runtime.NumCPU())), "Concurrent download workers")
flag.IntVar(&timeoutSec, "timeout", 30, "Per-request timeout in seconds")
flag.BoolVar(&skipExisting, "skip-existing", true, "Skip files that already exist with non-zero size")
flag.BoolVar(&metadataOnly, "metadata-only", false, "Only fetch gallery metadata and write info.json")
flag.Parse()
if galleryArg == "" && flag.NArg() > 0 {
galleryArg = flag.Arg(0)
}
if galleryArg == "" {
exitf("usage: hitomi-downloader -url https://hitomi.moe/g/123456 [-out downloads]")
}
if concurrency < 1 {
exitf("workers must be >= 1")
}
if timeoutSec < 1 {
exitf("timeout must be >= 1")
}
galleryID, err := normalizeGalleryID(galleryArg)
if err != nil {
exitf("invalid gallery identifier: %v", err)
}
client := &http.Client{Timeout: time.Duration(timeoutSec) * time.Second}
ctx := context.Background()
gallery, err := fetchGalleryInfo(ctx, client, galleryID)
if err != nil {
exitf("fetch gallery info failed: %v", err)
}
if len(gallery.Images) == 0 {
exitf("gallery %s returned no images", galleryID)
}
dirName := fmt.Sprintf("%s_%s", galleryID, sanitizeFilename(preferredTitle(gallery)))
root := filepath.Join(outputDir, dirName)
if err := os.MkdirAll(root, 0o755); err != nil {
exitf("create output dir failed: %v", err)
}
if err := writeMetadata(root, gallery); err != nil {
exitf("write metadata failed: %v", err)
}
fmt.Printf("gallery: %s\npages: %d\noutput: %s\n", preferredTitle(gallery), len(gallery.Images), root)
if metadataOnly {
return
}
if err := downloadAll(ctx, client, root, gallery.Images, concurrency, skipExisting); err != nil {
exitf("download failed: %v", err)
}
fmt.Println("done")
}
func fetchGalleryInfo(ctx context.Context, client *http.Client, galleryID string) (*galleryResponse, error) {
endpoint := fmt.Sprintf("https://hitomi.moe/reader/info/%s", galleryID)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36")
req.Header.Set("Accept", "application/json")
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
return nil, fmt.Errorf("unexpected status %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
}
var gallery galleryResponse
if err := json.NewDecoder(resp.Body).Decode(&gallery); err != nil {
return nil, err
}
return &gallery, nil
}
func writeMetadata(root string, gallery *galleryResponse) error {
data, err := json.MarshalIndent(gallery, "", " ")
if err != nil {
return err
}
return os.WriteFile(filepath.Join(root, "info.json"), data, 0o644)
}
func downloadAll(ctx context.Context, client *http.Client, root string, images []galleryImage, workers int, skipExisting bool) error {
jobs := make(chan job)
results := make(chan result)
var wg sync.WaitGroup
for range workers {
wg.Go(func() {
for j := range jobs {
path, err := downloadOne(ctx, client, root, j.index, j.image, skipExisting)
results <- result{index: j.index, path: path, err: err}
}
})
}
go func() {
for i, image := range images {
jobs <- job{index: i + 1, image: image}
}
close(jobs)
wg.Wait()
close(results)
}()
var failed []string
var completed atomic.Int64
total := int64(len(images))
for res := range results {
if res.err != nil {
failed = append(failed, fmt.Sprintf("page %d: %v", res.index, res.err))
fmt.Printf("[ERR] %03d %v\n", res.index, res.err)
continue
}
done := completed.Add(1)
fmt.Printf("[OK ] %03d %s (%d/%d)\n", res.index, filepath.Base(res.path), done, total)
}
if len(failed) > 0 {
sort.Strings(failed)
return errors.New(strings.Join(failed, "; "))
}
return nil
}
func downloadOne(ctx context.Context, client *http.Client, root string, page int, image galleryImage, skipExisting bool) (string, error) {
if image.URL == "" {
return "", errors.New("empty image url")
}
ext := extensionFromURL(image.URL)
if ext == "" {
ext = ".bin"
}
filename := fmt.Sprintf("%03d%s", page, ext)
target := filepath.Join(root, filename)
if skipExisting {
if info, err := os.Stat(target); err == nil && info.Size() > 0 {
return target, nil
}
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, image.URL, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36")
req.Header.Set("Referer", "https://hitomi.moe/")
resp, err := client.Do(req)
if err != nil {
return "", err
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 2048))
return "", fmt.Errorf("status %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
}
tmp := target + ".part"
file, err := os.Create(tmp)
if err != nil {
return "", err
}
_, copyErr := io.Copy(file, resp.Body)
closeErr := file.Close()
if copyErr != nil {
_ = os.Remove(tmp)
return "", copyErr
}
if closeErr != nil {
_ = os.Remove(tmp)
return "", closeErr
}
if err := os.Rename(tmp, target); err != nil {
_ = os.Remove(tmp)
return "", err
}
return target, nil
}
func preferredTitle(g *galleryResponse) string {
for _, s := range []string{g.TitleEn, g.Title, g.TitleJp, g.NHID} {
s = strings.TrimSpace(s)
if s != "" {
return s
}
}
return "gallery"
}
func normalizeGalleryID(input string) (string, error) {
input = strings.TrimSpace(input)
if input == "" {
return "", errors.New("empty input")
}
if digitsOnly(input) {
return input, nil
}
u, err := url.Parse(input)
if err != nil {
return "", err
}
re := regexp.MustCompile(`/g/(\d+)`)
match := re.FindStringSubmatch(u.Path)
if len(match) != 2 {
return "", fmt.Errorf("could not extract /g/<id> from %q", input)
}
return match[1], nil
}
func digitsOnly(s string) bool {
if s == "" {
return false
}
for _, r := range s {
if r < '0' || r > '9' {
return false
}
}
return true
}
func sanitizeFilename(s string) string {
s = strings.TrimSpace(strings.ToLower(s))
replacer := strings.NewReplacer(
"\\", "_",
"/", "_",
":", "_",
"*", "_",
"?", "_",
"\"", "_",
"<", "_",
">", "_",
"|", "_",
"'", "_",
)
s = replacer.Replace(s)
s = strings.Join(strings.Fields(s), "_")
if len(s) > 120 {
s = s[:120]
}
s = strings.Trim(s, "._-")
if s == "" {
return "gallery"
}
return s
}
func extensionFromURL(raw string) string {
u, err := url.Parse(raw)
if err != nil {
return strings.ToLower(filepath.Ext(raw))
}
return strings.ToLower(filepath.Ext(u.Path))
}
func exitf(format string, args ...any) {
fmt.Fprintf(os.Stderr, format+"\n", args...)
os.Exit(1)
}