first commit

2026-05-12 01:30:35 +10:00
commit 13818ae6cd
4 changed files with 459 additions and 0 deletions
@@ -0,0 +1 @@
 downloads/
@@ -0,0 +1,115 @@
 # hitomi-downloader
 A small Go command-line downloader for `hitomi.moe` galleries.
 The tool fetches gallery metadata from `https://hitomi.moe/reader/info/<id>`,
 writes it to `info.json`, and downloads all gallery images with concurrent
 workers.
 ## Requirements
 - Go 1.26 or newer
 - Network access to `hitomi.moe`
 ## Build
 ```sh
 go build -o hitomi-downloader .
 ```
 ## Usage
 Download by gallery URL:
 ```sh
 go run . -url https://hitomi.moe/g/123456
 ```
 Download by gallery ID:
 ```sh
 go run . -url 123456
 ```
 The gallery identifier can also be provided as a positional argument:
 ```sh
 go run . 123456
 ```
 After building:
 ```sh
 ./hitomi-downloader -url 123456 -out downloads
 ```
 ## Options
 ```text
 -url string
    Hitomi gallery URL or ID.
 -out string
    Output directory. Defaults to "downloads".
 -workers int
    Number of concurrent download workers. Defaults to a value based on CPU
    count, with a minimum of 2 and a maximum of 8.
 -timeout int
    Per-request timeout in seconds. Defaults to 30.
 -skip-existing bool
    Skip existing non-empty files. Defaults to true.
    Use -skip-existing=false to force re-downloads.
 -metadata-only bool
    Fetch metadata and write info.json without downloading images.
 ```
 ## Output
 Files are written under:
 ```text
 <out>/<gallery-id>_<sanitized-title>/
 ```
 Each gallery directory contains:
 ```text
 info.json
 001.<ext>
 002.<ext>
 003.<ext>
 ...
 ```
 Images are first downloaded to a temporary `.part` file and then renamed into
 place when the download completes successfully.
 ## Examples
 Fetch metadata only:
 ```sh
 go run . -url 123456 -metadata-only
 ```
 Use more workers and a longer timeout:
 ```sh
 go run . -url 123456 -workers 12 -timeout 60
 ```
 Force re-downloading existing files:
 ```sh
 go run . -url 123456 -skip-existing=false
 ```
 ## Notes
 Use this tool responsibly and follow the terms and rules of the site you are
 accessing. Very high worker counts can increase load on the remote server and
 may make downloads less reliable.
@@ -0,0 +1,3 @@
 module hitomi-downloader
 go 1.26.0
@@ -0,0 +1,340 @@
 package main
 import (
 	"context"
 	"encoding/json"
 	"errors"
 	"flag"
 	"fmt"
 	"io"
 	"net/http"
 	"net/url"
 	"os"
 	"path/filepath"
 	"regexp"
 	"runtime"
 	"sort"
 	"strings"
 	"sync"
 	"sync/atomic"
 	"time"
 )
 type galleryResponse struct {
 	ID       int64          `json:"id"`
 	NHID     string         `json:"nh_id"`
 	Source   string         `json:"source"`
 	MediaID  string         `json:"media_id"`
 	TitleEn  string         `json:"title_en"`
 	TitleJp  string         `json:"title_jp"`
 	Title    string         `json:"title"`
 	NumPages int            `json:"num_pages"`
 	Images   []galleryImage `json:"images"`
 }
 type galleryImage struct {
 	ID        int64  `json:"id"`
 	Type      string `json:"type"`
 	Extension string `json:"extension"`
 	Width     int    `json:"width"`
 	Height    int    `json:"height"`
 	URL       string `json:"url"`
 }
 type job struct {
 	index int
 	image galleryImage
 }
 type result struct {
 	index int
 	path  string
 	err   error
 }
 func main() {
 	var (
 		galleryArg   string
 		outputDir    string
 		concurrency  int
 		timeoutSec   int
 		skipExisting bool
 		metadataOnly bool
 	)
 	flag.StringVar(&galleryArg, "url", "", "Hitomi gallery URL or ID")
 	flag.StringVar(&outputDir, "out", "downloads", "Output directory")
 	flag.IntVar(&concurrency, "workers", min(8, max(2, runtime.NumCPU())), "Concurrent download workers")
 	flag.IntVar(&timeoutSec, "timeout", 30, "Per-request timeout in seconds")
 	flag.BoolVar(&skipExisting, "skip-existing", true, "Skip files that already exist with non-zero size")
 	flag.BoolVar(&metadataOnly, "metadata-only", false, "Only fetch gallery metadata and write info.json")
 	flag.Parse()
 	if galleryArg == "" && flag.NArg() > 0 {
 		galleryArg = flag.Arg(0)
 	}
 	if galleryArg == "" {
 		exitf("usage: hitomi-downloader -url https://hitomi.moe/g/123456 [-out downloads]")
 	}
 	if concurrency < 1 {
 		exitf("workers must be >= 1")
 	}
 	if timeoutSec < 1 {
 		exitf("timeout must be >= 1")
 	}
 	galleryID, err := normalizeGalleryID(galleryArg)
 	if err != nil {
 		exitf("invalid gallery identifier: %v", err)
 	}
 	client := &http.Client{Timeout: time.Duration(timeoutSec) * time.Second}
 	ctx := context.Background()
 	gallery, err := fetchGalleryInfo(ctx, client, galleryID)
 	if err != nil {
 		exitf("fetch gallery info failed: %v", err)
 	}
 	if len(gallery.Images) == 0 {
 		exitf("gallery %s returned no images", galleryID)
 	}
 	dirName := fmt.Sprintf("%s_%s", galleryID, sanitizeFilename(preferredTitle(gallery)))
 	root := filepath.Join(outputDir, dirName)
 	if err := os.MkdirAll(root, 0o755); err != nil {
 		exitf("create output dir failed: %v", err)
 	}
 	if err := writeMetadata(root, gallery); err != nil {
 		exitf("write metadata failed: %v", err)
 	}
 	fmt.Printf("gallery: %s\npages: %d\noutput: %s\n", preferredTitle(gallery), len(gallery.Images), root)
 	if metadataOnly {
 		return
 	}
 	if err := downloadAll(ctx, client, root, gallery.Images, concurrency, skipExisting); err != nil {
 		exitf("download failed: %v", err)
 	}
 	fmt.Println("done")
 }
 func fetchGalleryInfo(ctx context.Context, client *http.Client, galleryID string) (*galleryResponse, error) {
 	endpoint := fmt.Sprintf("https://hitomi.moe/reader/info/%s", galleryID)
 	req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
 	if err != nil {
 		return nil, err
 	}
 	req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36")
 	req.Header.Set("Accept", "application/json")
 	resp, err := client.Do(req)
 	if err != nil {
 		return nil, err
 	}
 	defer func() { _ = resp.Body.Close() }()
 	if resp.StatusCode != http.StatusOK {
 		body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
 		return nil, fmt.Errorf("unexpected status %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
 	}
 	var gallery galleryResponse
 	if err := json.NewDecoder(resp.Body).Decode(&gallery); err != nil {
 		return nil, err
 	}
 	return &gallery, nil
 }
 func writeMetadata(root string, gallery *galleryResponse) error {
 	data, err := json.MarshalIndent(gallery, "", "  ")
 	if err != nil {
 		return err
 	}
 	return os.WriteFile(filepath.Join(root, "info.json"), data, 0o644)
 }
 func downloadAll(ctx context.Context, client *http.Client, root string, images []galleryImage, workers int, skipExisting bool) error {
 	jobs := make(chan job)
 	results := make(chan result)
 	var wg sync.WaitGroup
 	for range workers {
 		wg.Go(func() {
 			for j := range jobs {
 				path, err := downloadOne(ctx, client, root, j.index, j.image, skipExisting)
 				results <- result{index: j.index, path: path, err: err}
 			}
 		})
 	}
 	go func() {
 		for i, image := range images {
 			jobs <- job{index: i + 1, image: image}
 		}
 		close(jobs)
 		wg.Wait()
 		close(results)
 	}()
 	var failed []string
 	var completed atomic.Int64
 	total := int64(len(images))
 	for res := range results {
 		if res.err != nil {
 			failed = append(failed, fmt.Sprintf("page %d: %v", res.index, res.err))
 			fmt.Printf("[ERR] %03d %v\n", res.index, res.err)
 			continue
 		}
 		done := completed.Add(1)
 		fmt.Printf("[OK ] %03d %s (%d/%d)\n", res.index, filepath.Base(res.path), done, total)
 	}
 	if len(failed) > 0 {
 		sort.Strings(failed)
 		return errors.New(strings.Join(failed, "; "))
 	}
 	return nil
 }
 func downloadOne(ctx context.Context, client *http.Client, root string, page int, image galleryImage, skipExisting bool) (string, error) {
 	if image.URL == "" {
 		return "", errors.New("empty image url")
 	}
 	ext := extensionFromURL(image.URL)
 	if ext == "" {
 		ext = ".bin"
 	}
 	filename := fmt.Sprintf("%03d%s", page, ext)
 	target := filepath.Join(root, filename)
 	if skipExisting {
 		if info, err := os.Stat(target); err == nil && info.Size() > 0 {
 			return target, nil
 		}
 	}
 	req, err := http.NewRequestWithContext(ctx, http.MethodGet, image.URL, nil)
 	if err != nil {
 		return "", err
 	}
 	req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36")
 	req.Header.Set("Referer", "https://hitomi.moe/")
 	resp, err := client.Do(req)
 	if err != nil {
 		return "", err
 	}
 	defer func() { _ = resp.Body.Close() }()
 	if resp.StatusCode != http.StatusOK {
 		body, _ := io.ReadAll(io.LimitReader(resp.Body, 2048))
 		return "", fmt.Errorf("status %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
 	}
 	tmp := target + ".part"
 	file, err := os.Create(tmp)
 	if err != nil {
 		return "", err
 	}
 	_, copyErr := io.Copy(file, resp.Body)
 	closeErr := file.Close()
 	if copyErr != nil {
 		_ = os.Remove(tmp)
 		return "", copyErr
 	}
 	if closeErr != nil {
 		_ = os.Remove(tmp)
 		return "", closeErr
 	}
 	if err := os.Rename(tmp, target); err != nil {
 		_ = os.Remove(tmp)
 		return "", err
 	}
 	return target, nil
 }
 func preferredTitle(g *galleryResponse) string {
 	for _, s := range []string{g.TitleEn, g.Title, g.TitleJp, g.NHID} {
 		s = strings.TrimSpace(s)
 		if s != "" {
 			return s
 		}
 	}
 	return "gallery"
 }
 func normalizeGalleryID(input string) (string, error) {
 	input = strings.TrimSpace(input)
 	if input == "" {
 		return "", errors.New("empty input")
 	}
 	if digitsOnly(input) {
 		return input, nil
 	}
 	u, err := url.Parse(input)
 	if err != nil {
 		return "", err
 	}
 	re := regexp.MustCompile(`/g/(\d+)`)
 	match := re.FindStringSubmatch(u.Path)
 	if len(match) != 2 {
 		return "", fmt.Errorf("could not extract /g/<id> from %q", input)
 	}
 	return match[1], nil
 }
 func digitsOnly(s string) bool {
 	if s == "" {
 		return false
 	}
 	for _, r := range s {
 		if r < '0' || r > '9' {
 			return false
 		}
 	}
 	return true
 }
 func sanitizeFilename(s string) string {
 	s = strings.TrimSpace(strings.ToLower(s))
 	replacer := strings.NewReplacer(
 		"\\", "_",
 		"/", "_",
 		":", "_",
 		"*", "_",
 		"?", "_",
 		"\"", "_",
 		"<", "_",
 		">", "_",
 		"|", "_",
 		"'", "_",
 	)
 	s = replacer.Replace(s)
 	s = strings.Join(strings.Fields(s), "_")
 	if len(s) > 120 {
 		s = s[:120]
 	}
 	s = strings.Trim(s, "._-")
 	if s == "" {
 		return "gallery"
 	}
 	return s
 }
 func extensionFromURL(raw string) string {
 	u, err := url.Parse(raw)
 	if err != nil {
 		return strings.ToLower(filepath.Ext(raw))
 	}
 	return strings.ToLower(filepath.Ext(u.Path))
 }
 func exitf(format string, args ...any) {
 	fmt.Fprintf(os.Stderr, format+"\n", args...)
 	os.Exit(1)
 }