first commit

This commit is contained in:
2026-05-12 01:30:35 +10:00
commit 13818ae6cd
4 changed files with 459 additions and 0 deletions
+1
View File
@@ -0,0 +1 @@
downloads/
+115
View File
@@ -0,0 +1,115 @@
# hitomi-downloader
A small Go command-line downloader for `hitomi.moe` galleries.
The tool fetches gallery metadata from `https://hitomi.moe/reader/info/<id>`,
writes it to `info.json`, and downloads all gallery images with concurrent
workers.
## Requirements
- Go 1.26 or newer
- Network access to `hitomi.moe`
## Build
```sh
go build -o hitomi-downloader .
```
## Usage
Download by gallery URL:
```sh
go run . -url https://hitomi.moe/g/123456
```
Download by gallery ID:
```sh
go run . -url 123456
```
The gallery identifier can also be provided as a positional argument:
```sh
go run . 123456
```
After building:
```sh
./hitomi-downloader -url 123456 -out downloads
```
## Options
```text
-url string
Hitomi gallery URL or ID.
-out string
Output directory. Defaults to "downloads".
-workers int
Number of concurrent download workers. Defaults to a value based on CPU
count, with a minimum of 2 and a maximum of 8.
-timeout int
Per-request timeout in seconds. Defaults to 30.
-skip-existing bool
Skip existing non-empty files. Defaults to true.
Use -skip-existing=false to force re-downloads.
-metadata-only bool
Fetch metadata and write info.json without downloading images.
```
## Output
Files are written under:
```text
<out>/<gallery-id>_<sanitized-title>/
```
Each gallery directory contains:
```text
info.json
001.<ext>
002.<ext>
003.<ext>
...
```
Images are first downloaded to a temporary `.part` file and then renamed into
place when the download completes successfully.
## Examples
Fetch metadata only:
```sh
go run . -url 123456 -metadata-only
```
Use more workers and a longer timeout:
```sh
go run . -url 123456 -workers 12 -timeout 60
```
Force re-downloading existing files:
```sh
go run . -url 123456 -skip-existing=false
```
## Notes
Use this tool responsibly and follow the terms and rules of the site you are
accessing. Very high worker counts can increase load on the remote server and
may make downloads less reliable.
+3
View File
@@ -0,0 +1,3 @@
module hitomi-downloader
go 1.26.0
+340
View File
@@ -0,0 +1,340 @@
package main
import (
"context"
"encoding/json"
"errors"
"flag"
"fmt"
"io"
"net/http"
"net/url"
"os"
"path/filepath"
"regexp"
"runtime"
"sort"
"strings"
"sync"
"sync/atomic"
"time"
)
type galleryResponse struct {
ID int64 `json:"id"`
NHID string `json:"nh_id"`
Source string `json:"source"`
MediaID string `json:"media_id"`
TitleEn string `json:"title_en"`
TitleJp string `json:"title_jp"`
Title string `json:"title"`
NumPages int `json:"num_pages"`
Images []galleryImage `json:"images"`
}
type galleryImage struct {
ID int64 `json:"id"`
Type string `json:"type"`
Extension string `json:"extension"`
Width int `json:"width"`
Height int `json:"height"`
URL string `json:"url"`
}
type job struct {
index int
image galleryImage
}
type result struct {
index int
path string
err error
}
func main() {
var (
galleryArg string
outputDir string
concurrency int
timeoutSec int
skipExisting bool
metadataOnly bool
)
flag.StringVar(&galleryArg, "url", "", "Hitomi gallery URL or ID")
flag.StringVar(&outputDir, "out", "downloads", "Output directory")
flag.IntVar(&concurrency, "workers", min(8, max(2, runtime.NumCPU())), "Concurrent download workers")
flag.IntVar(&timeoutSec, "timeout", 30, "Per-request timeout in seconds")
flag.BoolVar(&skipExisting, "skip-existing", true, "Skip files that already exist with non-zero size")
flag.BoolVar(&metadataOnly, "metadata-only", false, "Only fetch gallery metadata and write info.json")
flag.Parse()
if galleryArg == "" && flag.NArg() > 0 {
galleryArg = flag.Arg(0)
}
if galleryArg == "" {
exitf("usage: hitomi-downloader -url https://hitomi.moe/g/123456 [-out downloads]")
}
if concurrency < 1 {
exitf("workers must be >= 1")
}
if timeoutSec < 1 {
exitf("timeout must be >= 1")
}
galleryID, err := normalizeGalleryID(galleryArg)
if err != nil {
exitf("invalid gallery identifier: %v", err)
}
client := &http.Client{Timeout: time.Duration(timeoutSec) * time.Second}
ctx := context.Background()
gallery, err := fetchGalleryInfo(ctx, client, galleryID)
if err != nil {
exitf("fetch gallery info failed: %v", err)
}
if len(gallery.Images) == 0 {
exitf("gallery %s returned no images", galleryID)
}
dirName := fmt.Sprintf("%s_%s", galleryID, sanitizeFilename(preferredTitle(gallery)))
root := filepath.Join(outputDir, dirName)
if err := os.MkdirAll(root, 0o755); err != nil {
exitf("create output dir failed: %v", err)
}
if err := writeMetadata(root, gallery); err != nil {
exitf("write metadata failed: %v", err)
}
fmt.Printf("gallery: %s\npages: %d\noutput: %s\n", preferredTitle(gallery), len(gallery.Images), root)
if metadataOnly {
return
}
if err := downloadAll(ctx, client, root, gallery.Images, concurrency, skipExisting); err != nil {
exitf("download failed: %v", err)
}
fmt.Println("done")
}
func fetchGalleryInfo(ctx context.Context, client *http.Client, galleryID string) (*galleryResponse, error) {
endpoint := fmt.Sprintf("https://hitomi.moe/reader/info/%s", galleryID)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36")
req.Header.Set("Accept", "application/json")
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
return nil, fmt.Errorf("unexpected status %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
}
var gallery galleryResponse
if err := json.NewDecoder(resp.Body).Decode(&gallery); err != nil {
return nil, err
}
return &gallery, nil
}
func writeMetadata(root string, gallery *galleryResponse) error {
data, err := json.MarshalIndent(gallery, "", " ")
if err != nil {
return err
}
return os.WriteFile(filepath.Join(root, "info.json"), data, 0o644)
}
func downloadAll(ctx context.Context, client *http.Client, root string, images []galleryImage, workers int, skipExisting bool) error {
jobs := make(chan job)
results := make(chan result)
var wg sync.WaitGroup
for range workers {
wg.Go(func() {
for j := range jobs {
path, err := downloadOne(ctx, client, root, j.index, j.image, skipExisting)
results <- result{index: j.index, path: path, err: err}
}
})
}
go func() {
for i, image := range images {
jobs <- job{index: i + 1, image: image}
}
close(jobs)
wg.Wait()
close(results)
}()
var failed []string
var completed atomic.Int64
total := int64(len(images))
for res := range results {
if res.err != nil {
failed = append(failed, fmt.Sprintf("page %d: %v", res.index, res.err))
fmt.Printf("[ERR] %03d %v\n", res.index, res.err)
continue
}
done := completed.Add(1)
fmt.Printf("[OK ] %03d %s (%d/%d)\n", res.index, filepath.Base(res.path), done, total)
}
if len(failed) > 0 {
sort.Strings(failed)
return errors.New(strings.Join(failed, "; "))
}
return nil
}
func downloadOne(ctx context.Context, client *http.Client, root string, page int, image galleryImage, skipExisting bool) (string, error) {
if image.URL == "" {
return "", errors.New("empty image url")
}
ext := extensionFromURL(image.URL)
if ext == "" {
ext = ".bin"
}
filename := fmt.Sprintf("%03d%s", page, ext)
target := filepath.Join(root, filename)
if skipExisting {
if info, err := os.Stat(target); err == nil && info.Size() > 0 {
return target, nil
}
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, image.URL, nil)
if err != nil {
return "", err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36")
req.Header.Set("Referer", "https://hitomi.moe/")
resp, err := client.Do(req)
if err != nil {
return "", err
}
defer func() { _ = resp.Body.Close() }()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(io.LimitReader(resp.Body, 2048))
return "", fmt.Errorf("status %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
}
tmp := target + ".part"
file, err := os.Create(tmp)
if err != nil {
return "", err
}
_, copyErr := io.Copy(file, resp.Body)
closeErr := file.Close()
if copyErr != nil {
_ = os.Remove(tmp)
return "", copyErr
}
if closeErr != nil {
_ = os.Remove(tmp)
return "", closeErr
}
if err := os.Rename(tmp, target); err != nil {
_ = os.Remove(tmp)
return "", err
}
return target, nil
}
func preferredTitle(g *galleryResponse) string {
for _, s := range []string{g.TitleEn, g.Title, g.TitleJp, g.NHID} {
s = strings.TrimSpace(s)
if s != "" {
return s
}
}
return "gallery"
}
func normalizeGalleryID(input string) (string, error) {
input = strings.TrimSpace(input)
if input == "" {
return "", errors.New("empty input")
}
if digitsOnly(input) {
return input, nil
}
u, err := url.Parse(input)
if err != nil {
return "", err
}
re := regexp.MustCompile(`/g/(\d+)`)
match := re.FindStringSubmatch(u.Path)
if len(match) != 2 {
return "", fmt.Errorf("could not extract /g/<id> from %q", input)
}
return match[1], nil
}
func digitsOnly(s string) bool {
if s == "" {
return false
}
for _, r := range s {
if r < '0' || r > '9' {
return false
}
}
return true
}
func sanitizeFilename(s string) string {
s = strings.TrimSpace(strings.ToLower(s))
replacer := strings.NewReplacer(
"\\", "_",
"/", "_",
":", "_",
"*", "_",
"?", "_",
"\"", "_",
"<", "_",
">", "_",
"|", "_",
"'", "_",
)
s = replacer.Replace(s)
s = strings.Join(strings.Fields(s), "_")
if len(s) > 120 {
s = s[:120]
}
s = strings.Trim(s, "._-")
if s == "" {
return "gallery"
}
return s
}
func extensionFromURL(raw string) string {
u, err := url.Parse(raw)
if err != nil {
return strings.ToLower(filepath.Ext(raw))
}
return strings.ToLower(filepath.Ext(u.Path))
}
func exitf(format string, args ...any) {
fmt.Fprintf(os.Stderr, format+"\n", args...)
os.Exit(1)
}