first commit
This commit is contained in:
@@ -0,0 +1 @@
|
||||
downloads/
|
||||
@@ -0,0 +1,115 @@
|
||||
# hitomi-downloader
|
||||
|
||||
A small Go command-line downloader for `hitomi.moe` galleries.
|
||||
|
||||
The tool fetches gallery metadata from `https://hitomi.moe/reader/info/<id>`,
|
||||
writes it to `info.json`, and downloads all gallery images with concurrent
|
||||
workers.
|
||||
|
||||
## Requirements
|
||||
|
||||
- Go 1.26 or newer
|
||||
- Network access to `hitomi.moe`
|
||||
|
||||
## Build
|
||||
|
||||
```sh
|
||||
go build -o hitomi-downloader .
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
Download by gallery URL:
|
||||
|
||||
```sh
|
||||
go run . -url https://hitomi.moe/g/123456
|
||||
```
|
||||
|
||||
Download by gallery ID:
|
||||
|
||||
```sh
|
||||
go run . -url 123456
|
||||
```
|
||||
|
||||
The gallery identifier can also be provided as a positional argument:
|
||||
|
||||
```sh
|
||||
go run . 123456
|
||||
```
|
||||
|
||||
After building:
|
||||
|
||||
```sh
|
||||
./hitomi-downloader -url 123456 -out downloads
|
||||
```
|
||||
|
||||
## Options
|
||||
|
||||
```text
|
||||
-url string
|
||||
Hitomi gallery URL or ID.
|
||||
|
||||
-out string
|
||||
Output directory. Defaults to "downloads".
|
||||
|
||||
-workers int
|
||||
Number of concurrent download workers. Defaults to a value based on CPU
|
||||
count, with a minimum of 2 and a maximum of 8.
|
||||
|
||||
-timeout int
|
||||
Per-request timeout in seconds. Defaults to 30.
|
||||
|
||||
-skip-existing bool
|
||||
Skip existing non-empty files. Defaults to true.
|
||||
Use -skip-existing=false to force re-downloads.
|
||||
|
||||
-metadata-only bool
|
||||
Fetch metadata and write info.json without downloading images.
|
||||
```
|
||||
|
||||
## Output
|
||||
|
||||
Files are written under:
|
||||
|
||||
```text
|
||||
<out>/<gallery-id>_<sanitized-title>/
|
||||
```
|
||||
|
||||
Each gallery directory contains:
|
||||
|
||||
```text
|
||||
info.json
|
||||
001.<ext>
|
||||
002.<ext>
|
||||
003.<ext>
|
||||
...
|
||||
```
|
||||
|
||||
Images are first downloaded to a temporary `.part` file and then renamed into
|
||||
place when the download completes successfully.
|
||||
|
||||
## Examples
|
||||
|
||||
Fetch metadata only:
|
||||
|
||||
```sh
|
||||
go run . -url 123456 -metadata-only
|
||||
```
|
||||
|
||||
Use more workers and a longer timeout:
|
||||
|
||||
```sh
|
||||
go run . -url 123456 -workers 12 -timeout 60
|
||||
```
|
||||
|
||||
Force re-downloading existing files:
|
||||
|
||||
```sh
|
||||
go run . -url 123456 -skip-existing=false
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
||||
Use this tool responsibly and follow the terms and rules of the site you are
|
||||
accessing. Very high worker counts can increase load on the remote server and
|
||||
may make downloads less reliable.
|
||||
@@ -0,0 +1,340 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
)
|
||||
|
||||
type galleryResponse struct {
|
||||
ID int64 `json:"id"`
|
||||
NHID string `json:"nh_id"`
|
||||
Source string `json:"source"`
|
||||
MediaID string `json:"media_id"`
|
||||
TitleEn string `json:"title_en"`
|
||||
TitleJp string `json:"title_jp"`
|
||||
Title string `json:"title"`
|
||||
NumPages int `json:"num_pages"`
|
||||
Images []galleryImage `json:"images"`
|
||||
}
|
||||
|
||||
type galleryImage struct {
|
||||
ID int64 `json:"id"`
|
||||
Type string `json:"type"`
|
||||
Extension string `json:"extension"`
|
||||
Width int `json:"width"`
|
||||
Height int `json:"height"`
|
||||
URL string `json:"url"`
|
||||
}
|
||||
|
||||
type job struct {
|
||||
index int
|
||||
image galleryImage
|
||||
}
|
||||
|
||||
type result struct {
|
||||
index int
|
||||
path string
|
||||
err error
|
||||
}
|
||||
|
||||
func main() {
|
||||
var (
|
||||
galleryArg string
|
||||
outputDir string
|
||||
concurrency int
|
||||
timeoutSec int
|
||||
skipExisting bool
|
||||
metadataOnly bool
|
||||
)
|
||||
|
||||
flag.StringVar(&galleryArg, "url", "", "Hitomi gallery URL or ID")
|
||||
flag.StringVar(&outputDir, "out", "downloads", "Output directory")
|
||||
flag.IntVar(&concurrency, "workers", min(8, max(2, runtime.NumCPU())), "Concurrent download workers")
|
||||
flag.IntVar(&timeoutSec, "timeout", 30, "Per-request timeout in seconds")
|
||||
flag.BoolVar(&skipExisting, "skip-existing", true, "Skip files that already exist with non-zero size")
|
||||
flag.BoolVar(&metadataOnly, "metadata-only", false, "Only fetch gallery metadata and write info.json")
|
||||
flag.Parse()
|
||||
|
||||
if galleryArg == "" && flag.NArg() > 0 {
|
||||
galleryArg = flag.Arg(0)
|
||||
}
|
||||
if galleryArg == "" {
|
||||
exitf("usage: hitomi-downloader -url https://hitomi.moe/g/123456 [-out downloads]")
|
||||
}
|
||||
if concurrency < 1 {
|
||||
exitf("workers must be >= 1")
|
||||
}
|
||||
if timeoutSec < 1 {
|
||||
exitf("timeout must be >= 1")
|
||||
}
|
||||
|
||||
galleryID, err := normalizeGalleryID(galleryArg)
|
||||
if err != nil {
|
||||
exitf("invalid gallery identifier: %v", err)
|
||||
}
|
||||
|
||||
client := &http.Client{Timeout: time.Duration(timeoutSec) * time.Second}
|
||||
ctx := context.Background()
|
||||
|
||||
gallery, err := fetchGalleryInfo(ctx, client, galleryID)
|
||||
if err != nil {
|
||||
exitf("fetch gallery info failed: %v", err)
|
||||
}
|
||||
if len(gallery.Images) == 0 {
|
||||
exitf("gallery %s returned no images", galleryID)
|
||||
}
|
||||
|
||||
dirName := fmt.Sprintf("%s_%s", galleryID, sanitizeFilename(preferredTitle(gallery)))
|
||||
root := filepath.Join(outputDir, dirName)
|
||||
if err := os.MkdirAll(root, 0o755); err != nil {
|
||||
exitf("create output dir failed: %v", err)
|
||||
}
|
||||
|
||||
if err := writeMetadata(root, gallery); err != nil {
|
||||
exitf("write metadata failed: %v", err)
|
||||
}
|
||||
|
||||
fmt.Printf("gallery: %s\npages: %d\noutput: %s\n", preferredTitle(gallery), len(gallery.Images), root)
|
||||
if metadataOnly {
|
||||
return
|
||||
}
|
||||
|
||||
if err := downloadAll(ctx, client, root, gallery.Images, concurrency, skipExisting); err != nil {
|
||||
exitf("download failed: %v", err)
|
||||
}
|
||||
|
||||
fmt.Println("done")
|
||||
}
|
||||
|
||||
func fetchGalleryInfo(ctx context.Context, client *http.Client, galleryID string) (*galleryResponse, error) {
|
||||
endpoint := fmt.Sprintf("https://hitomi.moe/reader/info/%s", galleryID)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36")
|
||||
req.Header.Set("Accept", "application/json")
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
|
||||
return nil, fmt.Errorf("unexpected status %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
|
||||
}
|
||||
|
||||
var gallery galleryResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&gallery); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &gallery, nil
|
||||
}
|
||||
|
||||
func writeMetadata(root string, gallery *galleryResponse) error {
|
||||
data, err := json.MarshalIndent(gallery, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return os.WriteFile(filepath.Join(root, "info.json"), data, 0o644)
|
||||
}
|
||||
|
||||
func downloadAll(ctx context.Context, client *http.Client, root string, images []galleryImage, workers int, skipExisting bool) error {
|
||||
jobs := make(chan job)
|
||||
results := make(chan result)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for range workers {
|
||||
wg.Go(func() {
|
||||
for j := range jobs {
|
||||
path, err := downloadOne(ctx, client, root, j.index, j.image, skipExisting)
|
||||
results <- result{index: j.index, path: path, err: err}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
go func() {
|
||||
for i, image := range images {
|
||||
jobs <- job{index: i + 1, image: image}
|
||||
}
|
||||
close(jobs)
|
||||
wg.Wait()
|
||||
close(results)
|
||||
}()
|
||||
|
||||
var failed []string
|
||||
var completed atomic.Int64
|
||||
total := int64(len(images))
|
||||
|
||||
for res := range results {
|
||||
if res.err != nil {
|
||||
failed = append(failed, fmt.Sprintf("page %d: %v", res.index, res.err))
|
||||
fmt.Printf("[ERR] %03d %v\n", res.index, res.err)
|
||||
continue
|
||||
}
|
||||
done := completed.Add(1)
|
||||
fmt.Printf("[OK ] %03d %s (%d/%d)\n", res.index, filepath.Base(res.path), done, total)
|
||||
}
|
||||
|
||||
if len(failed) > 0 {
|
||||
sort.Strings(failed)
|
||||
return errors.New(strings.Join(failed, "; "))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func downloadOne(ctx context.Context, client *http.Client, root string, page int, image galleryImage, skipExisting bool) (string, error) {
|
||||
if image.URL == "" {
|
||||
return "", errors.New("empty image url")
|
||||
}
|
||||
ext := extensionFromURL(image.URL)
|
||||
if ext == "" {
|
||||
ext = ".bin"
|
||||
}
|
||||
filename := fmt.Sprintf("%03d%s", page, ext)
|
||||
target := filepath.Join(root, filename)
|
||||
|
||||
if skipExisting {
|
||||
if info, err := os.Stat(target); err == nil && info.Size() > 0 {
|
||||
return target, nil
|
||||
}
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, image.URL, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36")
|
||||
req.Header.Set("Referer", "https://hitomi.moe/")
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(io.LimitReader(resp.Body, 2048))
|
||||
return "", fmt.Errorf("status %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
|
||||
}
|
||||
|
||||
tmp := target + ".part"
|
||||
file, err := os.Create(tmp)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
_, copyErr := io.Copy(file, resp.Body)
|
||||
closeErr := file.Close()
|
||||
if copyErr != nil {
|
||||
_ = os.Remove(tmp)
|
||||
return "", copyErr
|
||||
}
|
||||
if closeErr != nil {
|
||||
_ = os.Remove(tmp)
|
||||
return "", closeErr
|
||||
}
|
||||
if err := os.Rename(tmp, target); err != nil {
|
||||
_ = os.Remove(tmp)
|
||||
return "", err
|
||||
}
|
||||
return target, nil
|
||||
}
|
||||
|
||||
func preferredTitle(g *galleryResponse) string {
|
||||
for _, s := range []string{g.TitleEn, g.Title, g.TitleJp, g.NHID} {
|
||||
s = strings.TrimSpace(s)
|
||||
if s != "" {
|
||||
return s
|
||||
}
|
||||
}
|
||||
return "gallery"
|
||||
}
|
||||
|
||||
func normalizeGalleryID(input string) (string, error) {
|
||||
input = strings.TrimSpace(input)
|
||||
if input == "" {
|
||||
return "", errors.New("empty input")
|
||||
}
|
||||
if digitsOnly(input) {
|
||||
return input, nil
|
||||
}
|
||||
u, err := url.Parse(input)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
re := regexp.MustCompile(`/g/(\d+)`)
|
||||
match := re.FindStringSubmatch(u.Path)
|
||||
if len(match) != 2 {
|
||||
return "", fmt.Errorf("could not extract /g/<id> from %q", input)
|
||||
}
|
||||
return match[1], nil
|
||||
}
|
||||
|
||||
func digitsOnly(s string) bool {
|
||||
if s == "" {
|
||||
return false
|
||||
}
|
||||
for _, r := range s {
|
||||
if r < '0' || r > '9' {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func sanitizeFilename(s string) string {
|
||||
s = strings.TrimSpace(strings.ToLower(s))
|
||||
replacer := strings.NewReplacer(
|
||||
"\\", "_",
|
||||
"/", "_",
|
||||
":", "_",
|
||||
"*", "_",
|
||||
"?", "_",
|
||||
"\"", "_",
|
||||
"<", "_",
|
||||
">", "_",
|
||||
"|", "_",
|
||||
"'", "_",
|
||||
)
|
||||
s = replacer.Replace(s)
|
||||
s = strings.Join(strings.Fields(s), "_")
|
||||
if len(s) > 120 {
|
||||
s = s[:120]
|
||||
}
|
||||
s = strings.Trim(s, "._-")
|
||||
if s == "" {
|
||||
return "gallery"
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func extensionFromURL(raw string) string {
|
||||
u, err := url.Parse(raw)
|
||||
if err != nil {
|
||||
return strings.ToLower(filepath.Ext(raw))
|
||||
}
|
||||
return strings.ToLower(filepath.Ext(u.Path))
|
||||
}
|
||||
|
||||
func exitf(format string, args ...any) {
|
||||
fmt.Fprintf(os.Stderr, format+"\n", args...)
|
||||
os.Exit(1)
|
||||
}
|
||||
Reference in New Issue
Block a user