From 13818ae6cd4c5fb891cc7122f15905831bf8eece Mon Sep 17 00:00:00 2001 From: nite Date: Tue, 12 May 2026 01:30:35 +1000 Subject: [PATCH] first commit --- .gitignore | 1 + README.md | 115 ++++++++++++++++++ go.mod | 3 + main.go | 340 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 459 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 go.mod create mode 100644 main.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..361ae0f --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +downloads/ \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..0d84dd6 --- /dev/null +++ b/README.md @@ -0,0 +1,115 @@ +# hitomi-downloader + +A small Go command-line downloader for `hitomi.moe` galleries. + +The tool fetches gallery metadata from `https://hitomi.moe/reader/info/`, +writes it to `info.json`, and downloads all gallery images with concurrent +workers. + +## Requirements + +- Go 1.26 or newer +- Network access to `hitomi.moe` + +## Build + +```sh +go build -o hitomi-downloader . +``` + +## Usage + +Download by gallery URL: + +```sh +go run . -url https://hitomi.moe/g/123456 +``` + +Download by gallery ID: + +```sh +go run . -url 123456 +``` + +The gallery identifier can also be provided as a positional argument: + +```sh +go run . 123456 +``` + +After building: + +```sh +./hitomi-downloader -url 123456 -out downloads +``` + +## Options + +```text +-url string + Hitomi gallery URL or ID. + +-out string + Output directory. Defaults to "downloads". + +-workers int + Number of concurrent download workers. Defaults to a value based on CPU + count, with a minimum of 2 and a maximum of 8. + +-timeout int + Per-request timeout in seconds. Defaults to 30. + +-skip-existing bool + Skip existing non-empty files. Defaults to true. + Use -skip-existing=false to force re-downloads. + +-metadata-only bool + Fetch metadata and write info.json without downloading images. +``` + +## Output + +Files are written under: + +```text +/_/ +``` + +Each gallery directory contains: + +```text +info.json +001. +002. +003. +... +``` + +Images are first downloaded to a temporary `.part` file and then renamed into +place when the download completes successfully. + +## Examples + +Fetch metadata only: + +```sh +go run . -url 123456 -metadata-only +``` + +Use more workers and a longer timeout: + +```sh +go run . -url 123456 -workers 12 -timeout 60 +``` + +Force re-downloading existing files: + +```sh +go run . -url 123456 -skip-existing=false +``` + +## Notes + +Use this tool responsibly and follow the terms and rules of the site you are +accessing. Very high worker counts can increase load on the remote server and +may make downloads less reliable. diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..6643eda --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module hitomi-downloader + +go 1.26.0 diff --git a/main.go b/main.go new file mode 100644 index 0000000..a998910 --- /dev/null +++ b/main.go @@ -0,0 +1,340 @@ +package main + +import ( + "context" + "encoding/json" + "errors" + "flag" + "fmt" + "io" + "net/http" + "net/url" + "os" + "path/filepath" + "regexp" + "runtime" + "sort" + "strings" + "sync" + "sync/atomic" + "time" +) + +type galleryResponse struct { + ID int64 `json:"id"` + NHID string `json:"nh_id"` + Source string `json:"source"` + MediaID string `json:"media_id"` + TitleEn string `json:"title_en"` + TitleJp string `json:"title_jp"` + Title string `json:"title"` + NumPages int `json:"num_pages"` + Images []galleryImage `json:"images"` +} + +type galleryImage struct { + ID int64 `json:"id"` + Type string `json:"type"` + Extension string `json:"extension"` + Width int `json:"width"` + Height int `json:"height"` + URL string `json:"url"` +} + +type job struct { + index int + image galleryImage +} + +type result struct { + index int + path string + err error +} + +func main() { + var ( + galleryArg string + outputDir string + concurrency int + timeoutSec int + skipExisting bool + metadataOnly bool + ) + + flag.StringVar(&galleryArg, "url", "", "Hitomi gallery URL or ID") + flag.StringVar(&outputDir, "out", "downloads", "Output directory") + flag.IntVar(&concurrency, "workers", min(8, max(2, runtime.NumCPU())), "Concurrent download workers") + flag.IntVar(&timeoutSec, "timeout", 30, "Per-request timeout in seconds") + flag.BoolVar(&skipExisting, "skip-existing", true, "Skip files that already exist with non-zero size") + flag.BoolVar(&metadataOnly, "metadata-only", false, "Only fetch gallery metadata and write info.json") + flag.Parse() + + if galleryArg == "" && flag.NArg() > 0 { + galleryArg = flag.Arg(0) + } + if galleryArg == "" { + exitf("usage: hitomi-downloader -url https://hitomi.moe/g/123456 [-out downloads]") + } + if concurrency < 1 { + exitf("workers must be >= 1") + } + if timeoutSec < 1 { + exitf("timeout must be >= 1") + } + + galleryID, err := normalizeGalleryID(galleryArg) + if err != nil { + exitf("invalid gallery identifier: %v", err) + } + + client := &http.Client{Timeout: time.Duration(timeoutSec) * time.Second} + ctx := context.Background() + + gallery, err := fetchGalleryInfo(ctx, client, galleryID) + if err != nil { + exitf("fetch gallery info failed: %v", err) + } + if len(gallery.Images) == 0 { + exitf("gallery %s returned no images", galleryID) + } + + dirName := fmt.Sprintf("%s_%s", galleryID, sanitizeFilename(preferredTitle(gallery))) + root := filepath.Join(outputDir, dirName) + if err := os.MkdirAll(root, 0o755); err != nil { + exitf("create output dir failed: %v", err) + } + + if err := writeMetadata(root, gallery); err != nil { + exitf("write metadata failed: %v", err) + } + + fmt.Printf("gallery: %s\npages: %d\noutput: %s\n", preferredTitle(gallery), len(gallery.Images), root) + if metadataOnly { + return + } + + if err := downloadAll(ctx, client, root, gallery.Images, concurrency, skipExisting); err != nil { + exitf("download failed: %v", err) + } + + fmt.Println("done") +} + +func fetchGalleryInfo(ctx context.Context, client *http.Client, galleryID string) (*galleryResponse, error) { + endpoint := fmt.Sprintf("https://hitomi.moe/reader/info/%s", galleryID) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) + if err != nil { + return nil, err + } + req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36") + req.Header.Set("Accept", "application/json") + + resp, err := client.Do(req) + if err != nil { + return nil, err + } + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(io.LimitReader(resp.Body, 4096)) + return nil, fmt.Errorf("unexpected status %d: %s", resp.StatusCode, strings.TrimSpace(string(body))) + } + + var gallery galleryResponse + if err := json.NewDecoder(resp.Body).Decode(&gallery); err != nil { + return nil, err + } + return &gallery, nil +} + +func writeMetadata(root string, gallery *galleryResponse) error { + data, err := json.MarshalIndent(gallery, "", " ") + if err != nil { + return err + } + return os.WriteFile(filepath.Join(root, "info.json"), data, 0o644) +} + +func downloadAll(ctx context.Context, client *http.Client, root string, images []galleryImage, workers int, skipExisting bool) error { + jobs := make(chan job) + results := make(chan result) + + var wg sync.WaitGroup + for range workers { + wg.Go(func() { + for j := range jobs { + path, err := downloadOne(ctx, client, root, j.index, j.image, skipExisting) + results <- result{index: j.index, path: path, err: err} + } + }) + } + + go func() { + for i, image := range images { + jobs <- job{index: i + 1, image: image} + } + close(jobs) + wg.Wait() + close(results) + }() + + var failed []string + var completed atomic.Int64 + total := int64(len(images)) + + for res := range results { + if res.err != nil { + failed = append(failed, fmt.Sprintf("page %d: %v", res.index, res.err)) + fmt.Printf("[ERR] %03d %v\n", res.index, res.err) + continue + } + done := completed.Add(1) + fmt.Printf("[OK ] %03d %s (%d/%d)\n", res.index, filepath.Base(res.path), done, total) + } + + if len(failed) > 0 { + sort.Strings(failed) + return errors.New(strings.Join(failed, "; ")) + } + return nil +} + +func downloadOne(ctx context.Context, client *http.Client, root string, page int, image galleryImage, skipExisting bool) (string, error) { + if image.URL == "" { + return "", errors.New("empty image url") + } + ext := extensionFromURL(image.URL) + if ext == "" { + ext = ".bin" + } + filename := fmt.Sprintf("%03d%s", page, ext) + target := filepath.Join(root, filename) + + if skipExisting { + if info, err := os.Stat(target); err == nil && info.Size() > 0 { + return target, nil + } + } + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, image.URL, nil) + if err != nil { + return "", err + } + req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36") + req.Header.Set("Referer", "https://hitomi.moe/") + + resp, err := client.Do(req) + if err != nil { + return "", err + } + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(io.LimitReader(resp.Body, 2048)) + return "", fmt.Errorf("status %d: %s", resp.StatusCode, strings.TrimSpace(string(body))) + } + + tmp := target + ".part" + file, err := os.Create(tmp) + if err != nil { + return "", err + } + + _, copyErr := io.Copy(file, resp.Body) + closeErr := file.Close() + if copyErr != nil { + _ = os.Remove(tmp) + return "", copyErr + } + if closeErr != nil { + _ = os.Remove(tmp) + return "", closeErr + } + if err := os.Rename(tmp, target); err != nil { + _ = os.Remove(tmp) + return "", err + } + return target, nil +} + +func preferredTitle(g *galleryResponse) string { + for _, s := range []string{g.TitleEn, g.Title, g.TitleJp, g.NHID} { + s = strings.TrimSpace(s) + if s != "" { + return s + } + } + return "gallery" +} + +func normalizeGalleryID(input string) (string, error) { + input = strings.TrimSpace(input) + if input == "" { + return "", errors.New("empty input") + } + if digitsOnly(input) { + return input, nil + } + u, err := url.Parse(input) + if err != nil { + return "", err + } + re := regexp.MustCompile(`/g/(\d+)`) + match := re.FindStringSubmatch(u.Path) + if len(match) != 2 { + return "", fmt.Errorf("could not extract /g/ from %q", input) + } + return match[1], nil +} + +func digitsOnly(s string) bool { + if s == "" { + return false + } + for _, r := range s { + if r < '0' || r > '9' { + return false + } + } + return true +} + +func sanitizeFilename(s string) string { + s = strings.TrimSpace(strings.ToLower(s)) + replacer := strings.NewReplacer( + "\\", "_", + "/", "_", + ":", "_", + "*", "_", + "?", "_", + "\"", "_", + "<", "_", + ">", "_", + "|", "_", + "'", "_", + ) + s = replacer.Replace(s) + s = strings.Join(strings.Fields(s), "_") + if len(s) > 120 { + s = s[:120] + } + s = strings.Trim(s, "._-") + if s == "" { + return "gallery" + } + return s +} + +func extensionFromURL(raw string) string { + u, err := url.Parse(raw) + if err != nil { + return strings.ToLower(filepath.Ext(raw)) + } + return strings.ToLower(filepath.Ext(u.Path)) +} + +func exitf(format string, args ...any) { + fmt.Fprintf(os.Stderr, format+"\n", args...) + os.Exit(1) +}