From bf8525fa04c72eade4cbb3ff98b6091659339c93 Mon Sep 17 00:00:00 2001 From: sneak Date: Mon, 7 Apr 2025 09:14:46 -0700 Subject: [PATCH] 'cause it splits and hashes, avi. --- .gitignore | 1 + go.mod | 3 + main.go | 192 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 196 insertions(+) create mode 100644 .gitignore create mode 100644 go.mod create mode 100644 main.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1ea803d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +hashingsplitter diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..640c3c4 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module git.eeqj.de/sneak/hashingsplitter + +go 1.23.4 diff --git a/main.go b/main.go new file mode 100644 index 0000000..a3c2afa --- /dev/null +++ b/main.go @@ -0,0 +1,192 @@ +package main + +import ( + "crypto/sha1" + "encoding/hex" + "errors" + "flag" + "fmt" + "io" + "os" + "path/filepath" + "strconv" + "strings" + "unicode" +) + +// parseByteSize parses a string like "100MB", "1M", "10MiB", or "1024" into a byte count. +// +// Supported suffixes: +// • Single-letter SI: K=1e3, M=1e6, G=1e9, T=1e12 +// • Two-letter SI: KB=1e3, MB=1e6, GB=1e9, TB=1e12 +// • Binary suffixes: KiB=2^10, MiB=2^20, GiB=2^30, TiB=2^40 +// If no suffix is present, it assumes bytes. +func parseByteSize(s string) (int64, error) { + s = strings.TrimSpace(s) + if s == "" { + return 0, errors.New("empty size string") + } + + // Find the first non-digit character (we’ll allow digits only). + // That remainder is our suffix. Everything before that is the numeric portion. + idx := 0 + for idx < len(s) && unicode.IsDigit(rune(s[idx])) { + idx++ + } + + numStr := s[:idx] + suffixStr := strings.ToUpper(strings.TrimSpace(s[idx:])) + + // Parse the numeric portion as an integer + baseVal, err := strconv.ParseInt(numStr, 10, 64) + if err != nil { + return 0, fmt.Errorf("invalid numeric portion %q: %v", numStr, err) + } + + // If there is no suffix, it’s just bytes + if suffixStr == "" { + return baseVal, nil + } + + // Known multipliers (include single-letter SI). + multipliers := map[string]int64{ + "": 1, // no suffix (handled above, but let's keep for completeness) + "B": 1, + // Single-letter SI: + "K": 1000, + "M": 1000 * 1000, + "G": 1000 * 1000 * 1000, + "T": 1000 * 1000 * 1000 * 1000, + // Two-letter SI: + "KB": 1000, + "MB": 1000 * 1000, + "GB": 1000 * 1000 * 1000, + "TB": 1000 * 1000 * 1000 * 1000, + // Binary: + "KIB": 1024, + "MIB": 1024 * 1024, + "GIB": 1024 * 1024 * 1024, + "TIB": 1024 * 1024 * 1024 * 1024, + } + + factor, found := multipliers[suffixStr] + if !found { + return 0, fmt.Errorf("unrecognized size suffix %q", suffixStr) + } + return baseVal * factor, nil +} + +func main() { + // Command-line flags + sizeStr := flag.String("size", "", "Split size (e.g. 64MiB, 128KB, 1M, 65536)") + dirFlag := flag.String("dir", ".", "Destination directory for output files") + prefixFlag := flag.String("prefix", "outfile", "Prefix (base name) for split files") + flag.Parse() + + // Parse the size from the -size argument + chunkSize, err := parseByteSize(*sizeStr) + if err != nil { + fmt.Fprintf(os.Stderr, "Error parsing -size: %v\n", err) + os.Exit(1) + } + if chunkSize <= 0 { + fmt.Fprintf(os.Stderr, "Error: parsed size must be a positive integer.\n") + os.Exit(1) + } + + var ( + chunkIndex int + sumFiles []string + ) + + // Read stdin and split into chunks + for { + chunkIndex++ + chunkName := fmt.Sprintf("%s.%05d", *prefixFlag, chunkIndex) + chunkPath := filepath.Join(*dirFlag, chunkName) + + outFile, err := os.Create(chunkPath) + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to create file '%s': %v\n", chunkPath, err) + os.Exit(1) + } + + // Use SHA-1 hasher (same default algorithm used by many "shasum" tools) + hasher := sha1.New() + writer := io.MultiWriter(outFile, hasher) + + var writtenThisChunk int64 + for writtenThisChunk < chunkSize { + remaining := chunkSize - writtenThisChunk + copied, copyErr := io.CopyN(writer, os.Stdin, remaining) + writtenThisChunk += copied + if copyErr == io.EOF { + break // No more data from stdin + } else if copyErr != nil { + fmt.Fprintf(os.Stderr, "Error reading stdin: %v\n", copyErr) + os.Exit(1) + } + if writtenThisChunk == chunkSize { + break + } + } + outFile.Close() + + // If empty chunk, remove file and end + if writtenThisChunk == 0 { + os.Remove(chunkPath) + break + } + + // Compute the SHA-1 sum in "shasum" format: filename + sumHex := hex.EncodeToString(hasher.Sum(nil)) + line := fmt.Sprintf("%s %s\n", sumHex, chunkName) + + shasumPath := chunkPath + ".shasum.txt" + sumFile, err := os.Create(shasumPath) + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to create shasum file '%s': %v\n", shasumPath, err) + os.Exit(1) + } + if _, err := sumFile.WriteString(line); err != nil { + fmt.Fprintf(os.Stderr, "Failed to write to '%s': %v\n", shasumPath, err) + os.Exit(1) + } + sumFile.Close() + + // Collect this .shasum.txt filename so we can reference it in check.sh + sumFiles = append(sumFiles, filepath.Base(shasumPath)) + + // If we wrote less than chunkSize, we're at EOF + if writtenThisChunk < chunkSize { + break + } + } + + // Create a "check.sh" script in the destination directory + checkScriptPath := filepath.Join(*dirFlag, "check.sh") + checkScriptFile, err := os.Create(checkScriptPath) + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to create check.sh script: %v\n", err) + os.Exit(1) + } + + // Instead of wildcards, list all .shasum.txt files by name + checkCommand := "shasum -c" + for _, f := range sumFiles { + checkCommand += " " + f + } + + script := "#!/bin/sh\n\n" + + "echo \"Checking all generated .shasum.txt files...\"\n" + + checkCommand + "\n" + + if _, err := checkScriptFile.WriteString(script); err != nil { + fmt.Fprintf(os.Stderr, "Failed to write check.sh: %v\n", err) + os.Exit(1) + } + checkScriptFile.Close() + + // Attempt to make check.sh executable + _ = os.Chmod(checkScriptPath, 0755) +} \ No newline at end of file