'cause it splits and hashes, avi.

This commit is contained in:
Jeffrey Paul 2025-04-07 09:14:46 -07:00
parent 14a446f4d9
commit bf8525fa04
3 changed files with 196 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
hashingsplitter

3
go.mod Normal file
View File

@ -0,0 +1,3 @@
module git.eeqj.de/sneak/hashingsplitter
go 1.23.4

192
main.go Normal file
View File

@ -0,0 +1,192 @@
package main
import (
"crypto/sha1"
"encoding/hex"
"errors"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"strconv"
"strings"
"unicode"
)
// parseByteSize parses a string like "100MB", "1M", "10MiB", or "1024" into a byte count.
//
// Supported suffixes:
// • Single-letter SI: K=1e3, M=1e6, G=1e9, T=1e12
// • Two-letter SI: KB=1e3, MB=1e6, GB=1e9, TB=1e12
// • Binary suffixes: KiB=2^10, MiB=2^20, GiB=2^30, TiB=2^40
// If no suffix is present, it assumes bytes.
func parseByteSize(s string) (int64, error) {
s = strings.TrimSpace(s)
if s == "" {
return 0, errors.New("empty size string")
}
// Find the first non-digit character (well allow digits only).
// That remainder is our suffix. Everything before that is the numeric portion.
idx := 0
for idx < len(s) && unicode.IsDigit(rune(s[idx])) {
idx++
}
numStr := s[:idx]
suffixStr := strings.ToUpper(strings.TrimSpace(s[idx:]))
// Parse the numeric portion as an integer
baseVal, err := strconv.ParseInt(numStr, 10, 64)
if err != nil {
return 0, fmt.Errorf("invalid numeric portion %q: %v", numStr, err)
}
// If there is no suffix, its just bytes
if suffixStr == "" {
return baseVal, nil
}
// Known multipliers (include single-letter SI).
multipliers := map[string]int64{
"": 1, // no suffix (handled above, but let's keep for completeness)
"B": 1,
// Single-letter SI:
"K": 1000,
"M": 1000 * 1000,
"G": 1000 * 1000 * 1000,
"T": 1000 * 1000 * 1000 * 1000,
// Two-letter SI:
"KB": 1000,
"MB": 1000 * 1000,
"GB": 1000 * 1000 * 1000,
"TB": 1000 * 1000 * 1000 * 1000,
// Binary:
"KIB": 1024,
"MIB": 1024 * 1024,
"GIB": 1024 * 1024 * 1024,
"TIB": 1024 * 1024 * 1024 * 1024,
}
factor, found := multipliers[suffixStr]
if !found {
return 0, fmt.Errorf("unrecognized size suffix %q", suffixStr)
}
return baseVal * factor, nil
}
func main() {
// Command-line flags
sizeStr := flag.String("size", "", "Split size (e.g. 64MiB, 128KB, 1M, 65536)")
dirFlag := flag.String("dir", ".", "Destination directory for output files")
prefixFlag := flag.String("prefix", "outfile", "Prefix (base name) for split files")
flag.Parse()
// Parse the size from the -size argument
chunkSize, err := parseByteSize(*sizeStr)
if err != nil {
fmt.Fprintf(os.Stderr, "Error parsing -size: %v\n", err)
os.Exit(1)
}
if chunkSize <= 0 {
fmt.Fprintf(os.Stderr, "Error: parsed size must be a positive integer.\n")
os.Exit(1)
}
var (
chunkIndex int
sumFiles []string
)
// Read stdin and split into chunks
for {
chunkIndex++
chunkName := fmt.Sprintf("%s.%05d", *prefixFlag, chunkIndex)
chunkPath := filepath.Join(*dirFlag, chunkName)
outFile, err := os.Create(chunkPath)
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to create file '%s': %v\n", chunkPath, err)
os.Exit(1)
}
// Use SHA-1 hasher (same default algorithm used by many "shasum" tools)
hasher := sha1.New()
writer := io.MultiWriter(outFile, hasher)
var writtenThisChunk int64
for writtenThisChunk < chunkSize {
remaining := chunkSize - writtenThisChunk
copied, copyErr := io.CopyN(writer, os.Stdin, remaining)
writtenThisChunk += copied
if copyErr == io.EOF {
break // No more data from stdin
} else if copyErr != nil {
fmt.Fprintf(os.Stderr, "Error reading stdin: %v\n", copyErr)
os.Exit(1)
}
if writtenThisChunk == chunkSize {
break
}
}
outFile.Close()
// If empty chunk, remove file and end
if writtenThisChunk == 0 {
os.Remove(chunkPath)
break
}
// Compute the SHA-1 sum in "shasum" format: <hex_digest> filename
sumHex := hex.EncodeToString(hasher.Sum(nil))
line := fmt.Sprintf("%s %s\n", sumHex, chunkName)
shasumPath := chunkPath + ".shasum.txt"
sumFile, err := os.Create(shasumPath)
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to create shasum file '%s': %v\n", shasumPath, err)
os.Exit(1)
}
if _, err := sumFile.WriteString(line); err != nil {
fmt.Fprintf(os.Stderr, "Failed to write to '%s': %v\n", shasumPath, err)
os.Exit(1)
}
sumFile.Close()
// Collect this .shasum.txt filename so we can reference it in check.sh
sumFiles = append(sumFiles, filepath.Base(shasumPath))
// If we wrote less than chunkSize, we're at EOF
if writtenThisChunk < chunkSize {
break
}
}
// Create a "check.sh" script in the destination directory
checkScriptPath := filepath.Join(*dirFlag, "check.sh")
checkScriptFile, err := os.Create(checkScriptPath)
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to create check.sh script: %v\n", err)
os.Exit(1)
}
// Instead of wildcards, list all .shasum.txt files by name
checkCommand := "shasum -c"
for _, f := range sumFiles {
checkCommand += " " + f
}
script := "#!/bin/sh\n\n" +
"echo \"Checking all generated .shasum.txt files...\"\n" +
checkCommand + "\n"
if _, err := checkScriptFile.WriteString(script); err != nil {
fmt.Fprintf(os.Stderr, "Failed to write check.sh: %v\n", err)
os.Exit(1)
}
checkScriptFile.Close()
// Attempt to make check.sh executable
_ = os.Chmod(checkScriptPath, 0755)
}