hashingsplitter/main.go

192 lines
5.8 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package main
import (
"crypto/sha1"
"encoding/hex"
"errors"
"flag"
"fmt"
"io"
"os"
"path/filepath"
"strconv"
"strings"
"unicode"
)
// parseByteSize parses a string like "100MB", "1M", "10MiB", or "1024" into a byte count.
//
// Supported suffixes:
// • Single-letter SI: K=1e3, M=1e6, G=1e9, T=1e12
// • Two-letter SI: KB=1e3, MB=1e6, GB=1e9, TB=1e12
// • Binary suffixes: KiB=2^10, MiB=2^20, GiB=2^30, TiB=2^40
// If no suffix is present, it assumes bytes.
func parseByteSize(s string) (int64, error) {
s = strings.TrimSpace(s)
if s == "" {
return 0, errors.New("empty size string")
}
// Find the first non-digit character (well allow digits only).
// That remainder is our suffix. Everything before that is the numeric portion.
idx := 0
for idx < len(s) && unicode.IsDigit(rune(s[idx])) {
idx++
}
numStr := s[:idx]
suffixStr := strings.ToUpper(strings.TrimSpace(s[idx:]))
// Parse the numeric portion as an integer
baseVal, err := strconv.ParseInt(numStr, 10, 64)
if err != nil {
return 0, fmt.Errorf("invalid numeric portion %q: %v", numStr, err)
}
// If there is no suffix, its just bytes
if suffixStr == "" {
return baseVal, nil
}
// Known multipliers (include single-letter SI).
multipliers := map[string]int64{
"": 1, // no suffix (handled above, but let's keep for completeness)
"B": 1,
// Single-letter SI:
"K": 1000,
"M": 1000 * 1000,
"G": 1000 * 1000 * 1000,
"T": 1000 * 1000 * 1000 * 1000,
// Two-letter SI:
"KB": 1000,
"MB": 1000 * 1000,
"GB": 1000 * 1000 * 1000,
"TB": 1000 * 1000 * 1000 * 1000,
// Binary:
"KIB": 1024,
"MIB": 1024 * 1024,
"GIB": 1024 * 1024 * 1024,
"TIB": 1024 * 1024 * 1024 * 1024,
}
factor, found := multipliers[suffixStr]
if !found {
return 0, fmt.Errorf("unrecognized size suffix %q", suffixStr)
}
return baseVal * factor, nil
}
func main() {
// Command-line flags
sizeStr := flag.String("size", "", "Split size (e.g. 64MiB, 128KB, 1M, 65536)")
dirFlag := flag.String("dir", ".", "Destination directory for output files")
prefixFlag := flag.String("prefix", "outfile", "Prefix (base name) for split files")
flag.Parse()
// Parse the size from the -size argument
chunkSize, err := parseByteSize(*sizeStr)
if err != nil {
fmt.Fprintf(os.Stderr, "Error parsing -size: %v\n", err)
os.Exit(1)
}
if chunkSize <= 0 {
fmt.Fprintf(os.Stderr, "Error: parsed size must be a positive integer.\n")
os.Exit(1)
}
var (
chunkIndex int
sumFiles []string
)
// Read stdin and split into chunks
for {
chunkIndex++
chunkName := fmt.Sprintf("%s.%05d", *prefixFlag, chunkIndex)
chunkPath := filepath.Join(*dirFlag, chunkName)
outFile, err := os.Create(chunkPath)
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to create file '%s': %v\n", chunkPath, err)
os.Exit(1)
}
// Use SHA-1 hasher (same default algorithm used by many "shasum" tools)
hasher := sha1.New()
writer := io.MultiWriter(outFile, hasher)
var writtenThisChunk int64
for writtenThisChunk < chunkSize {
remaining := chunkSize - writtenThisChunk
copied, copyErr := io.CopyN(writer, os.Stdin, remaining)
writtenThisChunk += copied
if copyErr == io.EOF {
break // No more data from stdin
} else if copyErr != nil {
fmt.Fprintf(os.Stderr, "Error reading stdin: %v\n", copyErr)
os.Exit(1)
}
if writtenThisChunk == chunkSize {
break
}
}
outFile.Close()
// If empty chunk, remove file and end
if writtenThisChunk == 0 {
os.Remove(chunkPath)
break
}
// Compute the SHA-1 sum in "shasum" format: <hex_digest> filename
sumHex := hex.EncodeToString(hasher.Sum(nil))
line := fmt.Sprintf("%s %s\n", sumHex, chunkName)
shasumPath := chunkPath + ".shasum.txt"
sumFile, err := os.Create(shasumPath)
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to create shasum file '%s': %v\n", shasumPath, err)
os.Exit(1)
}
if _, err := sumFile.WriteString(line); err != nil {
fmt.Fprintf(os.Stderr, "Failed to write to '%s': %v\n", shasumPath, err)
os.Exit(1)
}
sumFile.Close()
// Collect this .shasum.txt filename so we can reference it in check.sh
sumFiles = append(sumFiles, filepath.Base(shasumPath))
// If we wrote less than chunkSize, we're at EOF
if writtenThisChunk < chunkSize {
break
}
}
// Create a "check.sh" script in the destination directory
checkScriptPath := filepath.Join(*dirFlag, "check.sh")
checkScriptFile, err := os.Create(checkScriptPath)
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to create check.sh script: %v\n", err)
os.Exit(1)
}
// Instead of wildcards, list all .shasum.txt files by name
checkCommand := "shasum -c"
for _, f := range sumFiles {
checkCommand += " " + f
}
script := "#!/bin/sh\n\n" +
"echo \"Checking all generated .shasum.txt files...\"\n" +
checkCommand + "\n"
if _, err := checkScriptFile.WriteString(script); err != nil {
fmt.Fprintf(os.Stderr, "Failed to write check.sh: %v\n", err)
os.Exit(1)
}
checkScriptFile.Close()
// Attempt to make check.sh executable
_ = os.Chmod(checkScriptPath, 0755)
}