package blobgen import ( "crypto/sha256" "fmt" "hash" "io" "runtime" "filippo.io/age" "github.com/klauspost/compress/zstd" ) // Writer wraps compression and encryption with SHA256 hashing. // Data flows: input -> tee(hasher, compressor -> encryptor -> destination) // The hash is computed on the uncompressed input for deterministic content-addressing. type Writer struct { teeWriter io.Writer // Tee to hasher and compressor compressor *zstd.Encoder // Compression layer encryptor io.WriteCloser // Encryption layer hasher hash.Hash // SHA256 hasher (on uncompressed input) compressionLevel int bytesWritten int64 } // NewWriter creates a new Writer that compresses, encrypts, and hashes data. // The hash is computed on the uncompressed input for deterministic content-addressing. func NewWriter(w io.Writer, compressionLevel int, recipients []string) (*Writer, error) { // Validate compression level if err := validateCompressionLevel(compressionLevel); err != nil { return nil, err } // Create SHA256 hasher for the uncompressed input hasher := sha256.New() // Parse recipients var ageRecipients []age.Recipient for _, recipient := range recipients { r, err := age.ParseX25519Recipient(recipient) if err != nil { return nil, fmt.Errorf("parsing recipient %s: %w", recipient, err) } ageRecipients = append(ageRecipients, r) } // Create encryption writer that outputs to destination encWriter, err := age.Encrypt(w, ageRecipients...) if err != nil { return nil, fmt.Errorf("creating encryption writer: %w", err) } // Calculate compression concurrency: CPUs - 2, minimum 1 concurrency := runtime.NumCPU() - 2 if concurrency < 1 { concurrency = 1 } // Create compression writer with encryption as destination compressor, err := zstd.NewWriter(encWriter, zstd.WithEncoderLevel(zstd.EncoderLevelFromZstd(compressionLevel)), zstd.WithEncoderConcurrency(concurrency), ) if err != nil { _ = encWriter.Close() return nil, fmt.Errorf("creating compression writer: %w", err) } // Create tee writer: input goes to both hasher and compressor teeWriter := io.MultiWriter(hasher, compressor) return &Writer{ teeWriter: teeWriter, compressor: compressor, encryptor: encWriter, hasher: hasher, compressionLevel: compressionLevel, }, nil } // Write implements io.Writer func (w *Writer) Write(p []byte) (n int, err error) { n, err = w.teeWriter.Write(p) w.bytesWritten += int64(n) return n, err } // Close closes all layers and returns any errors func (w *Writer) Close() error { // Close compressor first if err := w.compressor.Close(); err != nil { return fmt.Errorf("closing compressor: %w", err) } // Then close encryptor if err := w.encryptor.Close(); err != nil { return fmt.Errorf("closing encryptor: %w", err) } return nil } // Sum256 returns the double SHA256 hash of the uncompressed input data. // Double hashing (SHA256(SHA256(data))) prevents information leakage about // the plaintext - an attacker cannot confirm existence of known content // by computing its hash and checking for a matching blob filename. func (w *Writer) Sum256() []byte { // First hash: SHA256(plaintext) firstHash := w.hasher.Sum(nil) // Second hash: SHA256(firstHash) - this is the blob ID secondHash := sha256.Sum256(firstHash) return secondHash[:] } // BytesWritten returns the number of uncompressed bytes written func (w *Writer) BytesWritten() int64 { return w.bytesWritten } func validateCompressionLevel(level int) error { // Zstd compression levels: 1-19 (default is 3) // SpeedFastest = 1, SpeedDefault = 3, SpeedBetterCompression = 7, SpeedBestCompression = 11 if level < 1 || level > 19 { return fmt.Errorf("invalid compression level %d: must be between 1 and 19", level) } return nil }