svi
/
kern


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
							package flate

import (
	"io"
	"math"
	"sync"

	"github.com/klauspost/compress/internal/le"
)

const (
	maxStatelessBlock = math.MaxInt16
	// dictionary will be taken from maxStatelessBlock, so limit it.
	maxStatelessDict = 8 << 10

	slTableBits  = 13
	slTableSize  = 1 << slTableBits
	slTableShift = 32 - slTableBits
)

type statelessWriter struct {
	dst    io.Writer
	closed bool
}

func (s *statelessWriter) Close() error {
	if s.closed {
		return nil
	}
	s.closed = true
	// Emit EOF block
	return StatelessDeflate(s.dst, nil, true, nil)
}

func (s *statelessWriter) Write(p []byte) (n int, err error) {
	err = StatelessDeflate(s.dst, p, false, nil)
	if err != nil {
		return 0, err
	}
	return len(p), nil
}

func (s *statelessWriter) Reset(w io.Writer) {
	s.dst = w
	s.closed = false
}

// NewStatelessWriter will do compression but without maintaining any state
// between Write calls.
// There will be no memory kept between Write calls,
// but compression and speed will be suboptimal.
// Because of this, the size of actual Write calls will affect output size.
func NewStatelessWriter(dst io.Writer) io.WriteCloser {
	return &statelessWriter{dst: dst}
}

// bitWriterPool contains bit writers that can be reused.
var bitWriterPool = sync.Pool{
	New: func() any {
		return newHuffmanBitWriter(nil)
	},
}

// tokensPool contains tokens struct objects that can be reused
var tokensPool = sync.Pool{
	New: func() any {
		return &tokens{}
	},
}

// StatelessDeflate allows compressing directly to a Writer without retaining state.
// When returning everything will be flushed.
// Up to 8KB of an optional dictionary can be given which is presumed to precede the block.
// Longer dictionaries will be truncated and will still produce valid output.
// Sending nil dictionary is perfectly fine.
func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
	bw := bitWriterPool.Get().(*huffmanBitWriter)
	bw.reset(out)
	defer func() {
		// don't keep a reference to our output
		bw.reset(nil)
		bitWriterPool.Put(bw)
	}()
	if eof && len(in) == 0 {
		// Just write an EOF block.
		// Could be faster...
		bw.writeStoredHeader(0, true)
		bw.flush()
		return bw.err
	}

	// Truncate dict
	if len(dict) > maxStatelessDict {
		dict = dict[len(dict)-maxStatelessDict:]
	}

	// For subsequent loops, keep shallow dict reference to avoid alloc+copy.
	var inDict []byte

	dst := tokensPool.Get().(*tokens)
	dst.Reset()
	defer func() {
		tokensPool.Put(dst)
	}()

	for len(in) > 0 {
		todo := in
		if len(inDict) > 0 {
			if len(todo) > maxStatelessBlock-maxStatelessDict {
				todo = todo[:maxStatelessBlock-maxStatelessDict]
			}
		} else if len(todo) > maxStatelessBlock-len(dict) {
			todo = todo[:maxStatelessBlock-len(dict)]
		}
		inOrg := in
		in = in[len(todo):]
		uncompressed := todo
		if len(dict) > 0 {
			// combine dict and source
			bufLen := len(todo) + len(dict)
			combined := make([]byte, bufLen)
			copy(combined, dict)
			copy(combined[len(dict):], todo)
			todo = combined
		}
		// Compress
		if len(inDict) == 0 {
			statelessEnc(dst, todo, int16(len(dict)))
		} else {
			statelessEnc(dst, inDict[:maxStatelessDict+len(todo)], maxStatelessDict)
		}
		isEof := eof && len(in) == 0

		if dst.n == 0 {
			bw.writeStoredHeader(len(uncompressed), isEof)
			if bw.err != nil {
				return bw.err
			}
			bw.writeBytes(uncompressed)
		} else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 {
			// If we removed less than 1/16th, huffman compress the block.
			bw.writeBlockHuff(isEof, uncompressed, len(in) == 0)
		} else {
			bw.writeBlockDynamic(dst, isEof, uncompressed, len(in) == 0)
		}
		if len(in) > 0 {
			// Retain a dict if we have more
			inDict = inOrg[len(uncompressed)-maxStatelessDict:]
			dict = nil
			dst.Reset()
		}
		if bw.err != nil {
			return bw.err
		}
	}
	if !eof {
		// Align, only a stored block can do that.
		bw.writeStoredHeader(0, false)
	}
	bw.flush()
	return bw.err
}

func hashSL(u uint32) uint32 {
	return (u * 0x1e35a7bd) >> slTableShift
}

func load3216(b []byte, i int16) uint32 {
	return le.Load32(b, i)
}

func load6416(b []byte, i int16) uint64 {
	return le.Load64(b, i)
}

func statelessEnc(dst *tokens, src []byte, startAt int16) {
	const (
		inputMargin            = 12 - 1
		minNonLiteralBlockSize = 1 + 1 + inputMargin
	)

	type tableEntry struct {
		offset int16
	}

	var table [slTableSize]tableEntry

	// This check isn't in the Snappy implementation, but there, the caller
	// instead of the callee handles this case.
	if len(src)-int(startAt) < minNonLiteralBlockSize {
		// We do not fill the token table.
		// This will be picked up by caller.
		dst.n = 0
		return
	}
	// Index until startAt
	if startAt > 0 {
		cv := load3232(src, 0)
		for i := range startAt {
			table[hashSL(cv)] = tableEntry{offset: i}
			cv = (cv >> 8) | (uint32(src[i+4]) << 24)
		}
	}

	s := startAt + 1
	nextEmit := startAt
	// sLimit is when to stop looking for offset/length copies. The inputMargin
	// lets us use a fast path for emitLiteral in the main loop, while we are
	// looking for copies.
	sLimit := int16(len(src) - inputMargin)

	// nextEmit is where in src the next emitLiteral should start from.
	cv := load3216(src, s)

	for {
		const skipLog = 5
		const doEvery = 2

		nextS := s
		var candidate tableEntry
		for {
			nextHash := hashSL(cv)
			candidate = table[nextHash]
			nextS = s + doEvery + (s-nextEmit)>>skipLog
			if nextS > sLimit || nextS <= 0 {
				goto emitRemainder
			}

			now := load6416(src, nextS)
			table[nextHash] = tableEntry{offset: s}
			nextHash = hashSL(uint32(now))

			if cv == load3216(src, candidate.offset) {
				table[nextHash] = tableEntry{offset: nextS}
				break
			}

			// Do one right away...
			cv = uint32(now)
			s = nextS
			nextS++
			candidate = table[nextHash]
			now >>= 8
			table[nextHash] = tableEntry{offset: s}

			if cv == load3216(src, candidate.offset) {
				table[nextHash] = tableEntry{offset: nextS}
				break
			}
			cv = uint32(now)
			s = nextS
		}

		// A 4-byte match has been found. We'll later see if more than 4 bytes
		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
		// them as literal bytes.
		for {
			// Invariant: we have a 4-byte match at s, and no need to emit any
			// literal bytes prior to s.

			// Extend the 4-byte match as long as possible.
			t := candidate.offset
			l := int16(matchLen(src[s+4:], src[t+4:]) + 4)

			// Extend backwards
			for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
				s--
				t--
				l++
			}
			if nextEmit < s {
				if false {
					emitLiteral(dst, src[nextEmit:s])
				} else {
					for _, v := range src[nextEmit:s] {
						dst.tokens[dst.n] = token(v)
						dst.litHist[v]++
						dst.n++
					}
				}
			}

			// Save the match found
			dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset))
			s += l
			nextEmit = s
			if nextS >= s {
				s = nextS + 1
			}
			if s >= sLimit {
				goto emitRemainder
			}

			// We could immediately start working at s now, but to improve
			// compression we first update the hash table at s-2 and at s. If
			// another emitCopy is not our next move, also calculate nextHash
			// at s+1. At least on GOARCH=amd64, these three hash calculations
			// are faster as one load64 call (with some shifts) instead of
			// three load32 calls.
			x := load6416(src, s-2)
			o := s - 2
			prevHash := hashSL(uint32(x))
			table[prevHash] = tableEntry{offset: o}
			x >>= 16
			currHash := hashSL(uint32(x))
			candidate = table[currHash]
			table[currHash] = tableEntry{offset: o + 2}

			if uint32(x) != load3216(src, candidate.offset) {
				cv = uint32(x >> 8)
				s++
				break
			}
		}
	}

emitRemainder:
	if int(nextEmit) < len(src) {
		// If nothing was added, don't encode literals.
		if dst.n == 0 {
			return
		}
		emitLiteral(dst, src[nextEmit:])
	}
}