2016-11-29 19:26:36 +03:00
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package snappy
import (
"encoding/binary"
"errors"
"io"
)
// Encode returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.
//
2019-03-27 14:15:23 +03:00
// The dst and src must not overlap. It is valid to pass a nil dst.
2016-11-29 19:26:36 +03:00
func Encode ( dst , src [ ] byte ) [ ] byte {
if n := MaxEncodedLen ( len ( src ) ) ; n < 0 {
panic ( ErrTooLarge )
} else if len ( dst ) < n {
dst = make ( [ ] byte , n )
}
// The block starts with the varint-encoded length of the decompressed bytes.
d := binary . PutUvarint ( dst , uint64 ( len ( src ) ) )
for len ( src ) > 0 {
p := src
src = nil
if len ( p ) > maxBlockSize {
p , src = p [ : maxBlockSize ] , p [ maxBlockSize : ]
}
2019-03-27 14:15:23 +03:00
if len ( p ) < minNonLiteralBlockSize {
d += emitLiteral ( dst [ d : ] , p )
} else {
d += encodeBlock ( dst [ d : ] , p )
}
2016-11-29 19:26:36 +03:00
}
return dst [ : d ]
}
2019-03-27 14:15:23 +03:00
// inputMargin is the minimum number of extra input bytes to keep, inside
// encodeBlock's inner loop. On some architectures, this margin lets us
// implement a fast path for emitLiteral, where the copy of short (<= 16 byte)
// literals can be implemented as a single load to and store from a 16-byte
// register. That literal's actual length can be as short as 1 byte, so this
// can copy up to 15 bytes too much, but that's OK as subsequent iterations of
// the encoding loop will fix up the copy overrun, and this inputMargin ensures
// that we don't overrun the dst and src buffers.
const inputMargin = 16 - 1
// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that
// could be encoded with a copy tag. This is the minimum with respect to the
// algorithm used by encodeBlock, not a minimum enforced by the file format.
2016-11-29 19:26:36 +03:00
//
2019-03-27 14:15:23 +03:00
// The encoded output must start with at least a 1 byte literal, as there are
// no previous bytes to copy. A minimal (1 byte) copy after that, generated
// from an emitCopy call in encodeBlock's main loop, would require at least
// another inputMargin bytes, for the reason above: we want any emitLiteral
// calls inside encodeBlock's main loop to use the fast path if possible, which
// requires being able to overrun by inputMargin bytes. Thus,
// minNonLiteralBlockSize equals 1 + 1 + inputMargin.
//
// The C++ code doesn't use this exact threshold, but it could, as discussed at
// https://groups.google.com/d/topic/snappy-compression/oGbhsdIJSJ8/discussion
// The difference between Go (2+inputMargin) and C++ (inputMargin) is purely an
// optimization. It should not affect the encoded form. This is tested by
// TestSameEncodingAsCppShortCopies.
const minNonLiteralBlockSize = 1 + 1 + inputMargin
2016-11-29 19:26:36 +03:00
// MaxEncodedLen returns the maximum length of a snappy block, given its
// uncompressed length.
//
// It will return a negative value if srcLen is too large to encode.
func MaxEncodedLen ( srcLen int ) int {
n := uint64 ( srcLen )
if n > 0xffffffff {
return - 1
}
// Compressed data can be defined as:
// compressed := item* literal*
// item := literal* copy
//
// The trailing literal sequence has a space blowup of at most 62/60
// since a literal of length 60 needs one tag byte + one extra byte
// for length information.
//
// Item blowup is trickier to measure. Suppose the "copy" op copies
// 4 bytes of data. Because of a special check in the encoding code,
// we produce a 4-byte copy only if the offset is < 65536. Therefore
// the copy op takes 3 bytes to encode, and this type of item leads
// to at most the 62/60 blowup for representing literals.
//
// Suppose the "copy" op copies 5 bytes of data. If the offset is big
// enough, it will take 5 bytes to encode the copy op. Therefore the
// worst case here is a one-byte literal followed by a five-byte copy.
// That is, 6 bytes of input turn into 7 bytes of "compressed" data.
//
// This last factor dominates the blowup, so the final estimate is:
n = 32 + n + n / 6
if n > 0xffffffff {
return - 1
}
return int ( n )
}
var errClosed = errors . New ( "snappy: Writer is closed" )
// NewWriter returns a new Writer that compresses to w.
//
// The Writer returned does not buffer writes. There is no need to Flush or
// Close such a Writer.
//
// Deprecated: the Writer returned is not suitable for many small writes, only
// for few large writes. Use NewBufferedWriter instead, which is efficient
// regardless of the frequency and shape of the writes, and remember to Close
// that Writer when done.
func NewWriter ( w io . Writer ) * Writer {
return & Writer {
w : w ,
obuf : make ( [ ] byte , obufLen ) ,
}
}
// NewBufferedWriter returns a new Writer that compresses to w, using the
// framing format described at
// https://github.com/google/snappy/blob/master/framing_format.txt
//
// The Writer returned buffers writes. Users must call Close to guarantee all
// data has been forwarded to the underlying io.Writer. They may also call
// Flush zero or more times before calling Close.
func NewBufferedWriter ( w io . Writer ) * Writer {
return & Writer {
w : w ,
ibuf : make ( [ ] byte , 0 , maxBlockSize ) ,
obuf : make ( [ ] byte , obufLen ) ,
}
}
2019-03-27 14:15:23 +03:00
// Writer is an io.Writer that can write Snappy-compressed bytes.
2016-11-29 19:26:36 +03:00
type Writer struct {
w io . Writer
err error
// ibuf is a buffer for the incoming (uncompressed) bytes.
//
// Its use is optional. For backwards compatibility, Writers created by the
// NewWriter function have ibuf == nil, do not buffer incoming bytes, and
// therefore do not need to be Flush'ed or Close'd.
ibuf [ ] byte
// obuf is a buffer for the outgoing (compressed) bytes.
obuf [ ] byte
// wroteStreamHeader is whether we have written the stream header.
wroteStreamHeader bool
}
// Reset discards the writer's state and switches the Snappy writer to write to
// w. This permits reusing a Writer rather than allocating a new one.
func ( w * Writer ) Reset ( writer io . Writer ) {
w . w = writer
w . err = nil
if w . ibuf != nil {
w . ibuf = w . ibuf [ : 0 ]
}
w . wroteStreamHeader = false
}
// Write satisfies the io.Writer interface.
func ( w * Writer ) Write ( p [ ] byte ) ( nRet int , errRet error ) {
if w . ibuf == nil {
// Do not buffer incoming bytes. This does not perform or compress well
// if the caller of Writer.Write writes many small slices. This
// behavior is therefore deprecated, but still supported for backwards
// compatibility with code that doesn't explicitly Flush or Close.
return w . write ( p )
}
// The remainder of this method is based on bufio.Writer.Write from the
// standard library.
for len ( p ) > ( cap ( w . ibuf ) - len ( w . ibuf ) ) && w . err == nil {
var n int
if len ( w . ibuf ) == 0 {
// Large write, empty buffer.
// Write directly from p to avoid copy.
n , _ = w . write ( p )
} else {
n = copy ( w . ibuf [ len ( w . ibuf ) : cap ( w . ibuf ) ] , p )
w . ibuf = w . ibuf [ : len ( w . ibuf ) + n ]
w . Flush ( )
}
nRet += n
p = p [ n : ]
}
if w . err != nil {
return nRet , w . err
}
n := copy ( w . ibuf [ len ( w . ibuf ) : cap ( w . ibuf ) ] , p )
w . ibuf = w . ibuf [ : len ( w . ibuf ) + n ]
nRet += n
return nRet , nil
}
func ( w * Writer ) write ( p [ ] byte ) ( nRet int , errRet error ) {
if w . err != nil {
return 0 , w . err
}
for len ( p ) > 0 {
obufStart := len ( magicChunk )
if ! w . wroteStreamHeader {
w . wroteStreamHeader = true
copy ( w . obuf , magicChunk )
obufStart = 0
}
var uncompressed [ ] byte
if len ( p ) > maxBlockSize {
uncompressed , p = p [ : maxBlockSize ] , p [ maxBlockSize : ]
} else {
uncompressed , p = p , nil
}
checksum := crc ( uncompressed )
// Compress the buffer, discarding the result if the improvement
// isn't at least 12.5%.
compressed := Encode ( w . obuf [ obufHeaderLen : ] , uncompressed )
chunkType := uint8 ( chunkTypeCompressedData )
chunkLen := 4 + len ( compressed )
obufEnd := obufHeaderLen + len ( compressed )
if len ( compressed ) >= len ( uncompressed ) - len ( uncompressed ) / 8 {
chunkType = chunkTypeUncompressedData
chunkLen = 4 + len ( uncompressed )
obufEnd = obufHeaderLen
}
// Fill in the per-chunk header that comes before the body.
w . obuf [ len ( magicChunk ) + 0 ] = chunkType
w . obuf [ len ( magicChunk ) + 1 ] = uint8 ( chunkLen >> 0 )
w . obuf [ len ( magicChunk ) + 2 ] = uint8 ( chunkLen >> 8 )
w . obuf [ len ( magicChunk ) + 3 ] = uint8 ( chunkLen >> 16 )
w . obuf [ len ( magicChunk ) + 4 ] = uint8 ( checksum >> 0 )
w . obuf [ len ( magicChunk ) + 5 ] = uint8 ( checksum >> 8 )
w . obuf [ len ( magicChunk ) + 6 ] = uint8 ( checksum >> 16 )
w . obuf [ len ( magicChunk ) + 7 ] = uint8 ( checksum >> 24 )
if _ , err := w . w . Write ( w . obuf [ obufStart : obufEnd ] ) ; err != nil {
w . err = err
return nRet , err
}
if chunkType == chunkTypeUncompressedData {
if _ , err := w . w . Write ( uncompressed ) ; err != nil {
w . err = err
return nRet , err
}
}
nRet += len ( uncompressed )
}
return nRet , nil
}
// Flush flushes the Writer to its underlying io.Writer.
func ( w * Writer ) Flush ( ) error {
if w . err != nil {
return w . err
}
if len ( w . ibuf ) == 0 {
return nil
}
w . write ( w . ibuf )
w . ibuf = w . ibuf [ : 0 ]
return w . err
}
// Close calls Flush and then closes the Writer.
func ( w * Writer ) Close ( ) error {
w . Flush ( )
ret := w . err
if w . err == nil {
w . err = errClosed
}
return ret
}