reader.go 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. /*
  5. Package zlib implements reading and writing of zlib format compressed data,
  6. as specified in RFC 1950.
  7. The implementation provides filters that uncompress during reading
  8. and compress during writing. For example, to write compressed data
  9. to a buffer:
  10. var b bytes.Buffer
  11. w := zlib.NewWriter(&b)
  12. w.Write([]byte("hello, world\n"))
  13. w.Close()
  14. and to read that data back:
  15. r, err := zlib.NewReader(&b)
  16. io.Copy(os.Stdout, r)
  17. r.Close()
  18. */
  19. package zlib
  20. import (
  21. "bufio"
  22. "compress/zlib"
  23. "encoding/binary"
  24. "hash"
  25. "hash/adler32"
  26. "io"
  27. "github.com/klauspost/compress/flate"
  28. )
  29. const (
  30. zlibDeflate = 8
  31. zlibMaxWindow = 7
  32. )
  33. var (
  34. // ErrChecksum is returned when reading ZLIB data that has an invalid checksum.
  35. ErrChecksum = zlib.ErrChecksum
  36. // ErrDictionary is returned when reading ZLIB data that has an invalid dictionary.
  37. ErrDictionary = zlib.ErrDictionary
  38. // ErrHeader is returned when reading ZLIB data that has an invalid header.
  39. ErrHeader = zlib.ErrHeader
  40. )
  41. type reader struct {
  42. r flate.Reader
  43. decompressor io.ReadCloser
  44. digest hash.Hash32
  45. err error
  46. scratch [4]byte
  47. }
  48. // Resetter resets a ReadCloser returned by [NewReader] or [NewReaderDict]
  49. // to switch to a new underlying Reader. This permits reusing a ReadCloser
  50. // instead of allocating a new one.
  51. type Resetter interface {
  52. // Reset discards any buffered data and resets the Resetter as if it was
  53. // newly initialized with the given reader.
  54. Reset(r io.Reader, dict []byte) error
  55. }
  56. // NewReader creates a new ReadCloser.
  57. // Reads from the returned ReadCloser read and decompress data from r.
  58. // If r does not implement [io.ByteReader], the decompressor may read more
  59. // data than necessary from r.
  60. // It is the caller's responsibility to call Close on the ReadCloser when done.
  61. //
  62. // The [io.ReadCloser] returned by NewReader also implements [Resetter].
  63. func NewReader(r io.Reader) (io.ReadCloser, error) {
  64. return NewReaderDict(r, nil)
  65. }
  66. // NewReaderDict is like [NewReader] but uses a preset dictionary.
  67. // NewReaderDict ignores the dictionary if the compressed data does not refer to it.
  68. // If the compressed data refers to a different dictionary, NewReaderDict returns [ErrDictionary].
  69. //
  70. // The ReadCloser returned by NewReaderDict also implements [Resetter].
  71. func NewReaderDict(r io.Reader, dict []byte) (io.ReadCloser, error) {
  72. z := new(reader)
  73. err := z.Reset(r, dict)
  74. if err != nil {
  75. return nil, err
  76. }
  77. return z, nil
  78. }
  79. func (z *reader) Read(p []byte) (int, error) {
  80. if z.err != nil {
  81. return 0, z.err
  82. }
  83. var n int
  84. n, z.err = z.decompressor.Read(p)
  85. z.digest.Write(p[0:n])
  86. if z.err != io.EOF {
  87. // In the normal case we return here.
  88. return n, z.err
  89. }
  90. // Finished file; check checksum.
  91. if _, err := io.ReadFull(z.r, z.scratch[0:4]); err != nil {
  92. if err == io.EOF {
  93. err = io.ErrUnexpectedEOF
  94. }
  95. z.err = err
  96. return n, z.err
  97. }
  98. // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
  99. checksum := binary.BigEndian.Uint32(z.scratch[:4])
  100. if checksum != z.digest.Sum32() {
  101. z.err = ErrChecksum
  102. return n, z.err
  103. }
  104. return n, io.EOF
  105. }
  106. // Calling Close does not close the wrapped [io.Reader] originally passed to [NewReader].
  107. // In order for the ZLIB checksum to be verified, the reader must be
  108. // fully consumed until the [io.EOF].
  109. func (z *reader) Close() error {
  110. if z.err != nil && z.err != io.EOF {
  111. return z.err
  112. }
  113. z.err = z.decompressor.Close()
  114. return z.err
  115. }
  116. func (z *reader) Reset(r io.Reader, dict []byte) error {
  117. *z = reader{decompressor: z.decompressor}
  118. if fr, ok := r.(flate.Reader); ok {
  119. z.r = fr
  120. } else {
  121. z.r = bufio.NewReader(r)
  122. }
  123. // Read the header (RFC 1950 section 2.2.).
  124. _, z.err = io.ReadFull(z.r, z.scratch[0:2])
  125. if z.err != nil {
  126. if z.err == io.EOF {
  127. z.err = io.ErrUnexpectedEOF
  128. }
  129. return z.err
  130. }
  131. h := binary.BigEndian.Uint16(z.scratch[:2])
  132. if (z.scratch[0]&0x0f != zlibDeflate) || (z.scratch[0]>>4 > zlibMaxWindow) || (h%31 != 0) {
  133. z.err = ErrHeader
  134. return z.err
  135. }
  136. haveDict := z.scratch[1]&0x20 != 0
  137. if haveDict {
  138. _, z.err = io.ReadFull(z.r, z.scratch[0:4])
  139. if z.err != nil {
  140. if z.err == io.EOF {
  141. z.err = io.ErrUnexpectedEOF
  142. }
  143. return z.err
  144. }
  145. checksum := binary.BigEndian.Uint32(z.scratch[:4])
  146. if checksum != adler32.Checksum(dict) {
  147. z.err = ErrDictionary
  148. return z.err
  149. }
  150. }
  151. if z.decompressor == nil {
  152. if haveDict {
  153. z.decompressor = flate.NewReaderDict(z.r, dict)
  154. } else {
  155. z.decompressor = flate.NewReader(z.r)
  156. }
  157. } else {
  158. z.decompressor.(flate.Resetter).Reset(z.r, dict)
  159. }
  160. if z.digest != nil {
  161. z.digest.Reset()
  162. } else {
  163. z.digest = adler32.New()
  164. }
  165. return nil
  166. }