inflate.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package flate implements the DEFLATE compressed data format, described in
  5. // RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file
  6. // formats.
  7. package flate
  8. import (
  9. "bufio"
  10. "compress/flate"
  11. "fmt"
  12. "io"
  13. "math/bits"
  14. "sync"
  15. )
  16. const (
  17. maxCodeLen = 16 // max length of Huffman code
  18. maxCodeLenMask = 15 // mask for max length of Huffman code
  19. // The next three numbers come from the RFC section 3.2.7, with the
  20. // additional proviso in section 3.2.5 which implies that distance codes
  21. // 30 and 31 should never occur in compressed data.
  22. maxNumLit = 286
  23. maxNumDist = 30
  24. numCodes = 19 // number of codes in Huffman meta-code
  25. debugDecode = false
  26. )
  27. // Value of length - 3 and extra bits.
  28. type lengthExtra struct {
  29. length, extra uint8
  30. }
  31. var decCodeToLen = [32]lengthExtra{{length: 0x0, extra: 0x0}, {length: 0x1, extra: 0x0}, {length: 0x2, extra: 0x0}, {length: 0x3, extra: 0x0}, {length: 0x4, extra: 0x0}, {length: 0x5, extra: 0x0}, {length: 0x6, extra: 0x0}, {length: 0x7, extra: 0x0}, {length: 0x8, extra: 0x1}, {length: 0xa, extra: 0x1}, {length: 0xc, extra: 0x1}, {length: 0xe, extra: 0x1}, {length: 0x10, extra: 0x2}, {length: 0x14, extra: 0x2}, {length: 0x18, extra: 0x2}, {length: 0x1c, extra: 0x2}, {length: 0x20, extra: 0x3}, {length: 0x28, extra: 0x3}, {length: 0x30, extra: 0x3}, {length: 0x38, extra: 0x3}, {length: 0x40, extra: 0x4}, {length: 0x50, extra: 0x4}, {length: 0x60, extra: 0x4}, {length: 0x70, extra: 0x4}, {length: 0x80, extra: 0x5}, {length: 0xa0, extra: 0x5}, {length: 0xc0, extra: 0x5}, {length: 0xe0, extra: 0x5}, {length: 0xff, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}, {length: 0x0, extra: 0x0}}
  32. var bitMask32 = [32]uint32{
  33. 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF,
  34. 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF,
  35. 0x1ffff, 0x3ffff, 0x7FFFF, 0xfFFFF, 0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF,
  36. 0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF,
  37. } // up to 32 bits
  38. // Initialize the fixedHuffmanDecoder only once upon first use.
  39. var fixedOnce sync.Once
  40. var fixedHuffmanDecoder huffmanDecoder
  41. // A CorruptInputError reports the presence of corrupt input at a given offset.
  42. type CorruptInputError = flate.CorruptInputError
  43. // An InternalError reports an error in the flate code itself.
  44. type InternalError string
  45. func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
  46. // A ReadError reports an error encountered while reading input.
  47. //
  48. // Deprecated: No longer returned.
  49. type ReadError = flate.ReadError
  50. // A WriteError reports an error encountered while writing output.
  51. //
  52. // Deprecated: No longer returned.
  53. type WriteError = flate.WriteError
  54. // Resetter resets a ReadCloser returned by NewReader or NewReaderDict to
  55. // to switch to a new underlying Reader. This permits reusing a ReadCloser
  56. // instead of allocating a new one.
  57. type Resetter interface {
  58. // Reset discards any buffered data and resets the Resetter as if it was
  59. // newly initialized with the given reader.
  60. Reset(r io.Reader, dict []byte) error
  61. }
  62. // The data structure for decoding Huffman tables is based on that of
  63. // zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
  64. // For codes smaller than the table width, there are multiple entries
  65. // (each combination of trailing bits has the same value). For codes
  66. // larger than the table width, the table contains a link to an overflow
  67. // table. The width of each entry in the link table is the maximum code
  68. // size minus the chunk width.
  69. //
  70. // Note that you can do a lookup in the table even without all bits
  71. // filled. Since the extra bits are zero, and the DEFLATE Huffman codes
  72. // have the property that shorter codes come before longer ones, the
  73. // bit length estimate in the result is a lower bound on the actual
  74. // number of bits.
  75. //
  76. // See the following:
  77. // http://www.gzip.org/algorithm.txt
  78. // chunk & 15 is number of bits
  79. // chunk >> 4 is value, including table link
  80. const (
  81. huffmanChunkBits = 9
  82. huffmanNumChunks = 1 << huffmanChunkBits
  83. huffmanCountMask = 15
  84. huffmanValueShift = 4
  85. )
  86. type huffmanDecoder struct {
  87. maxRead int // the maximum number of bits we can read and not overread
  88. chunks *[huffmanNumChunks]uint16 // chunks as described above
  89. links [][]uint16 // overflow links
  90. linkMask uint32 // mask the width of the link table
  91. }
  92. // Initialize Huffman decoding tables from array of code lengths.
  93. // Following this function, h is guaranteed to be initialized into a complete
  94. // tree (i.e., neither over-subscribed nor under-subscribed). The exception is a
  95. // degenerate case where the tree has only a single symbol with length 1. Empty
  96. // trees are permitted.
  97. func (h *huffmanDecoder) init(lengths []int) bool {
  98. // Sanity enables additional runtime tests during Huffman
  99. // table construction. It's intended to be used during
  100. // development to supplement the currently ad-hoc unit tests.
  101. const sanity = false
  102. if h.chunks == nil {
  103. h.chunks = new([huffmanNumChunks]uint16)
  104. }
  105. if h.maxRead != 0 {
  106. *h = huffmanDecoder{chunks: h.chunks, links: h.links}
  107. }
  108. // Count number of codes of each length,
  109. // compute maxRead and max length.
  110. var count [maxCodeLen]int
  111. var min, max int
  112. for _, n := range lengths {
  113. if n == 0 {
  114. continue
  115. }
  116. if min == 0 || n < min {
  117. min = n
  118. }
  119. if n > max {
  120. max = n
  121. }
  122. count[n&maxCodeLenMask]++
  123. }
  124. // Empty tree. The decompressor.huffSym function will fail later if the tree
  125. // is used. Technically, an empty tree is only valid for the HDIST tree and
  126. // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree
  127. // is guaranteed to fail since it will attempt to use the tree to decode the
  128. // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is
  129. // guaranteed to fail later since the compressed data section must be
  130. // composed of at least one symbol (the end-of-block marker).
  131. if max == 0 {
  132. return true
  133. }
  134. code := 0
  135. var nextcode [maxCodeLen]int
  136. for i := min; i <= max; i++ {
  137. code <<= 1
  138. nextcode[i&maxCodeLenMask] = code
  139. code += count[i&maxCodeLenMask]
  140. }
  141. // Check that the coding is complete (i.e., that we've
  142. // assigned all 2-to-the-max possible bit sequences).
  143. // Exception: To be compatible with zlib, we also need to
  144. // accept degenerate single-code codings. See also
  145. // TestDegenerateHuffmanCoding.
  146. if code != 1<<uint(max) && !(code == 1 && max == 1) {
  147. if debugDecode {
  148. fmt.Println("coding failed, code, max:", code, max, code == 1<<uint(max), code == 1 && max == 1, "(one should be true)")
  149. }
  150. return false
  151. }
  152. h.maxRead = min
  153. chunks := h.chunks[:]
  154. for i := range chunks {
  155. chunks[i] = 0
  156. }
  157. if max > huffmanChunkBits {
  158. numLinks := 1 << (uint(max) - huffmanChunkBits)
  159. h.linkMask = uint32(numLinks - 1)
  160. // create link tables
  161. link := nextcode[huffmanChunkBits+1] >> 1
  162. if cap(h.links) < huffmanNumChunks-link {
  163. h.links = make([][]uint16, huffmanNumChunks-link)
  164. } else {
  165. h.links = h.links[:huffmanNumChunks-link]
  166. }
  167. for j := uint(link); j < huffmanNumChunks; j++ {
  168. reverse := int(bits.Reverse16(uint16(j)))
  169. reverse >>= uint(16 - huffmanChunkBits)
  170. off := j - uint(link)
  171. if sanity && h.chunks[reverse] != 0 {
  172. panic("impossible: overwriting existing chunk")
  173. }
  174. h.chunks[reverse] = uint16(off<<huffmanValueShift | (huffmanChunkBits + 1))
  175. if cap(h.links[off]) < numLinks {
  176. h.links[off] = make([]uint16, numLinks)
  177. } else {
  178. h.links[off] = h.links[off][:numLinks]
  179. }
  180. }
  181. } else {
  182. h.links = h.links[:0]
  183. }
  184. for i, n := range lengths {
  185. if n == 0 {
  186. continue
  187. }
  188. code := nextcode[n]
  189. nextcode[n]++
  190. chunk := uint16(i<<huffmanValueShift | n)
  191. reverse := int(bits.Reverse16(uint16(code)))
  192. reverse >>= uint(16 - n)
  193. if n <= huffmanChunkBits {
  194. for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
  195. // We should never need to overwrite
  196. // an existing chunk. Also, 0 is
  197. // never a valid chunk, because the
  198. // lower 4 "count" bits should be
  199. // between 1 and 15.
  200. if sanity && h.chunks[off] != 0 {
  201. panic("impossible: overwriting existing chunk")
  202. }
  203. h.chunks[off] = chunk
  204. }
  205. } else {
  206. j := reverse & (huffmanNumChunks - 1)
  207. if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
  208. // Longer codes should have been
  209. // associated with a link table above.
  210. panic("impossible: not an indirect chunk")
  211. }
  212. value := h.chunks[j] >> huffmanValueShift
  213. linktab := h.links[value]
  214. reverse >>= huffmanChunkBits
  215. for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
  216. if sanity && linktab[off] != 0 {
  217. panic("impossible: overwriting existing chunk")
  218. }
  219. linktab[off] = chunk
  220. }
  221. }
  222. }
  223. if sanity {
  224. // Above we've sanity checked that we never overwrote
  225. // an existing entry. Here we additionally check that
  226. // we filled the tables completely.
  227. for i, chunk := range h.chunks {
  228. if chunk == 0 {
  229. // As an exception, in the degenerate
  230. // single-code case, we allow odd
  231. // chunks to be missing.
  232. if code == 1 && i%2 == 1 {
  233. continue
  234. }
  235. panic("impossible: missing chunk")
  236. }
  237. }
  238. for _, linktab := range h.links {
  239. for _, chunk := range linktab {
  240. if chunk == 0 {
  241. panic("impossible: missing chunk")
  242. }
  243. }
  244. }
  245. }
  246. return true
  247. }
  248. // Reader is the actual read interface needed by NewReader.
  249. // If the passed in io.Reader does not also have ReadByte,
  250. // the NewReader will introduce its own buffering.
  251. type Reader interface {
  252. io.Reader
  253. io.ByteReader
  254. }
  255. type step uint8
  256. const (
  257. copyData step = iota + 1
  258. nextBlock
  259. huffmanBytesBuffer
  260. huffmanBytesReader
  261. huffmanBufioReader
  262. huffmanStringsReader
  263. huffmanGenericReader
  264. )
  265. // flushMode tells decompressor when to return data
  266. type flushMode uint8
  267. const (
  268. syncFlush flushMode = iota // return data after sync flush block
  269. partialFlush // return data after each block
  270. )
  271. // Decompress state.
  272. type decompressor struct {
  273. // Input source.
  274. r Reader
  275. roffset int64
  276. // Huffman decoders for literal/length, distance.
  277. h1, h2 huffmanDecoder
  278. // Length arrays used to define Huffman codes.
  279. bits *[maxNumLit + maxNumDist]int
  280. codebits *[numCodes]int
  281. // Output history, buffer.
  282. dict dictDecoder
  283. // Next step in the decompression,
  284. // and decompression state.
  285. step step
  286. stepState int
  287. err error
  288. toRead []byte
  289. hl, hd *huffmanDecoder
  290. copyLen int
  291. copyDist int
  292. // Temporary buffer (avoids repeated allocation).
  293. buf [4]byte
  294. // Input bits, in top of b.
  295. b uint32
  296. nb uint
  297. final bool
  298. flushMode flushMode
  299. }
  300. func (f *decompressor) nextBlock() {
  301. for f.nb < 1+2 {
  302. if f.err = f.moreBits(); f.err != nil {
  303. return
  304. }
  305. }
  306. f.final = f.b&1 == 1
  307. f.b >>= 1
  308. typ := f.b & 3
  309. f.b >>= 2
  310. f.nb -= 1 + 2
  311. switch typ {
  312. case 0:
  313. f.dataBlock()
  314. if debugDecode {
  315. fmt.Println("stored block")
  316. }
  317. case 1:
  318. // compressed, fixed Huffman tables
  319. f.hl = &fixedHuffmanDecoder
  320. f.hd = nil
  321. f.huffmanBlockDecoder()
  322. if debugDecode {
  323. fmt.Println("predefinied huffman block")
  324. }
  325. case 2:
  326. // compressed, dynamic Huffman tables
  327. if f.err = f.readHuffman(); f.err != nil {
  328. break
  329. }
  330. f.hl = &f.h1
  331. f.hd = &f.h2
  332. f.huffmanBlockDecoder()
  333. if debugDecode {
  334. fmt.Println("dynamic huffman block")
  335. }
  336. default:
  337. // 3 is reserved.
  338. if debugDecode {
  339. fmt.Println("reserved data block encountered")
  340. }
  341. f.err = CorruptInputError(f.roffset)
  342. }
  343. }
  344. func (f *decompressor) Read(b []byte) (int, error) {
  345. for {
  346. if len(f.toRead) > 0 {
  347. n := copy(b, f.toRead)
  348. f.toRead = f.toRead[n:]
  349. if len(f.toRead) == 0 {
  350. return n, f.err
  351. }
  352. return n, nil
  353. }
  354. if f.err != nil {
  355. return 0, f.err
  356. }
  357. f.doStep()
  358. if f.err != nil && len(f.toRead) == 0 {
  359. f.toRead = f.dict.readFlush() // Flush what's left in case of error
  360. }
  361. }
  362. }
  363. // WriteTo implements the io.WriteTo interface for io.Copy and friends.
  364. func (f *decompressor) WriteTo(w io.Writer) (int64, error) {
  365. total := int64(0)
  366. flushed := false
  367. for {
  368. if len(f.toRead) > 0 {
  369. n, err := w.Write(f.toRead)
  370. total += int64(n)
  371. if err != nil {
  372. f.err = err
  373. return total, err
  374. }
  375. if n != len(f.toRead) {
  376. return total, io.ErrShortWrite
  377. }
  378. f.toRead = f.toRead[:0]
  379. }
  380. if f.err != nil && flushed {
  381. if f.err == io.EOF {
  382. return total, nil
  383. }
  384. return total, f.err
  385. }
  386. if f.err == nil {
  387. f.doStep()
  388. }
  389. if len(f.toRead) == 0 && f.err != nil && !flushed {
  390. f.toRead = f.dict.readFlush() // Flush what's left in case of error
  391. flushed = true
  392. }
  393. }
  394. }
  395. func (f *decompressor) Close() error {
  396. if f.err == io.EOF {
  397. return nil
  398. }
  399. return f.err
  400. }
  401. // RFC 1951 section 3.2.7.
  402. // Compression with dynamic Huffman codes
  403. var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
  404. func (f *decompressor) readHuffman() error {
  405. // HLIT[5], HDIST[5], HCLEN[4].
  406. for f.nb < 5+5+4 {
  407. if err := f.moreBits(); err != nil {
  408. return err
  409. }
  410. }
  411. nlit := int(f.b&0x1F) + 257
  412. if nlit > maxNumLit {
  413. if debugDecode {
  414. fmt.Println("nlit > maxNumLit", nlit)
  415. }
  416. return CorruptInputError(f.roffset)
  417. }
  418. f.b >>= 5
  419. ndist := int(f.b&0x1F) + 1
  420. if ndist > maxNumDist {
  421. if debugDecode {
  422. fmt.Println("ndist > maxNumDist", ndist)
  423. }
  424. return CorruptInputError(f.roffset)
  425. }
  426. f.b >>= 5
  427. nclen := int(f.b&0xF) + 4
  428. // numCodes is 19, so nclen is always valid.
  429. f.b >>= 4
  430. f.nb -= 5 + 5 + 4
  431. // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
  432. for i := 0; i < nclen; i++ {
  433. for f.nb < 3 {
  434. if err := f.moreBits(); err != nil {
  435. return err
  436. }
  437. }
  438. f.codebits[codeOrder[i]] = int(f.b & 0x7)
  439. f.b >>= 3
  440. f.nb -= 3
  441. }
  442. for i := nclen; i < len(codeOrder); i++ {
  443. f.codebits[codeOrder[i]] = 0
  444. }
  445. if !f.h1.init(f.codebits[0:]) {
  446. if debugDecode {
  447. fmt.Println("init codebits failed")
  448. }
  449. return CorruptInputError(f.roffset)
  450. }
  451. // HLIT + 257 code lengths, HDIST + 1 code lengths,
  452. // using the code length Huffman code.
  453. for i, n := 0, nlit+ndist; i < n; {
  454. x, err := f.huffSym(&f.h1)
  455. if err != nil {
  456. return err
  457. }
  458. if x < 16 {
  459. // Actual length.
  460. f.bits[i] = x
  461. i++
  462. continue
  463. }
  464. // Repeat previous length or zero.
  465. var rep int
  466. var nb uint
  467. var b int
  468. switch x {
  469. default:
  470. return InternalError("unexpected length code")
  471. case 16:
  472. rep = 3
  473. nb = 2
  474. if i == 0 {
  475. if debugDecode {
  476. fmt.Println("i==0")
  477. }
  478. return CorruptInputError(f.roffset)
  479. }
  480. b = f.bits[i-1]
  481. case 17:
  482. rep = 3
  483. nb = 3
  484. b = 0
  485. case 18:
  486. rep = 11
  487. nb = 7
  488. b = 0
  489. }
  490. for f.nb < nb {
  491. if err := f.moreBits(); err != nil {
  492. if debugDecode {
  493. fmt.Println("morebits:", err)
  494. }
  495. return err
  496. }
  497. }
  498. rep += int(f.b & uint32(1<<(nb&regSizeMaskUint32)-1))
  499. f.b >>= nb & regSizeMaskUint32
  500. f.nb -= nb
  501. if i+rep > n {
  502. if debugDecode {
  503. fmt.Println("i+rep > n", i, rep, n)
  504. }
  505. return CorruptInputError(f.roffset)
  506. }
  507. for j := 0; j < rep; j++ {
  508. f.bits[i] = b
  509. i++
  510. }
  511. }
  512. if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
  513. if debugDecode {
  514. fmt.Println("init2 failed")
  515. }
  516. return CorruptInputError(f.roffset)
  517. }
  518. // As an optimization, we can initialize the maxRead bits to read at a time
  519. // for the HLIT tree to the length of the EOB marker since we know that
  520. // every block must terminate with one. This preserves the property that
  521. // we never read any extra bytes after the end of the DEFLATE stream.
  522. if f.h1.maxRead < f.bits[endBlockMarker] {
  523. f.h1.maxRead = f.bits[endBlockMarker]
  524. }
  525. if !f.final {
  526. // If not the final block, the smallest block possible is
  527. // a predefined table, BTYPE=01, with a single EOB marker.
  528. // This will take up 3 + 7 bits.
  529. f.h1.maxRead += 10
  530. }
  531. return nil
  532. }
  533. // Copy a single uncompressed data block from input to output.
  534. func (f *decompressor) dataBlock() {
  535. // Uncompressed.
  536. // Discard current half-byte.
  537. left := (f.nb) & 7
  538. f.nb -= left
  539. f.b >>= left
  540. offBytes := f.nb >> 3
  541. // Unfilled values will be overwritten.
  542. f.buf[0] = uint8(f.b)
  543. f.buf[1] = uint8(f.b >> 8)
  544. f.buf[2] = uint8(f.b >> 16)
  545. f.buf[3] = uint8(f.b >> 24)
  546. f.roffset += int64(offBytes)
  547. f.nb, f.b = 0, 0
  548. // Length then ones-complement of length.
  549. nr, err := io.ReadFull(f.r, f.buf[offBytes:4])
  550. f.roffset += int64(nr)
  551. if err != nil {
  552. f.err = noEOF(err)
  553. return
  554. }
  555. n := uint16(f.buf[0]) | uint16(f.buf[1])<<8
  556. nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8
  557. if nn != ^n {
  558. if debugDecode {
  559. ncomp := ^n
  560. fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp)
  561. }
  562. f.err = CorruptInputError(f.roffset)
  563. return
  564. }
  565. if n == 0 {
  566. if f.flushMode == syncFlush {
  567. f.toRead = f.dict.readFlush()
  568. }
  569. f.finishBlock()
  570. return
  571. }
  572. f.copyLen = int(n)
  573. f.copyData()
  574. }
  575. // copyData copies f.copyLen bytes from the underlying reader into f.hist.
  576. // It pauses for reads when f.hist is full.
  577. func (f *decompressor) copyData() {
  578. buf := f.dict.writeSlice()
  579. if len(buf) > f.copyLen {
  580. buf = buf[:f.copyLen]
  581. }
  582. cnt, err := io.ReadFull(f.r, buf)
  583. f.roffset += int64(cnt)
  584. f.copyLen -= cnt
  585. f.dict.writeMark(cnt)
  586. if err != nil {
  587. f.err = noEOF(err)
  588. return
  589. }
  590. if f.dict.availWrite() == 0 || f.copyLen > 0 {
  591. f.toRead = f.dict.readFlush()
  592. f.step = copyData
  593. return
  594. }
  595. f.finishBlock()
  596. }
  597. func (f *decompressor) finishBlock() {
  598. if f.final {
  599. if f.dict.availRead() > 0 {
  600. f.toRead = f.dict.readFlush()
  601. }
  602. f.err = io.EOF
  603. } else if f.flushMode == partialFlush && f.dict.availRead() > 0 {
  604. f.toRead = f.dict.readFlush()
  605. }
  606. f.step = nextBlock
  607. }
  608. func (f *decompressor) doStep() {
  609. switch f.step {
  610. case copyData:
  611. f.copyData()
  612. case nextBlock:
  613. f.nextBlock()
  614. case huffmanBytesBuffer:
  615. f.huffmanBytesBuffer()
  616. case huffmanBytesReader:
  617. f.huffmanBytesReader()
  618. case huffmanBufioReader:
  619. f.huffmanBufioReader()
  620. case huffmanStringsReader:
  621. f.huffmanStringsReader()
  622. case huffmanGenericReader:
  623. f.huffmanGenericReader()
  624. default:
  625. panic("BUG: unexpected step state")
  626. }
  627. }
  628. // noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF.
  629. func noEOF(e error) error {
  630. if e == io.EOF {
  631. return io.ErrUnexpectedEOF
  632. }
  633. return e
  634. }
  635. func (f *decompressor) moreBits() error {
  636. c, err := f.r.ReadByte()
  637. if err != nil {
  638. return noEOF(err)
  639. }
  640. f.roffset++
  641. f.b |= uint32(c) << (f.nb & regSizeMaskUint32)
  642. f.nb += 8
  643. return nil
  644. }
  645. // Read the next Huffman-encoded symbol from f according to h.
  646. func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
  647. // Since a huffmanDecoder can be empty or be composed of a degenerate tree
  648. // with single element, huffSym must error on these two edge cases. In both
  649. // cases, the chunks slice will be 0 for the invalid sequence, leading it
  650. // satisfy the n == 0 check below.
  651. n := uint(h.maxRead)
  652. // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers,
  653. // but is smart enough to keep local variables in registers, so use nb and b,
  654. // inline call to moreBits and reassign b,nb back to f on return.
  655. nb, b := f.nb, f.b
  656. for {
  657. for nb < n {
  658. c, err := f.r.ReadByte()
  659. if err != nil {
  660. f.b = b
  661. f.nb = nb
  662. return 0, noEOF(err)
  663. }
  664. f.roffset++
  665. b |= uint32(c) << (nb & regSizeMaskUint32)
  666. nb += 8
  667. }
  668. chunk := h.chunks[b&(huffmanNumChunks-1)]
  669. n = uint(chunk & huffmanCountMask)
  670. if n > huffmanChunkBits {
  671. chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask]
  672. n = uint(chunk & huffmanCountMask)
  673. }
  674. if n <= nb {
  675. if n == 0 {
  676. f.b = b
  677. f.nb = nb
  678. if debugDecode {
  679. fmt.Println("huffsym: n==0")
  680. }
  681. f.err = CorruptInputError(f.roffset)
  682. return 0, f.err
  683. }
  684. f.b = b >> (n & regSizeMaskUint32)
  685. f.nb = nb - n
  686. return int(chunk >> huffmanValueShift), nil
  687. }
  688. }
  689. }
  690. func makeReader(r io.Reader) Reader {
  691. if rr, ok := r.(Reader); ok {
  692. return rr
  693. }
  694. return bufio.NewReader(r)
  695. }
  696. func fixedHuffmanDecoderInit() {
  697. fixedOnce.Do(func() {
  698. // These come from the RFC section 3.2.6.
  699. var bits [288]int
  700. for i := 0; i < 144; i++ {
  701. bits[i] = 8
  702. }
  703. for i := 144; i < 256; i++ {
  704. bits[i] = 9
  705. }
  706. for i := 256; i < 280; i++ {
  707. bits[i] = 7
  708. }
  709. for i := 280; i < 288; i++ {
  710. bits[i] = 8
  711. }
  712. fixedHuffmanDecoder.init(bits[:])
  713. })
  714. }
  715. func (f *decompressor) Reset(r io.Reader, dict []byte) error {
  716. *f = decompressor{
  717. r: makeReader(r),
  718. bits: f.bits,
  719. codebits: f.codebits,
  720. h1: f.h1,
  721. h2: f.h2,
  722. dict: f.dict,
  723. step: nextBlock,
  724. }
  725. f.dict.init(maxMatchOffset, dict)
  726. return nil
  727. }
  728. type ReaderOpt func(*decompressor)
  729. // WithPartialBlock tells decompressor to return after each block,
  730. // so it can read data written with partial flush
  731. func WithPartialBlock() ReaderOpt {
  732. return func(f *decompressor) {
  733. f.flushMode = partialFlush
  734. }
  735. }
  736. // WithDict initializes the reader with a preset dictionary
  737. func WithDict(dict []byte) ReaderOpt {
  738. return func(f *decompressor) {
  739. f.dict.init(maxMatchOffset, dict)
  740. }
  741. }
  742. // NewReaderOpts returns new reader with provided options
  743. func NewReaderOpts(r io.Reader, opts ...ReaderOpt) io.ReadCloser {
  744. fixedHuffmanDecoderInit()
  745. var f decompressor
  746. f.r = makeReader(r)
  747. f.bits = new([maxNumLit + maxNumDist]int)
  748. f.codebits = new([numCodes]int)
  749. f.step = nextBlock
  750. f.dict.init(maxMatchOffset, nil)
  751. for _, opt := range opts {
  752. opt(&f)
  753. }
  754. return &f
  755. }
  756. // NewReader returns a new ReadCloser that can be used
  757. // to read the uncompressed version of r.
  758. // If r does not also implement io.ByteReader,
  759. // the decompressor may read more data than necessary from r.
  760. // It is the caller's responsibility to call Close on the ReadCloser
  761. // when finished reading.
  762. //
  763. // The ReadCloser returned by NewReader also implements Resetter.
  764. func NewReader(r io.Reader) io.ReadCloser {
  765. return NewReaderOpts(r)
  766. }
  767. // NewReaderDict is like NewReader but initializes the reader
  768. // with a preset dictionary. The returned Reader behaves as if
  769. // the uncompressed data stream started with the given dictionary,
  770. // which has already been read. NewReaderDict is typically used
  771. // to read data compressed by NewWriterDict.
  772. //
  773. // The ReadCloser returned by NewReader also implements Resetter.
  774. func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
  775. return NewReaderOpts(r, WithDict(dict))
  776. }