Source file src/compress/flate/dict_decoder.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package flate
     6  
     7  // dictDecoder implements the LZ77 sliding dictionary as used in decompression.
     8  // LZ77 decompresses data through sequences of two forms of commands:
     9  //
    10  //   - Literal insertions: Runs of one or more symbols are inserted into the data
    11  //     stream as is. This is accomplished through the writeByte method for a
    12  //     single symbol, or combinations of writeSlice/writeMark for multiple symbols.
    13  //     Any valid stream must start with a literal insertion if no preset dictionary
    14  //     is used.
    15  //
    16  //   - Backward copies: Runs of one or more symbols are copied from previously
    17  //     emitted data. Backward copies come as the tuple (dist, length) where dist
    18  //     determines how far back in the stream to copy from and length determines how
    19  //     many bytes to copy. Note that it is valid for the length to be greater than
    20  //     the distance. Since LZ77 uses forward copies, that situation is used to
    21  //     perform a form of run-length encoding on repeated runs of symbols.
    22  //     The writeCopy and tryWriteCopy are used to implement this command.
    23  //
    24  // For performance reasons, this implementation performs little to no sanity
    25  // checks about the arguments. As such, the invariants documented for each
    26  // method call must be respected.
    27  type dictDecoder struct {
    28  	hist []byte // Sliding window history
    29  
    30  	// Invariant: 0 <= rdPos <= wrPos <= len(hist)
    31  	wrPos int  // Current output position in buffer
    32  	rdPos int  // Have emitted hist[:rdPos] already
    33  	full  bool // Has a full window length been written yet?
    34  }
    35  
    36  // init initializes dictDecoder to have a sliding window dictionary of the given
    37  // size. If a preset dict is provided, it will initialize the dictionary with
    38  // the contents of dict.
    39  func (dd *dictDecoder) init(size int, dict []byte) {
    40  	*dd = dictDecoder{hist: dd.hist}
    41  
    42  	if cap(dd.hist) < size {
    43  		dd.hist = make([]byte, size)
    44  	}
    45  	dd.hist = dd.hist[:size]
    46  
    47  	if len(dict) > len(dd.hist) {
    48  		dict = dict[len(dict)-len(dd.hist):]
    49  	}
    50  	dd.wrPos = copy(dd.hist, dict)
    51  	if dd.wrPos == len(dd.hist) {
    52  		dd.wrPos = 0
    53  		dd.full = true
    54  	}
    55  	dd.rdPos = dd.wrPos
    56  }
    57  
    58  // histSize reports the total amount of historical data in the dictionary.
    59  func (dd *dictDecoder) histSize() int {
    60  	if dd.full {
    61  		return len(dd.hist)
    62  	}
    63  	return dd.wrPos
    64  }
    65  
    66  // availRead reports the number of bytes that can be flushed by readFlush.
    67  func (dd *dictDecoder) availRead() int {
    68  	return dd.wrPos - dd.rdPos
    69  }
    70  
    71  // availWrite reports the available amount of output buffer space.
    72  func (dd *dictDecoder) availWrite() int {
    73  	return len(dd.hist) - dd.wrPos
    74  }
    75  
    76  // writeSlice returns a slice of the available buffer to write data to.
    77  //
    78  // This invariant will be kept: len(s) <= availWrite()
    79  func (dd *dictDecoder) writeSlice() []byte {
    80  	return dd.hist[dd.wrPos:]
    81  }
    82  
    83  // writeMark advances the writer pointer by cnt.
    84  //
    85  // This invariant must be kept: 0 <= cnt <= availWrite()
    86  func (dd *dictDecoder) writeMark(cnt int) {
    87  	dd.wrPos += cnt
    88  }
    89  
    90  // writeByte writes a single byte to the dictionary.
    91  //
    92  // This invariant must be kept: 0 < availWrite()
    93  func (dd *dictDecoder) writeByte(c byte) {
    94  	dd.hist[dd.wrPos] = c
    95  	dd.wrPos++
    96  }
    97  
    98  // writeCopy copies a string at a given (dist, length) to the output.
    99  // This returns the number of bytes copied and may be less than the requested
   100  // length if the available space in the output buffer is too small.
   101  //
   102  // This invariant must be kept: 0 < dist <= histSize()
   103  func (dd *dictDecoder) writeCopy(dist, length int) int {
   104  	dstBase := dd.wrPos
   105  	dstPos := dstBase
   106  	srcPos := dstPos - dist
   107  	endPos := dstPos + length
   108  	if endPos > len(dd.hist) {
   109  		endPos = len(dd.hist)
   110  	}
   111  
   112  	// Copy non-overlapping section after destination position.
   113  	//
   114  	// This section is non-overlapping in that the copy length for this section
   115  	// is always less than or equal to the backwards distance. This can occur
   116  	// if a distance refers to data that wraps-around in the buffer.
   117  	// Thus, a backwards copy is performed here; that is, the exact bytes in
   118  	// the source prior to the copy is placed in the destination.
   119  	if srcPos < 0 {
   120  		srcPos += len(dd.hist)
   121  		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:])
   122  		srcPos = 0
   123  	}
   124  
   125  	// Copy possibly overlapping section before destination position.
   126  	//
   127  	// This section can overlap if the copy length for this section is larger
   128  	// than the backwards distance. This is allowed by LZ77 so that repeated
   129  	// strings can be succinctly represented using (dist, length) pairs.
   130  	// Thus, a forwards copy is performed here; that is, the bytes copied is
   131  	// possibly dependent on the resulting bytes in the destination as the copy
   132  	// progresses along. This is functionally equivalent to the following:
   133  	//
   134  	//	for i := 0; i < endPos-dstPos; i++ {
   135  	//		dd.hist[dstPos+i] = dd.hist[srcPos+i]
   136  	//	}
   137  	//	dstPos = endPos
   138  	//
   139  	for dstPos < endPos {
   140  		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
   141  	}
   142  
   143  	dd.wrPos = dstPos
   144  	return dstPos - dstBase
   145  }
   146  
   147  // tryWriteCopy tries to copy a string at a given (distance, length) to the
   148  // output. This specialized version is optimized for short distances.
   149  //
   150  // This method is designed to be inlined for performance reasons.
   151  //
   152  // This invariant must be kept: 0 < dist <= histSize()
   153  func (dd *dictDecoder) tryWriteCopy(dist, length int) int {
   154  	dstPos := dd.wrPos
   155  	endPos := dstPos + length
   156  	if dstPos < dist || endPos > len(dd.hist) {
   157  		return 0
   158  	}
   159  	dstBase := dstPos
   160  	srcPos := dstPos - dist
   161  
   162  	// Copy possibly overlapping section before destination position.
   163  	for dstPos < endPos {
   164  		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
   165  	}
   166  
   167  	dd.wrPos = dstPos
   168  	return dstPos - dstBase
   169  }
   170  
   171  // readFlush returns a slice of the historical buffer that is ready to be
   172  // emitted to the user. The data returned by readFlush must be fully consumed
   173  // before calling any other dictDecoder methods.
   174  func (dd *dictDecoder) readFlush() []byte {
   175  	toRead := dd.hist[dd.rdPos:dd.wrPos]
   176  	dd.rdPos = dd.wrPos
   177  	if dd.wrPos == len(dd.hist) {
   178  		dd.wrPos, dd.rdPos = 0, 0
   179  		dd.full = true
   180  	}
   181  	return toRead
   182  }
   183  

View as plain text