@@ -24,17 +24,20 @@ import (
2424)
2525
2626const (
27- idxsz = 11 // Size of buffer indexes in bit, typically 10..13 bits
28- lensz = 4 // Size of lookahead indexes in bit, typically 4..5 bits
29- charsz = 8 // Size of encoded chars in bit
27+ idxsz = 11 // Size of buffer indexes in bits, typically 10..13 bits.
28+ lensz = 4 // Size of lookahead indexes in bits, typically 4..5 bits.
3029
31- threshold = 1 // If match length <= threshold then output one character
32- bufsz = (1 << idxsz ) // buffer size
33- looksz = ((1 << lensz ) + 1 ) // lookahead buffer size
34- historysz = bufsz - looksz // history buffer size
30+ charsz = 8 // Size of encoded chars in bits.
31+ bytemask = 128 // Mask with a bit in 8th position. Used to iterate through bits of a char.
3532
36- charStartBit = true // Indicates next bits encode a char
37- tokenStartBit = false // Indicates next bits encode a token
33+ threshold = 1 // If match length > threshold then output a token (idx, len), otherwise output one char.
34+
35+ bufsz = (1 << idxsz ) // Buffer size.
36+ looksz = ((1 << lensz ) + 1 ) // Lookahead buffer size.
37+ historysz = bufsz - looksz // History buffer size.
38+
39+ charStartBit = true // Indicates next bits encode a char.
40+ tokenStartBit = false // Indicates next bits encode a token.
3841)
3942
4043func min (x , y int ) int {
@@ -44,6 +47,10 @@ func min(x, y int) int {
4447 return y
4548}
4649
50+ // findLargestMatch looks for the largest sequence of characters (from current to current+ahead)
51+ // contained in the history of the buffer.
52+ // It returns the index of the found match, if any, and its length.
53+ // The index is relative to the current position. If idx 0 is returned than no match has been found.
4754func findLargestMatch (buf []byte , current , size int ) (idx , len int ) {
4855 idx = 0
4956 len = 1
@@ -67,6 +74,8 @@ func findLargestMatch(buf []byte, current, size int) (idx, len int) {
6774 return
6875}
6976
77+ // Encode takes a slice of bytes, compresses it using the lzss compression algorithm
78+ // and returns the result in a new bytes buffer.
7079func Encode (data []byte ) []byte {
7180 // buffer is made up of two parts: the first is for already processed data (history); the second is for new data
7281 buffer := make ([]byte , bufsz * 2 )
@@ -104,6 +113,9 @@ func Encode(data []byte) []byte {
104113 return out .bytes ()
105114}
106115
116+ // filler abstracts the process of consuming an input buffer
117+ // using its bytes to fill another buffer.
118+ // It's been used to facilitate the handling of the input buffer in the Encode function.
107119type filler struct {
108120 src []byte
109121 idx int
@@ -115,13 +127,21 @@ func newFiller(src []byte) *filler {
115127 }
116128}
117129
130+ // fill tries to fill all the dst buffer with bytes read from src.
131+ // It returns the number of bytes moved from src to dst.
132+ // The src buffer offset is then incremented so that all the content of src
133+ // can be consumed in small chunks.
118134func (f * filler ) fill (dst []byte ) int {
119135 n := min (len (f .src )- f .idx , len (dst ))
120136 copy (dst , f .src [f .idx :f .idx + n ])
121137 f .idx += n
122138 return n
123139}
124140
141+ // result is responsible for storing the actual result of the encoding.
142+ // It knows how to store characters and tokens in the resulting buffer.
143+ // It must be flushed at the end of the encoding in order to store the
144+ // remaining bits of bitBuffer.
125145type result struct {
126146 bitBuffer int
127147 bitMask int
@@ -131,11 +151,12 @@ type result struct {
131151func newResult () * result {
132152 return & result {
133153 bitBuffer : 0 ,
134- bitMask : 128 ,
135- out : bytes .NewBufferString ( "" ) ,
154+ bitMask : bytemask ,
155+ out : & bytes.Buffer {} ,
136156 }
137157}
138158
159+ // addChar stores a char in the out buffer.
139160func (r * result ) addChar (c byte ) {
140161 i := int (c )
141162 r .putbit (charStartBit )
@@ -145,6 +166,7 @@ func (r *result) addChar(c byte) {
145166 }
146167}
147168
169+ // addToken stores a token in the out buffer.
148170func (r * result ) addToken (idx , len int ) {
149171 // Adjust idx and len to fit idxsz and lensz bits respectively
150172 idx &= (bufsz - 1 )
@@ -163,11 +185,13 @@ func (r *result) addToken(idx, len int) {
163185}
164186
165187func (r * result ) flush () {
166- if r .bitMask != 128 {
188+ if r .bitMask != bytemask {
167189 r .out .WriteByte (byte (r .bitBuffer ))
168190 }
169191}
170192
193+ // putbit puts the passed bit (true -> 1; false -> 0) in the bitBuffer.
194+ // When bitBuffer contains an entire byte it's written to the out buffer.
171195func (r * result ) putbit (b bool ) {
172196 if b {
173197 r .bitBuffer |= r .bitMask
@@ -176,7 +200,7 @@ func (r *result) putbit(b bool) {
176200 if r .bitMask == 0 {
177201 r .out .WriteByte (byte (r .bitBuffer ))
178202 r .bitBuffer = 0
179- r .bitMask = 128
203+ r .bitMask = bytemask
180204 }
181205}
182206
0 commit comments