diff options
Diffstat (limited to 'libgo/go/compress/flate')
-rw-r--r-- | libgo/go/compress/flate/deflate.go | 521 | ||||
-rw-r--r-- | libgo/go/compress/flate/deflate_test.go | 389 | ||||
-rw-r--r-- | libgo/go/compress/flate/flate_test.go | 139 | ||||
-rw-r--r-- | libgo/go/compress/flate/huffman_bit_writer.go | 506 | ||||
-rw-r--r-- | libgo/go/compress/flate/huffman_code.go | 373 | ||||
-rw-r--r-- | libgo/go/compress/flate/inflate.go | 620 | ||||
-rw-r--r-- | libgo/go/compress/flate/reverse_bits.go | 48 | ||||
-rw-r--r-- | libgo/go/compress/flate/token.go | 103 | ||||
-rw-r--r-- | libgo/go/compress/flate/util.go | 72 |
9 files changed, 2771 insertions, 0 deletions
diff --git a/libgo/go/compress/flate/deflate.go b/libgo/go/compress/flate/deflate.go new file mode 100644 index 000000000..591b35c44 --- /dev/null +++ b/libgo/go/compress/flate/deflate.go @@ -0,0 +1,521 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "io" + "math" + "os" +) + +const ( + NoCompression = 0 + BestSpeed = 1 + fastCompression = 3 + BestCompression = 9 + DefaultCompression = -1 + logMaxOffsetSize = 15 // Standard DEFLATE + wideLogMaxOffsetSize = 22 // Wide DEFLATE + minMatchLength = 3 // The smallest match that the compressor looks for + maxMatchLength = 258 // The longest match for the compressor + minOffsetSize = 1 // The shortest offset that makes any sence + + // The maximum number of tokens we put into a single flat block, just too + // stop things from getting too large. + maxFlateBlockTokens = 1 << 14 + maxStoreBlockSize = 65535 + hashBits = 15 + hashSize = 1 << hashBits + hashMask = (1 << hashBits) - 1 + hashShift = (hashBits + minMatchLength - 1) / minMatchLength +) + +type syncPipeReader struct { + *io.PipeReader + closeChan chan bool +} + +func (sr *syncPipeReader) CloseWithError(err os.Error) os.Error { + retErr := sr.PipeReader.CloseWithError(err) + sr.closeChan <- true // finish writer close + return retErr +} + +type syncPipeWriter struct { + *io.PipeWriter + closeChan chan bool +} + +type compressionLevel struct { + good, lazy, nice, chain, fastSkipHashing int +} + +var levels = []compressionLevel{ + {}, // 0 + // For levels 1-3 we don't bother trying with lazy matches + {3, 0, 8, 4, 4}, + {3, 0, 16, 8, 5}, + {3, 0, 32, 32, 6}, + // Levels 4-9 use increasingly more lazy matching + // and increasingly stringent conditions for "good enough". + {4, 4, 16, 16, math.MaxInt32}, + {8, 16, 32, 32, math.MaxInt32}, + {8, 16, 128, 128, math.MaxInt32}, + {8, 32, 128, 256, math.MaxInt32}, + {32, 128, 258, 1024, math.MaxInt32}, + {32, 258, 258, 4096, math.MaxInt32}, +} + +func (sw *syncPipeWriter) Close() os.Error { + err := sw.PipeWriter.Close() + <-sw.closeChan // wait for reader close + return err +} + +func syncPipe() (*syncPipeReader, *syncPipeWriter) { + r, w := io.Pipe() + sr := &syncPipeReader{r, make(chan bool, 1)} + sw := &syncPipeWriter{w, sr.closeChan} + return sr, sw +} + +type compressor struct { + level int + logWindowSize uint + w *huffmanBitWriter + r io.Reader + // (1 << logWindowSize) - 1. + windowMask int + + eof bool // has eof been reached on input? + sync bool // writer wants to flush + syncChan chan os.Error + + // hashHead[hashValue] contains the largest inputIndex with the specified hash value + hashHead []int + + // If hashHead[hashValue] is within the current window, then + // hashPrev[hashHead[hashValue] & windowMask] contains the previous index + // with the same hash value. + hashPrev []int + + // If we find a match of length >= niceMatch, then we don't bother searching + // any further. + niceMatch int + + // If we find a match of length >= goodMatch, we only do a half-hearted + // effort at doing lazy matching starting at the next character + goodMatch int + + // The maximum number of chains we look at when finding a match + maxChainLength int + + // The sliding window we use for matching + window []byte + + // The index just past the last valid character + windowEnd int + + // index in "window" at which current block starts + blockStart int +} + +func (d *compressor) flush() os.Error { + d.w.flush() + return d.w.err +} + +func (d *compressor) fillWindow(index int) (int, os.Error) { + if d.sync { + return index, nil + } + wSize := d.windowMask + 1 + if index >= wSize+wSize-(minMatchLength+maxMatchLength) { + // shift the window by wSize + copy(d.window, d.window[wSize:2*wSize]) + index -= wSize + d.windowEnd -= wSize + if d.blockStart >= wSize { + d.blockStart -= wSize + } else { + d.blockStart = math.MaxInt32 + } + for i, h := range d.hashHead { + d.hashHead[i] = max(h-wSize, -1) + } + for i, h := range d.hashPrev { + d.hashPrev[i] = max(h-wSize, -1) + } + } + count, err := d.r.Read(d.window[d.windowEnd:]) + d.windowEnd += count + if count == 0 && err == nil { + d.sync = true + } + if err == os.EOF { + d.eof = true + err = nil + } + return index, err +} + +func (d *compressor) writeBlock(tokens []token, index int, eof bool) os.Error { + if index > 0 || eof { + var window []byte + if d.blockStart <= index { + window = d.window[d.blockStart:index] + } + d.blockStart = index + d.w.writeBlock(tokens, eof, window) + return d.w.err + } + return nil +} + +// Try to find a match starting at index whose length is greater than prevSize. +// We only look at chainCount possibilities before giving up. +func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) { + win := d.window[0 : pos+min(maxMatchLength, lookahead)] + + // We quit when we get a match that's at least nice long + nice := min(d.niceMatch, len(win)-pos) + + // If we've got a match that's good enough, only look in 1/4 the chain. + tries := d.maxChainLength + length = prevLength + if length >= d.goodMatch { + tries >>= 2 + } + + w0 := win[pos] + w1 := win[pos+1] + wEnd := win[pos+length] + minIndex := pos - (d.windowMask + 1) + + for i := prevHead; tries > 0; tries-- { + if w0 == win[i] && w1 == win[i+1] && wEnd == win[i+length] { + // The hash function ensures that if win[i] and win[i+1] match, win[i+2] matches + + n := 3 + for pos+n < len(win) && win[i+n] == win[pos+n] { + n++ + } + if n > length && (n > 3 || pos-i <= 4096) { + length = n + offset = pos - i + ok = true + if n >= nice { + // The match is good enough that we don't try to find a better one. + break + } + wEnd = win[pos+n] + } + } + if i == minIndex { + // hashPrev[i & windowMask] has already been overwritten, so stop now. + break + } + if i = d.hashPrev[i&d.windowMask]; i < minIndex || i < 0 { + break + } + } + return +} + +func (d *compressor) writeStoredBlock(buf []byte) os.Error { + if d.w.writeStoredHeader(len(buf), false); d.w.err != nil { + return d.w.err + } + d.w.writeBytes(buf) + return d.w.err +} + +func (d *compressor) storedDeflate() os.Error { + buf := make([]byte, maxStoreBlockSize) + for { + n, err := d.r.Read(buf) + if n == 0 && err == nil { + d.sync = true + } + if n > 0 || d.sync { + if err := d.writeStoredBlock(buf[0:n]); err != nil { + return err + } + if d.sync { + d.syncChan <- nil + d.sync = false + } + } + if err != nil { + if err == os.EOF { + break + } + return err + } + } + return nil +} + +func (d *compressor) doDeflate() (err os.Error) { + // init + d.windowMask = 1<<d.logWindowSize - 1 + d.hashHead = make([]int, hashSize) + d.hashPrev = make([]int, 1<<d.logWindowSize) + d.window = make([]byte, 2<<d.logWindowSize) + fillInts(d.hashHead, -1) + tokens := make([]token, maxFlateBlockTokens, maxFlateBlockTokens+1) + l := levels[d.level] + d.goodMatch = l.good + d.niceMatch = l.nice + d.maxChainLength = l.chain + lazyMatch := l.lazy + length := minMatchLength - 1 + offset := 0 + byteAvailable := false + isFastDeflate := l.fastSkipHashing != 0 + index := 0 + // run + if index, err = d.fillWindow(index); err != nil { + return + } + maxOffset := d.windowMask + 1 // (1 << logWindowSize); + // only need to change when you refill the window + windowEnd := d.windowEnd + maxInsertIndex := windowEnd - (minMatchLength - 1) + ti := 0 + + hash := int(0) + if index < maxInsertIndex { + hash = int(d.window[index])<<hashShift + int(d.window[index+1]) + } + chainHead := -1 +Loop: + for { + if index > windowEnd { + panic("index > windowEnd") + } + lookahead := windowEnd - index + if lookahead < minMatchLength+maxMatchLength { + if index, err = d.fillWindow(index); err != nil { + return + } + windowEnd = d.windowEnd + if index > windowEnd { + panic("index > windowEnd") + } + maxInsertIndex = windowEnd - (minMatchLength - 1) + lookahead = windowEnd - index + if lookahead == 0 { + // Flush current output block if any. + if byteAvailable { + // There is still one pending token that needs to be flushed + tokens[ti] = literalToken(uint32(d.window[index-1]) & 0xFF) + ti++ + byteAvailable = false + } + if ti > 0 { + if err = d.writeBlock(tokens[0:ti], index, false); err != nil { + return + } + ti = 0 + } + if d.sync { + d.w.writeStoredHeader(0, false) + d.w.flush() + d.syncChan <- d.w.err + d.sync = false + } + + // If this was only a sync (not at EOF) keep going. + if !d.eof { + continue + } + break Loop + } + } + if index < maxInsertIndex { + // Update the hash + hash = (hash<<hashShift + int(d.window[index+2])) & hashMask + chainHead = d.hashHead[hash] + d.hashPrev[index&d.windowMask] = chainHead + d.hashHead[hash] = index + } + prevLength := length + prevOffset := offset + minIndex := max(index-maxOffset, 0) + length = minMatchLength - 1 + offset = 0 + + if chainHead >= minIndex && + (isFastDeflate && lookahead > minMatchLength-1 || + !isFastDeflate && lookahead > prevLength && prevLength < lazyMatch) { + if newLength, newOffset, ok := d.findMatch(index, chainHead, minMatchLength-1, lookahead); ok { + length = newLength + offset = newOffset + } + } + if isFastDeflate && length >= minMatchLength || + !isFastDeflate && prevLength >= minMatchLength && length <= prevLength { + // There was a match at the previous step, and the current match is + // not better. Output the previous match. + if isFastDeflate { + tokens[ti] = matchToken(uint32(length-minMatchLength), uint32(offset-minOffsetSize)) + } else { + tokens[ti] = matchToken(uint32(prevLength-minMatchLength), uint32(prevOffset-minOffsetSize)) + } + ti++ + // Insert in the hash table all strings up to the end of the match. + // index and index-1 are already inserted. If there is not enough + // lookahead, the last two strings are not inserted into the hash + // table. + if length <= l.fastSkipHashing { + var newIndex int + if isFastDeflate { + newIndex = index + length + } else { + newIndex = prevLength - 1 + } + for index++; index < newIndex; index++ { + if index < maxInsertIndex { + hash = (hash<<hashShift + int(d.window[index+2])) & hashMask + // Get previous value with the same hash. + // Our chain should point to the previous value. + d.hashPrev[index&d.windowMask] = d.hashHead[hash] + // Set the head of the hash chain to us. + d.hashHead[hash] = index + } + } + if !isFastDeflate { + byteAvailable = false + length = minMatchLength - 1 + } + } else { + // For matches this long, we don't bother inserting each individual + // item into the table. + index += length + hash = (int(d.window[index])<<hashShift + int(d.window[index+1])) + } + if ti == maxFlateBlockTokens { + // The block includes the current character + if err = d.writeBlock(tokens, index, false); err != nil { + return + } + ti = 0 + } + } else { + if isFastDeflate || byteAvailable { + i := index - 1 + if isFastDeflate { + i = index + } + tokens[ti] = literalToken(uint32(d.window[i]) & 0xFF) + ti++ + if ti == maxFlateBlockTokens { + if err = d.writeBlock(tokens, i+1, false); err != nil { + return + } + ti = 0 + } + } + index++ + if !isFastDeflate { + byteAvailable = true + } + } + } + return +} + +func (d *compressor) compress(r io.Reader, w io.Writer, level int, logWindowSize uint) (err os.Error) { + d.r = r + d.w = newHuffmanBitWriter(w) + d.level = level + d.logWindowSize = logWindowSize + + switch { + case level == NoCompression: + err = d.storedDeflate() + case level == DefaultCompression: + d.level = 6 + fallthrough + case 1 <= level && level <= 9: + err = d.doDeflate() + default: + return WrongValueError{"level", 0, 9, int32(level)} + } + + if d.sync { + d.syncChan <- err + d.sync = false + } + if err != nil { + return err + } + if d.w.writeStoredHeader(0, true); d.w.err != nil { + return d.w.err + } + return d.flush() +} + +// NewWriter returns a new Writer compressing +// data at the given level. Following zlib, levels +// range from 1 (BestSpeed) to 9 (BestCompression); +// higher levels typically run slower but compress more. +// Level 0 (NoCompression) does not attempt any +// compression; it only adds the necessary DEFLATE framing. +func NewWriter(w io.Writer, level int) *Writer { + const logWindowSize = logMaxOffsetSize + var d compressor + d.syncChan = make(chan os.Error, 1) + pr, pw := syncPipe() + go func() { + err := d.compress(pr, w, level, logWindowSize) + pr.CloseWithError(err) + }() + return &Writer{pw, &d} +} + +// A Writer takes data written to it and writes the compressed +// form of that data to an underlying writer (see NewWriter). +type Writer struct { + w *syncPipeWriter + d *compressor +} + +// Write writes data to w, which will eventually write the +// compressed form of data to its underlying writer. +func (w *Writer) Write(data []byte) (n int, err os.Error) { + if len(data) == 0 { + // no point, and nil interferes with sync + return + } + return w.w.Write(data) +} + +// Flush flushes any pending compressed data to the underlying writer. +// It is useful mainly in compressed network protocols, to ensure that +// a remote reader has enough data to reconstruct a packet. +// Flush does not return until the data has been written. +// If the underlying writer returns an error, Flush returns that error. +// +// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH. +func (w *Writer) Flush() os.Error { + // For more about flushing: + // http://www.bolet.org/~pornin/deflate-flush.html + if w.d.sync { + panic("compress/flate: double Flush") + } + _, err := w.w.Write(nil) + err1 := <-w.d.syncChan + if err == nil { + err = err1 + } + return err +} + +// Close flushes and closes the writer. +func (w *Writer) Close() os.Error { + return w.w.Close() +} diff --git a/libgo/go/compress/flate/deflate_test.go b/libgo/go/compress/flate/deflate_test.go new file mode 100644 index 000000000..3db955609 --- /dev/null +++ b/libgo/go/compress/flate/deflate_test.go @@ -0,0 +1,389 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "bytes" + "fmt" + "io" + "io/ioutil" + "os" + "sync" + "testing" +) + +type deflateTest struct { + in []byte + level int + out []byte +} + +type deflateInflateTest struct { + in []byte +} + +type reverseBitsTest struct { + in uint16 + bitCount uint8 + out uint16 +} + +var deflateTests = []*deflateTest{ + &deflateTest{[]byte{}, 0, []byte{1, 0, 0, 255, 255}}, + &deflateTest{[]byte{0x11}, -1, []byte{18, 4, 4, 0, 0, 255, 255}}, + &deflateTest{[]byte{0x11}, DefaultCompression, []byte{18, 4, 4, 0, 0, 255, 255}}, + &deflateTest{[]byte{0x11}, 4, []byte{18, 4, 4, 0, 0, 255, 255}}, + + &deflateTest{[]byte{0x11}, 0, []byte{0, 1, 0, 254, 255, 17, 1, 0, 0, 255, 255}}, + &deflateTest{[]byte{0x11, 0x12}, 0, []byte{0, 2, 0, 253, 255, 17, 18, 1, 0, 0, 255, 255}}, + &deflateTest{[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, 0, + []byte{0, 8, 0, 247, 255, 17, 17, 17, 17, 17, 17, 17, 17, 1, 0, 0, 255, 255}, + }, + &deflateTest{[]byte{}, 1, []byte{1, 0, 0, 255, 255}}, + &deflateTest{[]byte{0x11}, 1, []byte{18, 4, 4, 0, 0, 255, 255}}, + &deflateTest{[]byte{0x11, 0x12}, 1, []byte{18, 20, 2, 4, 0, 0, 255, 255}}, + &deflateTest{[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, 1, []byte{18, 132, 2, 64, 0, 0, 0, 255, 255}}, + &deflateTest{[]byte{}, 9, []byte{1, 0, 0, 255, 255}}, + &deflateTest{[]byte{0x11}, 9, []byte{18, 4, 4, 0, 0, 255, 255}}, + &deflateTest{[]byte{0x11, 0x12}, 9, []byte{18, 20, 2, 4, 0, 0, 255, 255}}, + &deflateTest{[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}, 9, []byte{18, 132, 2, 64, 0, 0, 0, 255, 255}}, +} + +var deflateInflateTests = []*deflateInflateTest{ + &deflateInflateTest{[]byte{}}, + &deflateInflateTest{[]byte{0x11}}, + &deflateInflateTest{[]byte{0x11, 0x12}}, + &deflateInflateTest{[]byte{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11}}, + &deflateInflateTest{[]byte{0x11, 0x10, 0x13, 0x41, 0x21, 0x21, 0x41, 0x13, 0x87, 0x78, 0x13}}, + &deflateInflateTest{getLargeDataChunk()}, +} + +var reverseBitsTests = []*reverseBitsTest{ + &reverseBitsTest{1, 1, 1}, + &reverseBitsTest{1, 2, 2}, + &reverseBitsTest{1, 3, 4}, + &reverseBitsTest{1, 4, 8}, + &reverseBitsTest{1, 5, 16}, + &reverseBitsTest{17, 5, 17}, + &reverseBitsTest{257, 9, 257}, + &reverseBitsTest{29, 5, 23}, +} + +func getLargeDataChunk() []byte { + result := make([]byte, 100000) + for i := range result { + result[i] = byte(int64(i) * int64(i) & 0xFF) + } + return result +} + +func TestDeflate(t *testing.T) { + for _, h := range deflateTests { + buffer := bytes.NewBuffer(nil) + w := NewWriter(buffer, h.level) + w.Write(h.in) + w.Close() + if bytes.Compare(buffer.Bytes(), h.out) != 0 { + t.Errorf("buffer is wrong; level = %v, buffer.Bytes() = %v, expected output = %v", + h.level, buffer.Bytes(), h.out) + } + } +} + +type syncBuffer struct { + buf bytes.Buffer + mu sync.RWMutex + closed bool + ready chan bool +} + +func newSyncBuffer() *syncBuffer { + return &syncBuffer{ready: make(chan bool, 1)} +} + +func (b *syncBuffer) Read(p []byte) (n int, err os.Error) { + for { + b.mu.RLock() + n, err = b.buf.Read(p) + b.mu.RUnlock() + if n > 0 || b.closed { + return + } + <-b.ready + } + panic("unreachable") +} + +func (b *syncBuffer) Write(p []byte) (n int, err os.Error) { + n, err = b.buf.Write(p) + _ = b.ready <- true + return +} + +func (b *syncBuffer) WriteMode() { + b.mu.Lock() +} + +func (b *syncBuffer) ReadMode() { + b.mu.Unlock() + _ = b.ready <- true +} + +func (b *syncBuffer) Close() os.Error { + b.closed = true + _ = b.ready <- true + return nil +} + +func testSync(t *testing.T, level int, input []byte, name string) { + if len(input) == 0 { + return + } + + t.Logf("--testSync %d, %d, %s", level, len(input), name) + buf := newSyncBuffer() + buf1 := new(bytes.Buffer) + buf.WriteMode() + w := NewWriter(io.MultiWriter(buf, buf1), level) + r := NewReader(buf) + + // Write half the input and read back. + for i := 0; i < 2; i++ { + var lo, hi int + if i == 0 { + lo, hi = 0, (len(input)+1)/2 + } else { + lo, hi = (len(input)+1)/2, len(input) + } + t.Logf("#%d: write %d-%d", i, lo, hi) + if _, err := w.Write(input[lo:hi]); err != nil { + t.Errorf("testSync: write: %v", err) + return + } + if i == 0 { + if err := w.Flush(); err != nil { + t.Errorf("testSync: flush: %v", err) + return + } + } else { + if err := w.Close(); err != nil { + t.Errorf("testSync: close: %v", err) + } + } + buf.ReadMode() + out := make([]byte, hi-lo+1) + m, err := io.ReadAtLeast(r, out, hi-lo) + t.Logf("#%d: read %d", i, m) + if m != hi-lo || err != nil { + t.Errorf("testSync/%d (%d, %d, %s): read %d: %d, %v (%d left)", i, level, len(input), name, hi-lo, m, err, buf.buf.Len()) + return + } + if !bytes.Equal(input[lo:hi], out[:hi-lo]) { + t.Errorf("testSync/%d: read wrong bytes: %x vs %x", i, input[lo:hi], out[:hi-lo]) + return + } + if i == 0 && buf.buf.Len() != 0 { + t.Errorf("testSync/%d (%d, %d, %s): extra data after %d", i, level, len(input), name, hi-lo) + } + buf.WriteMode() + } + buf.ReadMode() + out := make([]byte, 10) + if n, err := r.Read(out); n > 0 || err != os.EOF { + t.Errorf("testSync (%d, %d, %s): final Read: %d, %v (hex: %x)", level, len(input), name, n, err, out[0:n]) + } + if buf.buf.Len() != 0 { + t.Errorf("testSync (%d, %d, %s): extra data at end", level, len(input), name) + } + r.Close() + + // stream should work for ordinary reader too + r = NewReader(buf1) + out, err := ioutil.ReadAll(r) + if err != nil { + t.Errorf("testSync: read: %s", err) + return + } + r.Close() + if !bytes.Equal(input, out) { + t.Errorf("testSync: decompress(compress(data)) != data: level=%d input=%s", level, name) + } +} + + +func testToFromWithLevel(t *testing.T, level int, input []byte, name string) os.Error { + buffer := bytes.NewBuffer(nil) + w := NewWriter(buffer, level) + w.Write(input) + w.Close() + r := NewReader(buffer) + out, err := ioutil.ReadAll(r) + if err != nil { + t.Errorf("read: %s", err) + return err + } + r.Close() + if !bytes.Equal(input, out) { + t.Errorf("decompress(compress(data)) != data: level=%d input=%s", level, name) + } + + testSync(t, level, input, name) + return nil +} + +func testToFrom(t *testing.T, input []byte, name string) { + for i := 0; i < 10; i++ { + testToFromWithLevel(t, i, input, name) + } +} + +func TestDeflateInflate(t *testing.T) { + for i, h := range deflateInflateTests { + testToFrom(t, h.in, fmt.Sprintf("#%d", i)) + } +} + +func TestReverseBits(t *testing.T) { + for _, h := range reverseBitsTests { + if v := reverseBits(h.in, h.bitCount); v != h.out { + t.Errorf("reverseBits(%v,%v) = %v, want %v", + h.in, h.bitCount, v, h.out) + } + } +} + +func TestDeflateInflateString(t *testing.T) { + gold := bytes.NewBufferString(getEdata()).Bytes() + testToFromWithLevel(t, 1, gold, "2.718281828...") +} + +func getEdata() string { + return "2.718281828459045235360287471352662497757247093699959574966967627724076630353547" + + "59457138217852516642742746639193200305992181741359662904357290033429526059563073" + + "81323286279434907632338298807531952510190115738341879307021540891499348841675092" + + "44761460668082264800168477411853742345442437107539077744992069551702761838606261" + + "33138458300075204493382656029760673711320070932870912744374704723069697720931014" + + "16928368190255151086574637721112523897844250569536967707854499699679468644549059" + + "87931636889230098793127736178215424999229576351482208269895193668033182528869398" + + "49646510582093923982948879332036250944311730123819706841614039701983767932068328" + + "23764648042953118023287825098194558153017567173613320698112509961818815930416903" + + "51598888519345807273866738589422879228499892086805825749279610484198444363463244" + + "96848756023362482704197862320900216099023530436994184914631409343173814364054625" + + "31520961836908887070167683964243781405927145635490613031072085103837505101157477" + + "04171898610687396965521267154688957035035402123407849819334321068170121005627880" + + "23519303322474501585390473041995777709350366041699732972508868769664035557071622" + + "68447162560798826517871341951246652010305921236677194325278675398558944896970964" + + "09754591856956380236370162112047742722836489613422516445078182442352948636372141" + + "74023889344124796357437026375529444833799801612549227850925778256209262264832627" + + "79333865664816277251640191059004916449982893150566047258027786318641551956532442" + + "58698294695930801915298721172556347546396447910145904090586298496791287406870504" + + "89585867174798546677575732056812884592054133405392200011378630094556068816674001" + + "69842055804033637953764520304024322566135278369511778838638744396625322498506549" + + "95886234281899707733276171783928034946501434558897071942586398772754710962953741" + + "52111513683506275260232648472870392076431005958411661205452970302364725492966693" + + "81151373227536450988890313602057248176585118063036442812314965507047510254465011" + + "72721155519486685080036853228183152196003735625279449515828418829478761085263981" + + "39559900673764829224437528718462457803619298197139914756448826260390338144182326" + + "25150974827987779964373089970388867782271383605772978824125611907176639465070633" + + "04527954661855096666185664709711344474016070462621568071748187784437143698821855" + + "96709591025968620023537185887485696522000503117343920732113908032936344797273559" + + "55277349071783793421637012050054513263835440001863239914907054797780566978533580" + + "48966906295119432473099587655236812859041383241160722602998330535370876138939639" + + "17795745401613722361878936526053815584158718692553860616477983402543512843961294" + + "60352913325942794904337299085731580290958631382683291477116396337092400316894586" + + "36060645845925126994655724839186564209752685082307544254599376917041977780085362" + + "73094171016343490769642372229435236612557250881477922315197477806056967253801718" + + "07763603462459278778465850656050780844211529697521890874019660906651803516501792" + + "50461950136658543663271254963990854914420001457476081930221206602433009641270489" + + "43903971771951806990869986066365832322787093765022601492910115171776359446020232" + + "49300280401867723910288097866605651183260043688508817157238669842242201024950551" + + "88169480322100251542649463981287367765892768816359831247788652014117411091360116" + + "49950766290779436460058519419985601626479076153210387275571269925182756879893027" + + "61761146162549356495903798045838182323368612016243736569846703785853305275833337" + + "93990752166069238053369887956513728559388349989470741618155012539706464817194670" + + "83481972144888987906765037959036696724949925452790337296361626589760394985767413" + + "97359441023744329709355477982629614591442936451428617158587339746791897571211956" + + "18738578364475844842355558105002561149239151889309946342841393608038309166281881" + + "15037152849670597416256282360921680751501777253874025642534708790891372917228286" + + "11515915683725241630772254406337875931059826760944203261924285317018781772960235" + + "41306067213604600038966109364709514141718577701418060644363681546444005331608778" + + "31431744408119494229755993140118886833148328027065538330046932901157441475631399" + + "97221703804617092894579096271662260740718749975359212756084414737823303270330168" + + "23719364800217328573493594756433412994302485023573221459784328264142168487872167" + + "33670106150942434569844018733128101079451272237378861260581656680537143961278887" + + "32527373890392890506865324138062796025930387727697783792868409325365880733988457" + + "21874602100531148335132385004782716937621800490479559795929059165547050577751430" + + "81751126989851884087185640260353055837378324229241856256442550226721559802740126" + + "17971928047139600689163828665277009752767069777036439260224372841840883251848770" + + "47263844037953016690546593746161932384036389313136432713768884102681121989127522" + + "30562567562547017250863497653672886059667527408686274079128565769963137897530346" + + "60616669804218267724560530660773899624218340859882071864682623215080288286359746" + + "83965435885668550377313129658797581050121491620765676995065971534476347032085321" + + "56036748286083786568030730626576334697742956346437167093971930608769634953288468" + + "33613038829431040800296873869117066666146800015121143442256023874474325250769387" + + "07777519329994213727721125884360871583483562696166198057252661220679754062106208" + + "06498829184543953015299820925030054982570433905535701686531205264956148572492573" + + "86206917403695213533732531666345466588597286659451136441370331393672118569553952" + + "10845840724432383558606310680696492485123263269951460359603729725319836842336390" + + "46321367101161928217111502828016044880588023820319814930963695967358327420249882" + + "45684941273860566491352526706046234450549227581151709314921879592718001940968866" + + "98683703730220047531433818109270803001720593553052070070607223399946399057131158" + + "70996357773590271962850611465148375262095653467132900259943976631145459026858989" + + "79115837093419370441155121920117164880566945938131183843765620627846310490346293" + + "95002945834116482411496975832601180073169943739350696629571241027323913874175492" + + "30718624545432220395527352952402459038057445028922468862853365422138157221311632" + + "88112052146489805180092024719391710555390113943316681515828843687606961102505171" + + "00739276238555338627255353883096067164466237092264680967125406186950214317621166" + + "81400975952814939072226011126811531083873176173232352636058381731510345957365382" + + "23534992935822836851007810884634349983518404451704270189381994243410090575376257" + + "76757111809008816418331920196262341628816652137471732547772778348877436651882875" + + "21566857195063719365653903894493664217640031215278702223664636357555035655769488" + + "86549500270853923617105502131147413744106134445544192101336172996285694899193369" + + "18472947858072915608851039678195942983318648075608367955149663644896559294818785" + + "17840387733262470519450504198477420141839477312028158868457072905440575106012852" + + "58056594703046836344592652552137008068752009593453607316226118728173928074623094" + + "68536782310609792159936001994623799343421068781349734695924646975250624695861690" + + "91785739765951993929939955675427146549104568607020990126068187049841780791739240" + + "71945996323060254707901774527513186809982284730860766536866855516467702911336827" + + "56310722334672611370549079536583453863719623585631261838715677411873852772292259" + + "47433737856955384562468010139057278710165129666367644518724656537304024436841408" + + "14488732957847348490003019477888020460324660842875351848364959195082888323206522" + + "12810419044804724794929134228495197002260131043006241071797150279343326340799596" + + "05314460532304885289729176598760166678119379323724538572096075822771784833616135" + + "82612896226118129455927462767137794487586753657544861407611931125958512655759734" + + "57301533364263076798544338576171533346232527057200530398828949903425956623297578" + + "24887350292591668258944568946559926584547626945287805165017206747854178879822768" + + "06536650641910973434528878338621726156269582654478205672987756426325321594294418" + + "03994321700009054265076309558846589517170914760743713689331946909098190450129030" + + "70995662266203031826493657336984195557769637876249188528656866076005660256054457" + + "11337286840205574416030837052312242587223438854123179481388550075689381124935386" + + "31863528708379984569261998179452336408742959118074745341955142035172618420084550" + + "91708456823682008977394558426792142734775608796442792027083121501564063413416171" + + "66448069815483764491573900121217041547872591998943825364950514771379399147205219" + + "52907939613762110723849429061635760459623125350606853765142311534966568371511660" + + "42207963944666211632551577290709784731562782775987881364919512574833287937715714" + + "59091064841642678309949723674420175862269402159407924480541255360431317992696739" + + "15754241929660731239376354213923061787675395871143610408940996608947141834069836" + + "29936753626215452472984642137528910798843813060955526227208375186298370667872244" + + "30195793793786072107254277289071732854874374355781966511716618330881129120245204" + + "04868220007234403502544820283425418788465360259150644527165770004452109773558589" + + "76226554849416217149895323834216001140629507184904277892585527430352213968356790" + + "18076406042138307308774460170842688272261177180842664333651780002171903449234264" + + "26629226145600433738386833555534345300426481847398921562708609565062934040526494" + + "32442614456659212912256488935696550091543064261342526684725949143142393988454324" + + "86327461842846655985332312210466259890141712103446084271616619001257195870793217" + + "56969854401339762209674945418540711844643394699016269835160784892451405894094639" + + "52678073545797003070511636825194877011897640028276484141605872061841852971891540" + + "19688253289309149665345753571427318482016384644832499037886069008072709327673127" + + "58196656394114896171683298045513972950668760474091542042842999354102582911350224" + + "16907694316685742425225090269390348148564513030699251995904363840284292674125734" + + "22447765584177886171737265462085498294498946787350929581652632072258992368768457" + + "01782303809656788311228930580914057261086588484587310165815116753332767488701482" + + "91674197015125597825727074064318086014281490241467804723275976842696339357735429" + + "30186739439716388611764209004068663398856841681003872389214483176070116684503887" + + "21236436704331409115573328018297798873659091665961240202177855885487617616198937" + + "07943800566633648843650891448055710397652146960276625835990519870423001794655367" + + "9" +} diff --git a/libgo/go/compress/flate/flate_test.go b/libgo/go/compress/flate/flate_test.go new file mode 100644 index 000000000..bfd3b8381 --- /dev/null +++ b/libgo/go/compress/flate/flate_test.go @@ -0,0 +1,139 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This test tests some internals of the flate package. +// The tests in package compress/gzip serve as the +// end-to-end test of the decompressor. + +package flate + +import ( + "bytes" + "reflect" + "testing" +) + +// The Huffman code lengths used by the fixed-format Huffman blocks. +var fixedHuffmanBits = [...]int{ + // 0-143 length 8 + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + + // 144-255 length 9 + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + + // 256-279 length 7 + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, + + // 280-287 length 8 + 8, 8, 8, 8, 8, 8, 8, 8, +} + +type InitDecoderTest struct { + in []int + out huffmanDecoder + ok bool +} + +var initDecoderTests = []*InitDecoderTest{ + // Example from Connell 1973, + &InitDecoderTest{ + []int{3, 5, 2, 4, 3, 5, 5, 4, 4, 3, 4, 5}, + huffmanDecoder{ + 2, 5, + [maxCodeLen + 1]int{2: 0, 4, 13, 31}, + [maxCodeLen + 1]int{2: 0, 1, 6, 20}, + // Paper used different code assignment: + // 2, 9, 4, 0, 10, 8, 3, 7, 1, 5, 11, 6 + // Reordered here so that codes of same length + // are assigned to increasing numbers. + []int{2, 0, 4, 9, 3, 7, 8, 10, 1, 5, 6, 11}, + }, + true, + }, + + // Example from RFC 1951 section 3.2.2 + &InitDecoderTest{ + []int{2, 1, 3, 3}, + huffmanDecoder{ + 1, 3, + [maxCodeLen + 1]int{1: 0, 2, 7}, + [maxCodeLen + 1]int{1: 0, 1, 4}, + []int{1, 0, 2, 3}, + }, + true, + }, + + // Second example from RFC 1951 section 3.2.2 + &InitDecoderTest{ + []int{3, 3, 3, 3, 3, 2, 4, 4}, + huffmanDecoder{ + 2, 4, + [maxCodeLen + 1]int{2: 0, 6, 15}, + [maxCodeLen + 1]int{2: 0, 1, 8}, + []int{5, 0, 1, 2, 3, 4, 6, 7}, + }, + true, + }, + + // Static Huffman codes (RFC 1951 section 3.2.6) + &InitDecoderTest{ + fixedHuffmanBits[0:], + fixedHuffmanDecoder, + true, + }, + + // Illegal input. + &InitDecoderTest{ + []int{}, + huffmanDecoder{}, + false, + }, + + // Illegal input. + &InitDecoderTest{ + []int{0, 0, 0, 0, 0, 0, 0}, + huffmanDecoder{}, + false, + }, +} + +func TestInitDecoder(t *testing.T) { + for i, tt := range initDecoderTests { + var h huffmanDecoder + if h.init(tt.in) != tt.ok { + t.Errorf("test %d: init = %v", i, !tt.ok) + continue + } + if !reflect.DeepEqual(&h, &tt.out) { + t.Errorf("test %d:\nhave %v\nwant %v", i, h, tt.out) + } + } +} + +func TestUncompressedSource(t *testing.T) { + decoder := NewReader(bytes.NewBuffer([]byte{0x01, 0x01, 0x00, 0xfe, 0xff, 0x11})) + output := make([]byte, 1) + n, error := decoder.Read(output) + if n != 1 || error != nil { + t.Fatalf("decoder.Read() = %d, %v, want 1, nil", n, error) + } + if output[0] != 0x11 { + t.Errorf("output[0] = %x, want 0x11", output[0]) + } +} diff --git a/libgo/go/compress/flate/huffman_bit_writer.go b/libgo/go/compress/flate/huffman_bit_writer.go new file mode 100644 index 000000000..abff82dd6 --- /dev/null +++ b/libgo/go/compress/flate/huffman_bit_writer.go @@ -0,0 +1,506 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "io" + "math" + "os" + "strconv" +) + +const ( + // The largest offset code. + offsetCodeCount = 30 + + // The largest offset code in the extensions. + extendedOffsetCodeCount = 42 + + // The special code used to mark the end of a block. + endBlockMarker = 256 + + // The first length code. + lengthCodesStart = 257 + + // The number of codegen codes. + codegenCodeCount = 19 + badCode = 255 +) + +// The number of extra bits needed by length code X - LENGTH_CODES_START. +var lengthExtraBits = []int8{ + /* 257 */ 0, 0, 0, + /* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, + /* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, + /* 280 */ 4, 5, 5, 5, 5, 0, +} + +// The length indicated by length code X - LENGTH_CODES_START. +var lengthBase = []uint32{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, + 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, + 64, 80, 96, 112, 128, 160, 192, 224, 255, +} + +// offset code word extra bits. +var offsetExtraBits = []int8{ + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, + /* extended window */ + 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, +} + +var offsetBase = []uint32{ + /* normal deflate */ + 0x000000, 0x000001, 0x000002, 0x000003, 0x000004, + 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018, + 0x000020, 0x000030, 0x000040, 0x000060, 0x000080, + 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300, + 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000, + 0x001800, 0x002000, 0x003000, 0x004000, 0x006000, + + /* extended window */ + 0x008000, 0x00c000, 0x010000, 0x018000, 0x020000, + 0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000, + 0x100000, 0x180000, 0x200000, 0x300000, +} + +// The odd order in which the codegen code sizes are written. +var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} + +type huffmanBitWriter struct { + w io.Writer + // Data waiting to be written is bytes[0:nbytes] + // and then the low nbits of bits. + bits uint32 + nbits uint32 + bytes [64]byte + nbytes int + literalFreq []int32 + offsetFreq []int32 + codegen []uint8 + codegenFreq []int32 + literalEncoding *huffmanEncoder + offsetEncoding *huffmanEncoder + codegenEncoding *huffmanEncoder + err os.Error +} + +type WrongValueError struct { + name string + from int32 + to int32 + value int32 +} + +func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter { + return &huffmanBitWriter{ + w: w, + literalFreq: make([]int32, maxLit), + offsetFreq: make([]int32, extendedOffsetCodeCount), + codegen: make([]uint8, maxLit+extendedOffsetCodeCount+1), + codegenFreq: make([]int32, codegenCodeCount), + literalEncoding: newHuffmanEncoder(maxLit), + offsetEncoding: newHuffmanEncoder(extendedOffsetCodeCount), + codegenEncoding: newHuffmanEncoder(codegenCodeCount), + } +} + +func (err WrongValueError) String() string { + return "huffmanBitWriter: " + err.name + " should belong to [" + strconv.Itoa64(int64(err.from)) + ";" + + strconv.Itoa64(int64(err.to)) + "] but actual value is " + strconv.Itoa64(int64(err.value)) +} + +func (w *huffmanBitWriter) flushBits() { + if w.err != nil { + w.nbits = 0 + return + } + bits := w.bits + w.bits >>= 16 + w.nbits -= 16 + n := w.nbytes + w.bytes[n] = byte(bits) + w.bytes[n+1] = byte(bits >> 8) + if n += 2; n >= len(w.bytes) { + _, w.err = w.w.Write(w.bytes[0:]) + n = 0 + } + w.nbytes = n +} + +func (w *huffmanBitWriter) flush() { + if w.err != nil { + w.nbits = 0 + return + } + n := w.nbytes + if w.nbits > 8 { + w.bytes[n] = byte(w.bits) + w.bits >>= 8 + w.nbits -= 8 + n++ + } + if w.nbits > 0 { + w.bytes[n] = byte(w.bits) + w.nbits = 0 + n++ + } + w.bits = 0 + _, w.err = w.w.Write(w.bytes[0:n]) + w.nbytes = 0 +} + +func (w *huffmanBitWriter) writeBits(b, nb int32) { + w.bits |= uint32(b) << w.nbits + if w.nbits += uint32(nb); w.nbits >= 16 { + w.flushBits() + } +} + +func (w *huffmanBitWriter) writeBytes(bytes []byte) { + if w.err != nil { + return + } + n := w.nbytes + if w.nbits == 8 { + w.bytes[n] = byte(w.bits) + w.nbits = 0 + n++ + } + if w.nbits != 0 { + w.err = InternalError("writeBytes with unfinished bits") + return + } + if n != 0 { + _, w.err = w.w.Write(w.bytes[0:n]) + if w.err != nil { + return + } + } + w.nbytes = 0 + _, w.err = w.w.Write(bytes) +} + +// RFC 1951 3.2.7 specifies a special run-length encoding for specifiying +// the literal and offset lengths arrays (which are concatenated into a single +// array). This method generates that run-length encoding. +// +// The result is written into the codegen array, and the frequencies +// of each code is written into the codegenFreq array. +// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional +// information. Code badCode is an end marker +// +// numLiterals The number of literals in literalEncoding +// numOffsets The number of offsets in offsetEncoding +func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int) { + fillInt32s(w.codegenFreq, 0) + // Note that we are using codegen both as a temporary variable for holding + // a copy of the frequencies, and as the place where we put the result. + // This is fine because the output is always shorter than the input used + // so far. + codegen := w.codegen // cache + // Copy the concatenated code sizes to codegen. Put a marker at the end. + copyUint8s(codegen[0:numLiterals], w.literalEncoding.codeBits) + copyUint8s(codegen[numLiterals:numLiterals+numOffsets], w.offsetEncoding.codeBits) + codegen[numLiterals+numOffsets] = badCode + + size := codegen[0] + count := 1 + outIndex := 0 + for inIndex := 1; size != badCode; inIndex++ { + // INVARIANT: We have seen "count" copies of size that have not yet + // had output generated for them. + nextSize := codegen[inIndex] + if nextSize == size { + count++ + continue + } + // We need to generate codegen indicating "count" of size. + if size != 0 { + codegen[outIndex] = size + outIndex++ + w.codegenFreq[size]++ + count-- + for count >= 3 { + n := min(count, 6) + codegen[outIndex] = 16 + outIndex++ + codegen[outIndex] = uint8(n - 3) + outIndex++ + w.codegenFreq[16]++ + count -= n + } + } else { + for count >= 11 { + n := min(count, 138) + codegen[outIndex] = 18 + outIndex++ + codegen[outIndex] = uint8(n - 11) + outIndex++ + w.codegenFreq[18]++ + count -= n + } + if count >= 3 { + // count >= 3 && count <= 10 + codegen[outIndex] = 17 + outIndex++ + codegen[outIndex] = uint8(count - 3) + outIndex++ + w.codegenFreq[17]++ + count = 0 + } + } + count-- + for ; count >= 0; count-- { + codegen[outIndex] = size + outIndex++ + w.codegenFreq[size]++ + } + // Set up invariant for next time through the loop. + size = nextSize + count = 1 + } + // Marker indicating the end of the codegen. + codegen[outIndex] = badCode +} + +func (w *huffmanBitWriter) writeCode(code *huffmanEncoder, literal uint32) { + if w.err != nil { + return + } + w.writeBits(int32(code.code[literal]), int32(code.codeBits[literal])) +} + +// Write the header of a dynamic Huffman block to the output stream. +// +// numLiterals The number of literals specified in codegen +// numOffsets The number of offsets specified in codegen +// numCodegens Tne number of codegens used in codegen +func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) { + if w.err != nil { + return + } + var firstBits int32 = 4 + if isEof { + firstBits = 5 + } + w.writeBits(firstBits, 3) + w.writeBits(int32(numLiterals-257), 5) + if numOffsets > offsetCodeCount { + // Extended version of decompressor + w.writeBits(int32(offsetCodeCount+((numOffsets-(1+offsetCodeCount))>>3)), 5) + w.writeBits(int32((numOffsets-(1+offsetCodeCount))&0x7), 3) + } else { + w.writeBits(int32(numOffsets-1), 5) + } + w.writeBits(int32(numCodegens-4), 4) + + for i := 0; i < numCodegens; i++ { + value := w.codegenEncoding.codeBits[codegenOrder[i]] + w.writeBits(int32(value), 3) + } + + i := 0 + for { + var codeWord int = int(w.codegen[i]) + i++ + if codeWord == badCode { + break + } + // The low byte contains the actual code to generate. + w.writeCode(w.codegenEncoding, uint32(codeWord)) + + switch codeWord { + case 16: + w.writeBits(int32(w.codegen[i]), 2) + i++ + break + case 17: + w.writeBits(int32(w.codegen[i]), 3) + i++ + break + case 18: + w.writeBits(int32(w.codegen[i]), 7) + i++ + break + } + } +} + +func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) { + if w.err != nil { + return + } + var flag int32 + if isEof { + flag = 1 + } + w.writeBits(flag, 3) + w.flush() + w.writeBits(int32(length), 16) + w.writeBits(int32(^uint16(length)), 16) +} + +func (w *huffmanBitWriter) writeFixedHeader(isEof bool) { + if w.err != nil { + return + } + // Indicate that we are a fixed Huffman block + var value int32 = 2 + if isEof { + value = 3 + } + w.writeBits(value, 3) +} + +func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) { + if w.err != nil { + return + } + fillInt32s(w.literalFreq, 0) + fillInt32s(w.offsetFreq, 0) + + n := len(tokens) + tokens = tokens[0 : n+1] + tokens[n] = endBlockMarker + + totalLength := -1 // Subtract 1 for endBlock. + for _, t := range tokens { + switch t.typ() { + case literalType: + w.literalFreq[t.literal()]++ + totalLength++ + break + case matchType: + length := t.length() + offset := t.offset() + totalLength += int(length + 3) + w.literalFreq[lengthCodesStart+lengthCode(length)]++ + w.offsetFreq[offsetCode(offset)]++ + break + } + } + w.literalEncoding.generate(w.literalFreq, 15) + w.offsetEncoding.generate(w.offsetFreq, 15) + + // get the number of literals + numLiterals := len(w.literalFreq) + for w.literalFreq[numLiterals-1] == 0 { + numLiterals-- + } + // get the number of offsets + numOffsets := len(w.offsetFreq) + for numOffsets > 1 && w.offsetFreq[numOffsets-1] == 0 { + numOffsets-- + } + storedBytes := 0 + if input != nil { + storedBytes = len(input) + } + var extraBits int64 + var storedSize int64 + if storedBytes <= maxStoreBlockSize && input != nil { + storedSize = int64((storedBytes + 5) * 8) + // We only bother calculating the costs of the extra bits required by + // the length of offset fields (which will be the same for both fixed + // and dynamic encoding), if we need to compare those two encodings + // against stored encoding. + for lengthCode := lengthCodesStart + 8; lengthCode < numLiterals; lengthCode++ { + // First eight length codes have extra size = 0. + extraBits += int64(w.literalFreq[lengthCode]) * int64(lengthExtraBits[lengthCode-lengthCodesStart]) + } + for offsetCode := 4; offsetCode < numOffsets; offsetCode++ { + // First four offset codes have extra size = 0. + extraBits += int64(w.offsetFreq[offsetCode]) * int64(offsetExtraBits[offsetCode]) + } + } else { + storedSize = math.MaxInt32 + } + + // Figure out which generates smaller code, fixed Huffman, dynamic + // Huffman, or just storing the data. + var fixedSize int64 = math.MaxInt64 + if numOffsets <= offsetCodeCount { + fixedSize = int64(3) + + fixedLiteralEncoding.bitLength(w.literalFreq) + + fixedOffsetEncoding.bitLength(w.offsetFreq) + + extraBits + } + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literalEncoding and the offsetEncoding. + w.generateCodegen(numLiterals, numOffsets) + w.codegenEncoding.generate(w.codegenFreq, 7) + numCodegens := len(w.codegenFreq) + for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 { + numCodegens-- + } + extensionSummand := 0 + if numOffsets > offsetCodeCount { + extensionSummand = 3 + } + dynamicHeader := int64(3+5+5+4+(3*numCodegens)) + + // Following line is an extension. + int64(extensionSummand) + + w.codegenEncoding.bitLength(w.codegenFreq) + + int64(extraBits) + + int64(w.codegenFreq[16]*2) + + int64(w.codegenFreq[17]*3) + + int64(w.codegenFreq[18]*7) + dynamicSize := dynamicHeader + + w.literalEncoding.bitLength(w.literalFreq) + + w.offsetEncoding.bitLength(w.offsetFreq) + + if storedSize < fixedSize && storedSize < dynamicSize { + w.writeStoredHeader(storedBytes, eof) + w.writeBytes(input[0:storedBytes]) + return + } + var literalEncoding *huffmanEncoder + var offsetEncoding *huffmanEncoder + + if fixedSize <= dynamicSize { + w.writeFixedHeader(eof) + literalEncoding = fixedLiteralEncoding + offsetEncoding = fixedOffsetEncoding + } else { + // Write the header. + w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) + literalEncoding = w.literalEncoding + offsetEncoding = w.offsetEncoding + } + + // Write the tokens. + for _, t := range tokens { + switch t.typ() { + case literalType: + w.writeCode(literalEncoding, t.literal()) + break + case matchType: + // Write the length + length := t.length() + lengthCode := lengthCode(length) + w.writeCode(literalEncoding, lengthCode+lengthCodesStart) + extraLengthBits := int32(lengthExtraBits[lengthCode]) + if extraLengthBits > 0 { + extraLength := int32(length - lengthBase[lengthCode]) + w.writeBits(extraLength, extraLengthBits) + } + // Write the offset + offset := t.offset() + offsetCode := offsetCode(offset) + w.writeCode(offsetEncoding, offsetCode) + extraOffsetBits := int32(offsetExtraBits[offsetCode]) + if extraOffsetBits > 0 { + extraOffset := int32(offset - offsetBase[offsetCode]) + w.writeBits(extraOffset, extraOffsetBits) + } + break + default: + panic("unknown token type: " + string(t)) + } + } +} diff --git a/libgo/go/compress/flate/huffman_code.go b/libgo/go/compress/flate/huffman_code.go new file mode 100644 index 000000000..6be605f0a --- /dev/null +++ b/libgo/go/compress/flate/huffman_code.go @@ -0,0 +1,373 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "math" + "sort" +) + +type huffmanEncoder struct { + codeBits []uint8 + code []uint16 +} + +type literalNode struct { + literal uint16 + freq int32 +} + +type chain struct { + // The sum of the leaves in this tree + freq int32 + + // The number of literals to the left of this item at this level + leafCount int32 + + // The right child of this chain in the previous level. + up *chain +} + +type levelInfo struct { + // Our level. for better printing + level int32 + + // The most recent chain generated for this level + lastChain *chain + + // The frequency of the next character to add to this level + nextCharFreq int32 + + // The frequency of the next pair (from level below) to add to this level. + // Only valid if the "needed" value of the next lower level is 0. + nextPairFreq int32 + + // The number of chains remaining to generate for this level before moving + // up to the next level + needed int32 + + // The levelInfo for level+1 + up *levelInfo + + // The levelInfo for level-1 + down *levelInfo +} + +func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxInt32} } + +func newHuffmanEncoder(size int) *huffmanEncoder { + return &huffmanEncoder{make([]uint8, size), make([]uint16, size)} +} + +// Generates a HuffmanCode corresponding to the fixed literal table +func generateFixedLiteralEncoding() *huffmanEncoder { + h := newHuffmanEncoder(maxLit) + codeBits := h.codeBits + code := h.code + var ch uint16 + for ch = 0; ch < maxLit; ch++ { + var bits uint16 + var size uint8 + switch { + case ch < 144: + // size 8, 000110000 .. 10111111 + bits = ch + 48 + size = 8 + break + case ch < 256: + // size 9, 110010000 .. 111111111 + bits = ch + 400 - 144 + size = 9 + break + case ch < 280: + // size 7, 0000000 .. 0010111 + bits = ch - 256 + size = 7 + break + default: + // size 8, 11000000 .. 11000111 + bits = ch + 192 - 280 + size = 8 + } + codeBits[ch] = size + code[ch] = reverseBits(bits, size) + } + return h +} + +func generateFixedOffsetEncoding() *huffmanEncoder { + h := newHuffmanEncoder(30) + codeBits := h.codeBits + code := h.code + for ch := uint16(0); ch < 30; ch++ { + codeBits[ch] = 5 + code[ch] = reverseBits(ch, 5) + } + return h +} + +var fixedLiteralEncoding *huffmanEncoder = generateFixedLiteralEncoding() +var fixedOffsetEncoding *huffmanEncoder = generateFixedOffsetEncoding() + +func (h *huffmanEncoder) bitLength(freq []int32) int64 { + var total int64 + for i, f := range freq { + if f != 0 { + total += int64(f) * int64(h.codeBits[i]) + } + } + return total +} + +// Generate elements in the chain using an iterative algorithm. +func (h *huffmanEncoder) generateChains(top *levelInfo, list []literalNode) { + n := len(list) + list = list[0 : n+1] + list[n] = maxNode() + + l := top + for { + if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 { + // We've run out of both leafs and pairs. + // End all calculations for this level. + // To m sure we never come back to this level or any lower level, + // set nextPairFreq impossibly large. + l.lastChain = nil + l.needed = 0 + l = l.up + l.nextPairFreq = math.MaxInt32 + continue + } + + prevFreq := l.lastChain.freq + if l.nextCharFreq < l.nextPairFreq { + // The next item on this row is a leaf node. + n := l.lastChain.leafCount + 1 + l.lastChain = &chain{l.nextCharFreq, n, l.lastChain.up} + l.nextCharFreq = list[n].freq + } else { + // The next item on this row is a pair from the previous row. + // nextPairFreq isn't valid until we generate two + // more values in the level below + l.lastChain = &chain{l.nextPairFreq, l.lastChain.leafCount, l.down.lastChain} + l.down.needed = 2 + } + + if l.needed--; l.needed == 0 { + // We've done everything we need to do for this level. + // Continue calculating one level up. Fill in nextPairFreq + // of that level with the sum of the two nodes we've just calculated on + // this level. + up := l.up + if up == nil { + // All done! + return + } + up.nextPairFreq = prevFreq + l.lastChain.freq + l = up + } else { + // If we stole from below, move down temporarily to replenish it. + for l.down.needed > 0 { + l = l.down + } + } + } +} + +// Return the number of literals assigned to each bit size in the Huffman encoding +// +// This method is only called when list.length >= 3 +// The cases of 0, 1, and 2 literals are handled by special case code. +// +// list An array of the literals with non-zero frequencies +// and their associated frequencies. The array is in order of increasing +// frequency, and has as its last element a special element with frequency +// MaxInt32 +// maxBits The maximum number of bits that should be used to encode any literal. +// return An integer array in which array[i] indicates the number of literals +// that should be encoded in i bits. +func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { + n := int32(len(list)) + list = list[0 : n+1] + list[n] = maxNode() + + // The tree can't have greater depth than n - 1, no matter what. This + // saves a little bit of work in some small cases + maxBits = minInt32(maxBits, n-1) + + // Create information about each of the levels. + // A bogus "Level 0" whose sole purpose is so that + // level1.prev.needed==0. This makes level1.nextPairFreq + // be a legitimate value that never gets chosen. + top := &levelInfo{needed: 0} + chain2 := &chain{list[1].freq, 2, new(chain)} + for level := int32(1); level <= maxBits; level++ { + // For every level, the first two items are the first two characters. + // We initialize the levels as if we had already figured this out. + top = &levelInfo{ + level: level, + lastChain: chain2, + nextCharFreq: list[2].freq, + nextPairFreq: list[0].freq + list[1].freq, + down: top, + } + top.down.up = top + if level == 1 { + top.nextPairFreq = math.MaxInt32 + } + } + + // We need a total of 2*n - 2 items at top level and have already generated 2. + top.needed = 2*n - 4 + + l := top + for { + if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 { + // We've run out of both leafs and pairs. + // End all calculations for this level. + // To m sure we never come back to this level or any lower level, + // set nextPairFreq impossibly large. + l.lastChain = nil + l.needed = 0 + l = l.up + l.nextPairFreq = math.MaxInt32 + continue + } + + prevFreq := l.lastChain.freq + if l.nextCharFreq < l.nextPairFreq { + // The next item on this row is a leaf node. + n := l.lastChain.leafCount + 1 + l.lastChain = &chain{l.nextCharFreq, n, l.lastChain.up} + l.nextCharFreq = list[n].freq + } else { + // The next item on this row is a pair from the previous row. + // nextPairFreq isn't valid until we generate two + // more values in the level below + l.lastChain = &chain{l.nextPairFreq, l.lastChain.leafCount, l.down.lastChain} + l.down.needed = 2 + } + + if l.needed--; l.needed == 0 { + // We've done everything we need to do for this level. + // Continue calculating one level up. Fill in nextPairFreq + // of that level with the sum of the two nodes we've just calculated on + // this level. + up := l.up + if up == nil { + // All done! + break + } + up.nextPairFreq = prevFreq + l.lastChain.freq + l = up + } else { + // If we stole from below, move down temporarily to replenish it. + for l.down.needed > 0 { + l = l.down + } + } + } + + // Somethings is wrong if at the end, the top level is null or hasn't used + // all of the leaves. + if top.lastChain.leafCount != n { + panic("top.lastChain.leafCount != n") + } + + bitCount := make([]int32, maxBits+1) + bits := 1 + for chain := top.lastChain; chain.up != nil; chain = chain.up { + // chain.leafCount gives the number of literals requiring at least "bits" + // bits to encode. + bitCount[bits] = chain.leafCount - chain.up.leafCount + bits++ + } + return bitCount +} + +// Look at the leaves and assign them a bit count and an encoding as specified +// in RFC 1951 3.2.2 +func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) { + code := uint16(0) + for n, bits := range bitCount { + code <<= 1 + if n == 0 || bits == 0 { + continue + } + // The literals list[len(list)-bits] .. list[len(list)-bits] + // are encoded using "bits" bits, and get the values + // code, code + 1, .... The code values are + // assigned in literal order (not frequency order). + chunk := list[len(list)-int(bits):] + sortByLiteral(chunk) + for _, node := range chunk { + h.codeBits[node.literal] = uint8(n) + h.code[node.literal] = reverseBits(code, uint8(n)) + code++ + } + list = list[0 : len(list)-int(bits)] + } +} + +// Update this Huffman Code object to be the minimum code for the specified frequency count. +// +// freq An array of frequencies, in which frequency[i] gives the frequency of literal i. +// maxBits The maximum number of bits to use for any literal. +func (h *huffmanEncoder) generate(freq []int32, maxBits int32) { + list := make([]literalNode, len(freq)+1) + // Number of non-zero literals + count := 0 + // Set list to be the set of all non-zero literals and their frequencies + for i, f := range freq { + if f != 0 { + list[count] = literalNode{uint16(i), f} + count++ + } else { + h.codeBits[i] = 0 + } + } + // If freq[] is shorter than codeBits[], fill rest of codeBits[] with zeros + h.codeBits = h.codeBits[0:len(freq)] + list = list[0:count] + if count <= 2 { + // Handle the small cases here, because they are awkward for the general case code. With + // two or fewer literals, everything has bit length 1. + for i, node := range list { + // "list" is in order of increasing literal value. + h.codeBits[node.literal] = 1 + h.code[node.literal] = uint16(i) + } + return + } + sortByFreq(list) + + // Get the number of literals for each bit count + bitCount := h.bitCounts(list, maxBits) + // And do the assignment + h.assignEncodingAndSize(bitCount, list) +} + +type literalNodeSorter struct { + a []literalNode + less func(i, j int) bool +} + +func (s literalNodeSorter) Len() int { return len(s.a) } + +func (s literalNodeSorter) Less(i, j int) bool { + return s.less(i, j) +} + +func (s literalNodeSorter) Swap(i, j int) { s.a[i], s.a[j] = s.a[j], s.a[i] } + +func sortByFreq(a []literalNode) { + s := &literalNodeSorter{a, func(i, j int) bool { return a[i].freq < a[j].freq }} + sort.Sort(s) +} + +func sortByLiteral(a []literalNode) { + s := &literalNodeSorter{a, func(i, j int) bool { return a[i].literal < a[j].literal }} + sort.Sort(s) +} diff --git a/libgo/go/compress/flate/inflate.go b/libgo/go/compress/flate/inflate.go new file mode 100644 index 000000000..7dc8cf93b --- /dev/null +++ b/libgo/go/compress/flate/inflate.go @@ -0,0 +1,620 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// The flate package implements the DEFLATE compressed data +// format, described in RFC 1951. The gzip and zlib packages +// implement access to DEFLATE-based file formats. +package flate + +import ( + "bufio" + "io" + "os" + "strconv" +) + +const ( + maxCodeLen = 16 // max length of Huffman code + maxHist = 32768 // max history required + maxLit = 286 + maxDist = 32 + numCodes = 19 // number of codes in Huffman meta-code +) + +// A CorruptInputError reports the presence of corrupt input at a given offset. +type CorruptInputError int64 + +func (e CorruptInputError) String() string { + return "flate: corrupt input before offset " + strconv.Itoa64(int64(e)) +} + +// An InternalError reports an error in the flate code itself. +type InternalError string + +func (e InternalError) String() string { return "flate: internal error: " + string(e) } + +// A ReadError reports an error encountered while reading input. +type ReadError struct { + Offset int64 // byte offset where error occurred + Error os.Error // error returned by underlying Read +} + +func (e *ReadError) String() string { + return "flate: read error at offset " + strconv.Itoa64(e.Offset) + ": " + e.Error.String() +} + +// A WriteError reports an error encountered while writing output. +type WriteError struct { + Offset int64 // byte offset where error occurred + Error os.Error // error returned by underlying Write +} + +func (e *WriteError) String() string { + return "flate: write error at offset " + strconv.Itoa64(e.Offset) + ": " + e.Error.String() +} + +// Huffman decoder is based on +// J. Brian Connell, ``A Huffman-Shannon-Fano Code,'' +// Proceedings of the IEEE, 61(7) (July 1973), pp 1046-1047. +type huffmanDecoder struct { + // min, max code length + min, max int + + // limit[i] = largest code word of length i + // Given code v of length n, + // need more bits if v > limit[n]. + limit [maxCodeLen + 1]int + + // base[i] = smallest code word of length i - seq number + base [maxCodeLen + 1]int + + // codes[seq number] = output code. + // Given code v of length n, value is + // codes[v - base[n]]. + codes []int +} + +// Initialize Huffman decoding tables from array of code lengths. +func (h *huffmanDecoder) init(bits []int) bool { + // TODO(rsc): Return false sometimes. + + // Count number of codes of each length, + // compute min and max length. + var count [maxCodeLen + 1]int + var min, max int + for _, n := range bits { + if n == 0 { + continue + } + if min == 0 || n < min { + min = n + } + if n > max { + max = n + } + count[n]++ + } + if max == 0 { + return false + } + + h.min = min + h.max = max + + // For each code range, compute + // nextcode (first code of that length), + // limit (last code of that length), and + // base (offset from first code to sequence number). + code := 0 + seq := 0 + var nextcode [maxCodeLen]int + for i := min; i <= max; i++ { + n := count[i] + nextcode[i] = code + h.base[i] = code - seq + code += n + seq += n + h.limit[i] = code - 1 + code <<= 1 + } + + // Make array mapping sequence numbers to codes. + if len(h.codes) < len(bits) { + h.codes = make([]int, len(bits)) + } + for i, n := range bits { + if n == 0 { + continue + } + code := nextcode[n] + nextcode[n]++ + seq := code - h.base[n] + h.codes[seq] = i + } + return true +} + +// Hard-coded Huffman tables for DEFLATE algorithm. +// See RFC 1951, section 3.2.6. +var fixedHuffmanDecoder = huffmanDecoder{ + 7, 9, + [maxCodeLen + 1]int{7: 23, 199, 511}, + [maxCodeLen + 1]int{7: 0, 24, 224}, + []int{ + // length 7: 256-279 + 256, 257, 258, 259, 260, 261, 262, + 263, 264, 265, 266, 267, 268, 269, + 270, 271, 272, 273, 274, 275, 276, + 277, 278, 279, + + // length 8: 0-143 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, + 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, + 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, + 92, 93, 94, 95, 96, 97, 98, 99, 100, + 101, 102, 103, 104, 105, 106, 107, 108, + 109, 110, 111, 112, 113, 114, 115, 116, + 117, 118, 119, 120, 121, 122, 123, 124, + 125, 126, 127, 128, 129, 130, 131, 132, + 133, 134, 135, 136, 137, 138, 139, 140, + 141, 142, 143, + + // length 8: 280-287 + 280, 281, 282, 283, 284, 285, 286, 287, + + // length 9: 144-255 + 144, 145, 146, 147, 148, 149, 150, 151, + 152, 153, 154, 155, 156, 157, 158, 159, + 160, 161, 162, 163, 164, 165, 166, 167, + 168, 169, 170, 171, 172, 173, 174, 175, + 176, 177, 178, 179, 180, 181, 182, 183, + 184, 185, 186, 187, 188, 189, 190, 191, + 192, 193, 194, 195, 196, 197, 198, 199, + 200, 201, 202, 203, 204, 205, 206, 207, + 208, 209, 210, 211, 212, 213, 214, 215, + 216, 217, 218, 219, 220, 221, 222, 223, + 224, 225, 226, 227, 228, 229, 230, 231, + 232, 233, 234, 235, 236, 237, 238, 239, + 240, 241, 242, 243, 244, 245, 246, 247, + 248, 249, 250, 251, 252, 253, 254, 255, + }, +} + +// The actual read interface needed by NewReader. +// If the passed in io.Reader does not also have ReadByte, +// the NewReader will introduce its own buffering. +type Reader interface { + io.Reader + ReadByte() (c byte, err os.Error) +} + +// Decompress state. +type decompressor struct { + // Input/output sources. + r Reader + w io.Writer + roffset int64 + woffset int64 + + // Input bits, in top of b. + b uint32 + nb uint + + // Huffman decoders for literal/length, distance. + h1, h2 huffmanDecoder + + // Length arrays used to define Huffman codes. + bits [maxLit + maxDist]int + codebits [numCodes]int + + // Output history, buffer. + hist [maxHist]byte + hp int // current output position in buffer + hw int // have written hist[0:hw] already + hfull bool // buffer has filled at least once + + // Temporary buffer (avoids repeated allocation). + buf [4]byte +} + +func (f *decompressor) inflate() (err os.Error) { + final := false + for err == nil && !final { + for f.nb < 1+2 { + if err = f.moreBits(); err != nil { + return + } + } + final = f.b&1 == 1 + f.b >>= 1 + typ := f.b & 3 + f.b >>= 2 + f.nb -= 1 + 2 + switch typ { + case 0: + err = f.dataBlock() + case 1: + // compressed, fixed Huffman tables + err = f.decodeBlock(&fixedHuffmanDecoder, nil) + case 2: + // compressed, dynamic Huffman tables + if err = f.readHuffman(); err == nil { + err = f.decodeBlock(&f.h1, &f.h2) + } + default: + // 3 is reserved. + err = CorruptInputError(f.roffset) + } + } + return +} + +// RFC 1951 section 3.2.7. +// Compression with dynamic Huffman codes + +var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} + +func (f *decompressor) readHuffman() os.Error { + // HLIT[5], HDIST[5], HCLEN[4]. + for f.nb < 5+5+4 { + if err := f.moreBits(); err != nil { + return err + } + } + nlit := int(f.b&0x1F) + 257 + f.b >>= 5 + ndist := int(f.b&0x1F) + 1 + f.b >>= 5 + nclen := int(f.b&0xF) + 4 + f.b >>= 4 + f.nb -= 5 + 5 + 4 + + // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. + for i := 0; i < nclen; i++ { + for f.nb < 3 { + if err := f.moreBits(); err != nil { + return err + } + } + f.codebits[codeOrder[i]] = int(f.b & 0x7) + f.b >>= 3 + f.nb -= 3 + } + for i := nclen; i < len(codeOrder); i++ { + f.codebits[codeOrder[i]] = 0 + } + if !f.h1.init(f.codebits[0:]) { + return CorruptInputError(f.roffset) + } + + // HLIT + 257 code lengths, HDIST + 1 code lengths, + // using the code length Huffman code. + for i, n := 0, nlit+ndist; i < n; { + x, err := f.huffSym(&f.h1) + if err != nil { + return err + } + if x < 16 { + // Actual length. + f.bits[i] = x + i++ + continue + } + // Repeat previous length or zero. + var rep int + var nb uint + var b int + switch x { + default: + return InternalError("unexpected length code") + case 16: + rep = 3 + nb = 2 + if i == 0 { + return CorruptInputError(f.roffset) + } + b = f.bits[i-1] + case 17: + rep = 3 + nb = 3 + b = 0 + case 18: + rep = 11 + nb = 7 + b = 0 + } + for f.nb < nb { + if err := f.moreBits(); err != nil { + return err + } + } + rep += int(f.b & uint32(1<<nb-1)) + f.b >>= nb + f.nb -= nb + if i+rep > n { + return CorruptInputError(f.roffset) + } + for j := 0; j < rep; j++ { + f.bits[i] = b + i++ + } + } + + if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) { + return CorruptInputError(f.roffset) + } + + return nil +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) decodeBlock(hl, hd *huffmanDecoder) os.Error { + for { + v, err := f.huffSym(hl) + if err != nil { + return err + } + var n uint // number of bits extra + var length int + switch { + case v < 256: + f.hist[f.hp] = byte(v) + f.hp++ + if f.hp == len(f.hist) { + if err = f.flush(); err != nil { + return err + } + } + continue + case v == 256: + return nil + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + n = 0 + case v < 269: + length = v*2 - (265*2 - 11) + n = 1 + case v < 273: + length = v*4 - (269*4 - 19) + n = 2 + case v < 277: + length = v*8 - (273*8 - 35) + n = 3 + case v < 281: + length = v*16 - (277*16 - 67) + n = 4 + case v < 285: + length = v*32 - (281*32 - 131) + n = 5 + default: + length = 258 + n = 0 + } + if n > 0 { + for f.nb < n { + if err = f.moreBits(); err != nil { + return err + } + } + length += int(f.b & uint32(1<<n-1)) + f.b >>= n + f.nb -= n + } + + var dist int + if hd == nil { + for f.nb < 5 { + if err = f.moreBits(); err != nil { + return err + } + } + dist = int(reverseByte[(f.b&0x1F)<<3]) + f.b >>= 5 + f.nb -= 5 + } else { + if dist, err = f.huffSym(hd); err != nil { + return err + } + } + + switch { + case dist < 4: + dist++ + case dist >= 30: + return CorruptInputError(f.roffset) + default: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << nb + for f.nb < nb { + if err = f.moreBits(); err != nil { + return err + } + } + extra |= int(f.b & uint32(1<<nb-1)) + f.b >>= nb + f.nb -= nb + dist = 1<<(nb+1) + 1 + extra + } + + // Copy history[-dist:-dist+length] into output. + if dist > len(f.hist) { + return InternalError("bad history distance") + } + + // No check on length; encoding can be prescient. + if !f.hfull && dist > f.hp { + return CorruptInputError(f.roffset) + } + + p := f.hp - dist + if p < 0 { + p += len(f.hist) + } + for i := 0; i < length; i++ { + f.hist[f.hp] = f.hist[p] + f.hp++ + p++ + if f.hp == len(f.hist) { + if err = f.flush(); err != nil { + return err + } + } + if p == len(f.hist) { + p = 0 + } + } + } + panic("unreached") +} + +// Copy a single uncompressed data block from input to output. +func (f *decompressor) dataBlock() os.Error { + // Uncompressed. + // Discard current half-byte. + f.nb = 0 + f.b = 0 + + // Length then ones-complement of length. + nr, err := io.ReadFull(f.r, f.buf[0:4]) + f.roffset += int64(nr) + if err != nil { + return &ReadError{f.roffset, err} + } + n := int(f.buf[0]) | int(f.buf[1])<<8 + nn := int(f.buf[2]) | int(f.buf[3])<<8 + if uint16(nn) != uint16(^n) { + return CorruptInputError(f.roffset) + } + + if n == 0 { + // 0-length block means sync + return f.flush() + } + + // Read len bytes into history, + // writing as history fills. + for n > 0 { + m := len(f.hist) - f.hp + if m > n { + m = n + } + m, err := io.ReadFull(f.r, f.hist[f.hp:f.hp+m]) + f.roffset += int64(m) + if err != nil { + return &ReadError{f.roffset, err} + } + n -= m + f.hp += m + if f.hp == len(f.hist) { + if err = f.flush(); err != nil { + return err + } + } + } + return nil +} + +func (f *decompressor) moreBits() os.Error { + c, err := f.r.ReadByte() + if err != nil { + if err == os.EOF { + err = io.ErrUnexpectedEOF + } + return err + } + f.roffset++ + f.b |= uint32(c) << f.nb + f.nb += 8 + return nil +} + +// Read the next Huffman-encoded symbol from f according to h. +func (f *decompressor) huffSym(h *huffmanDecoder) (int, os.Error) { + for n := uint(h.min); n <= uint(h.max); n++ { + lim := h.limit[n] + if lim == -1 { + continue + } + for f.nb < n { + if err := f.moreBits(); err != nil { + return 0, err + } + } + v := int(f.b & uint32(1<<n-1)) + v <<= 16 - n + v = int(reverseByte[v>>8]) | int(reverseByte[v&0xFF])<<8 // reverse bits + if v <= lim { + f.b >>= n + f.nb -= n + return h.codes[v-h.base[n]], nil + } + } + return 0, CorruptInputError(f.roffset) +} + +// Flush any buffered output to the underlying writer. +func (f *decompressor) flush() os.Error { + if f.hw == f.hp { + return nil + } + n, err := f.w.Write(f.hist[f.hw:f.hp]) + if n != f.hp-f.hw && err == nil { + err = io.ErrShortWrite + } + if err != nil { + return &WriteError{f.woffset, err} + } + f.woffset += int64(f.hp - f.hw) + f.hw = f.hp + if f.hp == len(f.hist) { + f.hp = 0 + f.hw = 0 + f.hfull = true + } + return nil +} + +func makeReader(r io.Reader) Reader { + if rr, ok := r.(Reader); ok { + return rr + } + return bufio.NewReader(r) +} + +// decompress reads DEFLATE-compressed data from r and writes +// the uncompressed data to w. +func (f *decompressor) decompress(r io.Reader, w io.Writer) os.Error { + f.r = makeReader(r) + f.w = w + f.woffset = 0 + if err := f.inflate(); err != nil { + return err + } + if err := f.flush(); err != nil { + return err + } + return nil +} + +// NewReader returns a new ReadCloser that can be used +// to read the uncompressed version of r. It is the caller's +// responsibility to call Close on the ReadCloser when +// finished reading. +func NewReader(r io.Reader) io.ReadCloser { + var f decompressor + pr, pw := io.Pipe() + go func() { pw.CloseWithError(f.decompress(r, pw)) }() + return pr +} diff --git a/libgo/go/compress/flate/reverse_bits.go b/libgo/go/compress/flate/reverse_bits.go new file mode 100644 index 000000000..c1a02720d --- /dev/null +++ b/libgo/go/compress/flate/reverse_bits.go @@ -0,0 +1,48 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +var reverseByte = [256]byte{ + 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, + 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0, + 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, + 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8, + 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, + 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4, + 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, + 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc, + 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, + 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2, + 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, + 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa, + 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, + 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6, + 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, + 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe, + 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, + 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1, + 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, + 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9, + 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, + 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5, + 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, + 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd, + 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, + 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3, + 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, + 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb, + 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, + 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7, + 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, + 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff, +} + +func reverseUint16(v uint16) uint16 { + return uint16(reverseByte[v>>8]) | uint16(reverseByte[v&0xFF])<<8 +} + +func reverseBits(number uint16, bitLength byte) uint16 { + return reverseUint16(number << uint8(16-bitLength)) +} diff --git a/libgo/go/compress/flate/token.go b/libgo/go/compress/flate/token.go new file mode 100644 index 000000000..38aea5fa6 --- /dev/null +++ b/libgo/go/compress/flate/token.go @@ -0,0 +1,103 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +const ( + // 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused + // 8 bits: xlength = length - MIN_MATCH_LENGTH + // 22 bits xoffset = offset - MIN_OFFSET_SIZE, or literal + lengthShift = 22 + offsetMask = 1<<lengthShift - 1 + typeMask = 3 << 30 + literalType = 0 << 30 + matchType = 1 << 30 +) + +// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH) +// is lengthCodes[length - MIN_MATCH_LENGTH] +var lengthCodes = [...]uint32{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, + 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, + 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, + 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, + 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, + 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, + 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, + 27, 27, 27, 27, 27, 28, +} + +var offsetCodes = [...]uint32{ + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +} + +type token uint32 + +// Convert a literal into a literal token. +func literalToken(literal uint32) token { return token(literalType + literal) } + +// Convert a < xlength, xoffset > pair into a match token. +func matchToken(xlength uint32, xoffset uint32) token { + return token(matchType + xlength<<lengthShift + xoffset) +} + +// Returns the type of a token +func (t token) typ() uint32 { return uint32(t) & typeMask } + +// Returns the literal of a literal token +func (t token) literal() uint32 { return uint32(t - literalType) } + +// Returns the extra offset of a match token +func (t token) offset() uint32 { return uint32(t) & offsetMask } + +func (t token) length() uint32 { return uint32((t - matchType) >> lengthShift) } + +func lengthCode(len uint32) uint32 { return lengthCodes[len] } + +// Returns the offset code corresponding to a specific offset +func offsetCode(off uint32) uint32 { + const n = uint32(len(offsetCodes)) + switch { + case off < n: + return offsetCodes[off] + case off>>7 < n: + return offsetCodes[off>>7] + 14 + default: + return offsetCodes[off>>14] + 28 + } + panic("unreachable") +} diff --git a/libgo/go/compress/flate/util.go b/libgo/go/compress/flate/util.go new file mode 100644 index 000000000..aca5c78b2 --- /dev/null +++ b/libgo/go/compress/flate/util.go @@ -0,0 +1,72 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +func min(left int, right int) int { + if left < right { + return left + } + return right +} + +func minInt32(left int32, right int32) int32 { + if left < right { + return left + } + return right +} + +func max(left int, right int) int { + if left > right { + return left + } + return right +} + +func fillInts(a []int, value int) { + for i := range a { + a[i] = value + } +} + +func fillInt32s(a []int32, value int32) { + for i := range a { + a[i] = value + } +} + +func fillBytes(a []byte, value byte) { + for i := range a { + a[i] = value + } +} + +func fillInt8s(a []int8, value int8) { + for i := range a { + a[i] = value + } +} + +func fillUint8s(a []uint8, value uint8) { + for i := range a { + a[i] = value + } +} + +func copyInt8s(dst []int8, src []int8) int { + cnt := min(len(dst), len(src)) + for i := 0; i < cnt; i++ { + dst[i] = src[i] + } + return cnt +} + +func copyUint8s(dst []uint8, src []uint8) int { + cnt := min(len(dst), len(src)) + for i := 0; i < cnt; i++ { + dst[i] = src[i] + } + return cnt +} |