001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.lz4;
020
021import java.io.ByteArrayOutputStream;
022import java.io.IOException;
023import java.io.OutputStream;
024
025import org.apache.commons.compress.compressors.CompressorOutputStream;
026import org.apache.commons.compress.utils.ByteUtils;
027
028/**
029 * CompressorOutputStream for the LZ4 frame format.
030 *
031 * <p>Based on the "spec" in the version "1.5.1 (31/03/2015)"</p>
032 *
033 * @see <a href="http://lz4.github.io/lz4/lz4_Frame_format.html">LZ4 Frame Format Description</a>
034 * @since 1.14
035 * @NotThreadSafe
036 */
037public class FramedLZ4CompressorOutputStream extends CompressorOutputStream {
038
039    /**
040     * The block sizes supported by the format.
041     */
042    public enum BlockSize {
043        /** Block size of 64K */
044        K64(64 * 1024, 4),
045        /** Block size of 256K */
046        K256(256 * 1024, 5),
047        /** Block size of 1M */
048        M1(1024 * 1024, 6),
049        /** Block size of 4M */
050        M4(4096 * 1024, 7);
051
052        private final int size, index;
053        BlockSize(final int size, final int index) {
054            this.size = size;
055            this.index = index;
056        }
057        int getIndex() {
058            return index;
059        }
060        int getSize() {
061            return size;
062        }
063    }
064
065    /**
066     * Parameters of the LZ4 frame format.
067     */
068    public static class Parameters {
069        /**
070         * The default parameters of 4M block size, enabled content
071         * checksum, disabled block checksums and independent blocks.
072         *
073         * <p>This matches the defaults of the lz4 command line utility.</p>
074         */
075        public static final Parameters DEFAULT = new Parameters(BlockSize.M4, true, false, false);
076        private final BlockSize blockSize;
077        private final boolean withContentChecksum, withBlockChecksum, withBlockDependency;
078
079        private final org.apache.commons.compress.compressors.lz77support.Parameters lz77params;
080
081        /**
082         * Sets up custom a custom block size for the LZ4 stream but
083         * otherwise uses the defaults of enabled content checksum,
084         * disabled block checksums and independent blocks.
085         * @param blockSize the size of a single block.
086         */
087        public Parameters(final BlockSize blockSize) {
088            this(blockSize, true, false, false);
089        }
090        /**
091         * Sets up custom parameters for the LZ4 stream.
092         * @param blockSize the size of a single block.
093         * @param withContentChecksum whether to write a content checksum
094         * @param withBlockChecksum whether to write a block checksum.
095         * Note that block checksums are not supported by the lz4
096         * command line utility
097         * @param withBlockDependency whether a block may depend on
098         * the content of a previous block. Enabling this may improve
099         * compression ratio but makes it impossible to decompress the
100         * output in parallel.
101         */
102        public Parameters(final BlockSize blockSize, final boolean withContentChecksum, final boolean withBlockChecksum,
103            final boolean withBlockDependency) {
104            this(blockSize, withContentChecksum, withBlockChecksum, withBlockDependency,
105                 BlockLZ4CompressorOutputStream.createParameterBuilder().build());
106        }
107        /**
108         * Sets up custom parameters for the LZ4 stream.
109         * @param blockSize the size of a single block.
110         * @param withContentChecksum whether to write a content checksum
111         * @param withBlockChecksum whether to write a block checksum.
112         * Note that block checksums are not supported by the lz4
113         * command line utility
114         * @param withBlockDependency whether a block may depend on
115         * the content of a previous block. Enabling this may improve
116         * compression ratio but makes it impossible to decompress the
117         * output in parallel.
118         * @param lz77params parameters used to fine-tune compression,
119         * in particular to balance compression ratio vs compression
120         * speed.
121         */
122        public Parameters(final BlockSize blockSize, final boolean withContentChecksum, final boolean withBlockChecksum,
123                final boolean withBlockDependency,
124                final org.apache.commons.compress.compressors.lz77support.Parameters lz77params) {
125            this.blockSize = blockSize;
126            this.withContentChecksum = withContentChecksum;
127            this.withBlockChecksum = withBlockChecksum;
128            this.withBlockDependency = withBlockDependency;
129            this.lz77params = lz77params;
130        }
131
132        /**
133         * Sets up custom a custom block size for the LZ4 stream but
134         * otherwise uses the defaults of enabled content checksum,
135         * disabled block checksums and independent blocks.
136         * @param blockSize the size of a single block.
137         * @param lz77params parameters used to fine-tune compression,
138         * in particular to balance compression ratio vs compression
139         * speed.
140         */
141        public Parameters(final BlockSize blockSize,
142            final org.apache.commons.compress.compressors.lz77support.Parameters lz77params) {
143            this(blockSize, true, false, false, lz77params);
144        }
145
146        @Override
147        public String toString() {
148            return "LZ4 Parameters with BlockSize " + blockSize + ", withContentChecksum " + withContentChecksum
149                + ", withBlockChecksum " + withBlockChecksum + ", withBlockDependency " + withBlockDependency;
150        }
151    }
152
153    private static final byte[] END_MARK = new byte[4];
154    // used in one-arg write method
155    private final byte[] oneByte = new byte[1];
156    private final byte[] blockData;
157    private final OutputStream out;
158    private final Parameters params;
159
160    private boolean finished;
161
162    // used for frame header checksum and content checksum, if requested
163    private final XXHash32 contentHash = new XXHash32();
164    // used for block checksum, if requested
165    private final XXHash32 blockHash;
166
167    // only created if the config requires block dependency
168    private final byte[] blockDependencyBuffer;
169
170    private int collectedBlockDependencyBytes;
171    private int currentIndex;
172
173    /**
174     * Constructs a new output stream that compresses data using the
175     * LZ4 frame format using the default block size of 4MB.
176     * @param out the OutputStream to which to write the compressed data
177     * @throws IOException if writing the signature fails
178     */
179    public FramedLZ4CompressorOutputStream(final OutputStream out) throws IOException {
180        this(out, Parameters.DEFAULT);
181    }
182
183    /**
184     * Constructs a new output stream that compresses data using the
185     * LZ4 frame format using the given block size.
186     * @param out the OutputStream to which to write the compressed data
187     * @param params the parameters to use
188     * @throws IOException if writing the signature fails
189     */
190    public FramedLZ4CompressorOutputStream(final OutputStream out, final Parameters params) throws IOException {
191        this.params = params;
192        blockData = new byte[params.blockSize.getSize()];
193        this.out = out;
194        blockHash = params.withBlockChecksum ? new XXHash32() : null;
195        out.write(FramedLZ4CompressorInputStream.LZ4_SIGNATURE);
196        writeFrameDescriptor();
197        blockDependencyBuffer = params.withBlockDependency
198            ? new byte[BlockLZ4CompressorInputStream.WINDOW_SIZE]
199            : null;
200    }
201
202    private void appendToBlockDependencyBuffer(final byte[] b, final int off, int len) {
203        len = Math.min(len, blockDependencyBuffer.length);
204        if (len > 0) {
205            final int keep = blockDependencyBuffer.length - len;
206            if (keep > 0) {
207                // move last keep bytes towards the start of the buffer
208                System.arraycopy(blockDependencyBuffer, len, blockDependencyBuffer, 0, keep);
209            }
210            // append new data
211            System.arraycopy(b, off, blockDependencyBuffer, keep, len);
212            collectedBlockDependencyBytes = Math.min(collectedBlockDependencyBytes + len,
213                blockDependencyBuffer.length);
214        }
215    }
216
217    @Override
218    public void close() throws IOException {
219        try {
220            finish();
221        } finally {
222            out.close();
223        }
224    }
225
226    /**
227     * Compresses all blockDataRemaining data and writes it to the stream,
228 doesn't close the underlying stream.
229     * @throws IOException if an error occurs
230     */
231    public void finish() throws IOException {
232        if (!finished) {
233            flushBlock();
234            writeTrailer();
235            finished = true;
236        }
237    }
238
239    private void flushBlock() throws IOException {
240        if (currentIndex == 0) {
241            return;
242        }
243        final boolean withBlockDependency = params.withBlockDependency;
244        final ByteArrayOutputStream baos = new ByteArrayOutputStream();
245        try (BlockLZ4CompressorOutputStream o = new BlockLZ4CompressorOutputStream(baos, params.lz77params)) {
246            if (withBlockDependency) {
247                o.prefill(blockDependencyBuffer, blockDependencyBuffer.length - collectedBlockDependencyBytes,
248                    collectedBlockDependencyBytes);
249            }
250            o.write(blockData, 0, currentIndex);
251        }
252        if (withBlockDependency) {
253            appendToBlockDependencyBuffer(blockData, 0, currentIndex);
254        }
255        final byte[] b = baos.toByteArray();
256        if (b.length > currentIndex) { // compression increased size, maybe beyond blocksize
257            ByteUtils.toLittleEndian(out, currentIndex | FramedLZ4CompressorInputStream.UNCOMPRESSED_FLAG_MASK,
258                4);
259            out.write(blockData, 0, currentIndex);
260            if (params.withBlockChecksum) {
261                blockHash.update(blockData, 0, currentIndex);
262            }
263        } else {
264            ByteUtils.toLittleEndian(out, b.length, 4);
265            out.write(b);
266            if (params.withBlockChecksum) {
267                blockHash.update(b, 0, b.length);
268            }
269        }
270        if (params.withBlockChecksum) {
271            ByteUtils.toLittleEndian(out, blockHash.getValue(), 4);
272            blockHash.reset();
273        }
274        currentIndex = 0;
275    }
276
277    @Override
278    public void write(final byte[] data, int off, int len) throws IOException {
279        if (params.withContentChecksum) {
280            contentHash.update(data, off, len);
281        }
282        int blockDataRemaining = blockData.length - currentIndex;
283        while (len > 0) {
284            int copyLen = Math.min(len, blockDataRemaining);
285            System.arraycopy(data, off, blockData, currentIndex, copyLen);
286            off += copyLen;
287            blockDataRemaining -= copyLen;
288            len -= copyLen;
289            currentIndex += copyLen;
290            if (blockDataRemaining == 0) {
291                flushBlock();
292                blockDataRemaining = blockData.length;
293            }
294        }
295    }
296
297    @Override
298    public void write(final int b) throws IOException {
299        oneByte[0] = (byte) (b & 0xff);
300        write(oneByte);
301    }
302
303    private void writeFrameDescriptor() throws IOException {
304        int flags = FramedLZ4CompressorInputStream.SUPPORTED_VERSION;
305        if (!params.withBlockDependency) {
306            flags |= FramedLZ4CompressorInputStream.BLOCK_INDEPENDENCE_MASK;
307        }
308        if (params.withContentChecksum) {
309            flags |= FramedLZ4CompressorInputStream.CONTENT_CHECKSUM_MASK;
310        }
311        if (params.withBlockChecksum) {
312            flags |= FramedLZ4CompressorInputStream.BLOCK_CHECKSUM_MASK;
313        }
314        out.write(flags);
315        contentHash.update(flags);
316        final int bd = params.blockSize.getIndex() << 4 & FramedLZ4CompressorInputStream.BLOCK_MAX_SIZE_MASK;
317        out.write(bd);
318        contentHash.update(bd);
319        out.write((int) (contentHash.getValue() >> 8 & 0xff));
320        contentHash.reset();
321    }
322
323    private void writeTrailer() throws IOException {
324        out.write(END_MARK);
325        if (params.withContentChecksum) {
326            ByteUtils.toLittleEndian(out, contentHash.getValue(), 4);
327        }
328    }
329
330}
331