001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.compress.compressors.lz4;
020
021import java.io.IOException;
022import java.io.InputStream;
023
024import org.apache.commons.compress.compressors.lz77support.AbstractLZ77CompressorInputStream;
025import org.apache.commons.compress.utils.ByteUtils;
026
027/**
028 * CompressorInputStream for the LZ4 block format.
029 *
030 * @see <a href="http://lz4.github.io/lz4/lz4_Block_format.html">LZ4 Block Format Description</a>
031 * @since 1.14
032 * @NotThreadSafe
033 */
034public class BlockLZ4CompressorInputStream extends AbstractLZ77CompressorInputStream {
035
036    private enum State {
037        NO_BLOCK, IN_LITERAL, LOOKING_FOR_BACK_REFERENCE, IN_BACK_REFERENCE, EOF
038    }
039    static final int WINDOW_SIZE = 1 << 16;
040    static final int SIZE_BITS = 4;
041    static final int BACK_REFERENCE_SIZE_MASK = (1 << SIZE_BITS) - 1;
042
043    static final int LITERAL_SIZE_MASK = BACK_REFERENCE_SIZE_MASK << SIZE_BITS;
044
045    /** Back-Reference-size part of the block starting byte. */
046    private int nextBackReferenceSize;
047
048    /** Current state of the stream */
049    private State state = State.NO_BLOCK;
050
051    /**
052     * Creates a new LZ4 input stream.
053     *
054     * @param is
055     *            An InputStream to read compressed data from
056     */
057    public BlockLZ4CompressorInputStream(final InputStream is) {
058        super(is, WINDOW_SIZE);
059    }
060
061    /**
062     * @return false if there is no more back-reference - this means this is the
063     * last block of the stream.
064     */
065    private boolean initializeBackReference() throws IOException {
066        int backReferenceOffset;
067        try {
068            backReferenceOffset = (int) ByteUtils.fromLittleEndian(supplier, 2);
069        } catch (final IOException ex) {
070            if (nextBackReferenceSize == 0) { // the last block has no back-reference
071                return false;
072            }
073            throw ex;
074        }
075        long backReferenceSize = nextBackReferenceSize;
076        if (nextBackReferenceSize == BACK_REFERENCE_SIZE_MASK) {
077            backReferenceSize += readSizeBytes();
078        }
079        // minimal match length 4 is encoded as 0
080        if (backReferenceSize < 0) {
081            throw new IOException("Illegal block with a negative match length found");
082        }
083        try {
084            startBackReference(backReferenceOffset, backReferenceSize + 4);
085        } catch (final IllegalArgumentException ex) {
086            throw new IOException("Illegal block with bad offset found", ex);
087        }
088        state = State.IN_BACK_REFERENCE;
089        return true;
090    }
091
092    /**
093     * {@inheritDoc}
094     */
095    @Override
096    public int read(final byte[] b, final int off, final int len) throws IOException {
097        if (len == 0) {
098            return 0;
099        }
100        switch (state) {
101        case EOF:
102            return -1;
103        case NO_BLOCK: // NOSONAR - fallthrough intended
104            readSizes();
105            /*FALLTHROUGH*/
106        case IN_LITERAL:
107            final int litLen = readLiteral(b, off, len);
108            if (!hasMoreDataInBlock()) {
109                state = State.LOOKING_FOR_BACK_REFERENCE;
110            }
111            return litLen > 0 ? litLen : read(b, off, len);
112        case LOOKING_FOR_BACK_REFERENCE: // NOSONAR - fallthrough intended
113            if (!initializeBackReference()) {
114                state = State.EOF;
115                return -1;
116            }
117            /*FALLTHROUGH*/
118        case IN_BACK_REFERENCE:
119            final int backReferenceLen = readBackReference(b, off, len);
120            if (!hasMoreDataInBlock()) {
121                state = State.NO_BLOCK;
122            }
123            return backReferenceLen > 0 ? backReferenceLen : read(b, off, len);
124        default:
125            throw new IOException("Unknown stream state " + state);
126        }
127    }
128
129    private long readSizeBytes() throws IOException {
130        long accum = 0;
131        int nextByte;
132        do {
133            nextByte = readOneByte();
134            if (nextByte == -1) {
135                throw new IOException("Premature end of stream while parsing length");
136            }
137            accum += nextByte;
138        } while (nextByte == 255);
139        return accum;
140    }
141
142    private void readSizes() throws IOException {
143        final int nextBlock = readOneByte();
144        if (nextBlock == -1) {
145            throw new IOException("Premature end of stream while looking for next block");
146        }
147        nextBackReferenceSize = nextBlock & BACK_REFERENCE_SIZE_MASK;
148        long literalSizePart = (nextBlock & LITERAL_SIZE_MASK) >> SIZE_BITS;
149        if (literalSizePart == BACK_REFERENCE_SIZE_MASK) {
150            literalSizePart += readSizeBytes();
151        }
152        if (literalSizePart < 0) {
153            throw new IOException("Illegal block with a negative literal size found");
154        }
155        startLiteral(literalSizePart);
156        state = State.IN_LITERAL;
157    }
158}