source: josm/trunk/src/org/tukaani/xz/LZMA2InputStream.java@ 14015

Last change on this file since 14015 was 13350, checked in by stoecker, 7 years ago

see #15816 - add XZ support

File size: 13.5 KB
Line 
1/*
2 * LZMA2InputStream
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11package org.tukaani.xz;
12
13import java.io.InputStream;
14import java.io.DataInputStream;
15import java.io.IOException;
16import org.tukaani.xz.lz.LZDecoder;
17import org.tukaani.xz.rangecoder.RangeDecoderFromBuffer;
18import org.tukaani.xz.lzma.LZMADecoder;
19
20/**
21 * Decompresses a raw LZMA2 stream (no XZ headers).
22 */
23public class LZMA2InputStream extends InputStream {
24 /**
25 * Smallest valid LZMA2 dictionary size.
26 * <p>
27 * Very tiny dictionaries would be a performance problem, so
28 * the minimum is 4 KiB.
29 */
30 public static final int DICT_SIZE_MIN = 4096;
31
32 /**
33 * Largest dictionary size supported by this implementation.
34 * <p>
35 * The LZMA2 algorithm allows dictionaries up to one byte less than 4 GiB.
36 * This implementation supports only 16 bytes less than 2 GiB for raw
37 * LZMA2 streams, and for .xz files the maximum is 1.5 GiB. This
38 * limitation is due to Java using signed 32-bit integers for array
39 * indexing. The limitation shouldn't matter much in practice since so
40 * huge dictionaries are not normally used.
41 */
42 public static final int DICT_SIZE_MAX = Integer.MAX_VALUE & ~15;
43
44 private static final int COMPRESSED_SIZE_MAX = 1 << 16;
45
46 private final ArrayCache arrayCache;
47 private DataInputStream in;
48
49 private LZDecoder lz;
50 private RangeDecoderFromBuffer rc;
51 private LZMADecoder lzma;
52
53 private int uncompressedSize = 0;
54 private boolean isLZMAChunk = false;
55
56 private boolean needDictReset = true;
57 private boolean needProps = true;
58 private boolean endReached = false;
59
60 private IOException exception = null;
61
62 private final byte[] tempBuf = new byte[1];
63
64 /**
65 * Gets approximate decompressor memory requirements as kibibytes for
66 * the given dictionary size.
67 *
68 * @param dictSize LZMA2 dictionary size as bytes, must be
69 * in the range [<code>DICT_SIZE_MIN</code>,
70 * <code>DICT_SIZE_MAX</code>]
71 *
72 * @return approximate memory requirements as kibibytes (KiB)
73 */
74 public static int getMemoryUsage(int dictSize) {
75 // The base state is around 30-40 KiB (probabilities etc.),
76 // range decoder needs COMPRESSED_SIZE_MAX bytes for buffering,
77 // and LZ decoder needs a dictionary buffer.
78 return 40 + COMPRESSED_SIZE_MAX / 1024 + getDictSize(dictSize) / 1024;
79 }
80
81 private static int getDictSize(int dictSize) {
82 if (dictSize < DICT_SIZE_MIN || dictSize > DICT_SIZE_MAX)
83 throw new IllegalArgumentException(
84 "Unsupported dictionary size " + dictSize);
85
86 // Round dictionary size upward to a multiple of 16. This way LZMA
87 // can use LZDecoder.getPos() for calculating LZMA's posMask.
88 // Note that this check is needed only for raw LZMA2 streams; it is
89 // redundant with .xz.
90 return (dictSize + 15) & ~15;
91 }
92
93 /**
94 * Creates a new input stream that decompresses raw LZMA2 data
95 * from <code>in</code>.
96 * <p>
97 * The caller needs to know the dictionary size used when compressing;
98 * the dictionary size isn't stored as part of a raw LZMA2 stream.
99 * <p>
100 * Specifying a too small dictionary size will prevent decompressing
101 * the stream. Specifying a too big dictionary is waste of memory but
102 * decompression will work.
103 * <p>
104 * There is no need to specify a dictionary bigger than
105 * the uncompressed size of the data even if a bigger dictionary
106 * was used when compressing. If you know the uncompressed size
107 * of the data, this might allow saving some memory.
108 *
109 * @param in input stream from which LZMA2-compressed
110 * data is read
111 *
112 * @param dictSize LZMA2 dictionary size as bytes, must be
113 * in the range [<code>DICT_SIZE_MIN</code>,
114 * <code>DICT_SIZE_MAX</code>]
115 */
116 public LZMA2InputStream(InputStream in, int dictSize) {
117 this(in, dictSize, null);
118 }
119
120 /**
121 * Creates a new LZMA2 decompressor using a preset dictionary.
122 * <p>
123 * This is like <code>LZMA2InputStream(InputStream, int)</code> except
124 * that the dictionary may be initialized using a preset dictionary.
125 * If a preset dictionary was used when compressing the data, the
126 * same preset dictionary must be provided when decompressing.
127 *
128 * @param in input stream from which LZMA2-compressed
129 * data is read
130 *
131 * @param dictSize LZMA2 dictionary size as bytes, must be
132 * in the range [<code>DICT_SIZE_MIN</code>,
133 * <code>DICT_SIZE_MAX</code>]
134 *
135 * @param presetDict preset dictionary or <code>null</code>
136 * to use no preset dictionary
137 */
138 public LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict) {
139 this(in, dictSize, presetDict, ArrayCache.getDefaultCache());
140 }
141
142 /**
143 * Creates a new LZMA2 decompressor using a preset dictionary
144 * and array cache.
145 * <p>
146 * This is like <code>LZMA2InputStream(InputStream, int, byte[])</code>
147 * except that this also takes the <code>arrayCache</code> argument.
148 *
149 * @param in input stream from which LZMA2-compressed
150 * data is read
151 *
152 * @param dictSize LZMA2 dictionary size as bytes, must be
153 * in the range [<code>DICT_SIZE_MIN</code>,
154 * <code>DICT_SIZE_MAX</code>]
155 *
156 * @param presetDict preset dictionary or <code>null</code>
157 * to use no preset dictionary
158 *
159 * @param arrayCache cache to be used for allocating large arrays
160 *
161 * @since 1.7
162 */
163 LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict,
164 ArrayCache arrayCache) {
165 // Check for null because otherwise null isn't detect
166 // in this constructor.
167 if (in == null)
168 throw new NullPointerException();
169
170 this.arrayCache = arrayCache;
171 this.in = new DataInputStream(in);
172 this.rc = new RangeDecoderFromBuffer(COMPRESSED_SIZE_MAX, arrayCache);
173 this.lz = new LZDecoder(getDictSize(dictSize), presetDict, arrayCache);
174
175 if (presetDict != null && presetDict.length > 0)
176 needDictReset = false;
177 }
178
179 /**
180 * Decompresses the next byte from this input stream.
181 * <p>
182 * Reading lots of data with <code>read()</code> from this input stream
183 * may be inefficient. Wrap it in <code>java.io.BufferedInputStream</code>
184 * if you need to read lots of data one byte at a time.
185 *
186 * @return the next decompressed byte, or <code>-1</code>
187 * to indicate the end of the compressed stream
188 *
189 * @throws CorruptedInputException
190 *
191 * @throws XZIOException if the stream has been closed
192 *
193 * @throws EOFException
194 * compressed input is truncated or corrupt
195 *
196 * @throws IOException may be thrown by <code>in</code>
197 */
198 public int read() throws IOException {
199 return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
200 }
201
202 /**
203 * Decompresses into an array of bytes.
204 * <p>
205 * If <code>len</code> is zero, no bytes are read and <code>0</code>
206 * is returned. Otherwise this will block until <code>len</code>
207 * bytes have been decompressed, the end of the LZMA2 stream is reached,
208 * or an exception is thrown.
209 *
210 * @param buf target buffer for uncompressed data
211 * @param off start offset in <code>buf</code>
212 * @param len maximum number of uncompressed bytes to read
213 *
214 * @return number of bytes read, or <code>-1</code> to indicate
215 * the end of the compressed stream
216 *
217 * @throws CorruptedInputException
218 *
219 * @throws XZIOException if the stream has been closed
220 *
221 * @throws EOFException
222 * compressed input is truncated or corrupt
223 *
224 * @throws IOException may be thrown by <code>in</code>
225 */
226 public int read(byte[] buf, int off, int len) throws IOException {
227 if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
228 throw new IndexOutOfBoundsException();
229
230 if (len == 0)
231 return 0;
232
233 if (in == null)
234 throw new XZIOException("Stream closed");
235
236 if (exception != null)
237 throw exception;
238
239 if (endReached)
240 return -1;
241
242 try {
243 int size = 0;
244
245 while (len > 0) {
246 if (uncompressedSize == 0) {
247 decodeChunkHeader();
248 if (endReached)
249 return size == 0 ? -1 : size;
250 }
251
252 int copySizeMax = Math.min(uncompressedSize, len);
253
254 if (!isLZMAChunk) {
255 lz.copyUncompressed(in, copySizeMax);
256 } else {
257 lz.setLimit(copySizeMax);
258 lzma.decode();
259 }
260
261 int copiedSize = lz.flush(buf, off);
262 off += copiedSize;
263 len -= copiedSize;
264 size += copiedSize;
265 uncompressedSize -= copiedSize;
266
267 if (uncompressedSize == 0)
268 if (!rc.isFinished() || lz.hasPending())
269 throw new CorruptedInputException();
270 }
271
272 return size;
273
274 } catch (IOException e) {
275 exception = e;
276 throw e;
277 }
278 }
279
280 private void decodeChunkHeader() throws IOException {
281 int control = in.readUnsignedByte();
282
283 if (control == 0x00) {
284 endReached = true;
285 putArraysToCache();
286 return;
287 }
288
289 if (control >= 0xE0 || control == 0x01) {
290 needProps = true;
291 needDictReset = false;
292 lz.reset();
293 } else if (needDictReset) {
294 throw new CorruptedInputException();
295 }
296
297 if (control >= 0x80) {
298 isLZMAChunk = true;
299
300 uncompressedSize = (control & 0x1F) << 16;
301 uncompressedSize += in.readUnsignedShort() + 1;
302
303 int compressedSize = in.readUnsignedShort() + 1;
304
305 if (control >= 0xC0) {
306 needProps = false;
307 decodeProps();
308
309 } else if (needProps) {
310 throw new CorruptedInputException();
311
312 } else if (control >= 0xA0) {
313 lzma.reset();
314 }
315
316 rc.prepareInputBuffer(in, compressedSize);
317
318 } else if (control > 0x02) {
319 throw new CorruptedInputException();
320
321 } else {
322 isLZMAChunk = false;
323 uncompressedSize = in.readUnsignedShort() + 1;
324 }
325 }
326
327 private void decodeProps() throws IOException {
328 int props = in.readUnsignedByte();
329
330 if (props > (4 * 5 + 4) * 9 + 8)
331 throw new CorruptedInputException();
332
333 int pb = props / (9 * 5);
334 props -= pb * 9 * 5;
335 int lp = props / 9;
336 int lc = props - lp * 9;
337
338 if (lc + lp > 4)
339 throw new CorruptedInputException();
340
341 lzma = new LZMADecoder(lz, rc, lc, lp, pb);
342 }
343
344 /**
345 * Returns the number of uncompressed bytes that can be read
346 * without blocking. The value is returned with an assumption
347 * that the compressed input data will be valid. If the compressed
348 * data is corrupt, <code>CorruptedInputException</code> may get
349 * thrown before the number of bytes claimed to be available have
350 * been read from this input stream.
351 * <p>
352 * In LZMA2InputStream, the return value will be non-zero when the
353 * decompressor is in the middle of an LZMA2 chunk. The return value
354 * will then be the number of uncompressed bytes remaining from that
355 * chunk. The return value can also be non-zero in the middle of
356 * an uncompressed chunk, but then the return value depends also on
357 * the <code>available()</code> method of the underlying InputStream.
358 *
359 * @return the number of uncompressed bytes that can be read
360 * without blocking
361 */
362 public int available() throws IOException {
363 if (in == null)
364 throw new XZIOException("Stream closed");
365
366 if (exception != null)
367 throw exception;
368
369 return isLZMAChunk ? uncompressedSize
370 : Math.min(uncompressedSize, in.available());
371 }
372
373 private void putArraysToCache() {
374 if (lz != null) {
375 lz.putArraysToCache(arrayCache);
376 lz = null;
377
378 rc.putArraysToCache(arrayCache);
379 rc = null;
380 }
381 }
382
383 /**
384 * Closes the stream and calls <code>in.close()</code>.
385 * If the stream was already closed, this does nothing.
386 *
387 * @throws IOException if thrown by <code>in.close()</code>
388 */
389 public void close() throws IOException {
390 if (in != null) {
391 putArraysToCache();
392
393 try {
394 in.close();
395 } finally {
396 in = null;
397 }
398 }
399 }
400}
Note: See TracBrowser for help on using the repository browser.