source: josm/trunk/src/org/tukaani/xz/XZInputStream.java@ 13350

Last change on this file since 13350 was 13350, checked in by stoecker, 6 years ago

see #15816 - add XZ support

File size: 19.7 KB
Line 
1/*
2 * XZInputStream
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10package org.tukaani.xz;
11
12import java.io.InputStream;
13import java.io.DataInputStream;
14import java.io.IOException;
15import java.io.EOFException;
16import org.tukaani.xz.common.DecoderUtil;
17
18/**
19 * Decompresses a .xz file in streamed mode (no seeking).
20 * <p>
21 * Use this to decompress regular standalone .xz files. This reads from
22 * its input stream until the end of the input or until an error occurs.
23 * This supports decompressing concatenated .xz files.
24 *
25 * <h4>Typical use cases</h4>
26 * <p>
27 * Getting an input stream to decompress a .xz file:
28 * <p><blockquote><pre>
29 * InputStream infile = new FileInputStream("foo.xz");
30 * XZInputStream inxz = new XZInputStream(infile);
31 * </pre></blockquote>
32 * <p>
33 * It's important to keep in mind that decompressor memory usage depends
34 * on the settings used to compress the file. The worst-case memory usage
35 * of XZInputStream is currently 1.5&nbsp;GiB. Still, very few files will
36 * require more than about 65&nbsp;MiB because that's how much decompressing
37 * a file created with the highest preset level will need, and only a few
38 * people use settings other than the predefined presets.
39 * <p>
40 * It is possible to specify a memory usage limit for
41 * <code>XZInputStream</code>. If decompression requires more memory than
42 * the specified limit, MemoryLimitException will be thrown when reading
43 * from the stream. For example, the following sets the memory usage limit
44 * to 100&nbsp;MiB:
45 * <p><blockquote><pre>
46 * InputStream infile = new FileInputStream("foo.xz");
47 * XZInputStream inxz = new XZInputStream(infile, 100 * 1024);
48 * </pre></blockquote>
49 *
50 * <h4>When uncompressed size is known beforehand</h4>
51 * <p>
52 * If you are decompressing complete files and your application knows
53 * exactly how much uncompressed data there should be, it is good to try
54 * reading one more byte by calling <code>read()</code> and checking
55 * that it returns <code>-1</code>. This way the decompressor will parse the
56 * file footers and verify the integrity checks, giving the caller more
57 * confidence that the uncompressed data is valid. (This advice seems to
58 * apply to
59 * {@link java.util.zip.GZIPInputStream java.util.zip.GZIPInputStream} too.)
60 *
61 * @see SingleXZInputStream
62 */
63public class XZInputStream extends InputStream {
64 private final ArrayCache arrayCache;
65
66 private final int memoryLimit;
67 private InputStream in;
68 private SingleXZInputStream xzIn;
69 private final boolean verifyCheck;
70 private boolean endReached = false;
71 private IOException exception = null;
72
73 private final byte[] tempBuf = new byte[1];
74
75 /**
76 * Creates a new XZ decompressor without a memory usage limit.
77 * <p>
78 * This constructor reads and parses the XZ Stream Header (12 bytes)
79 * from <code>in</code>. The header of the first Block is not read
80 * until <code>read</code> is called.
81 *
82 * @param in input stream from which XZ-compressed
83 * data is read
84 *
85 * @throws XZFormatException
86 * input is not in the XZ format
87 *
88 * @throws CorruptedInputException
89 * XZ header CRC32 doesn't match
90 *
91 * @throws UnsupportedOptionsException
92 * XZ header is valid but specifies options
93 * not supported by this implementation
94 *
95 * @throws EOFException
96 * less than 12 bytes of input was available
97 * from <code>in</code>
98 *
99 * @throws IOException may be thrown by <code>in</code>
100 */
101 public XZInputStream(InputStream in) throws IOException {
102 this(in, -1);
103 }
104
105 /**
106 * Creates a new XZ decompressor without a memory usage limit.
107 * <p>
108 * This is identical to <code>XZInputStream(InputStream)</code>
109 * except that this takes also the <code>arrayCache</code> argument.
110 *
111 * @param in input stream from which XZ-compressed
112 * data is read
113 *
114 * @param arrayCache cache to be used for allocating large arrays
115 *
116 * @throws XZFormatException
117 * input is not in the XZ format
118 *
119 * @throws CorruptedInputException
120 * XZ header CRC32 doesn't match
121 *
122 * @throws UnsupportedOptionsException
123 * XZ header is valid but specifies options
124 * not supported by this implementation
125 *
126 * @throws EOFException
127 * less than 12 bytes of input was available
128 * from <code>in</code>
129 *
130 * @throws IOException may be thrown by <code>in</code>
131 *
132 * @since 1.7
133 */
134 public XZInputStream(InputStream in, ArrayCache arrayCache)
135 throws IOException {
136 this(in, -1, arrayCache);
137 }
138
139 /**
140 * Creates a new XZ decompressor with an optional memory usage limit.
141 * <p>
142 * This is identical to <code>XZInputStream(InputStream)</code> except
143 * that this takes also the <code>memoryLimit</code> argument.
144 *
145 * @param in input stream from which XZ-compressed
146 * data is read
147 *
148 * @param memoryLimit memory usage limit in kibibytes (KiB)
149 * or <code>-1</code> to impose no
150 * memory usage limit
151 *
152 * @throws XZFormatException
153 * input is not in the XZ format
154 *
155 * @throws CorruptedInputException
156 * XZ header CRC32 doesn't match
157 *
158 * @throws UnsupportedOptionsException
159 * XZ header is valid but specifies options
160 * not supported by this implementation
161 *
162 * @throws EOFException
163 * less than 12 bytes of input was available
164 * from <code>in</code>
165 *
166 * @throws IOException may be thrown by <code>in</code>
167 */
168 public XZInputStream(InputStream in, int memoryLimit) throws IOException {
169 this(in, memoryLimit, true);
170 }
171
172 /**
173 * Creates a new XZ decompressor with an optional memory usage limit.
174 * <p>
175 * This is identical to <code>XZInputStream(InputStream)</code> except
176 * that this takes also the <code>memoryLimit</code> and
177 * <code>arrayCache</code> arguments.
178 *
179 * @param in input stream from which XZ-compressed
180 * data is read
181 *
182 * @param memoryLimit memory usage limit in kibibytes (KiB)
183 * or <code>-1</code> to impose no
184 * memory usage limit
185 *
186 * @param arrayCache cache to be used for allocating large arrays
187 *
188 * @throws XZFormatException
189 * input is not in the XZ format
190 *
191 * @throws CorruptedInputException
192 * XZ header CRC32 doesn't match
193 *
194 * @throws UnsupportedOptionsException
195 * XZ header is valid but specifies options
196 * not supported by this implementation
197 *
198 * @throws EOFException
199 * less than 12 bytes of input was available
200 * from <code>in</code>
201 *
202 * @throws IOException may be thrown by <code>in</code>
203 *
204 * @since 1.7
205 */
206 public XZInputStream(InputStream in, int memoryLimit,
207 ArrayCache arrayCache) throws IOException {
208 this(in, memoryLimit, true, arrayCache);
209 }
210
211 /**
212 * Creates a new XZ decompressor with an optional memory usage limit
213 * and ability to disable verification of integrity checks.
214 * <p>
215 * This is identical to <code>XZInputStream(InputStream,int)</code> except
216 * that this takes also the <code>verifyCheck</code> argument.
217 * <p>
218 * Note that integrity check verification should almost never be disabled.
219 * Possible reasons to disable integrity check verification:
220 * <ul>
221 * <li>Trying to recover data from a corrupt .xz file.</li>
222 * <li>Speeding up decompression. This matters mostly with SHA-256
223 * or with files that have compressed extremely well. It's recommended
224 * that integrity checking isn't disabled for performance reasons
225 * unless the file integrity is verified externally in some other
226 * way.</li>
227 * </ul>
228 * <p>
229 * <code>verifyCheck</code> only affects the integrity check of
230 * the actual compressed data. The CRC32 fields in the headers
231 * are always verified.
232 *
233 * @param in input stream from which XZ-compressed
234 * data is read
235 *
236 * @param memoryLimit memory usage limit in kibibytes (KiB)
237 * or <code>-1</code> to impose no
238 * memory usage limit
239 *
240 * @param verifyCheck if <code>true</code>, the integrity checks
241 * will be verified; this should almost never
242 * be set to <code>false</code>
243 *
244 * @throws XZFormatException
245 * input is not in the XZ format
246 *
247 * @throws CorruptedInputException
248 * XZ header CRC32 doesn't match
249 *
250 * @throws UnsupportedOptionsException
251 * XZ header is valid but specifies options
252 * not supported by this implementation
253 *
254 * @throws EOFException
255 * less than 12 bytes of input was available
256 * from <code>in</code>
257 *
258 * @throws IOException may be thrown by <code>in</code>
259 *
260 * @since 1.6
261 */
262 public XZInputStream(InputStream in, int memoryLimit, boolean verifyCheck)
263 throws IOException {
264 this(in, memoryLimit, verifyCheck, ArrayCache.getDefaultCache());
265 }
266
267 /**
268 * Creates a new XZ decompressor with an optional memory usage limit
269 * and ability to disable verification of integrity checks.
270 * <p>
271 * This is identical to <code>XZInputStream(InputStream,int,boolean)</code>
272 * except that this takes also the <code>arrayCache</code> argument.
273 *
274 * @param in input stream from which XZ-compressed
275 * data is read
276 *
277 * @param memoryLimit memory usage limit in kibibytes (KiB)
278 * or <code>-1</code> to impose no
279 * memory usage limit
280 *
281 * @param verifyCheck if <code>true</code>, the integrity checks
282 * will be verified; this should almost never
283 * be set to <code>false</code>
284 *
285 * @param arrayCache cache to be used for allocating large arrays
286 *
287 * @throws XZFormatException
288 * input is not in the XZ format
289 *
290 * @throws CorruptedInputException
291 * XZ header CRC32 doesn't match
292 *
293 * @throws UnsupportedOptionsException
294 * XZ header is valid but specifies options
295 * not supported by this implementation
296 *
297 * @throws EOFException
298 * less than 12 bytes of input was available
299 * from <code>in</code>
300 *
301 * @throws IOException may be thrown by <code>in</code>
302 *
303 * @since 1.7
304 */
305 public XZInputStream(InputStream in, int memoryLimit, boolean verifyCheck,
306 ArrayCache arrayCache) throws IOException {
307 this.arrayCache = arrayCache;
308 this.in = in;
309 this.memoryLimit = memoryLimit;
310 this.verifyCheck = verifyCheck;
311 this.xzIn = new SingleXZInputStream(in, memoryLimit, verifyCheck,
312 arrayCache);
313 }
314
315 /**
316 * Decompresses the next byte from this input stream.
317 * <p>
318 * Reading lots of data with <code>read()</code> from this input stream
319 * may be inefficient. Wrap it in {@link java.io.BufferedInputStream}
320 * if you need to read lots of data one byte at a time.
321 *
322 * @return the next decompressed byte, or <code>-1</code>
323 * to indicate the end of the compressed stream
324 *
325 * @throws CorruptedInputException
326 * @throws UnsupportedOptionsException
327 * @throws MemoryLimitException
328 *
329 * @throws XZIOException if the stream has been closed
330 *
331 * @throws EOFException
332 * compressed input is truncated or corrupt
333 *
334 * @throws IOException may be thrown by <code>in</code>
335 */
336 public int read() throws IOException {
337 return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
338 }
339
340 /**
341 * Decompresses into an array of bytes.
342 * <p>
343 * If <code>len</code> is zero, no bytes are read and <code>0</code>
344 * is returned. Otherwise this will try to decompress <code>len</code>
345 * bytes of uncompressed data. Less than <code>len</code> bytes may
346 * be read only in the following situations:
347 * <ul>
348 * <li>The end of the compressed data was reached successfully.</li>
349 * <li>An error is detected after at least one but less <code>len</code>
350 * bytes have already been successfully decompressed.
351 * The next call with non-zero <code>len</code> will immediately
352 * throw the pending exception.</li>
353 * <li>An exception is thrown.</li>
354 * </ul>
355 *
356 * @param buf target buffer for uncompressed data
357 * @param off start offset in <code>buf</code>
358 * @param len maximum number of uncompressed bytes to read
359 *
360 * @return number of bytes read, or <code>-1</code> to indicate
361 * the end of the compressed stream
362 *
363 * @throws CorruptedInputException
364 * @throws UnsupportedOptionsException
365 * @throws MemoryLimitException
366 *
367 * @throws XZIOException if the stream has been closed
368 *
369 * @throws EOFException
370 * compressed input is truncated or corrupt
371 *
372 * @throws IOException may be thrown by <code>in</code>
373 */
374 public int read(byte[] buf, int off, int len) throws IOException {
375 if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
376 throw new IndexOutOfBoundsException();
377
378 if (len == 0)
379 return 0;
380
381 if (in == null)
382 throw new XZIOException("Stream closed");
383
384 if (exception != null)
385 throw exception;
386
387 if (endReached)
388 return -1;
389
390 int size = 0;
391
392 try {
393 while (len > 0) {
394 if (xzIn == null) {
395 prepareNextStream();
396 if (endReached)
397 return size == 0 ? -1 : size;
398 }
399
400 int ret = xzIn.read(buf, off, len);
401
402 if (ret > 0) {
403 size += ret;
404 off += ret;
405 len -= ret;
406 } else if (ret == -1) {
407 xzIn = null;
408 }
409 }
410 } catch (IOException e) {
411 exception = e;
412 if (size == 0)
413 throw e;
414 }
415
416 return size;
417 }
418
419 private void prepareNextStream() throws IOException {
420 DataInputStream inData = new DataInputStream(in);
421 byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE];
422
423 // The size of Stream Padding must be a multiple of four bytes,
424 // all bytes zero.
425 do {
426 // First try to read one byte to see if we have reached the end
427 // of the file.
428 int ret = inData.read(buf, 0, 1);
429 if (ret == -1) {
430 endReached = true;
431 return;
432 }
433
434 // Since we got one byte of input, there must be at least
435 // three more available in a valid file.
436 inData.readFully(buf, 1, 3);
437
438 } while (buf[0] == 0 && buf[1] == 0 && buf[2] == 0 && buf[3] == 0);
439
440 // Not all bytes are zero. In a valid Stream it indicates the
441 // beginning of the next Stream. Read the rest of the Stream Header
442 // and initialize the XZ decoder.
443 inData.readFully(buf, 4, DecoderUtil.STREAM_HEADER_SIZE - 4);
444
445 try {
446 xzIn = new SingleXZInputStream(in, memoryLimit, verifyCheck, buf,
447 arrayCache);
448 } catch (XZFormatException e) {
449 // Since this isn't the first .xz Stream, it is more
450 // logical to tell that the data is corrupt.
451 throw new CorruptedInputException(
452 "Garbage after a valid XZ Stream");
453 }
454 }
455
456 /**
457 * Returns the number of uncompressed bytes that can be read
458 * without blocking. The value is returned with an assumption
459 * that the compressed input data will be valid. If the compressed
460 * data is corrupt, <code>CorruptedInputException</code> may get
461 * thrown before the number of bytes claimed to be available have
462 * been read from this input stream.
463 *
464 * @return the number of uncompressed bytes that can be read
465 * without blocking
466 */
467 public int available() throws IOException {
468 if (in == null)
469 throw new XZIOException("Stream closed");
470
471 if (exception != null)
472 throw exception;
473
474 return xzIn == null ? 0 : xzIn.available();
475 }
476
477 /**
478 * Closes the stream and calls <code>in.close()</code>.
479 * If the stream was already closed, this does nothing.
480 * <p>
481 * This is equivalent to <code>close(true)</code>.
482 *
483 * @throws IOException if thrown by <code>in.close()</code>
484 */
485 public void close() throws IOException {
486 close(true);
487 }
488
489 /**
490 * Closes the stream and optionally calls <code>in.close()</code>.
491 * If the stream was already closed, this does nothing.
492 * If <code>close(false)</code> has been called, a further
493 * call of <code>close(true)</code> does nothing (it doesn't call
494 * <code>in.close()</code>).
495 * <p>
496 * If you don't want to close the underlying <code>InputStream</code>,
497 * there is usually no need to worry about closing this stream either;
498 * it's fine to do nothing and let the garbage collector handle it.
499 * However, if you are using {@link ArrayCache}, <code>close(false)</code>
500 * can be useful to put the allocated arrays back to the cache without
501 * closing the underlying <code>InputStream</code>.
502 * <p>
503 * Note that if you successfully reach the end of the stream
504 * (<code>read</code> returns <code>-1</code>), the arrays are
505 * automatically put back to the cache by that <code>read</code> call. In
506 * this situation <code>close(false)</code> is redundant (but harmless).
507 *
508 * @throws IOException if thrown by <code>in.close()</code>
509 *
510 * @since 1.7
511 */
512 public void close(boolean closeInput) throws IOException {
513 if (in != null) {
514 if (xzIn != null) {
515 xzIn.close(false);
516 xzIn = null;
517 }
518
519 try {
520 if (closeInput)
521 in.close();
522 } finally {
523 in = null;
524 }
525 }
526 }
527}
Note: See TracBrowser for help on using the repository browser.