Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: josm/trunk/src/org/tukaani/xz/XZInputStream.java@ 13350

Last change on this file since 13350 was 13350, checked in by stoecker, 6 years ago
see #15816 - add XZ support
File size: 19.7 KB

Line
1	/*
2	* XZInputStream
3	*
4	* Author: Lasse Collin <lasse.collin@tukaani.org>
5	*
6	* This file has been put into the public domain.
7	* You can do whatever you want with this file.
8	*/
9
10	package org.tukaani.xz;
11
12	import java.io.InputStream;
13	import java.io.DataInputStream;
14	import java.io.IOException;
15	import java.io.EOFException;
16	import org.tukaani.xz.common.DecoderUtil;
17
18	/**
19	* Decompresses a .xz file in streamed mode (no seeking).
20	* <p>
21	* Use this to decompress regular standalone .xz files. This reads from
22	* its input stream until the end of the input or until an error occurs.
23	* This supports decompressing concatenated .xz files.
24	*
25	* <h4>Typical use cases</h4>
26	* <p>
27	* Getting an input stream to decompress a .xz file:
28	* <p><blockquote><pre>
29	* InputStream infile = new FileInputStream("foo.xz");
30	* XZInputStream inxz = new XZInputStream(infile);
31	* </pre></blockquote>
32	* <p>
33	* It's important to keep in mind that decompressor memory usage depends
34	* on the settings used to compress the file. The worst-case memory usage
35	* of XZInputStream is currently 1.5 GiB. Still, very few files will
36	* require more than about 65 MiB because that's how much decompressing
37	* a file created with the highest preset level will need, and only a few
38	* people use settings other than the predefined presets.
39	* <p>
40	* It is possible to specify a memory usage limit for
41	* <code>XZInputStream</code>. If decompression requires more memory than
42	* the specified limit, MemoryLimitException will be thrown when reading
43	* from the stream. For example, the following sets the memory usage limit
44	* to 100 MiB:
45	* <p><blockquote><pre>
46	* InputStream infile = new FileInputStream("foo.xz");
47	* XZInputStream inxz = new XZInputStream(infile, 100 * 1024);
48	* </pre></blockquote>
49	*
50	* <h4>When uncompressed size is known beforehand</h4>
51	* <p>
52	* If you are decompressing complete files and your application knows
53	* exactly how much uncompressed data there should be, it is good to try
54	* reading one more byte by calling <code>read()</code> and checking
55	* that it returns <code>-1</code>. This way the decompressor will parse the
56	* file footers and verify the integrity checks, giving the caller more
57	* confidence that the uncompressed data is valid. (This advice seems to
58	* apply to
59	* {@link java.util.zip.GZIPInputStream java.util.zip.GZIPInputStream} too.)
60	*
61	* @see SingleXZInputStream
62	*/
63	public class XZInputStream extends InputStream {
64	private final ArrayCache arrayCache;
65
66	private final int memoryLimit;
67	private InputStream in;
68	private SingleXZInputStream xzIn;
69	private final boolean verifyCheck;
70	private boolean endReached = false;
71	private IOException exception = null;
72
73	private final byte[] tempBuf = new byte[1];
74
75	/**
76	* Creates a new XZ decompressor without a memory usage limit.
77	* <p>
78	* This constructor reads and parses the XZ Stream Header (12 bytes)
79	* from <code>in</code>. The header of the first Block is not read
80	* until <code>read</code> is called.
81	*
82	* @param in input stream from which XZ-compressed
83	* data is read
84	*
85	* @throws XZFormatException
86	* input is not in the XZ format
87	*
88	* @throws CorruptedInputException
89	* XZ header CRC32 doesn't match
90	*
91	* @throws UnsupportedOptionsException
92	* XZ header is valid but specifies options
93	* not supported by this implementation
94	*
95	* @throws EOFException
96	* less than 12 bytes of input was available
97	* from <code>in</code>
98	*
99	* @throws IOException may be thrown by <code>in</code>
100	*/
101	public XZInputStream(InputStream in) throws IOException {
102	this(in, -1);
103	}
104
105	/**
106	* Creates a new XZ decompressor without a memory usage limit.
107	* <p>
108	* This is identical to <code>XZInputStream(InputStream)</code>
109	* except that this takes also the <code>arrayCache</code> argument.
110	*
111	* @param in input stream from which XZ-compressed
112	* data is read
113	*
114	* @param arrayCache cache to be used for allocating large arrays
115	*
116	* @throws XZFormatException
117	* input is not in the XZ format
118	*
119	* @throws CorruptedInputException
120	* XZ header CRC32 doesn't match
121	*
122	* @throws UnsupportedOptionsException
123	* XZ header is valid but specifies options
124	* not supported by this implementation
125	*
126	* @throws EOFException
127	* less than 12 bytes of input was available
128	* from <code>in</code>
129	*
130	* @throws IOException may be thrown by <code>in</code>
131	*
132	* @since 1.7
133	*/
134	public XZInputStream(InputStream in, ArrayCache arrayCache)
135	throws IOException {
136	this(in, -1, arrayCache);
137	}
138
139	/**
140	* Creates a new XZ decompressor with an optional memory usage limit.
141	* <p>
142	* This is identical to <code>XZInputStream(InputStream)</code> except
143	* that this takes also the <code>memoryLimit</code> argument.
144	*
145	* @param in input stream from which XZ-compressed
146	* data is read
147	*
148	* @param memoryLimit memory usage limit in kibibytes (KiB)
149	* or <code>-1</code> to impose no
150	* memory usage limit
151	*
152	* @throws XZFormatException
153	* input is not in the XZ format
154	*
155	* @throws CorruptedInputException
156	* XZ header CRC32 doesn't match
157	*
158	* @throws UnsupportedOptionsException
159	* XZ header is valid but specifies options
160	* not supported by this implementation
161	*
162	* @throws EOFException
163	* less than 12 bytes of input was available
164	* from <code>in</code>
165	*
166	* @throws IOException may be thrown by <code>in</code>
167	*/
168	public XZInputStream(InputStream in, int memoryLimit) throws IOException {
169	this(in, memoryLimit, true);
170	}
171
172	/**
173	* Creates a new XZ decompressor with an optional memory usage limit.
174	* <p>
175	* This is identical to <code>XZInputStream(InputStream)</code> except
176	* that this takes also the <code>memoryLimit</code> and
177	* <code>arrayCache</code> arguments.
178	*
179	* @param in input stream from which XZ-compressed
180	* data is read
181	*
182	* @param memoryLimit memory usage limit in kibibytes (KiB)
183	* or <code>-1</code> to impose no
184	* memory usage limit
185	*
186	* @param arrayCache cache to be used for allocating large arrays
187	*
188	* @throws XZFormatException
189	* input is not in the XZ format
190	*
191	* @throws CorruptedInputException
192	* XZ header CRC32 doesn't match
193	*
194	* @throws UnsupportedOptionsException
195	* XZ header is valid but specifies options
196	* not supported by this implementation
197	*
198	* @throws EOFException
199	* less than 12 bytes of input was available
200	* from <code>in</code>
201	*
202	* @throws IOException may be thrown by <code>in</code>
203	*
204	* @since 1.7
205	*/
206	public XZInputStream(InputStream in, int memoryLimit,
207	ArrayCache arrayCache) throws IOException {
208	this(in, memoryLimit, true, arrayCache);
209	}
210
211	/**
212	* Creates a new XZ decompressor with an optional memory usage limit
213	* and ability to disable verification of integrity checks.
214	* <p>
215	* This is identical to <code>XZInputStream(InputStream,int)</code> except
216	* that this takes also the <code>verifyCheck</code> argument.
217	* <p>
218	* Note that integrity check verification should almost never be disabled.
219	* Possible reasons to disable integrity check verification:
220	* <ul>
221	* <li>Trying to recover data from a corrupt .xz file.</li>
222	* <li>Speeding up decompression. This matters mostly with SHA-256
223	* or with files that have compressed extremely well. It's recommended
224	* that integrity checking isn't disabled for performance reasons
225	* unless the file integrity is verified externally in some other
226	* way.</li>
227	* </ul>
228	* <p>
229	* <code>verifyCheck</code> only affects the integrity check of
230	* the actual compressed data. The CRC32 fields in the headers
231	* are always verified.
232	*
233	* @param in input stream from which XZ-compressed
234	* data is read
235	*
236	* @param memoryLimit memory usage limit in kibibytes (KiB)
237	* or <code>-1</code> to impose no
238	* memory usage limit
239	*
240	* @param verifyCheck if <code>true</code>, the integrity checks
241	* will be verified; this should almost never
242	* be set to <code>false</code>
243	*
244	* @throws XZFormatException
245	* input is not in the XZ format
246	*
247	* @throws CorruptedInputException
248	* XZ header CRC32 doesn't match
249	*
250	* @throws UnsupportedOptionsException
251	* XZ header is valid but specifies options
252	* not supported by this implementation
253	*
254	* @throws EOFException
255	* less than 12 bytes of input was available
256	* from <code>in</code>
257	*
258	* @throws IOException may be thrown by <code>in</code>
259	*
260	* @since 1.6
261	*/
262	public XZInputStream(InputStream in, int memoryLimit, boolean verifyCheck)
263	throws IOException {
264	this(in, memoryLimit, verifyCheck, ArrayCache.getDefaultCache());
265	}
266
267	/**
268	* Creates a new XZ decompressor with an optional memory usage limit
269	* and ability to disable verification of integrity checks.
270	* <p>
271	* This is identical to <code>XZInputStream(InputStream,int,boolean)</code>
272	* except that this takes also the <code>arrayCache</code> argument.
273	*
274	* @param in input stream from which XZ-compressed
275	* data is read
276	*
277	* @param memoryLimit memory usage limit in kibibytes (KiB)
278	* or <code>-1</code> to impose no
279	* memory usage limit
280	*
281	* @param verifyCheck if <code>true</code>, the integrity checks
282	* will be verified; this should almost never
283	* be set to <code>false</code>
284	*
285	* @param arrayCache cache to be used for allocating large arrays
286	*
287	* @throws XZFormatException
288	* input is not in the XZ format
289	*
290	* @throws CorruptedInputException
291	* XZ header CRC32 doesn't match
292	*
293	* @throws UnsupportedOptionsException
294	* XZ header is valid but specifies options
295	* not supported by this implementation
296	*
297	* @throws EOFException
298	* less than 12 bytes of input was available
299	* from <code>in</code>
300	*
301	* @throws IOException may be thrown by <code>in</code>
302	*
303	* @since 1.7
304	*/
305	public XZInputStream(InputStream in, int memoryLimit, boolean verifyCheck,
306	ArrayCache arrayCache) throws IOException {
307	this.arrayCache = arrayCache;
308	this.in = in;
309	this.memoryLimit = memoryLimit;
310	this.verifyCheck = verifyCheck;
311	this.xzIn = new SingleXZInputStream(in, memoryLimit, verifyCheck,
312	arrayCache);
313	}
314
315	/**
316	* Decompresses the next byte from this input stream.
317	* <p>
318	* Reading lots of data with <code>read()</code> from this input stream
319	* may be inefficient. Wrap it in {@link java.io.BufferedInputStream}
320	* if you need to read lots of data one byte at a time.
321	*
322	* @return the next decompressed byte, or <code>-1</code>
323	* to indicate the end of the compressed stream
324	*
325	* @throws CorruptedInputException
326	* @throws UnsupportedOptionsException
327	* @throws MemoryLimitException
328	*
329	* @throws XZIOException if the stream has been closed
330	*
331	* @throws EOFException
332	* compressed input is truncated or corrupt
333	*
334	* @throws IOException may be thrown by <code>in</code>
335	*/
336	public int read() throws IOException {
337	return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
338	}
339
340	/**
341	* Decompresses into an array of bytes.
342	* <p>
343	* If <code>len</code> is zero, no bytes are read and <code>0</code>
344	* is returned. Otherwise this will try to decompress <code>len</code>
345	* bytes of uncompressed data. Less than <code>len</code> bytes may
346	* be read only in the following situations:
347	* <ul>
348	* <li>The end of the compressed data was reached successfully.</li>
349	* <li>An error is detected after at least one but less <code>len</code>
350	* bytes have already been successfully decompressed.
351	* The next call with non-zero <code>len</code> will immediately
352	* throw the pending exception.</li>
353	* <li>An exception is thrown.</li>
354	* </ul>
355	*
356	* @param buf target buffer for uncompressed data
357	* @param off start offset in <code>buf</code>
358	* @param len maximum number of uncompressed bytes to read
359	*
360	* @return number of bytes read, or <code>-1</code> to indicate
361	* the end of the compressed stream
362	*
363	* @throws CorruptedInputException
364	* @throws UnsupportedOptionsException
365	* @throws MemoryLimitException
366	*
367	* @throws XZIOException if the stream has been closed
368	*
369	* @throws EOFException
370	* compressed input is truncated or corrupt
371	*
372	* @throws IOException may be thrown by <code>in</code>
373	*/
374	public int read(byte[] buf, int off, int len) throws IOException {
375	if (off < 0 \|\| len < 0 \|\| off + len < 0 \|\| off + len > buf.length)
376	throw new IndexOutOfBoundsException();
377
378	if (len == 0)
379	return 0;
380
381	if (in == null)
382	throw new XZIOException("Stream closed");
383
384	if (exception != null)
385	throw exception;
386
387	if (endReached)
388	return -1;
389
390	int size = 0;
391
392	try {
393	while (len > 0) {
394	if (xzIn == null) {
395	prepareNextStream();
396	if (endReached)
397	return size == 0 ? -1 : size;
398	}
399
400	int ret = xzIn.read(buf, off, len);
401
402	if (ret > 0) {
403	size += ret;
404	off += ret;
405	len -= ret;
406	} else if (ret == -1) {
407	xzIn = null;
408	}
409	}
410	} catch (IOException e) {
411	exception = e;
412	if (size == 0)
413	throw e;
414	}
415
416	return size;
417	}
418
419	private void prepareNextStream() throws IOException {
420	DataInputStream inData = new DataInputStream(in);
421	byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE];
422
423	// The size of Stream Padding must be a multiple of four bytes,
424	// all bytes zero.
425	do {
426	// First try to read one byte to see if we have reached the end
427	// of the file.
428	int ret = inData.read(buf, 0, 1);
429	if (ret == -1) {
430	endReached = true;
431	return;
432	}
433
434	// Since we got one byte of input, there must be at least
435	// three more available in a valid file.
436	inData.readFully(buf, 1, 3);
437
438	} while (buf[0] == 0 && buf[1] == 0 && buf[2] == 0 && buf[3] == 0);
439
440	// Not all bytes are zero. In a valid Stream it indicates the
441	// beginning of the next Stream. Read the rest of the Stream Header
442	// and initialize the XZ decoder.
443	inData.readFully(buf, 4, DecoderUtil.STREAM_HEADER_SIZE - 4);
444
445	try {
446	xzIn = new SingleXZInputStream(in, memoryLimit, verifyCheck, buf,
447	arrayCache);
448	} catch (XZFormatException e) {
449	// Since this isn't the first .xz Stream, it is more
450	// logical to tell that the data is corrupt.
451	throw new CorruptedInputException(
452	"Garbage after a valid XZ Stream");
453	}
454	}
455
456	/**
457	* Returns the number of uncompressed bytes that can be read
458	* without blocking. The value is returned with an assumption
459	* that the compressed input data will be valid. If the compressed
460	* data is corrupt, <code>CorruptedInputException</code> may get
461	* thrown before the number of bytes claimed to be available have
462	* been read from this input stream.
463	*
464	* @return the number of uncompressed bytes that can be read
465	* without blocking
466	*/
467	public int available() throws IOException {
468	if (in == null)
469	throw new XZIOException("Stream closed");
470
471	if (exception != null)
472	throw exception;
473
474	return xzIn == null ? 0 : xzIn.available();
475	}
476
477	/**
478	* Closes the stream and calls <code>in.close()</code>.
479	* If the stream was already closed, this does nothing.
480	* <p>
481	* This is equivalent to <code>close(true)</code>.
482	*
483	* @throws IOException if thrown by <code>in.close()</code>
484	*/
485	public void close() throws IOException {
486	close(true);
487	}
488
489	/**
490	* Closes the stream and optionally calls <code>in.close()</code>.
491	* If the stream was already closed, this does nothing.
492	* If <code>close(false)</code> has been called, a further
493	* call of <code>close(true)</code> does nothing (it doesn't call
494	* <code>in.close()</code>).
495	* <p>
496	* If you don't want to close the underlying <code>InputStream</code>,
497	* there is usually no need to worry about closing this stream either;
498	* it's fine to do nothing and let the garbage collector handle it.
499	* However, if you are using {@link ArrayCache}, <code>close(false)</code>
500	* can be useful to put the allocated arrays back to the cache without
501	* closing the underlying <code>InputStream</code>.
502	* <p>
503	* Note that if you successfully reach the end of the stream
504	* (<code>read</code> returns <code>-1</code>), the arrays are
505	* automatically put back to the cache by that <code>read</code> call. In
506	* this situation <code>close(false)</code> is redundant (but harmless).
507	*
508	* @throws IOException if thrown by <code>in.close()</code>
509	*
510	* @since 1.7
511	*/
512	public void close(boolean closeInput) throws IOException {
513	if (in != null) {
514	if (xzIn != null) {
515	xzIn.close(false);
516	xzIn = null;
517	}
518
519	try {
520	if (closeInput)
521	in.close();
522	} finally {
523	in = null;
524	}
525	}
526	}
527	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: