Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: josm/trunk/src/org/tukaani/xz/SeekableXZInputStream.java@ 13350

Last change on this file since 13350 was 13350, checked in by stoecker, 6 years ago
see #15816 - add XZ support
File size: 41.8 KB

Line
1	/*
2	* SeekableXZInputStream
3	*
4	* Author: Lasse Collin <lasse.collin@tukaani.org>
5	*
6	* This file has been put into the public domain.
7	* You can do whatever you want with this file.
8	*/
9
10	package org.tukaani.xz;
11
12	import java.util.Arrays;
13	import java.util.ArrayList;
14	import java.io.DataInputStream;
15	import java.io.IOException;
16	import java.io.EOFException;
17	import org.tukaani.xz.common.DecoderUtil;
18	import org.tukaani.xz.common.StreamFlags;
19	import org.tukaani.xz.check.Check;
20	import org.tukaani.xz.index.IndexDecoder;
21	import org.tukaani.xz.index.BlockInfo;
22
23	/**
24	* Decompresses a .xz file in random access mode.
25	* This supports decompressing concatenated .xz files.
26	* <p>
27	* Each .xz file consist of one or more Streams. Each Stream consist of zero
28	* or more Blocks. Each Stream contains an Index of Streams' Blocks.
29	* The Indexes from all Streams are loaded in RAM by a constructor of this
30	* class. A typical .xz file has only one Stream, and parsing its Index will
31	* need only three or four seeks.
32	* <p>
33	* To make random access possible, the data in a .xz file must be splitted
34	* into multiple Blocks of reasonable size. Decompression can only start at
35	* a Block boundary. When seeking to an uncompressed position that is not at
36	* a Block boundary, decompression starts at the beginning of the Block and
37	* throws away data until the target position is reached. Thus, smaller Blocks
38	* mean faster seeks to arbitrary uncompressed positions. On the other hand,
39	* smaller Blocks mean worse compression. So one has to make a compromise
40	* between random access speed and compression ratio.
41	* <p>
42	* Implementation note: This class uses linear search to locate the correct
43	* Stream from the data structures in RAM. It was the simplest to implement
44	* and should be fine as long as there aren't too many Streams. The correct
45	* Block inside a Stream is located using binary search and thus is fast
46	* even with a huge number of Blocks.
47	*
48	* <h4>Memory usage</h4>
49	* <p>
50	* The amount of memory needed for the Indexes is taken into account when
51	* checking the memory usage limit. Each Stream is calculated to need at
52	* least 1 KiB of memory and each Block 16 bytes of memory, rounded up
53	* to the next kibibyte. So unless the file has a huge number of Streams or
54	* Blocks, these don't take significant amount of memory.
55	*
56	* <h4>Creating random-accessible .xz files</h4>
57	* <p>
58	* When using {@link XZOutputStream}, a new Block can be started by calling
59	* its {@link XZOutputStream#endBlock() endBlock} method. If you know
60	* that the decompressor will only need to seek to certain uncompressed
61	* positions, it can be a good idea to start a new Block at (some of) these
62	* positions (and only at these positions to get better compression ratio).
63	* <p>
64	* liblzma in XZ Utils supports starting a new Block with
65	* <code>LZMA_FULL_FLUSH</code>. XZ Utils 5.1.1alpha added threaded
66	* compression which creates multi-Block .xz files. XZ Utils 5.1.1alpha
67	* also added the option <code>--block-size=SIZE</code> to the xz command
68	* line tool. XZ Utils 5.1.2alpha added a partial implementation of
69	* <code>--block-list=SIZES</code> which allows specifying sizes of
70	* individual Blocks.
71	*
72	* @see SeekableFileInputStream
73	* @see XZInputStream
74	* @see XZOutputStream
75	*/
76	public class SeekableXZInputStream extends SeekableInputStream {
77	/**
78	* Cache for big arrays.
79	*/
80	private final ArrayCache arrayCache;
81
82	/**
83	* The input stream containing XZ compressed data.
84	*/
85	private SeekableInputStream in;
86
87	/**
88	* Memory usage limit after the memory usage of the IndexDecoders have
89	* been substracted.
90	*/
91	private final int memoryLimit;
92
93	/**
94	* Memory usage of the IndexDecoders.
95	* <code>memoryLimit + indexMemoryUsage</code> equals the original
96	* memory usage limit that was passed to the constructor.
97	*/
98	private int indexMemoryUsage = 0;
99
100	/**
101	* List of IndexDecoders, one for each Stream in the file.
102	* The list is in reverse order: The first element is
103	* the last Stream in the file.
104	*/
105	private final ArrayList<IndexDecoder> streams
106	= new ArrayList<IndexDecoder>();
107
108	/**
109	* Bitmask of all Check IDs seen.
110	*/
111	private int checkTypes = 0;
112
113	/**
114	* Uncompressed size of the file (all Streams).
115	*/
116	private long uncompressedSize = 0;
117
118	/**
119	* Uncompressed size of the largest XZ Block in the file.
120	*/
121	private long largestBlockSize = 0;
122
123	/**
124	* Number of XZ Blocks in the file.
125	*/
126	private int blockCount = 0;
127
128	/**
129	* Size and position information about the current Block.
130	* If there are no Blocks, all values will be <code>-1</code>.
131	*/
132	private final BlockInfo curBlockInfo;
133
134	/**
135	* Temporary (and cached) information about the Block whose information
136	* is queried via <code>getBlockPos</code> and related functions.
137	*/
138	private final BlockInfo queriedBlockInfo;
139
140	/**
141	* Integrity Check in the current XZ Stream. The constructor leaves
142	* this to point to the Check of the first Stream.
143	*/
144	private Check check;
145
146	/**
147	* Flag indicating if the integrity checks will be verified.
148	*/
149	private final boolean verifyCheck;
150
151	/**
152	* Decoder of the current XZ Block, if any.
153	*/
154	private BlockInputStream blockDecoder = null;
155
156	/**
157	* Current uncompressed position.
158	*/
159	private long curPos = 0;
160
161	/**
162	* Target position for seeking.
163	*/
164	private long seekPos;
165
166	/**
167	* True when <code>seek(long)</code> has been called but the actual
168	* seeking hasn't been done yet.
169	*/
170	private boolean seekNeeded = false;
171
172	/**
173	* True when end of the file was reached. This can be cleared by
174	* calling <code>seek(long)</code>.
175	*/
176	private boolean endReached = false;
177
178	/**
179	* Pending exception from an earlier error.
180	*/
181	private IOException exception = null;
182
183	/**
184	* Temporary buffer for read(). This avoids reallocating memory
185	* on every read() call.
186	*/
187	private final byte[] tempBuf = new byte[1];
188
189	/**
190	* Creates a new seekable XZ decompressor without a memory usage limit.
191	*
192	* @param in seekable input stream containing one or more
193	* XZ Streams; the whole input stream is used
194	*
195	* @throws XZFormatException
196	* input is not in the XZ format
197	*
198	* @throws CorruptedInputException
199	* XZ data is corrupt or truncated
200	*
201	* @throws UnsupportedOptionsException
202	* XZ headers seem valid but they specify
203	* options not supported by this implementation
204	*
205	* @throws EOFException
206	* less than 6 bytes of input was available
207	* from <code>in</code>, or (unlikely) the size
208	* of the underlying stream got smaller while
209	* this was reading from it
210	*
211	* @throws IOException may be thrown by <code>in</code>
212	*/
213	public SeekableXZInputStream(SeekableInputStream in)
214	throws IOException {
215	this(in, -1);
216	}
217
218	/**
219	* Creates a new seekable XZ decompressor without a memory usage limit.
220	* <p>
221	* This is identical to
222	* <code>SeekableXZInputStream(SeekableInputStream)</code> except that
223	* this also takes the <code>arrayCache</code> argument.
224	*
225	* @param in seekable input stream containing one or more
226	* XZ Streams; the whole input stream is used
227	*
228	* @param arrayCache cache to be used for allocating large arrays
229	*
230	* @throws XZFormatException
231	* input is not in the XZ format
232	*
233	* @throws CorruptedInputException
234	* XZ data is corrupt or truncated
235	*
236	* @throws UnsupportedOptionsException
237	* XZ headers seem valid but they specify
238	* options not supported by this implementation
239	*
240	* @throws EOFException
241	* less than 6 bytes of input was available
242	* from <code>in</code>, or (unlikely) the size
243	* of the underlying stream got smaller while
244	* this was reading from it
245	*
246	* @throws IOException may be thrown by <code>in</code>
247	*
248	* @since 1.7
249	*/
250	public SeekableXZInputStream(SeekableInputStream in, ArrayCache arrayCache)
251	throws IOException {
252	this(in, -1, arrayCache);
253	}
254
255	/**
256	* Creates a new seekable XZ decomporessor with an optional
257	* memory usage limit.
258	*
259	* @param in seekable input stream containing one or more
260	* XZ Streams; the whole input stream is used
261	*
262	* @param memoryLimit memory usage limit in kibibytes (KiB)
263	* or <code>-1</code> to impose no
264	* memory usage limit
265	*
266	* @throws XZFormatException
267	* input is not in the XZ format
268	*
269	* @throws CorruptedInputException
270	* XZ data is corrupt or truncated
271	*
272	* @throws UnsupportedOptionsException
273	* XZ headers seem valid but they specify
274	* options not supported by this implementation
275	*
276	* @throws MemoryLimitException
277	* decoded XZ Indexes would need more memory
278	* than allowed by the memory usage limit
279	*
280	* @throws EOFException
281	* less than 6 bytes of input was available
282	* from <code>in</code>, or (unlikely) the size
283	* of the underlying stream got smaller while
284	* this was reading from it
285	*
286	* @throws IOException may be thrown by <code>in</code>
287	*/
288	public SeekableXZInputStream(SeekableInputStream in, int memoryLimit)
289	throws IOException {
290	this(in, memoryLimit, true);
291	}
292
293	/**
294	* Creates a new seekable XZ decomporessor with an optional
295	* memory usage limit.
296	* <p>
297	* This is identical to
298	* <code>SeekableXZInputStream(SeekableInputStream,int)</code>
299	* except that this also takes the <code>arrayCache</code> argument.
300	*
301	* @param in seekable input stream containing one or more
302	* XZ Streams; the whole input stream is used
303	*
304	* @param memoryLimit memory usage limit in kibibytes (KiB)
305	* or <code>-1</code> to impose no
306	* memory usage limit
307	*
308	* @param arrayCache cache to be used for allocating large arrays
309	*
310	* @throws XZFormatException
311	* input is not in the XZ format
312	*
313	* @throws CorruptedInputException
314	* XZ data is corrupt or truncated
315	*
316	* @throws UnsupportedOptionsException
317	* XZ headers seem valid but they specify
318	* options not supported by this implementation
319	*
320	* @throws MemoryLimitException
321	* decoded XZ Indexes would need more memory
322	* than allowed by the memory usage limit
323	*
324	* @throws EOFException
325	* less than 6 bytes of input was available
326	* from <code>in</code>, or (unlikely) the size
327	* of the underlying stream got smaller while
328	* this was reading from it
329	*
330	* @throws IOException may be thrown by <code>in</code>
331	*
332	* @since 1.7
333	*/
334	public SeekableXZInputStream(SeekableInputStream in, int memoryLimit,
335	ArrayCache arrayCache)
336	throws IOException {
337	this(in, memoryLimit, true, arrayCache);
338	}
339
340	/**
341	* Creates a new seekable XZ decomporessor with an optional
342	* memory usage limit and ability to disable verification
343	* of integrity checks.
344	* <p>
345	* Note that integrity check verification should almost never be disabled.
346	* Possible reasons to disable integrity check verification:
347	* <ul>
348	* <li>Trying to recover data from a corrupt .xz file.</li>
349	* <li>Speeding up decompression. This matters mostly with SHA-256
350	* or with files that have compressed extremely well. It's recommended
351	* that integrity checking isn't disabled for performance reasons
352	* unless the file integrity is verified externally in some other
353	* way.</li>
354	* </ul>
355	* <p>
356	* <code>verifyCheck</code> only affects the integrity check of
357	* the actual compressed data. The CRC32 fields in the headers
358	* are always verified.
359	*
360	* @param in seekable input stream containing one or more
361	* XZ Streams; the whole input stream is used
362	*
363	* @param memoryLimit memory usage limit in kibibytes (KiB)
364	* or <code>-1</code> to impose no
365	* memory usage limit
366	*
367	* @param verifyCheck if <code>true</code>, the integrity checks
368	* will be verified; this should almost never
369	* be set to <code>false</code>
370	*
371	* @throws XZFormatException
372	* input is not in the XZ format
373	*
374	* @throws CorruptedInputException
375	* XZ data is corrupt or truncated
376	*
377	* @throws UnsupportedOptionsException
378	* XZ headers seem valid but they specify
379	* options not supported by this implementation
380	*
381	* @throws MemoryLimitException
382	* decoded XZ Indexes would need more memory
383	* than allowed by the memory usage limit
384	*
385	* @throws EOFException
386	* less than 6 bytes of input was available
387	* from <code>in</code>, or (unlikely) the size
388	* of the underlying stream got smaller while
389	* this was reading from it
390	*
391	* @throws IOException may be thrown by <code>in</code>
392	*
393	* @since 1.6
394	*/
395	public SeekableXZInputStream(SeekableInputStream in, int memoryLimit,
396	boolean verifyCheck)
397	throws IOException {
398	this(in, memoryLimit, verifyCheck, ArrayCache.getDefaultCache());
399	}
400
401	/**
402	* Creates a new seekable XZ decomporessor with an optional
403	* memory usage limit and ability to disable verification
404	* of integrity checks.
405	* <p>
406	* This is identical to
407	* <code>SeekableXZInputStream(SeekableInputStream,int,boolean)</code>
408	* except that this also takes the <code>arrayCache</code> argument.
409	*
410	* @param in seekable input stream containing one or more
411	* XZ Streams; the whole input stream is used
412	*
413	* @param memoryLimit memory usage limit in kibibytes (KiB)
414	* or <code>-1</code> to impose no
415	* memory usage limit
416	*
417	* @param verifyCheck if <code>true</code>, the integrity checks
418	* will be verified; this should almost never
419	* be set to <code>false</code>
420	*
421	* @param arrayCache cache to be used for allocating large arrays
422	*
423	* @throws XZFormatException
424	* input is not in the XZ format
425	*
426	* @throws CorruptedInputException
427	* XZ data is corrupt or truncated
428	*
429	* @throws UnsupportedOptionsException
430	* XZ headers seem valid but they specify
431	* options not supported by this implementation
432	*
433	* @throws MemoryLimitException
434	* decoded XZ Indexes would need more memory
435	* than allowed by the memory usage limit
436	*
437	* @throws EOFException
438	* less than 6 bytes of input was available
439	* from <code>in</code>, or (unlikely) the size
440	* of the underlying stream got smaller while
441	* this was reading from it
442	*
443	* @throws IOException may be thrown by <code>in</code>
444	*
445	* @since 1.7
446	*/
447	public SeekableXZInputStream(SeekableInputStream in, int memoryLimit,
448	boolean verifyCheck, ArrayCache arrayCache)
449	throws IOException {
450	this.arrayCache = arrayCache;
451	this.verifyCheck = verifyCheck;
452	this.in = in;
453	DataInputStream inData = new DataInputStream(in);
454
455	// Check the magic bytes in the beginning of the file.
456	{
457	in.seek(0);
458	byte[] buf = new byte[XZ.HEADER_MAGIC.length];
459	inData.readFully(buf);
460	if (!Arrays.equals(buf, XZ.HEADER_MAGIC))
461	throw new XZFormatException();
462	}
463
464	// Get the file size and verify that it is a multiple of 4 bytes.
465	long pos = in.length();
466	if ((pos & 3) != 0)
467	throw new CorruptedInputException(
468	"XZ file size is not a multiple of 4 bytes");
469
470	// Parse the headers starting from the end of the file.
471	byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE];
472	long streamPadding = 0;
473
474	while (pos > 0) {
475	if (pos < DecoderUtil.STREAM_HEADER_SIZE)
476	throw new CorruptedInputException();
477
478	// Read the potential Stream Footer.
479	in.seek(pos - DecoderUtil.STREAM_HEADER_SIZE);
480	inData.readFully(buf);
481
482	// Skip Stream Padding four bytes at a time.
483	// Skipping more at once would be faster,
484	// but usually there isn't much Stream Padding.
485	if (buf[8] == 0x00 && buf[9] == 0x00 && buf[10] == 0x00
486	&& buf[11] == 0x00) {
487	streamPadding += 4;
488	pos -= 4;
489	continue;
490	}
491
492	// It's not Stream Padding. Update pos.
493	pos -= DecoderUtil.STREAM_HEADER_SIZE;
494
495	// Decode the Stream Footer and check if Backward Size
496	// looks reasonable.
497	StreamFlags streamFooter = DecoderUtil.decodeStreamFooter(buf);
498	if (streamFooter.backwardSize >= pos)
499	throw new CorruptedInputException(
500	"Backward Size in XZ Stream Footer is too big");
501
502	// Check that the Check ID is supported. Store it in case this
503	// is the first Stream in the file.
504	check = Check.getInstance(streamFooter.checkType);
505
506	// Remember which Check IDs have been seen.
507	checkTypes \|= 1 << streamFooter.checkType;
508
509	// Seek to the beginning of the Index.
510	in.seek(pos - streamFooter.backwardSize);
511
512	// Decode the Index field.
513	IndexDecoder index;
514	try {
515	index = new IndexDecoder(in, streamFooter, streamPadding,
516	memoryLimit);
517	} catch (MemoryLimitException e) {
518	// IndexDecoder doesn't know how much memory we had
519	// already needed so we need to recreate the exception.
520	assert memoryLimit >= 0;
521	throw new MemoryLimitException(
522	e.getMemoryNeeded() + indexMemoryUsage,
523	memoryLimit + indexMemoryUsage);
524	}
525
526	// Update the memory usage and limit counters.
527	indexMemoryUsage += index.getMemoryUsage();
528	if (memoryLimit >= 0) {
529	memoryLimit -= index.getMemoryUsage();
530	assert memoryLimit >= 0;
531	}
532
533	// Remember the uncompressed size of the largest Block.
534	if (largestBlockSize < index.getLargestBlockSize())
535	largestBlockSize = index.getLargestBlockSize();
536
537	// Calculate the offset to the beginning of this XZ Stream and
538	// check that it looks sane.
539	long off = index.getStreamSize() - DecoderUtil.STREAM_HEADER_SIZE;
540	if (pos < off)
541	throw new CorruptedInputException("XZ Index indicates "
542	+ "too big compressed size for the XZ Stream");
543
544	// Seek to the beginning of this Stream.
545	pos -= off;
546	in.seek(pos);
547
548	// Decode the Stream Header.
549	inData.readFully(buf);
550	StreamFlags streamHeader = DecoderUtil.decodeStreamHeader(buf);
551
552	// Verify that the Stream Header matches the Stream Footer.
553	if (!DecoderUtil.areStreamFlagsEqual(streamHeader, streamFooter))
554	throw new CorruptedInputException(
555	"XZ Stream Footer does not match Stream Header");
556
557	// Update the total uncompressed size of the file and check that
558	// it doesn't overflow.
559	uncompressedSize += index.getUncompressedSize();
560	if (uncompressedSize < 0)
561	throw new UnsupportedOptionsException("XZ file is too big");
562
563	// Update the Block count and check that it fits into an int.
564	blockCount += index.getRecordCount();
565	if (blockCount < 0)
566	throw new UnsupportedOptionsException(
567	"XZ file has over " + Integer.MAX_VALUE + " Blocks");
568
569	// Add this Stream to the list of Streams.
570	streams.add(index);
571
572	// Reset to be ready to parse the next Stream.
573	streamPadding = 0;
574	}
575
576	assert pos == 0;
577
578	// Save it now that indexMemoryUsage has been substracted from it.
579	this.memoryLimit = memoryLimit;
580
581	// Store the relative offsets of the Streams. This way we don't
582	// need to recalculate them in this class when seeking; the
583	// IndexDecoder instances will handle them.
584	IndexDecoder prev = streams.get(streams.size() - 1);
585	for (int i = streams.size() - 2; i >= 0; --i) {
586	IndexDecoder cur = streams.get(i);
587	cur.setOffsets(prev);
588	prev = cur;
589	}
590
591	// Initialize curBlockInfo to point to the first Stream.
592	// The blockNumber will be left to -1 so that .hasNext()
593	// and .setNext() work to get the first Block when starting
594	// to decompress from the beginning of the file.
595	IndexDecoder first = streams.get(streams.size() - 1);
596	curBlockInfo = new BlockInfo(first);
597
598	// queriedBlockInfo needs to be allocated too. The Stream used for
599	// initialization doesn't matter though.
600	queriedBlockInfo = new BlockInfo(first);
601	}
602
603	/**
604	* Gets the types of integrity checks used in the .xz file.
605	* Multiple checks are possible only if there are multiple
606	* concatenated XZ Streams.
607	* <p>
608	* The returned value has a bit set for every check type that is present.
609	* For example, if CRC64 and SHA-256 were used, the return value is
610	* <code>(1 << XZ.CHECK_CRC64)
611	* \| (1 << XZ.CHECK_SHA256)</code>.
612	*/
613	public int getCheckTypes() {
614	return checkTypes;
615	}
616
617	/**
618	* Gets the amount of memory in kibibytes (KiB) used by
619	* the data structures needed to locate the XZ Blocks.
620	* This is usually useless information but since it is calculated
621	* for memory usage limit anyway, it is nice to make it available to too.
622	*/
623	public int getIndexMemoryUsage() {
624	return indexMemoryUsage;
625	}
626
627	/**
628	* Gets the uncompressed size of the largest XZ Block in bytes.
629	* This can be useful if you want to check that the file doesn't
630	* have huge XZ Blocks which could make seeking to arbitrary offsets
631	* very slow. Note that huge Blocks don't automatically mean that
632	* seeking would be slow, for example, seeking to the beginning of
633	* any Block is always fast.
634	*/
635	public long getLargestBlockSize() {
636	return largestBlockSize;
637	}
638
639	/**
640	* Gets the number of Streams in the .xz file.
641	*
642	* @since 1.3
643	*/
644	public int getStreamCount() {
645	return streams.size();
646	}
647
648	/**
649	* Gets the number of Blocks in the .xz file.
650	*
651	* @since 1.3
652	*/
653	public int getBlockCount() {
654	return blockCount;
655	}
656
657	/**
658	* Gets the uncompressed start position of the given Block.
659	*
660	* @throws IndexOutOfBoundsException if
661	* <code>blockNumber < 0</code> or
662	* <code>blockNumber >= getBlockCount()</code>.
663	*
664	* @since 1.3
665	*/
666	public long getBlockPos(int blockNumber) {
667	locateBlockByNumber(queriedBlockInfo, blockNumber);
668	return queriedBlockInfo.uncompressedOffset;
669	}
670
671	/**
672	* Gets the uncompressed size of the given Block.
673	*
674	* @throws IndexOutOfBoundsException if
675	* <code>blockNumber < 0</code> or
676	* <code>blockNumber >= getBlockCount()</code>.
677	*
678	* @since 1.3
679	*/
680	public long getBlockSize(int blockNumber) {
681	locateBlockByNumber(queriedBlockInfo, blockNumber);
682	return queriedBlockInfo.uncompressedSize;
683	}
684
685	/**
686	* Gets the position where the given compressed Block starts in
687	* the underlying .xz file.
688	* This information is rarely useful to the users of this class.
689	*
690	* @throws IndexOutOfBoundsException if
691	* <code>blockNumber < 0</code> or
692	* <code>blockNumber >= getBlockCount()</code>.
693	*
694	* @since 1.3
695	*/
696	public long getBlockCompPos(int blockNumber) {
697	locateBlockByNumber(queriedBlockInfo, blockNumber);
698	return queriedBlockInfo.compressedOffset;
699	}
700
701	/**
702	* Gets the compressed size of the given Block.
703	* This together with the uncompressed size can be used to calculate
704	* the compression ratio of the specific Block.
705	*
706	* @throws IndexOutOfBoundsException if
707	* <code>blockNumber < 0</code> or
708	* <code>blockNumber >= getBlockCount()</code>.
709	*
710	* @since 1.3
711	*/
712	public long getBlockCompSize(int blockNumber) {
713	locateBlockByNumber(queriedBlockInfo, blockNumber);
714	return (queriedBlockInfo.unpaddedSize + 3) & ~3;
715	}
716
717	/**
718	* Gets integrity check type (Check ID) of the given Block.
719	*
720	* @throws IndexOutOfBoundsException if
721	* <code>blockNumber < 0</code> or
722	* <code>blockNumber >= getBlockCount()</code>.
723	*
724	* @see #getCheckTypes()
725	*
726	* @since 1.3
727	*/
728	public int getBlockCheckType(int blockNumber) {
729	locateBlockByNumber(queriedBlockInfo, blockNumber);
730	return queriedBlockInfo.getCheckType();
731	}
732
733	/**
734	* Gets the number of the Block that contains the byte at the given
735	* uncompressed position.
736	*
737	* @throws IndexOutOfBoundsException if
738	* <code>pos < 0</code> or
739	* <code>pos >= length()</code>.
740	*
741	* @since 1.3
742	*/
743	public int getBlockNumber(long pos) {
744	locateBlockByPos(queriedBlockInfo, pos);
745	return queriedBlockInfo.blockNumber;
746	}
747
748	/**
749	* Decompresses the next byte from this input stream.
750	*
751	* @return the next decompressed byte, or <code>-1</code>
752	* to indicate the end of the compressed stream
753	*
754	* @throws CorruptedInputException
755	* @throws UnsupportedOptionsException
756	* @throws MemoryLimitException
757	*
758	* @throws XZIOException if the stream has been closed
759	*
760	* @throws IOException may be thrown by <code>in</code>
761	*/
762	public int read() throws IOException {
763	return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
764	}
765
766	/**
767	* Decompresses into an array of bytes.
768	* <p>
769	* If <code>len</code> is zero, no bytes are read and <code>0</code>
770	* is returned. Otherwise this will try to decompress <code>len</code>
771	* bytes of uncompressed data. Less than <code>len</code> bytes may
772	* be read only in the following situations:
773	* <ul>
774	* <li>The end of the compressed data was reached successfully.</li>
775	* <li>An error is detected after at least one but less than
776	* <code>len</code> bytes have already been successfully
777	* decompressed. The next call with non-zero <code>len</code>
778	* will immediately throw the pending exception.</li>
779	* <li>An exception is thrown.</li>
780	* </ul>
781	*
782	* @param buf target buffer for uncompressed data
783	* @param off start offset in <code>buf</code>
784	* @param len maximum number of uncompressed bytes to read
785	*
786	* @return number of bytes read, or <code>-1</code> to indicate
787	* the end of the compressed stream
788	*
789	* @throws CorruptedInputException
790	* @throws UnsupportedOptionsException
791	* @throws MemoryLimitException
792	*
793	* @throws XZIOException if the stream has been closed
794	*
795	* @throws IOException may be thrown by <code>in</code>
796	*/
797	public int read(byte[] buf, int off, int len) throws IOException {
798	if (off < 0 \|\| len < 0 \|\| off + len < 0 \|\| off + len > buf.length)
799	throw new IndexOutOfBoundsException();
800
801	if (len == 0)
802	return 0;
803
804	if (in == null)
805	throw new XZIOException("Stream closed");
806
807	if (exception != null)
808	throw exception;
809
810	int size = 0;
811
812	try {
813	if (seekNeeded)
814	seek();
815
816	if (endReached)
817	return -1;
818
819	while (len > 0) {
820	if (blockDecoder == null) {
821	seek();
822	if (endReached)
823	break;
824	}
825
826	int ret = blockDecoder.read(buf, off, len);
827
828	if (ret > 0) {
829	curPos += ret;
830	size += ret;
831	off += ret;
832	len -= ret;
833	} else if (ret == -1) {
834	blockDecoder = null;
835	}
836	}
837	} catch (IOException e) {
838	// We know that the file isn't simply truncated because we could
839	// parse the Indexes in the constructor. So convert EOFException
840	// to CorruptedInputException.
841	if (e instanceof EOFException)
842	e = new CorruptedInputException();
843
844	exception = e;
845	if (size == 0)
846	throw e;
847	}
848
849	return size;
850	}
851
852	/**
853	* Returns the number of uncompressed bytes that can be read
854	* without blocking. The value is returned with an assumption
855	* that the compressed input data will be valid. If the compressed
856	* data is corrupt, <code>CorruptedInputException</code> may get
857	* thrown before the number of bytes claimed to be available have
858	* been read from this input stream.
859	*
860	* @return the number of uncompressed bytes that can be read
861	* without blocking
862	*/
863	public int available() throws IOException {
864	if (in == null)
865	throw new XZIOException("Stream closed");
866
867	if (exception != null)
868	throw exception;
869
870	if (endReached \|\| seekNeeded \|\| blockDecoder == null)
871	return 0;
872
873	return blockDecoder.available();
874	}
875
876	/**
877	* Closes the stream and calls <code>in.close()</code>.
878	* If the stream was already closed, this does nothing.
879	* <p>
880	* This is equivalent to <code>close(true)</code>.
881	*
882	* @throws IOException if thrown by <code>in.close()</code>
883	*/
884	public void close() throws IOException {
885	close(true);
886	}
887
888	/**
889	* Closes the stream and optionally calls <code>in.close()</code>.
890	* If the stream was already closed, this does nothing.
891	* If <code>close(false)</code> has been called, a further
892	* call of <code>close(true)</code> does nothing (it doesn't call
893	* <code>in.close()</code>).
894	* <p>
895	* If you don't want to close the underlying <code>InputStream</code>,
896	* there is usually no need to worry about closing this stream either;
897	* it's fine to do nothing and let the garbage collector handle it.
898	* However, if you are using {@link ArrayCache}, <code>close(false)</code>
899	* can be useful to put the allocated arrays back to the cache without
900	* closing the underlying <code>InputStream</code>.
901	* <p>
902	* Note that if you successfully reach the end of the stream
903	* (<code>read</code> returns <code>-1</code>), the arrays are
904	* automatically put back to the cache by that <code>read</code> call. In
905	* this situation <code>close(false)</code> is redundant (but harmless).
906	*
907	* @throws IOException if thrown by <code>in.close()</code>
908	*
909	* @since 1.7
910	*/
911	public void close(boolean closeInput) throws IOException {
912	if (in != null) {
913	if (blockDecoder != null) {
914	blockDecoder.close();
915	blockDecoder = null;
916	}
917
918	try {
919	if (closeInput)
920	in.close();
921	} finally {
922	in = null;
923	}
924	}
925	}
926
927	/**
928	* Gets the uncompressed size of this input stream. If there are multiple
929	* XZ Streams, the total uncompressed size of all XZ Streams is returned.
930	*/
931	public long length() {
932	return uncompressedSize;
933	}
934
935	/**
936	* Gets the current uncompressed position in this input stream.
937	*
938	* @throws XZIOException if the stream has been closed
939	*/
940	public long position() throws IOException {
941	if (in == null)
942	throw new XZIOException("Stream closed");
943
944	return seekNeeded ? seekPos : curPos;
945	}
946
947	/**
948	* Seeks to the specified absolute uncompressed position in the stream.
949	* This only stores the new position, so this function itself is always
950	* very fast. The actual seek is done when <code>read</code> is called
951	* to read at least one byte.
952	* <p>
953	* Seeking past the end of the stream is possible. In that case
954	* <code>read</code> will return <code>-1</code> to indicate
955	* the end of the stream.
956	*
957	* @param pos new uncompressed read position
958	*
959	* @throws XZIOException
960	* if <code>pos</code> is negative, or
961	* if stream has been closed
962	*/
963	public void seek(long pos) throws IOException {
964	if (in == null)
965	throw new XZIOException("Stream closed");
966
967	if (pos < 0)
968	throw new XZIOException("Negative seek position: " + pos);
969
970	seekPos = pos;
971	seekNeeded = true;
972	}
973
974	/**
975	* Seeks to the beginning of the given XZ Block.
976	*
977	* @throws XZIOException
978	* if <code>blockNumber < 0</code> or
979	* <code>blockNumber >= getBlockCount()</code>,
980	* or if stream has been closed
981	*
982	* @since 1.3
983	*/
984	public void seekToBlock(int blockNumber) throws IOException {
985	if (in == null)
986	throw new XZIOException("Stream closed");
987
988	if (blockNumber < 0 \|\| blockNumber >= blockCount)
989	throw new XZIOException("Invalid XZ Block number: " + blockNumber);
990
991	// This is a bit silly implementation. Here we locate the uncompressed
992	// offset of the specified Block, then when doing the actual seek in
993	// seek(), we need to find the Block number based on seekPos.
994	seekPos = getBlockPos(blockNumber);
995	seekNeeded = true;
996	}
997
998	/**
999	* Does the actual seeking. This is also called when <code>read</code>
1000	* needs a new Block to decode.
1001	*/
1002	private void seek() throws IOException {
1003	// If seek(long) wasn't called, we simply need to get the next Block
1004	// from the same Stream. If there are no more Blocks in this Stream,
1005	// then we behave as if seek(long) had been called.
1006	if (!seekNeeded) {
1007	if (curBlockInfo.hasNext()) {
1008	curBlockInfo.setNext();
1009	initBlockDecoder();
1010	return;
1011	}
1012
1013	seekPos = curPos;
1014	}
1015
1016	seekNeeded = false;
1017
1018	// Check if we are seeking to or past the end of the file.
1019	if (seekPos >= uncompressedSize) {
1020	curPos = seekPos;
1021
1022	if (blockDecoder != null) {
1023	blockDecoder.close();
1024	blockDecoder = null;
1025	}
1026
1027	endReached = true;
1028	return;
1029	}
1030
1031	endReached = false;
1032
1033	// Locate the Block that contains the uncompressed target position.
1034	locateBlockByPos(curBlockInfo, seekPos);
1035
1036	// Seek in the underlying stream and create a new Block decoder
1037	// only if really needed. We can skip it if the current position
1038	// is already in the correct Block and the target position hasn't
1039	// been decompressed yet.
1040	//
1041	// NOTE: If curPos points to the beginning of this Block, it's
1042	// because it was left there after decompressing an earlier Block.
1043	// In that case, decoding of the current Block hasn't been started
1044	// yet. (Decoding of a Block won't be started until at least one
1045	// byte will also be read from it.)
1046	if (!(curPos > curBlockInfo.uncompressedOffset && curPos <= seekPos)) {
1047	// Seek to the beginning of the Block.
1048	in.seek(curBlockInfo.compressedOffset);
1049
1050	// Since it is possible that this Block is from a different
1051	// Stream than the previous Block, initialize a new Check.
1052	check = Check.getInstance(curBlockInfo.getCheckType());
1053
1054	// Create a new Block decoder.
1055	initBlockDecoder();
1056	curPos = curBlockInfo.uncompressedOffset;
1057	}
1058
1059	// If the target wasn't at a Block boundary, decompress and throw
1060	// away data to reach the target position.
1061	if (seekPos > curPos) {
1062	// NOTE: The "if" below is there just in case. In this situation,
1063	// blockDecoder.skip will always skip the requested amount
1064	// or throw an exception.
1065	long skipAmount = seekPos - curPos;
1066	if (blockDecoder.skip(skipAmount) != skipAmount)
1067	throw new CorruptedInputException();
1068
1069	curPos = seekPos;
1070	}
1071	}
1072
1073	/**
1074	* Locates the Block that contains the given uncompressed position.
1075	*/
1076	private void locateBlockByPos(BlockInfo info, long pos) {
1077	if (pos < 0 \|\| pos >= uncompressedSize)
1078	throw new IndexOutOfBoundsException(
1079	"Invalid uncompressed position: " + pos);
1080
1081	// Locate the Stream that contains the target position.
1082	IndexDecoder index;
1083	for (int i = 0; ; ++i) {
1084	index = streams.get(i);
1085	if (index.hasUncompressedOffset(pos))
1086	break;
1087	}
1088
1089	// Locate the Block from the Stream that contains the target position.
1090	index.locateBlock(info, pos);
1091
1092	assert (info.compressedOffset & 3) == 0;
1093	assert info.uncompressedSize > 0;
1094	assert pos >= info.uncompressedOffset;
1095	assert pos < info.uncompressedOffset + info.uncompressedSize;
1096	}
1097
1098	/**
1099	* Locates the given Block and stores information about it
1100	* to <code>info</code>.
1101	*/
1102	private void locateBlockByNumber(BlockInfo info, int blockNumber) {
1103	// Validate.
1104	if (blockNumber < 0 \|\| blockNumber >= blockCount)
1105	throw new IndexOutOfBoundsException(
1106	"Invalid XZ Block number: " + blockNumber);
1107
1108	// Skip the search if info already points to the correct Block.
1109	if (info.blockNumber == blockNumber)
1110	return;
1111
1112	// Search the Stream that contains the given Block and then
1113	// search the Block from that Stream.
1114	for (int i = 0; ; ++i) {
1115	IndexDecoder index = streams.get(i);
1116	if (index.hasRecord(blockNumber)) {
1117	index.setBlockInfo(info, blockNumber);
1118	return;
1119	}
1120	}
1121	}
1122
1123	/**
1124	* Initializes a new BlockInputStream. This is a helper function for
1125	* <code>seek()</code>.
1126	*/
1127	private void initBlockDecoder() throws IOException {
1128	try {
1129	// Set it to null first so that GC can collect it if memory
1130	// runs tight when initializing a new BlockInputStream.
1131	if (blockDecoder != null) {
1132	blockDecoder.close();
1133	blockDecoder = null;
1134	}
1135
1136	blockDecoder = new BlockInputStream(
1137	in, check, verifyCheck, memoryLimit,
1138	curBlockInfo.unpaddedSize, curBlockInfo.uncompressedSize,
1139	arrayCache);
1140	} catch (MemoryLimitException e) {
1141	// BlockInputStream doesn't know how much memory we had
1142	// already needed so we need to recreate the exception.
1143	assert memoryLimit >= 0;
1144	throw new MemoryLimitException(
1145	e.getMemoryNeeded() + indexMemoryUsage,
1146	memoryLimit + indexMemoryUsage);
1147	} catch (IndexIndicatorException e) {
1148	// It cannot be Index so the file must be corrupt.
1149	throw new CorruptedInputException();
1150	}
1151	}
1152	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: