source: josm/trunk/src/org/tukaani/xz/SeekableXZInputStream.java@ 13350

Last change on this file since 13350 was 13350, checked in by stoecker, 6 years ago

see #15816 - add XZ support

File size: 41.8 KB
Line 
1/*
2 * SeekableXZInputStream
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10package org.tukaani.xz;
11
12import java.util.Arrays;
13import java.util.ArrayList;
14import java.io.DataInputStream;
15import java.io.IOException;
16import java.io.EOFException;
17import org.tukaani.xz.common.DecoderUtil;
18import org.tukaani.xz.common.StreamFlags;
19import org.tukaani.xz.check.Check;
20import org.tukaani.xz.index.IndexDecoder;
21import org.tukaani.xz.index.BlockInfo;
22
23/**
24 * Decompresses a .xz file in random access mode.
25 * This supports decompressing concatenated .xz files.
26 * <p>
27 * Each .xz file consist of one or more Streams. Each Stream consist of zero
28 * or more Blocks. Each Stream contains an Index of Streams' Blocks.
29 * The Indexes from all Streams are loaded in RAM by a constructor of this
30 * class. A typical .xz file has only one Stream, and parsing its Index will
31 * need only three or four seeks.
32 * <p>
33 * To make random access possible, the data in a .xz file must be splitted
34 * into multiple Blocks of reasonable size. Decompression can only start at
35 * a Block boundary. When seeking to an uncompressed position that is not at
36 * a Block boundary, decompression starts at the beginning of the Block and
37 * throws away data until the target position is reached. Thus, smaller Blocks
38 * mean faster seeks to arbitrary uncompressed positions. On the other hand,
39 * smaller Blocks mean worse compression. So one has to make a compromise
40 * between random access speed and compression ratio.
41 * <p>
42 * Implementation note: This class uses linear search to locate the correct
43 * Stream from the data structures in RAM. It was the simplest to implement
44 * and should be fine as long as there aren't too many Streams. The correct
45 * Block inside a Stream is located using binary search and thus is fast
46 * even with a huge number of Blocks.
47 *
48 * <h4>Memory usage</h4>
49 * <p>
50 * The amount of memory needed for the Indexes is taken into account when
51 * checking the memory usage limit. Each Stream is calculated to need at
52 * least 1&nbsp;KiB of memory and each Block 16 bytes of memory, rounded up
53 * to the next kibibyte. So unless the file has a huge number of Streams or
54 * Blocks, these don't take significant amount of memory.
55 *
56 * <h4>Creating random-accessible .xz files</h4>
57 * <p>
58 * When using {@link XZOutputStream}, a new Block can be started by calling
59 * its {@link XZOutputStream#endBlock() endBlock} method. If you know
60 * that the decompressor will only need to seek to certain uncompressed
61 * positions, it can be a good idea to start a new Block at (some of) these
62 * positions (and only at these positions to get better compression ratio).
63 * <p>
64 * liblzma in XZ Utils supports starting a new Block with
65 * <code>LZMA_FULL_FLUSH</code>. XZ Utils 5.1.1alpha added threaded
66 * compression which creates multi-Block .xz files. XZ Utils 5.1.1alpha
67 * also added the option <code>--block-size=SIZE</code> to the xz command
68 * line tool. XZ Utils 5.1.2alpha added a partial implementation of
69 * <code>--block-list=SIZES</code> which allows specifying sizes of
70 * individual Blocks.
71 *
72 * @see SeekableFileInputStream
73 * @see XZInputStream
74 * @see XZOutputStream
75 */
76public class SeekableXZInputStream extends SeekableInputStream {
77 /**
78 * Cache for big arrays.
79 */
80 private final ArrayCache arrayCache;
81
82 /**
83 * The input stream containing XZ compressed data.
84 */
85 private SeekableInputStream in;
86
87 /**
88 * Memory usage limit after the memory usage of the IndexDecoders have
89 * been substracted.
90 */
91 private final int memoryLimit;
92
93 /**
94 * Memory usage of the IndexDecoders.
95 * <code>memoryLimit + indexMemoryUsage</code> equals the original
96 * memory usage limit that was passed to the constructor.
97 */
98 private int indexMemoryUsage = 0;
99
100 /**
101 * List of IndexDecoders, one for each Stream in the file.
102 * The list is in reverse order: The first element is
103 * the last Stream in the file.
104 */
105 private final ArrayList<IndexDecoder> streams
106 = new ArrayList<IndexDecoder>();
107
108 /**
109 * Bitmask of all Check IDs seen.
110 */
111 private int checkTypes = 0;
112
113 /**
114 * Uncompressed size of the file (all Streams).
115 */
116 private long uncompressedSize = 0;
117
118 /**
119 * Uncompressed size of the largest XZ Block in the file.
120 */
121 private long largestBlockSize = 0;
122
123 /**
124 * Number of XZ Blocks in the file.
125 */
126 private int blockCount = 0;
127
128 /**
129 * Size and position information about the current Block.
130 * If there are no Blocks, all values will be <code>-1</code>.
131 */
132 private final BlockInfo curBlockInfo;
133
134 /**
135 * Temporary (and cached) information about the Block whose information
136 * is queried via <code>getBlockPos</code> and related functions.
137 */
138 private final BlockInfo queriedBlockInfo;
139
140 /**
141 * Integrity Check in the current XZ Stream. The constructor leaves
142 * this to point to the Check of the first Stream.
143 */
144 private Check check;
145
146 /**
147 * Flag indicating if the integrity checks will be verified.
148 */
149 private final boolean verifyCheck;
150
151 /**
152 * Decoder of the current XZ Block, if any.
153 */
154 private BlockInputStream blockDecoder = null;
155
156 /**
157 * Current uncompressed position.
158 */
159 private long curPos = 0;
160
161 /**
162 * Target position for seeking.
163 */
164 private long seekPos;
165
166 /**
167 * True when <code>seek(long)</code> has been called but the actual
168 * seeking hasn't been done yet.
169 */
170 private boolean seekNeeded = false;
171
172 /**
173 * True when end of the file was reached. This can be cleared by
174 * calling <code>seek(long)</code>.
175 */
176 private boolean endReached = false;
177
178 /**
179 * Pending exception from an earlier error.
180 */
181 private IOException exception = null;
182
183 /**
184 * Temporary buffer for read(). This avoids reallocating memory
185 * on every read() call.
186 */
187 private final byte[] tempBuf = new byte[1];
188
189 /**
190 * Creates a new seekable XZ decompressor without a memory usage limit.
191 *
192 * @param in seekable input stream containing one or more
193 * XZ Streams; the whole input stream is used
194 *
195 * @throws XZFormatException
196 * input is not in the XZ format
197 *
198 * @throws CorruptedInputException
199 * XZ data is corrupt or truncated
200 *
201 * @throws UnsupportedOptionsException
202 * XZ headers seem valid but they specify
203 * options not supported by this implementation
204 *
205 * @throws EOFException
206 * less than 6 bytes of input was available
207 * from <code>in</code>, or (unlikely) the size
208 * of the underlying stream got smaller while
209 * this was reading from it
210 *
211 * @throws IOException may be thrown by <code>in</code>
212 */
213 public SeekableXZInputStream(SeekableInputStream in)
214 throws IOException {
215 this(in, -1);
216 }
217
218 /**
219 * Creates a new seekable XZ decompressor without a memory usage limit.
220 * <p>
221 * This is identical to
222 * <code>SeekableXZInputStream(SeekableInputStream)</code> except that
223 * this also takes the <code>arrayCache</code> argument.
224 *
225 * @param in seekable input stream containing one or more
226 * XZ Streams; the whole input stream is used
227 *
228 * @param arrayCache cache to be used for allocating large arrays
229 *
230 * @throws XZFormatException
231 * input is not in the XZ format
232 *
233 * @throws CorruptedInputException
234 * XZ data is corrupt or truncated
235 *
236 * @throws UnsupportedOptionsException
237 * XZ headers seem valid but they specify
238 * options not supported by this implementation
239 *
240 * @throws EOFException
241 * less than 6 bytes of input was available
242 * from <code>in</code>, or (unlikely) the size
243 * of the underlying stream got smaller while
244 * this was reading from it
245 *
246 * @throws IOException may be thrown by <code>in</code>
247 *
248 * @since 1.7
249 */
250 public SeekableXZInputStream(SeekableInputStream in, ArrayCache arrayCache)
251 throws IOException {
252 this(in, -1, arrayCache);
253 }
254
255 /**
256 * Creates a new seekable XZ decomporessor with an optional
257 * memory usage limit.
258 *
259 * @param in seekable input stream containing one or more
260 * XZ Streams; the whole input stream is used
261 *
262 * @param memoryLimit memory usage limit in kibibytes (KiB)
263 * or <code>-1</code> to impose no
264 * memory usage limit
265 *
266 * @throws XZFormatException
267 * input is not in the XZ format
268 *
269 * @throws CorruptedInputException
270 * XZ data is corrupt or truncated
271 *
272 * @throws UnsupportedOptionsException
273 * XZ headers seem valid but they specify
274 * options not supported by this implementation
275 *
276 * @throws MemoryLimitException
277 * decoded XZ Indexes would need more memory
278 * than allowed by the memory usage limit
279 *
280 * @throws EOFException
281 * less than 6 bytes of input was available
282 * from <code>in</code>, or (unlikely) the size
283 * of the underlying stream got smaller while
284 * this was reading from it
285 *
286 * @throws IOException may be thrown by <code>in</code>
287 */
288 public SeekableXZInputStream(SeekableInputStream in, int memoryLimit)
289 throws IOException {
290 this(in, memoryLimit, true);
291 }
292
293 /**
294 * Creates a new seekable XZ decomporessor with an optional
295 * memory usage limit.
296 * <p>
297 * This is identical to
298 * <code>SeekableXZInputStream(SeekableInputStream,int)</code>
299 * except that this also takes the <code>arrayCache</code> argument.
300 *
301 * @param in seekable input stream containing one or more
302 * XZ Streams; the whole input stream is used
303 *
304 * @param memoryLimit memory usage limit in kibibytes (KiB)
305 * or <code>-1</code> to impose no
306 * memory usage limit
307 *
308 * @param arrayCache cache to be used for allocating large arrays
309 *
310 * @throws XZFormatException
311 * input is not in the XZ format
312 *
313 * @throws CorruptedInputException
314 * XZ data is corrupt or truncated
315 *
316 * @throws UnsupportedOptionsException
317 * XZ headers seem valid but they specify
318 * options not supported by this implementation
319 *
320 * @throws MemoryLimitException
321 * decoded XZ Indexes would need more memory
322 * than allowed by the memory usage limit
323 *
324 * @throws EOFException
325 * less than 6 bytes of input was available
326 * from <code>in</code>, or (unlikely) the size
327 * of the underlying stream got smaller while
328 * this was reading from it
329 *
330 * @throws IOException may be thrown by <code>in</code>
331 *
332 * @since 1.7
333 */
334 public SeekableXZInputStream(SeekableInputStream in, int memoryLimit,
335 ArrayCache arrayCache)
336 throws IOException {
337 this(in, memoryLimit, true, arrayCache);
338 }
339
340 /**
341 * Creates a new seekable XZ decomporessor with an optional
342 * memory usage limit and ability to disable verification
343 * of integrity checks.
344 * <p>
345 * Note that integrity check verification should almost never be disabled.
346 * Possible reasons to disable integrity check verification:
347 * <ul>
348 * <li>Trying to recover data from a corrupt .xz file.</li>
349 * <li>Speeding up decompression. This matters mostly with SHA-256
350 * or with files that have compressed extremely well. It's recommended
351 * that integrity checking isn't disabled for performance reasons
352 * unless the file integrity is verified externally in some other
353 * way.</li>
354 * </ul>
355 * <p>
356 * <code>verifyCheck</code> only affects the integrity check of
357 * the actual compressed data. The CRC32 fields in the headers
358 * are always verified.
359 *
360 * @param in seekable input stream containing one or more
361 * XZ Streams; the whole input stream is used
362 *
363 * @param memoryLimit memory usage limit in kibibytes (KiB)
364 * or <code>-1</code> to impose no
365 * memory usage limit
366 *
367 * @param verifyCheck if <code>true</code>, the integrity checks
368 * will be verified; this should almost never
369 * be set to <code>false</code>
370 *
371 * @throws XZFormatException
372 * input is not in the XZ format
373 *
374 * @throws CorruptedInputException
375 * XZ data is corrupt or truncated
376 *
377 * @throws UnsupportedOptionsException
378 * XZ headers seem valid but they specify
379 * options not supported by this implementation
380 *
381 * @throws MemoryLimitException
382 * decoded XZ Indexes would need more memory
383 * than allowed by the memory usage limit
384 *
385 * @throws EOFException
386 * less than 6 bytes of input was available
387 * from <code>in</code>, or (unlikely) the size
388 * of the underlying stream got smaller while
389 * this was reading from it
390 *
391 * @throws IOException may be thrown by <code>in</code>
392 *
393 * @since 1.6
394 */
395 public SeekableXZInputStream(SeekableInputStream in, int memoryLimit,
396 boolean verifyCheck)
397 throws IOException {
398 this(in, memoryLimit, verifyCheck, ArrayCache.getDefaultCache());
399 }
400
401 /**
402 * Creates a new seekable XZ decomporessor with an optional
403 * memory usage limit and ability to disable verification
404 * of integrity checks.
405 * <p>
406 * This is identical to
407 * <code>SeekableXZInputStream(SeekableInputStream,int,boolean)</code>
408 * except that this also takes the <code>arrayCache</code> argument.
409 *
410 * @param in seekable input stream containing one or more
411 * XZ Streams; the whole input stream is used
412 *
413 * @param memoryLimit memory usage limit in kibibytes (KiB)
414 * or <code>-1</code> to impose no
415 * memory usage limit
416 *
417 * @param verifyCheck if <code>true</code>, the integrity checks
418 * will be verified; this should almost never
419 * be set to <code>false</code>
420 *
421 * @param arrayCache cache to be used for allocating large arrays
422 *
423 * @throws XZFormatException
424 * input is not in the XZ format
425 *
426 * @throws CorruptedInputException
427 * XZ data is corrupt or truncated
428 *
429 * @throws UnsupportedOptionsException
430 * XZ headers seem valid but they specify
431 * options not supported by this implementation
432 *
433 * @throws MemoryLimitException
434 * decoded XZ Indexes would need more memory
435 * than allowed by the memory usage limit
436 *
437 * @throws EOFException
438 * less than 6 bytes of input was available
439 * from <code>in</code>, or (unlikely) the size
440 * of the underlying stream got smaller while
441 * this was reading from it
442 *
443 * @throws IOException may be thrown by <code>in</code>
444 *
445 * @since 1.7
446 */
447 public SeekableXZInputStream(SeekableInputStream in, int memoryLimit,
448 boolean verifyCheck, ArrayCache arrayCache)
449 throws IOException {
450 this.arrayCache = arrayCache;
451 this.verifyCheck = verifyCheck;
452 this.in = in;
453 DataInputStream inData = new DataInputStream(in);
454
455 // Check the magic bytes in the beginning of the file.
456 {
457 in.seek(0);
458 byte[] buf = new byte[XZ.HEADER_MAGIC.length];
459 inData.readFully(buf);
460 if (!Arrays.equals(buf, XZ.HEADER_MAGIC))
461 throw new XZFormatException();
462 }
463
464 // Get the file size and verify that it is a multiple of 4 bytes.
465 long pos = in.length();
466 if ((pos & 3) != 0)
467 throw new CorruptedInputException(
468 "XZ file size is not a multiple of 4 bytes");
469
470 // Parse the headers starting from the end of the file.
471 byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE];
472 long streamPadding = 0;
473
474 while (pos > 0) {
475 if (pos < DecoderUtil.STREAM_HEADER_SIZE)
476 throw new CorruptedInputException();
477
478 // Read the potential Stream Footer.
479 in.seek(pos - DecoderUtil.STREAM_HEADER_SIZE);
480 inData.readFully(buf);
481
482 // Skip Stream Padding four bytes at a time.
483 // Skipping more at once would be faster,
484 // but usually there isn't much Stream Padding.
485 if (buf[8] == 0x00 && buf[9] == 0x00 && buf[10] == 0x00
486 && buf[11] == 0x00) {
487 streamPadding += 4;
488 pos -= 4;
489 continue;
490 }
491
492 // It's not Stream Padding. Update pos.
493 pos -= DecoderUtil.STREAM_HEADER_SIZE;
494
495 // Decode the Stream Footer and check if Backward Size
496 // looks reasonable.
497 StreamFlags streamFooter = DecoderUtil.decodeStreamFooter(buf);
498 if (streamFooter.backwardSize >= pos)
499 throw new CorruptedInputException(
500 "Backward Size in XZ Stream Footer is too big");
501
502 // Check that the Check ID is supported. Store it in case this
503 // is the first Stream in the file.
504 check = Check.getInstance(streamFooter.checkType);
505
506 // Remember which Check IDs have been seen.
507 checkTypes |= 1 << streamFooter.checkType;
508
509 // Seek to the beginning of the Index.
510 in.seek(pos - streamFooter.backwardSize);
511
512 // Decode the Index field.
513 IndexDecoder index;
514 try {
515 index = new IndexDecoder(in, streamFooter, streamPadding,
516 memoryLimit);
517 } catch (MemoryLimitException e) {
518 // IndexDecoder doesn't know how much memory we had
519 // already needed so we need to recreate the exception.
520 assert memoryLimit >= 0;
521 throw new MemoryLimitException(
522 e.getMemoryNeeded() + indexMemoryUsage,
523 memoryLimit + indexMemoryUsage);
524 }
525
526 // Update the memory usage and limit counters.
527 indexMemoryUsage += index.getMemoryUsage();
528 if (memoryLimit >= 0) {
529 memoryLimit -= index.getMemoryUsage();
530 assert memoryLimit >= 0;
531 }
532
533 // Remember the uncompressed size of the largest Block.
534 if (largestBlockSize < index.getLargestBlockSize())
535 largestBlockSize = index.getLargestBlockSize();
536
537 // Calculate the offset to the beginning of this XZ Stream and
538 // check that it looks sane.
539 long off = index.getStreamSize() - DecoderUtil.STREAM_HEADER_SIZE;
540 if (pos < off)
541 throw new CorruptedInputException("XZ Index indicates "
542 + "too big compressed size for the XZ Stream");
543
544 // Seek to the beginning of this Stream.
545 pos -= off;
546 in.seek(pos);
547
548 // Decode the Stream Header.
549 inData.readFully(buf);
550 StreamFlags streamHeader = DecoderUtil.decodeStreamHeader(buf);
551
552 // Verify that the Stream Header matches the Stream Footer.
553 if (!DecoderUtil.areStreamFlagsEqual(streamHeader, streamFooter))
554 throw new CorruptedInputException(
555 "XZ Stream Footer does not match Stream Header");
556
557 // Update the total uncompressed size of the file and check that
558 // it doesn't overflow.
559 uncompressedSize += index.getUncompressedSize();
560 if (uncompressedSize < 0)
561 throw new UnsupportedOptionsException("XZ file is too big");
562
563 // Update the Block count and check that it fits into an int.
564 blockCount += index.getRecordCount();
565 if (blockCount < 0)
566 throw new UnsupportedOptionsException(
567 "XZ file has over " + Integer.MAX_VALUE + " Blocks");
568
569 // Add this Stream to the list of Streams.
570 streams.add(index);
571
572 // Reset to be ready to parse the next Stream.
573 streamPadding = 0;
574 }
575
576 assert pos == 0;
577
578 // Save it now that indexMemoryUsage has been substracted from it.
579 this.memoryLimit = memoryLimit;
580
581 // Store the relative offsets of the Streams. This way we don't
582 // need to recalculate them in this class when seeking; the
583 // IndexDecoder instances will handle them.
584 IndexDecoder prev = streams.get(streams.size() - 1);
585 for (int i = streams.size() - 2; i >= 0; --i) {
586 IndexDecoder cur = streams.get(i);
587 cur.setOffsets(prev);
588 prev = cur;
589 }
590
591 // Initialize curBlockInfo to point to the first Stream.
592 // The blockNumber will be left to -1 so that .hasNext()
593 // and .setNext() work to get the first Block when starting
594 // to decompress from the beginning of the file.
595 IndexDecoder first = streams.get(streams.size() - 1);
596 curBlockInfo = new BlockInfo(first);
597
598 // queriedBlockInfo needs to be allocated too. The Stream used for
599 // initialization doesn't matter though.
600 queriedBlockInfo = new BlockInfo(first);
601 }
602
603 /**
604 * Gets the types of integrity checks used in the .xz file.
605 * Multiple checks are possible only if there are multiple
606 * concatenated XZ Streams.
607 * <p>
608 * The returned value has a bit set for every check type that is present.
609 * For example, if CRC64 and SHA-256 were used, the return value is
610 * <code>(1&nbsp;&lt;&lt;&nbsp;XZ.CHECK_CRC64)
611 * | (1&nbsp;&lt;&lt;&nbsp;XZ.CHECK_SHA256)</code>.
612 */
613 public int getCheckTypes() {
614 return checkTypes;
615 }
616
617 /**
618 * Gets the amount of memory in kibibytes (KiB) used by
619 * the data structures needed to locate the XZ Blocks.
620 * This is usually useless information but since it is calculated
621 * for memory usage limit anyway, it is nice to make it available to too.
622 */
623 public int getIndexMemoryUsage() {
624 return indexMemoryUsage;
625 }
626
627 /**
628 * Gets the uncompressed size of the largest XZ Block in bytes.
629 * This can be useful if you want to check that the file doesn't
630 * have huge XZ Blocks which could make seeking to arbitrary offsets
631 * very slow. Note that huge Blocks don't automatically mean that
632 * seeking would be slow, for example, seeking to the beginning of
633 * any Block is always fast.
634 */
635 public long getLargestBlockSize() {
636 return largestBlockSize;
637 }
638
639 /**
640 * Gets the number of Streams in the .xz file.
641 *
642 * @since 1.3
643 */
644 public int getStreamCount() {
645 return streams.size();
646 }
647
648 /**
649 * Gets the number of Blocks in the .xz file.
650 *
651 * @since 1.3
652 */
653 public int getBlockCount() {
654 return blockCount;
655 }
656
657 /**
658 * Gets the uncompressed start position of the given Block.
659 *
660 * @throws IndexOutOfBoundsException if
661 * <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
662 * <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>.
663 *
664 * @since 1.3
665 */
666 public long getBlockPos(int blockNumber) {
667 locateBlockByNumber(queriedBlockInfo, blockNumber);
668 return queriedBlockInfo.uncompressedOffset;
669 }
670
671 /**
672 * Gets the uncompressed size of the given Block.
673 *
674 * @throws IndexOutOfBoundsException if
675 * <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
676 * <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>.
677 *
678 * @since 1.3
679 */
680 public long getBlockSize(int blockNumber) {
681 locateBlockByNumber(queriedBlockInfo, blockNumber);
682 return queriedBlockInfo.uncompressedSize;
683 }
684
685 /**
686 * Gets the position where the given compressed Block starts in
687 * the underlying .xz file.
688 * This information is rarely useful to the users of this class.
689 *
690 * @throws IndexOutOfBoundsException if
691 * <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
692 * <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>.
693 *
694 * @since 1.3
695 */
696 public long getBlockCompPos(int blockNumber) {
697 locateBlockByNumber(queriedBlockInfo, blockNumber);
698 return queriedBlockInfo.compressedOffset;
699 }
700
701 /**
702 * Gets the compressed size of the given Block.
703 * This together with the uncompressed size can be used to calculate
704 * the compression ratio of the specific Block.
705 *
706 * @throws IndexOutOfBoundsException if
707 * <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
708 * <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>.
709 *
710 * @since 1.3
711 */
712 public long getBlockCompSize(int blockNumber) {
713 locateBlockByNumber(queriedBlockInfo, blockNumber);
714 return (queriedBlockInfo.unpaddedSize + 3) & ~3;
715 }
716
717 /**
718 * Gets integrity check type (Check ID) of the given Block.
719 *
720 * @throws IndexOutOfBoundsException if
721 * <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
722 * <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>.
723 *
724 * @see #getCheckTypes()
725 *
726 * @since 1.3
727 */
728 public int getBlockCheckType(int blockNumber) {
729 locateBlockByNumber(queriedBlockInfo, blockNumber);
730 return queriedBlockInfo.getCheckType();
731 }
732
733 /**
734 * Gets the number of the Block that contains the byte at the given
735 * uncompressed position.
736 *
737 * @throws IndexOutOfBoundsException if
738 * <code>pos&nbsp;&lt;&nbsp;0</code> or
739 * <code>pos&nbsp;&gt;=&nbsp;length()</code>.
740 *
741 * @since 1.3
742 */
743 public int getBlockNumber(long pos) {
744 locateBlockByPos(queriedBlockInfo, pos);
745 return queriedBlockInfo.blockNumber;
746 }
747
748 /**
749 * Decompresses the next byte from this input stream.
750 *
751 * @return the next decompressed byte, or <code>-1</code>
752 * to indicate the end of the compressed stream
753 *
754 * @throws CorruptedInputException
755 * @throws UnsupportedOptionsException
756 * @throws MemoryLimitException
757 *
758 * @throws XZIOException if the stream has been closed
759 *
760 * @throws IOException may be thrown by <code>in</code>
761 */
762 public int read() throws IOException {
763 return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
764 }
765
766 /**
767 * Decompresses into an array of bytes.
768 * <p>
769 * If <code>len</code> is zero, no bytes are read and <code>0</code>
770 * is returned. Otherwise this will try to decompress <code>len</code>
771 * bytes of uncompressed data. Less than <code>len</code> bytes may
772 * be read only in the following situations:
773 * <ul>
774 * <li>The end of the compressed data was reached successfully.</li>
775 * <li>An error is detected after at least one but less than
776 * <code>len</code> bytes have already been successfully
777 * decompressed. The next call with non-zero <code>len</code>
778 * will immediately throw the pending exception.</li>
779 * <li>An exception is thrown.</li>
780 * </ul>
781 *
782 * @param buf target buffer for uncompressed data
783 * @param off start offset in <code>buf</code>
784 * @param len maximum number of uncompressed bytes to read
785 *
786 * @return number of bytes read, or <code>-1</code> to indicate
787 * the end of the compressed stream
788 *
789 * @throws CorruptedInputException
790 * @throws UnsupportedOptionsException
791 * @throws MemoryLimitException
792 *
793 * @throws XZIOException if the stream has been closed
794 *
795 * @throws IOException may be thrown by <code>in</code>
796 */
797 public int read(byte[] buf, int off, int len) throws IOException {
798 if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
799 throw new IndexOutOfBoundsException();
800
801 if (len == 0)
802 return 0;
803
804 if (in == null)
805 throw new XZIOException("Stream closed");
806
807 if (exception != null)
808 throw exception;
809
810 int size = 0;
811
812 try {
813 if (seekNeeded)
814 seek();
815
816 if (endReached)
817 return -1;
818
819 while (len > 0) {
820 if (blockDecoder == null) {
821 seek();
822 if (endReached)
823 break;
824 }
825
826 int ret = blockDecoder.read(buf, off, len);
827
828 if (ret > 0) {
829 curPos += ret;
830 size += ret;
831 off += ret;
832 len -= ret;
833 } else if (ret == -1) {
834 blockDecoder = null;
835 }
836 }
837 } catch (IOException e) {
838 // We know that the file isn't simply truncated because we could
839 // parse the Indexes in the constructor. So convert EOFException
840 // to CorruptedInputException.
841 if (e instanceof EOFException)
842 e = new CorruptedInputException();
843
844 exception = e;
845 if (size == 0)
846 throw e;
847 }
848
849 return size;
850 }
851
852 /**
853 * Returns the number of uncompressed bytes that can be read
854 * without blocking. The value is returned with an assumption
855 * that the compressed input data will be valid. If the compressed
856 * data is corrupt, <code>CorruptedInputException</code> may get
857 * thrown before the number of bytes claimed to be available have
858 * been read from this input stream.
859 *
860 * @return the number of uncompressed bytes that can be read
861 * without blocking
862 */
863 public int available() throws IOException {
864 if (in == null)
865 throw new XZIOException("Stream closed");
866
867 if (exception != null)
868 throw exception;
869
870 if (endReached || seekNeeded || blockDecoder == null)
871 return 0;
872
873 return blockDecoder.available();
874 }
875
876 /**
877 * Closes the stream and calls <code>in.close()</code>.
878 * If the stream was already closed, this does nothing.
879 * <p>
880 * This is equivalent to <code>close(true)</code>.
881 *
882 * @throws IOException if thrown by <code>in.close()</code>
883 */
884 public void close() throws IOException {
885 close(true);
886 }
887
888 /**
889 * Closes the stream and optionally calls <code>in.close()</code>.
890 * If the stream was already closed, this does nothing.
891 * If <code>close(false)</code> has been called, a further
892 * call of <code>close(true)</code> does nothing (it doesn't call
893 * <code>in.close()</code>).
894 * <p>
895 * If you don't want to close the underlying <code>InputStream</code>,
896 * there is usually no need to worry about closing this stream either;
897 * it's fine to do nothing and let the garbage collector handle it.
898 * However, if you are using {@link ArrayCache}, <code>close(false)</code>
899 * can be useful to put the allocated arrays back to the cache without
900 * closing the underlying <code>InputStream</code>.
901 * <p>
902 * Note that if you successfully reach the end of the stream
903 * (<code>read</code> returns <code>-1</code>), the arrays are
904 * automatically put back to the cache by that <code>read</code> call. In
905 * this situation <code>close(false)</code> is redundant (but harmless).
906 *
907 * @throws IOException if thrown by <code>in.close()</code>
908 *
909 * @since 1.7
910 */
911 public void close(boolean closeInput) throws IOException {
912 if (in != null) {
913 if (blockDecoder != null) {
914 blockDecoder.close();
915 blockDecoder = null;
916 }
917
918 try {
919 if (closeInput)
920 in.close();
921 } finally {
922 in = null;
923 }
924 }
925 }
926
927 /**
928 * Gets the uncompressed size of this input stream. If there are multiple
929 * XZ Streams, the total uncompressed size of all XZ Streams is returned.
930 */
931 public long length() {
932 return uncompressedSize;
933 }
934
935 /**
936 * Gets the current uncompressed position in this input stream.
937 *
938 * @throws XZIOException if the stream has been closed
939 */
940 public long position() throws IOException {
941 if (in == null)
942 throw new XZIOException("Stream closed");
943
944 return seekNeeded ? seekPos : curPos;
945 }
946
947 /**
948 * Seeks to the specified absolute uncompressed position in the stream.
949 * This only stores the new position, so this function itself is always
950 * very fast. The actual seek is done when <code>read</code> is called
951 * to read at least one byte.
952 * <p>
953 * Seeking past the end of the stream is possible. In that case
954 * <code>read</code> will return <code>-1</code> to indicate
955 * the end of the stream.
956 *
957 * @param pos new uncompressed read position
958 *
959 * @throws XZIOException
960 * if <code>pos</code> is negative, or
961 * if stream has been closed
962 */
963 public void seek(long pos) throws IOException {
964 if (in == null)
965 throw new XZIOException("Stream closed");
966
967 if (pos < 0)
968 throw new XZIOException("Negative seek position: " + pos);
969
970 seekPos = pos;
971 seekNeeded = true;
972 }
973
974 /**
975 * Seeks to the beginning of the given XZ Block.
976 *
977 * @throws XZIOException
978 * if <code>blockNumber&nbsp;&lt;&nbsp;0</code> or
979 * <code>blockNumber&nbsp;&gt;=&nbsp;getBlockCount()</code>,
980 * or if stream has been closed
981 *
982 * @since 1.3
983 */
984 public void seekToBlock(int blockNumber) throws IOException {
985 if (in == null)
986 throw new XZIOException("Stream closed");
987
988 if (blockNumber < 0 || blockNumber >= blockCount)
989 throw new XZIOException("Invalid XZ Block number: " + blockNumber);
990
991 // This is a bit silly implementation. Here we locate the uncompressed
992 // offset of the specified Block, then when doing the actual seek in
993 // seek(), we need to find the Block number based on seekPos.
994 seekPos = getBlockPos(blockNumber);
995 seekNeeded = true;
996 }
997
998 /**
999 * Does the actual seeking. This is also called when <code>read</code>
1000 * needs a new Block to decode.
1001 */
1002 private void seek() throws IOException {
1003 // If seek(long) wasn't called, we simply need to get the next Block
1004 // from the same Stream. If there are no more Blocks in this Stream,
1005 // then we behave as if seek(long) had been called.
1006 if (!seekNeeded) {
1007 if (curBlockInfo.hasNext()) {
1008 curBlockInfo.setNext();
1009 initBlockDecoder();
1010 return;
1011 }
1012
1013 seekPos = curPos;
1014 }
1015
1016 seekNeeded = false;
1017
1018 // Check if we are seeking to or past the end of the file.
1019 if (seekPos >= uncompressedSize) {
1020 curPos = seekPos;
1021
1022 if (blockDecoder != null) {
1023 blockDecoder.close();
1024 blockDecoder = null;
1025 }
1026
1027 endReached = true;
1028 return;
1029 }
1030
1031 endReached = false;
1032
1033 // Locate the Block that contains the uncompressed target position.
1034 locateBlockByPos(curBlockInfo, seekPos);
1035
1036 // Seek in the underlying stream and create a new Block decoder
1037 // only if really needed. We can skip it if the current position
1038 // is already in the correct Block and the target position hasn't
1039 // been decompressed yet.
1040 //
1041 // NOTE: If curPos points to the beginning of this Block, it's
1042 // because it was left there after decompressing an earlier Block.
1043 // In that case, decoding of the current Block hasn't been started
1044 // yet. (Decoding of a Block won't be started until at least one
1045 // byte will also be read from it.)
1046 if (!(curPos > curBlockInfo.uncompressedOffset && curPos <= seekPos)) {
1047 // Seek to the beginning of the Block.
1048 in.seek(curBlockInfo.compressedOffset);
1049
1050 // Since it is possible that this Block is from a different
1051 // Stream than the previous Block, initialize a new Check.
1052 check = Check.getInstance(curBlockInfo.getCheckType());
1053
1054 // Create a new Block decoder.
1055 initBlockDecoder();
1056 curPos = curBlockInfo.uncompressedOffset;
1057 }
1058
1059 // If the target wasn't at a Block boundary, decompress and throw
1060 // away data to reach the target position.
1061 if (seekPos > curPos) {
1062 // NOTE: The "if" below is there just in case. In this situation,
1063 // blockDecoder.skip will always skip the requested amount
1064 // or throw an exception.
1065 long skipAmount = seekPos - curPos;
1066 if (blockDecoder.skip(skipAmount) != skipAmount)
1067 throw new CorruptedInputException();
1068
1069 curPos = seekPos;
1070 }
1071 }
1072
1073 /**
1074 * Locates the Block that contains the given uncompressed position.
1075 */
1076 private void locateBlockByPos(BlockInfo info, long pos) {
1077 if (pos < 0 || pos >= uncompressedSize)
1078 throw new IndexOutOfBoundsException(
1079 "Invalid uncompressed position: " + pos);
1080
1081 // Locate the Stream that contains the target position.
1082 IndexDecoder index;
1083 for (int i = 0; ; ++i) {
1084 index = streams.get(i);
1085 if (index.hasUncompressedOffset(pos))
1086 break;
1087 }
1088
1089 // Locate the Block from the Stream that contains the target position.
1090 index.locateBlock(info, pos);
1091
1092 assert (info.compressedOffset & 3) == 0;
1093 assert info.uncompressedSize > 0;
1094 assert pos >= info.uncompressedOffset;
1095 assert pos < info.uncompressedOffset + info.uncompressedSize;
1096 }
1097
1098 /**
1099 * Locates the given Block and stores information about it
1100 * to <code>info</code>.
1101 */
1102 private void locateBlockByNumber(BlockInfo info, int blockNumber) {
1103 // Validate.
1104 if (blockNumber < 0 || blockNumber >= blockCount)
1105 throw new IndexOutOfBoundsException(
1106 "Invalid XZ Block number: " + blockNumber);
1107
1108 // Skip the search if info already points to the correct Block.
1109 if (info.blockNumber == blockNumber)
1110 return;
1111
1112 // Search the Stream that contains the given Block and then
1113 // search the Block from that Stream.
1114 for (int i = 0; ; ++i) {
1115 IndexDecoder index = streams.get(i);
1116 if (index.hasRecord(blockNumber)) {
1117 index.setBlockInfo(info, blockNumber);
1118 return;
1119 }
1120 }
1121 }
1122
1123 /**
1124 * Initializes a new BlockInputStream. This is a helper function for
1125 * <code>seek()</code>.
1126 */
1127 private void initBlockDecoder() throws IOException {
1128 try {
1129 // Set it to null first so that GC can collect it if memory
1130 // runs tight when initializing a new BlockInputStream.
1131 if (blockDecoder != null) {
1132 blockDecoder.close();
1133 blockDecoder = null;
1134 }
1135
1136 blockDecoder = new BlockInputStream(
1137 in, check, verifyCheck, memoryLimit,
1138 curBlockInfo.unpaddedSize, curBlockInfo.uncompressedSize,
1139 arrayCache);
1140 } catch (MemoryLimitException e) {
1141 // BlockInputStream doesn't know how much memory we had
1142 // already needed so we need to recreate the exception.
1143 assert memoryLimit >= 0;
1144 throw new MemoryLimitException(
1145 e.getMemoryNeeded() + indexMemoryUsage,
1146 memoryLimit + indexMemoryUsage);
1147 } catch (IndexIndicatorException e) {
1148 // It cannot be Index so the file must be corrupt.
1149 throw new CorruptedInputException();
1150 }
1151 }
1152}
Note: See TracBrowser for help on using the repository browser.