1 | /*
|
---|
2 | * SeekableXZInputStream
|
---|
3 | *
|
---|
4 | * Author: Lasse Collin <lasse.collin@tukaani.org>
|
---|
5 | *
|
---|
6 | * This file has been put into the public domain.
|
---|
7 | * You can do whatever you want with this file.
|
---|
8 | */
|
---|
9 |
|
---|
10 | package org.tukaani.xz;
|
---|
11 |
|
---|
12 | import java.util.Arrays;
|
---|
13 | import java.util.ArrayList;
|
---|
14 | import java.io.DataInputStream;
|
---|
15 | import java.io.IOException;
|
---|
16 | import java.io.EOFException;
|
---|
17 | import org.tukaani.xz.common.DecoderUtil;
|
---|
18 | import org.tukaani.xz.common.StreamFlags;
|
---|
19 | import org.tukaani.xz.check.Check;
|
---|
20 | import org.tukaani.xz.index.IndexDecoder;
|
---|
21 | import org.tukaani.xz.index.BlockInfo;
|
---|
22 |
|
---|
23 | /**
|
---|
24 | * Decompresses a .xz file in random access mode.
|
---|
25 | * This supports decompressing concatenated .xz files.
|
---|
26 | * <p>
|
---|
27 | * Each .xz file consist of one or more Streams. Each Stream consist of zero
|
---|
28 | * or more Blocks. Each Stream contains an Index of Streams' Blocks.
|
---|
29 | * The Indexes from all Streams are loaded in RAM by a constructor of this
|
---|
30 | * class. A typical .xz file has only one Stream, and parsing its Index will
|
---|
31 | * need only three or four seeks.
|
---|
32 | * <p>
|
---|
33 | * To make random access possible, the data in a .xz file must be splitted
|
---|
34 | * into multiple Blocks of reasonable size. Decompression can only start at
|
---|
35 | * a Block boundary. When seeking to an uncompressed position that is not at
|
---|
36 | * a Block boundary, decompression starts at the beginning of the Block and
|
---|
37 | * throws away data until the target position is reached. Thus, smaller Blocks
|
---|
38 | * mean faster seeks to arbitrary uncompressed positions. On the other hand,
|
---|
39 | * smaller Blocks mean worse compression. So one has to make a compromise
|
---|
40 | * between random access speed and compression ratio.
|
---|
41 | * <p>
|
---|
42 | * Implementation note: This class uses linear search to locate the correct
|
---|
43 | * Stream from the data structures in RAM. It was the simplest to implement
|
---|
44 | * and should be fine as long as there aren't too many Streams. The correct
|
---|
45 | * Block inside a Stream is located using binary search and thus is fast
|
---|
46 | * even with a huge number of Blocks.
|
---|
47 | *
|
---|
48 | * <h4>Memory usage</h4>
|
---|
49 | * <p>
|
---|
50 | * The amount of memory needed for the Indexes is taken into account when
|
---|
51 | * checking the memory usage limit. Each Stream is calculated to need at
|
---|
52 | * least 1 KiB of memory and each Block 16 bytes of memory, rounded up
|
---|
53 | * to the next kibibyte. So unless the file has a huge number of Streams or
|
---|
54 | * Blocks, these don't take significant amount of memory.
|
---|
55 | *
|
---|
56 | * <h4>Creating random-accessible .xz files</h4>
|
---|
57 | * <p>
|
---|
58 | * When using {@link XZOutputStream}, a new Block can be started by calling
|
---|
59 | * its {@link XZOutputStream#endBlock() endBlock} method. If you know
|
---|
60 | * that the decompressor will only need to seek to certain uncompressed
|
---|
61 | * positions, it can be a good idea to start a new Block at (some of) these
|
---|
62 | * positions (and only at these positions to get better compression ratio).
|
---|
63 | * <p>
|
---|
64 | * liblzma in XZ Utils supports starting a new Block with
|
---|
65 | * <code>LZMA_FULL_FLUSH</code>. XZ Utils 5.1.1alpha added threaded
|
---|
66 | * compression which creates multi-Block .xz files. XZ Utils 5.1.1alpha
|
---|
67 | * also added the option <code>--block-size=SIZE</code> to the xz command
|
---|
68 | * line tool. XZ Utils 5.1.2alpha added a partial implementation of
|
---|
69 | * <code>--block-list=SIZES</code> which allows specifying sizes of
|
---|
70 | * individual Blocks.
|
---|
71 | *
|
---|
72 | * @see SeekableFileInputStream
|
---|
73 | * @see XZInputStream
|
---|
74 | * @see XZOutputStream
|
---|
75 | */
|
---|
76 | public class SeekableXZInputStream extends SeekableInputStream {
|
---|
77 | /**
|
---|
78 | * Cache for big arrays.
|
---|
79 | */
|
---|
80 | private final ArrayCache arrayCache;
|
---|
81 |
|
---|
82 | /**
|
---|
83 | * The input stream containing XZ compressed data.
|
---|
84 | */
|
---|
85 | private SeekableInputStream in;
|
---|
86 |
|
---|
87 | /**
|
---|
88 | * Memory usage limit after the memory usage of the IndexDecoders have
|
---|
89 | * been substracted.
|
---|
90 | */
|
---|
91 | private final int memoryLimit;
|
---|
92 |
|
---|
93 | /**
|
---|
94 | * Memory usage of the IndexDecoders.
|
---|
95 | * <code>memoryLimit + indexMemoryUsage</code> equals the original
|
---|
96 | * memory usage limit that was passed to the constructor.
|
---|
97 | */
|
---|
98 | private int indexMemoryUsage = 0;
|
---|
99 |
|
---|
100 | /**
|
---|
101 | * List of IndexDecoders, one for each Stream in the file.
|
---|
102 | * The list is in reverse order: The first element is
|
---|
103 | * the last Stream in the file.
|
---|
104 | */
|
---|
105 | private final ArrayList<IndexDecoder> streams
|
---|
106 | = new ArrayList<IndexDecoder>();
|
---|
107 |
|
---|
108 | /**
|
---|
109 | * Bitmask of all Check IDs seen.
|
---|
110 | */
|
---|
111 | private int checkTypes = 0;
|
---|
112 |
|
---|
113 | /**
|
---|
114 | * Uncompressed size of the file (all Streams).
|
---|
115 | */
|
---|
116 | private long uncompressedSize = 0;
|
---|
117 |
|
---|
118 | /**
|
---|
119 | * Uncompressed size of the largest XZ Block in the file.
|
---|
120 | */
|
---|
121 | private long largestBlockSize = 0;
|
---|
122 |
|
---|
123 | /**
|
---|
124 | * Number of XZ Blocks in the file.
|
---|
125 | */
|
---|
126 | private int blockCount = 0;
|
---|
127 |
|
---|
128 | /**
|
---|
129 | * Size and position information about the current Block.
|
---|
130 | * If there are no Blocks, all values will be <code>-1</code>.
|
---|
131 | */
|
---|
132 | private final BlockInfo curBlockInfo;
|
---|
133 |
|
---|
134 | /**
|
---|
135 | * Temporary (and cached) information about the Block whose information
|
---|
136 | * is queried via <code>getBlockPos</code> and related functions.
|
---|
137 | */
|
---|
138 | private final BlockInfo queriedBlockInfo;
|
---|
139 |
|
---|
140 | /**
|
---|
141 | * Integrity Check in the current XZ Stream. The constructor leaves
|
---|
142 | * this to point to the Check of the first Stream.
|
---|
143 | */
|
---|
144 | private Check check;
|
---|
145 |
|
---|
146 | /**
|
---|
147 | * Flag indicating if the integrity checks will be verified.
|
---|
148 | */
|
---|
149 | private final boolean verifyCheck;
|
---|
150 |
|
---|
151 | /**
|
---|
152 | * Decoder of the current XZ Block, if any.
|
---|
153 | */
|
---|
154 | private BlockInputStream blockDecoder = null;
|
---|
155 |
|
---|
156 | /**
|
---|
157 | * Current uncompressed position.
|
---|
158 | */
|
---|
159 | private long curPos = 0;
|
---|
160 |
|
---|
161 | /**
|
---|
162 | * Target position for seeking.
|
---|
163 | */
|
---|
164 | private long seekPos;
|
---|
165 |
|
---|
166 | /**
|
---|
167 | * True when <code>seek(long)</code> has been called but the actual
|
---|
168 | * seeking hasn't been done yet.
|
---|
169 | */
|
---|
170 | private boolean seekNeeded = false;
|
---|
171 |
|
---|
172 | /**
|
---|
173 | * True when end of the file was reached. This can be cleared by
|
---|
174 | * calling <code>seek(long)</code>.
|
---|
175 | */
|
---|
176 | private boolean endReached = false;
|
---|
177 |
|
---|
178 | /**
|
---|
179 | * Pending exception from an earlier error.
|
---|
180 | */
|
---|
181 | private IOException exception = null;
|
---|
182 |
|
---|
183 | /**
|
---|
184 | * Temporary buffer for read(). This avoids reallocating memory
|
---|
185 | * on every read() call.
|
---|
186 | */
|
---|
187 | private final byte[] tempBuf = new byte[1];
|
---|
188 |
|
---|
189 | /**
|
---|
190 | * Creates a new seekable XZ decompressor without a memory usage limit.
|
---|
191 | *
|
---|
192 | * @param in seekable input stream containing one or more
|
---|
193 | * XZ Streams; the whole input stream is used
|
---|
194 | *
|
---|
195 | * @throws XZFormatException
|
---|
196 | * input is not in the XZ format
|
---|
197 | *
|
---|
198 | * @throws CorruptedInputException
|
---|
199 | * XZ data is corrupt or truncated
|
---|
200 | *
|
---|
201 | * @throws UnsupportedOptionsException
|
---|
202 | * XZ headers seem valid but they specify
|
---|
203 | * options not supported by this implementation
|
---|
204 | *
|
---|
205 | * @throws EOFException
|
---|
206 | * less than 6 bytes of input was available
|
---|
207 | * from <code>in</code>, or (unlikely) the size
|
---|
208 | * of the underlying stream got smaller while
|
---|
209 | * this was reading from it
|
---|
210 | *
|
---|
211 | * @throws IOException may be thrown by <code>in</code>
|
---|
212 | */
|
---|
213 | public SeekableXZInputStream(SeekableInputStream in)
|
---|
214 | throws IOException {
|
---|
215 | this(in, -1);
|
---|
216 | }
|
---|
217 |
|
---|
218 | /**
|
---|
219 | * Creates a new seekable XZ decompressor without a memory usage limit.
|
---|
220 | * <p>
|
---|
221 | * This is identical to
|
---|
222 | * <code>SeekableXZInputStream(SeekableInputStream)</code> except that
|
---|
223 | * this also takes the <code>arrayCache</code> argument.
|
---|
224 | *
|
---|
225 | * @param in seekable input stream containing one or more
|
---|
226 | * XZ Streams; the whole input stream is used
|
---|
227 | *
|
---|
228 | * @param arrayCache cache to be used for allocating large arrays
|
---|
229 | *
|
---|
230 | * @throws XZFormatException
|
---|
231 | * input is not in the XZ format
|
---|
232 | *
|
---|
233 | * @throws CorruptedInputException
|
---|
234 | * XZ data is corrupt or truncated
|
---|
235 | *
|
---|
236 | * @throws UnsupportedOptionsException
|
---|
237 | * XZ headers seem valid but they specify
|
---|
238 | * options not supported by this implementation
|
---|
239 | *
|
---|
240 | * @throws EOFException
|
---|
241 | * less than 6 bytes of input was available
|
---|
242 | * from <code>in</code>, or (unlikely) the size
|
---|
243 | * of the underlying stream got smaller while
|
---|
244 | * this was reading from it
|
---|
245 | *
|
---|
246 | * @throws IOException may be thrown by <code>in</code>
|
---|
247 | *
|
---|
248 | * @since 1.7
|
---|
249 | */
|
---|
250 | public SeekableXZInputStream(SeekableInputStream in, ArrayCache arrayCache)
|
---|
251 | throws IOException {
|
---|
252 | this(in, -1, arrayCache);
|
---|
253 | }
|
---|
254 |
|
---|
255 | /**
|
---|
256 | * Creates a new seekable XZ decomporessor with an optional
|
---|
257 | * memory usage limit.
|
---|
258 | *
|
---|
259 | * @param in seekable input stream containing one or more
|
---|
260 | * XZ Streams; the whole input stream is used
|
---|
261 | *
|
---|
262 | * @param memoryLimit memory usage limit in kibibytes (KiB)
|
---|
263 | * or <code>-1</code> to impose no
|
---|
264 | * memory usage limit
|
---|
265 | *
|
---|
266 | * @throws XZFormatException
|
---|
267 | * input is not in the XZ format
|
---|
268 | *
|
---|
269 | * @throws CorruptedInputException
|
---|
270 | * XZ data is corrupt or truncated
|
---|
271 | *
|
---|
272 | * @throws UnsupportedOptionsException
|
---|
273 | * XZ headers seem valid but they specify
|
---|
274 | * options not supported by this implementation
|
---|
275 | *
|
---|
276 | * @throws MemoryLimitException
|
---|
277 | * decoded XZ Indexes would need more memory
|
---|
278 | * than allowed by the memory usage limit
|
---|
279 | *
|
---|
280 | * @throws EOFException
|
---|
281 | * less than 6 bytes of input was available
|
---|
282 | * from <code>in</code>, or (unlikely) the size
|
---|
283 | * of the underlying stream got smaller while
|
---|
284 | * this was reading from it
|
---|
285 | *
|
---|
286 | * @throws IOException may be thrown by <code>in</code>
|
---|
287 | */
|
---|
288 | public SeekableXZInputStream(SeekableInputStream in, int memoryLimit)
|
---|
289 | throws IOException {
|
---|
290 | this(in, memoryLimit, true);
|
---|
291 | }
|
---|
292 |
|
---|
293 | /**
|
---|
294 | * Creates a new seekable XZ decomporessor with an optional
|
---|
295 | * memory usage limit.
|
---|
296 | * <p>
|
---|
297 | * This is identical to
|
---|
298 | * <code>SeekableXZInputStream(SeekableInputStream,int)</code>
|
---|
299 | * except that this also takes the <code>arrayCache</code> argument.
|
---|
300 | *
|
---|
301 | * @param in seekable input stream containing one or more
|
---|
302 | * XZ Streams; the whole input stream is used
|
---|
303 | *
|
---|
304 | * @param memoryLimit memory usage limit in kibibytes (KiB)
|
---|
305 | * or <code>-1</code> to impose no
|
---|
306 | * memory usage limit
|
---|
307 | *
|
---|
308 | * @param arrayCache cache to be used for allocating large arrays
|
---|
309 | *
|
---|
310 | * @throws XZFormatException
|
---|
311 | * input is not in the XZ format
|
---|
312 | *
|
---|
313 | * @throws CorruptedInputException
|
---|
314 | * XZ data is corrupt or truncated
|
---|
315 | *
|
---|
316 | * @throws UnsupportedOptionsException
|
---|
317 | * XZ headers seem valid but they specify
|
---|
318 | * options not supported by this implementation
|
---|
319 | *
|
---|
320 | * @throws MemoryLimitException
|
---|
321 | * decoded XZ Indexes would need more memory
|
---|
322 | * than allowed by the memory usage limit
|
---|
323 | *
|
---|
324 | * @throws EOFException
|
---|
325 | * less than 6 bytes of input was available
|
---|
326 | * from <code>in</code>, or (unlikely) the size
|
---|
327 | * of the underlying stream got smaller while
|
---|
328 | * this was reading from it
|
---|
329 | *
|
---|
330 | * @throws IOException may be thrown by <code>in</code>
|
---|
331 | *
|
---|
332 | * @since 1.7
|
---|
333 | */
|
---|
334 | public SeekableXZInputStream(SeekableInputStream in, int memoryLimit,
|
---|
335 | ArrayCache arrayCache)
|
---|
336 | throws IOException {
|
---|
337 | this(in, memoryLimit, true, arrayCache);
|
---|
338 | }
|
---|
339 |
|
---|
340 | /**
|
---|
341 | * Creates a new seekable XZ decomporessor with an optional
|
---|
342 | * memory usage limit and ability to disable verification
|
---|
343 | * of integrity checks.
|
---|
344 | * <p>
|
---|
345 | * Note that integrity check verification should almost never be disabled.
|
---|
346 | * Possible reasons to disable integrity check verification:
|
---|
347 | * <ul>
|
---|
348 | * <li>Trying to recover data from a corrupt .xz file.</li>
|
---|
349 | * <li>Speeding up decompression. This matters mostly with SHA-256
|
---|
350 | * or with files that have compressed extremely well. It's recommended
|
---|
351 | * that integrity checking isn't disabled for performance reasons
|
---|
352 | * unless the file integrity is verified externally in some other
|
---|
353 | * way.</li>
|
---|
354 | * </ul>
|
---|
355 | * <p>
|
---|
356 | * <code>verifyCheck</code> only affects the integrity check of
|
---|
357 | * the actual compressed data. The CRC32 fields in the headers
|
---|
358 | * are always verified.
|
---|
359 | *
|
---|
360 | * @param in seekable input stream containing one or more
|
---|
361 | * XZ Streams; the whole input stream is used
|
---|
362 | *
|
---|
363 | * @param memoryLimit memory usage limit in kibibytes (KiB)
|
---|
364 | * or <code>-1</code> to impose no
|
---|
365 | * memory usage limit
|
---|
366 | *
|
---|
367 | * @param verifyCheck if <code>true</code>, the integrity checks
|
---|
368 | * will be verified; this should almost never
|
---|
369 | * be set to <code>false</code>
|
---|
370 | *
|
---|
371 | * @throws XZFormatException
|
---|
372 | * input is not in the XZ format
|
---|
373 | *
|
---|
374 | * @throws CorruptedInputException
|
---|
375 | * XZ data is corrupt or truncated
|
---|
376 | *
|
---|
377 | * @throws UnsupportedOptionsException
|
---|
378 | * XZ headers seem valid but they specify
|
---|
379 | * options not supported by this implementation
|
---|
380 | *
|
---|
381 | * @throws MemoryLimitException
|
---|
382 | * decoded XZ Indexes would need more memory
|
---|
383 | * than allowed by the memory usage limit
|
---|
384 | *
|
---|
385 | * @throws EOFException
|
---|
386 | * less than 6 bytes of input was available
|
---|
387 | * from <code>in</code>, or (unlikely) the size
|
---|
388 | * of the underlying stream got smaller while
|
---|
389 | * this was reading from it
|
---|
390 | *
|
---|
391 | * @throws IOException may be thrown by <code>in</code>
|
---|
392 | *
|
---|
393 | * @since 1.6
|
---|
394 | */
|
---|
395 | public SeekableXZInputStream(SeekableInputStream in, int memoryLimit,
|
---|
396 | boolean verifyCheck)
|
---|
397 | throws IOException {
|
---|
398 | this(in, memoryLimit, verifyCheck, ArrayCache.getDefaultCache());
|
---|
399 | }
|
---|
400 |
|
---|
401 | /**
|
---|
402 | * Creates a new seekable XZ decomporessor with an optional
|
---|
403 | * memory usage limit and ability to disable verification
|
---|
404 | * of integrity checks.
|
---|
405 | * <p>
|
---|
406 | * This is identical to
|
---|
407 | * <code>SeekableXZInputStream(SeekableInputStream,int,boolean)</code>
|
---|
408 | * except that this also takes the <code>arrayCache</code> argument.
|
---|
409 | *
|
---|
410 | * @param in seekable input stream containing one or more
|
---|
411 | * XZ Streams; the whole input stream is used
|
---|
412 | *
|
---|
413 | * @param memoryLimit memory usage limit in kibibytes (KiB)
|
---|
414 | * or <code>-1</code> to impose no
|
---|
415 | * memory usage limit
|
---|
416 | *
|
---|
417 | * @param verifyCheck if <code>true</code>, the integrity checks
|
---|
418 | * will be verified; this should almost never
|
---|
419 | * be set to <code>false</code>
|
---|
420 | *
|
---|
421 | * @param arrayCache cache to be used for allocating large arrays
|
---|
422 | *
|
---|
423 | * @throws XZFormatException
|
---|
424 | * input is not in the XZ format
|
---|
425 | *
|
---|
426 | * @throws CorruptedInputException
|
---|
427 | * XZ data is corrupt or truncated
|
---|
428 | *
|
---|
429 | * @throws UnsupportedOptionsException
|
---|
430 | * XZ headers seem valid but they specify
|
---|
431 | * options not supported by this implementation
|
---|
432 | *
|
---|
433 | * @throws MemoryLimitException
|
---|
434 | * decoded XZ Indexes would need more memory
|
---|
435 | * than allowed by the memory usage limit
|
---|
436 | *
|
---|
437 | * @throws EOFException
|
---|
438 | * less than 6 bytes of input was available
|
---|
439 | * from <code>in</code>, or (unlikely) the size
|
---|
440 | * of the underlying stream got smaller while
|
---|
441 | * this was reading from it
|
---|
442 | *
|
---|
443 | * @throws IOException may be thrown by <code>in</code>
|
---|
444 | *
|
---|
445 | * @since 1.7
|
---|
446 | */
|
---|
447 | public SeekableXZInputStream(SeekableInputStream in, int memoryLimit,
|
---|
448 | boolean verifyCheck, ArrayCache arrayCache)
|
---|
449 | throws IOException {
|
---|
450 | this.arrayCache = arrayCache;
|
---|
451 | this.verifyCheck = verifyCheck;
|
---|
452 | this.in = in;
|
---|
453 | DataInputStream inData = new DataInputStream(in);
|
---|
454 |
|
---|
455 | // Check the magic bytes in the beginning of the file.
|
---|
456 | {
|
---|
457 | in.seek(0);
|
---|
458 | byte[] buf = new byte[XZ.HEADER_MAGIC.length];
|
---|
459 | inData.readFully(buf);
|
---|
460 | if (!Arrays.equals(buf, XZ.HEADER_MAGIC))
|
---|
461 | throw new XZFormatException();
|
---|
462 | }
|
---|
463 |
|
---|
464 | // Get the file size and verify that it is a multiple of 4 bytes.
|
---|
465 | long pos = in.length();
|
---|
466 | if ((pos & 3) != 0)
|
---|
467 | throw new CorruptedInputException(
|
---|
468 | "XZ file size is not a multiple of 4 bytes");
|
---|
469 |
|
---|
470 | // Parse the headers starting from the end of the file.
|
---|
471 | byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE];
|
---|
472 | long streamPadding = 0;
|
---|
473 |
|
---|
474 | while (pos > 0) {
|
---|
475 | if (pos < DecoderUtil.STREAM_HEADER_SIZE)
|
---|
476 | throw new CorruptedInputException();
|
---|
477 |
|
---|
478 | // Read the potential Stream Footer.
|
---|
479 | in.seek(pos - DecoderUtil.STREAM_HEADER_SIZE);
|
---|
480 | inData.readFully(buf);
|
---|
481 |
|
---|
482 | // Skip Stream Padding four bytes at a time.
|
---|
483 | // Skipping more at once would be faster,
|
---|
484 | // but usually there isn't much Stream Padding.
|
---|
485 | if (buf[8] == 0x00 && buf[9] == 0x00 && buf[10] == 0x00
|
---|
486 | && buf[11] == 0x00) {
|
---|
487 | streamPadding += 4;
|
---|
488 | pos -= 4;
|
---|
489 | continue;
|
---|
490 | }
|
---|
491 |
|
---|
492 | // It's not Stream Padding. Update pos.
|
---|
493 | pos -= DecoderUtil.STREAM_HEADER_SIZE;
|
---|
494 |
|
---|
495 | // Decode the Stream Footer and check if Backward Size
|
---|
496 | // looks reasonable.
|
---|
497 | StreamFlags streamFooter = DecoderUtil.decodeStreamFooter(buf);
|
---|
498 | if (streamFooter.backwardSize >= pos)
|
---|
499 | throw new CorruptedInputException(
|
---|
500 | "Backward Size in XZ Stream Footer is too big");
|
---|
501 |
|
---|
502 | // Check that the Check ID is supported. Store it in case this
|
---|
503 | // is the first Stream in the file.
|
---|
504 | check = Check.getInstance(streamFooter.checkType);
|
---|
505 |
|
---|
506 | // Remember which Check IDs have been seen.
|
---|
507 | checkTypes |= 1 << streamFooter.checkType;
|
---|
508 |
|
---|
509 | // Seek to the beginning of the Index.
|
---|
510 | in.seek(pos - streamFooter.backwardSize);
|
---|
511 |
|
---|
512 | // Decode the Index field.
|
---|
513 | IndexDecoder index;
|
---|
514 | try {
|
---|
515 | index = new IndexDecoder(in, streamFooter, streamPadding,
|
---|
516 | memoryLimit);
|
---|
517 | } catch (MemoryLimitException e) {
|
---|
518 | // IndexDecoder doesn't know how much memory we had
|
---|
519 | // already needed so we need to recreate the exception.
|
---|
520 | assert memoryLimit >= 0;
|
---|
521 | throw new MemoryLimitException(
|
---|
522 | e.getMemoryNeeded() + indexMemoryUsage,
|
---|
523 | memoryLimit + indexMemoryUsage);
|
---|
524 | }
|
---|
525 |
|
---|
526 | // Update the memory usage and limit counters.
|
---|
527 | indexMemoryUsage += index.getMemoryUsage();
|
---|
528 | if (memoryLimit >= 0) {
|
---|
529 | memoryLimit -= index.getMemoryUsage();
|
---|
530 | assert memoryLimit >= 0;
|
---|
531 | }
|
---|
532 |
|
---|
533 | // Remember the uncompressed size of the largest Block.
|
---|
534 | if (largestBlockSize < index.getLargestBlockSize())
|
---|
535 | largestBlockSize = index.getLargestBlockSize();
|
---|
536 |
|
---|
537 | // Calculate the offset to the beginning of this XZ Stream and
|
---|
538 | // check that it looks sane.
|
---|
539 | long off = index.getStreamSize() - DecoderUtil.STREAM_HEADER_SIZE;
|
---|
540 | if (pos < off)
|
---|
541 | throw new CorruptedInputException("XZ Index indicates "
|
---|
542 | + "too big compressed size for the XZ Stream");
|
---|
543 |
|
---|
544 | // Seek to the beginning of this Stream.
|
---|
545 | pos -= off;
|
---|
546 | in.seek(pos);
|
---|
547 |
|
---|
548 | // Decode the Stream Header.
|
---|
549 | inData.readFully(buf);
|
---|
550 | StreamFlags streamHeader = DecoderUtil.decodeStreamHeader(buf);
|
---|
551 |
|
---|
552 | // Verify that the Stream Header matches the Stream Footer.
|
---|
553 | if (!DecoderUtil.areStreamFlagsEqual(streamHeader, streamFooter))
|
---|
554 | throw new CorruptedInputException(
|
---|
555 | "XZ Stream Footer does not match Stream Header");
|
---|
556 |
|
---|
557 | // Update the total uncompressed size of the file and check that
|
---|
558 | // it doesn't overflow.
|
---|
559 | uncompressedSize += index.getUncompressedSize();
|
---|
560 | if (uncompressedSize < 0)
|
---|
561 | throw new UnsupportedOptionsException("XZ file is too big");
|
---|
562 |
|
---|
563 | // Update the Block count and check that it fits into an int.
|
---|
564 | blockCount += index.getRecordCount();
|
---|
565 | if (blockCount < 0)
|
---|
566 | throw new UnsupportedOptionsException(
|
---|
567 | "XZ file has over " + Integer.MAX_VALUE + " Blocks");
|
---|
568 |
|
---|
569 | // Add this Stream to the list of Streams.
|
---|
570 | streams.add(index);
|
---|
571 |
|
---|
572 | // Reset to be ready to parse the next Stream.
|
---|
573 | streamPadding = 0;
|
---|
574 | }
|
---|
575 |
|
---|
576 | assert pos == 0;
|
---|
577 |
|
---|
578 | // Save it now that indexMemoryUsage has been substracted from it.
|
---|
579 | this.memoryLimit = memoryLimit;
|
---|
580 |
|
---|
581 | // Store the relative offsets of the Streams. This way we don't
|
---|
582 | // need to recalculate them in this class when seeking; the
|
---|
583 | // IndexDecoder instances will handle them.
|
---|
584 | IndexDecoder prev = streams.get(streams.size() - 1);
|
---|
585 | for (int i = streams.size() - 2; i >= 0; --i) {
|
---|
586 | IndexDecoder cur = streams.get(i);
|
---|
587 | cur.setOffsets(prev);
|
---|
588 | prev = cur;
|
---|
589 | }
|
---|
590 |
|
---|
591 | // Initialize curBlockInfo to point to the first Stream.
|
---|
592 | // The blockNumber will be left to -1 so that .hasNext()
|
---|
593 | // and .setNext() work to get the first Block when starting
|
---|
594 | // to decompress from the beginning of the file.
|
---|
595 | IndexDecoder first = streams.get(streams.size() - 1);
|
---|
596 | curBlockInfo = new BlockInfo(first);
|
---|
597 |
|
---|
598 | // queriedBlockInfo needs to be allocated too. The Stream used for
|
---|
599 | // initialization doesn't matter though.
|
---|
600 | queriedBlockInfo = new BlockInfo(first);
|
---|
601 | }
|
---|
602 |
|
---|
603 | /**
|
---|
604 | * Gets the types of integrity checks used in the .xz file.
|
---|
605 | * Multiple checks are possible only if there are multiple
|
---|
606 | * concatenated XZ Streams.
|
---|
607 | * <p>
|
---|
608 | * The returned value has a bit set for every check type that is present.
|
---|
609 | * For example, if CRC64 and SHA-256 were used, the return value is
|
---|
610 | * <code>(1 << XZ.CHECK_CRC64)
|
---|
611 | * | (1 << XZ.CHECK_SHA256)</code>.
|
---|
612 | */
|
---|
613 | public int getCheckTypes() {
|
---|
614 | return checkTypes;
|
---|
615 | }
|
---|
616 |
|
---|
617 | /**
|
---|
618 | * Gets the amount of memory in kibibytes (KiB) used by
|
---|
619 | * the data structures needed to locate the XZ Blocks.
|
---|
620 | * This is usually useless information but since it is calculated
|
---|
621 | * for memory usage limit anyway, it is nice to make it available to too.
|
---|
622 | */
|
---|
623 | public int getIndexMemoryUsage() {
|
---|
624 | return indexMemoryUsage;
|
---|
625 | }
|
---|
626 |
|
---|
627 | /**
|
---|
628 | * Gets the uncompressed size of the largest XZ Block in bytes.
|
---|
629 | * This can be useful if you want to check that the file doesn't
|
---|
630 | * have huge XZ Blocks which could make seeking to arbitrary offsets
|
---|
631 | * very slow. Note that huge Blocks don't automatically mean that
|
---|
632 | * seeking would be slow, for example, seeking to the beginning of
|
---|
633 | * any Block is always fast.
|
---|
634 | */
|
---|
635 | public long getLargestBlockSize() {
|
---|
636 | return largestBlockSize;
|
---|
637 | }
|
---|
638 |
|
---|
639 | /**
|
---|
640 | * Gets the number of Streams in the .xz file.
|
---|
641 | *
|
---|
642 | * @since 1.3
|
---|
643 | */
|
---|
644 | public int getStreamCount() {
|
---|
645 | return streams.size();
|
---|
646 | }
|
---|
647 |
|
---|
648 | /**
|
---|
649 | * Gets the number of Blocks in the .xz file.
|
---|
650 | *
|
---|
651 | * @since 1.3
|
---|
652 | */
|
---|
653 | public int getBlockCount() {
|
---|
654 | return blockCount;
|
---|
655 | }
|
---|
656 |
|
---|
657 | /**
|
---|
658 | * Gets the uncompressed start position of the given Block.
|
---|
659 | *
|
---|
660 | * @throws IndexOutOfBoundsException if
|
---|
661 | * <code>blockNumber < 0</code> or
|
---|
662 | * <code>blockNumber >= getBlockCount()</code>.
|
---|
663 | *
|
---|
664 | * @since 1.3
|
---|
665 | */
|
---|
666 | public long getBlockPos(int blockNumber) {
|
---|
667 | locateBlockByNumber(queriedBlockInfo, blockNumber);
|
---|
668 | return queriedBlockInfo.uncompressedOffset;
|
---|
669 | }
|
---|
670 |
|
---|
671 | /**
|
---|
672 | * Gets the uncompressed size of the given Block.
|
---|
673 | *
|
---|
674 | * @throws IndexOutOfBoundsException if
|
---|
675 | * <code>blockNumber < 0</code> or
|
---|
676 | * <code>blockNumber >= getBlockCount()</code>.
|
---|
677 | *
|
---|
678 | * @since 1.3
|
---|
679 | */
|
---|
680 | public long getBlockSize(int blockNumber) {
|
---|
681 | locateBlockByNumber(queriedBlockInfo, blockNumber);
|
---|
682 | return queriedBlockInfo.uncompressedSize;
|
---|
683 | }
|
---|
684 |
|
---|
685 | /**
|
---|
686 | * Gets the position where the given compressed Block starts in
|
---|
687 | * the underlying .xz file.
|
---|
688 | * This information is rarely useful to the users of this class.
|
---|
689 | *
|
---|
690 | * @throws IndexOutOfBoundsException if
|
---|
691 | * <code>blockNumber < 0</code> or
|
---|
692 | * <code>blockNumber >= getBlockCount()</code>.
|
---|
693 | *
|
---|
694 | * @since 1.3
|
---|
695 | */
|
---|
696 | public long getBlockCompPos(int blockNumber) {
|
---|
697 | locateBlockByNumber(queriedBlockInfo, blockNumber);
|
---|
698 | return queriedBlockInfo.compressedOffset;
|
---|
699 | }
|
---|
700 |
|
---|
701 | /**
|
---|
702 | * Gets the compressed size of the given Block.
|
---|
703 | * This together with the uncompressed size can be used to calculate
|
---|
704 | * the compression ratio of the specific Block.
|
---|
705 | *
|
---|
706 | * @throws IndexOutOfBoundsException if
|
---|
707 | * <code>blockNumber < 0</code> or
|
---|
708 | * <code>blockNumber >= getBlockCount()</code>.
|
---|
709 | *
|
---|
710 | * @since 1.3
|
---|
711 | */
|
---|
712 | public long getBlockCompSize(int blockNumber) {
|
---|
713 | locateBlockByNumber(queriedBlockInfo, blockNumber);
|
---|
714 | return (queriedBlockInfo.unpaddedSize + 3) & ~3;
|
---|
715 | }
|
---|
716 |
|
---|
717 | /**
|
---|
718 | * Gets integrity check type (Check ID) of the given Block.
|
---|
719 | *
|
---|
720 | * @throws IndexOutOfBoundsException if
|
---|
721 | * <code>blockNumber < 0</code> or
|
---|
722 | * <code>blockNumber >= getBlockCount()</code>.
|
---|
723 | *
|
---|
724 | * @see #getCheckTypes()
|
---|
725 | *
|
---|
726 | * @since 1.3
|
---|
727 | */
|
---|
728 | public int getBlockCheckType(int blockNumber) {
|
---|
729 | locateBlockByNumber(queriedBlockInfo, blockNumber);
|
---|
730 | return queriedBlockInfo.getCheckType();
|
---|
731 | }
|
---|
732 |
|
---|
733 | /**
|
---|
734 | * Gets the number of the Block that contains the byte at the given
|
---|
735 | * uncompressed position.
|
---|
736 | *
|
---|
737 | * @throws IndexOutOfBoundsException if
|
---|
738 | * <code>pos < 0</code> or
|
---|
739 | * <code>pos >= length()</code>.
|
---|
740 | *
|
---|
741 | * @since 1.3
|
---|
742 | */
|
---|
743 | public int getBlockNumber(long pos) {
|
---|
744 | locateBlockByPos(queriedBlockInfo, pos);
|
---|
745 | return queriedBlockInfo.blockNumber;
|
---|
746 | }
|
---|
747 |
|
---|
748 | /**
|
---|
749 | * Decompresses the next byte from this input stream.
|
---|
750 | *
|
---|
751 | * @return the next decompressed byte, or <code>-1</code>
|
---|
752 | * to indicate the end of the compressed stream
|
---|
753 | *
|
---|
754 | * @throws CorruptedInputException
|
---|
755 | * @throws UnsupportedOptionsException
|
---|
756 | * @throws MemoryLimitException
|
---|
757 | *
|
---|
758 | * @throws XZIOException if the stream has been closed
|
---|
759 | *
|
---|
760 | * @throws IOException may be thrown by <code>in</code>
|
---|
761 | */
|
---|
762 | public int read() throws IOException {
|
---|
763 | return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
|
---|
764 | }
|
---|
765 |
|
---|
766 | /**
|
---|
767 | * Decompresses into an array of bytes.
|
---|
768 | * <p>
|
---|
769 | * If <code>len</code> is zero, no bytes are read and <code>0</code>
|
---|
770 | * is returned. Otherwise this will try to decompress <code>len</code>
|
---|
771 | * bytes of uncompressed data. Less than <code>len</code> bytes may
|
---|
772 | * be read only in the following situations:
|
---|
773 | * <ul>
|
---|
774 | * <li>The end of the compressed data was reached successfully.</li>
|
---|
775 | * <li>An error is detected after at least one but less than
|
---|
776 | * <code>len</code> bytes have already been successfully
|
---|
777 | * decompressed. The next call with non-zero <code>len</code>
|
---|
778 | * will immediately throw the pending exception.</li>
|
---|
779 | * <li>An exception is thrown.</li>
|
---|
780 | * </ul>
|
---|
781 | *
|
---|
782 | * @param buf target buffer for uncompressed data
|
---|
783 | * @param off start offset in <code>buf</code>
|
---|
784 | * @param len maximum number of uncompressed bytes to read
|
---|
785 | *
|
---|
786 | * @return number of bytes read, or <code>-1</code> to indicate
|
---|
787 | * the end of the compressed stream
|
---|
788 | *
|
---|
789 | * @throws CorruptedInputException
|
---|
790 | * @throws UnsupportedOptionsException
|
---|
791 | * @throws MemoryLimitException
|
---|
792 | *
|
---|
793 | * @throws XZIOException if the stream has been closed
|
---|
794 | *
|
---|
795 | * @throws IOException may be thrown by <code>in</code>
|
---|
796 | */
|
---|
797 | public int read(byte[] buf, int off, int len) throws IOException {
|
---|
798 | if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
|
---|
799 | throw new IndexOutOfBoundsException();
|
---|
800 |
|
---|
801 | if (len == 0)
|
---|
802 | return 0;
|
---|
803 |
|
---|
804 | if (in == null)
|
---|
805 | throw new XZIOException("Stream closed");
|
---|
806 |
|
---|
807 | if (exception != null)
|
---|
808 | throw exception;
|
---|
809 |
|
---|
810 | int size = 0;
|
---|
811 |
|
---|
812 | try {
|
---|
813 | if (seekNeeded)
|
---|
814 | seek();
|
---|
815 |
|
---|
816 | if (endReached)
|
---|
817 | return -1;
|
---|
818 |
|
---|
819 | while (len > 0) {
|
---|
820 | if (blockDecoder == null) {
|
---|
821 | seek();
|
---|
822 | if (endReached)
|
---|
823 | break;
|
---|
824 | }
|
---|
825 |
|
---|
826 | int ret = blockDecoder.read(buf, off, len);
|
---|
827 |
|
---|
828 | if (ret > 0) {
|
---|
829 | curPos += ret;
|
---|
830 | size += ret;
|
---|
831 | off += ret;
|
---|
832 | len -= ret;
|
---|
833 | } else if (ret == -1) {
|
---|
834 | blockDecoder = null;
|
---|
835 | }
|
---|
836 | }
|
---|
837 | } catch (IOException e) {
|
---|
838 | // We know that the file isn't simply truncated because we could
|
---|
839 | // parse the Indexes in the constructor. So convert EOFException
|
---|
840 | // to CorruptedInputException.
|
---|
841 | if (e instanceof EOFException)
|
---|
842 | e = new CorruptedInputException();
|
---|
843 |
|
---|
844 | exception = e;
|
---|
845 | if (size == 0)
|
---|
846 | throw e;
|
---|
847 | }
|
---|
848 |
|
---|
849 | return size;
|
---|
850 | }
|
---|
851 |
|
---|
852 | /**
|
---|
853 | * Returns the number of uncompressed bytes that can be read
|
---|
854 | * without blocking. The value is returned with an assumption
|
---|
855 | * that the compressed input data will be valid. If the compressed
|
---|
856 | * data is corrupt, <code>CorruptedInputException</code> may get
|
---|
857 | * thrown before the number of bytes claimed to be available have
|
---|
858 | * been read from this input stream.
|
---|
859 | *
|
---|
860 | * @return the number of uncompressed bytes that can be read
|
---|
861 | * without blocking
|
---|
862 | */
|
---|
863 | public int available() throws IOException {
|
---|
864 | if (in == null)
|
---|
865 | throw new XZIOException("Stream closed");
|
---|
866 |
|
---|
867 | if (exception != null)
|
---|
868 | throw exception;
|
---|
869 |
|
---|
870 | if (endReached || seekNeeded || blockDecoder == null)
|
---|
871 | return 0;
|
---|
872 |
|
---|
873 | return blockDecoder.available();
|
---|
874 | }
|
---|
875 |
|
---|
876 | /**
|
---|
877 | * Closes the stream and calls <code>in.close()</code>.
|
---|
878 | * If the stream was already closed, this does nothing.
|
---|
879 | * <p>
|
---|
880 | * This is equivalent to <code>close(true)</code>.
|
---|
881 | *
|
---|
882 | * @throws IOException if thrown by <code>in.close()</code>
|
---|
883 | */
|
---|
884 | public void close() throws IOException {
|
---|
885 | close(true);
|
---|
886 | }
|
---|
887 |
|
---|
888 | /**
|
---|
889 | * Closes the stream and optionally calls <code>in.close()</code>.
|
---|
890 | * If the stream was already closed, this does nothing.
|
---|
891 | * If <code>close(false)</code> has been called, a further
|
---|
892 | * call of <code>close(true)</code> does nothing (it doesn't call
|
---|
893 | * <code>in.close()</code>).
|
---|
894 | * <p>
|
---|
895 | * If you don't want to close the underlying <code>InputStream</code>,
|
---|
896 | * there is usually no need to worry about closing this stream either;
|
---|
897 | * it's fine to do nothing and let the garbage collector handle it.
|
---|
898 | * However, if you are using {@link ArrayCache}, <code>close(false)</code>
|
---|
899 | * can be useful to put the allocated arrays back to the cache without
|
---|
900 | * closing the underlying <code>InputStream</code>.
|
---|
901 | * <p>
|
---|
902 | * Note that if you successfully reach the end of the stream
|
---|
903 | * (<code>read</code> returns <code>-1</code>), the arrays are
|
---|
904 | * automatically put back to the cache by that <code>read</code> call. In
|
---|
905 | * this situation <code>close(false)</code> is redundant (but harmless).
|
---|
906 | *
|
---|
907 | * @throws IOException if thrown by <code>in.close()</code>
|
---|
908 | *
|
---|
909 | * @since 1.7
|
---|
910 | */
|
---|
911 | public void close(boolean closeInput) throws IOException {
|
---|
912 | if (in != null) {
|
---|
913 | if (blockDecoder != null) {
|
---|
914 | blockDecoder.close();
|
---|
915 | blockDecoder = null;
|
---|
916 | }
|
---|
917 |
|
---|
918 | try {
|
---|
919 | if (closeInput)
|
---|
920 | in.close();
|
---|
921 | } finally {
|
---|
922 | in = null;
|
---|
923 | }
|
---|
924 | }
|
---|
925 | }
|
---|
926 |
|
---|
927 | /**
|
---|
928 | * Gets the uncompressed size of this input stream. If there are multiple
|
---|
929 | * XZ Streams, the total uncompressed size of all XZ Streams is returned.
|
---|
930 | */
|
---|
931 | public long length() {
|
---|
932 | return uncompressedSize;
|
---|
933 | }
|
---|
934 |
|
---|
935 | /**
|
---|
936 | * Gets the current uncompressed position in this input stream.
|
---|
937 | *
|
---|
938 | * @throws XZIOException if the stream has been closed
|
---|
939 | */
|
---|
940 | public long position() throws IOException {
|
---|
941 | if (in == null)
|
---|
942 | throw new XZIOException("Stream closed");
|
---|
943 |
|
---|
944 | return seekNeeded ? seekPos : curPos;
|
---|
945 | }
|
---|
946 |
|
---|
947 | /**
|
---|
948 | * Seeks to the specified absolute uncompressed position in the stream.
|
---|
949 | * This only stores the new position, so this function itself is always
|
---|
950 | * very fast. The actual seek is done when <code>read</code> is called
|
---|
951 | * to read at least one byte.
|
---|
952 | * <p>
|
---|
953 | * Seeking past the end of the stream is possible. In that case
|
---|
954 | * <code>read</code> will return <code>-1</code> to indicate
|
---|
955 | * the end of the stream.
|
---|
956 | *
|
---|
957 | * @param pos new uncompressed read position
|
---|
958 | *
|
---|
959 | * @throws XZIOException
|
---|
960 | * if <code>pos</code> is negative, or
|
---|
961 | * if stream has been closed
|
---|
962 | */
|
---|
963 | public void seek(long pos) throws IOException {
|
---|
964 | if (in == null)
|
---|
965 | throw new XZIOException("Stream closed");
|
---|
966 |
|
---|
967 | if (pos < 0)
|
---|
968 | throw new XZIOException("Negative seek position: " + pos);
|
---|
969 |
|
---|
970 | seekPos = pos;
|
---|
971 | seekNeeded = true;
|
---|
972 | }
|
---|
973 |
|
---|
974 | /**
|
---|
975 | * Seeks to the beginning of the given XZ Block.
|
---|
976 | *
|
---|
977 | * @throws XZIOException
|
---|
978 | * if <code>blockNumber < 0</code> or
|
---|
979 | * <code>blockNumber >= getBlockCount()</code>,
|
---|
980 | * or if stream has been closed
|
---|
981 | *
|
---|
982 | * @since 1.3
|
---|
983 | */
|
---|
984 | public void seekToBlock(int blockNumber) throws IOException {
|
---|
985 | if (in == null)
|
---|
986 | throw new XZIOException("Stream closed");
|
---|
987 |
|
---|
988 | if (blockNumber < 0 || blockNumber >= blockCount)
|
---|
989 | throw new XZIOException("Invalid XZ Block number: " + blockNumber);
|
---|
990 |
|
---|
991 | // This is a bit silly implementation. Here we locate the uncompressed
|
---|
992 | // offset of the specified Block, then when doing the actual seek in
|
---|
993 | // seek(), we need to find the Block number based on seekPos.
|
---|
994 | seekPos = getBlockPos(blockNumber);
|
---|
995 | seekNeeded = true;
|
---|
996 | }
|
---|
997 |
|
---|
998 | /**
|
---|
999 | * Does the actual seeking. This is also called when <code>read</code>
|
---|
1000 | * needs a new Block to decode.
|
---|
1001 | */
|
---|
1002 | private void seek() throws IOException {
|
---|
1003 | // If seek(long) wasn't called, we simply need to get the next Block
|
---|
1004 | // from the same Stream. If there are no more Blocks in this Stream,
|
---|
1005 | // then we behave as if seek(long) had been called.
|
---|
1006 | if (!seekNeeded) {
|
---|
1007 | if (curBlockInfo.hasNext()) {
|
---|
1008 | curBlockInfo.setNext();
|
---|
1009 | initBlockDecoder();
|
---|
1010 | return;
|
---|
1011 | }
|
---|
1012 |
|
---|
1013 | seekPos = curPos;
|
---|
1014 | }
|
---|
1015 |
|
---|
1016 | seekNeeded = false;
|
---|
1017 |
|
---|
1018 | // Check if we are seeking to or past the end of the file.
|
---|
1019 | if (seekPos >= uncompressedSize) {
|
---|
1020 | curPos = seekPos;
|
---|
1021 |
|
---|
1022 | if (blockDecoder != null) {
|
---|
1023 | blockDecoder.close();
|
---|
1024 | blockDecoder = null;
|
---|
1025 | }
|
---|
1026 |
|
---|
1027 | endReached = true;
|
---|
1028 | return;
|
---|
1029 | }
|
---|
1030 |
|
---|
1031 | endReached = false;
|
---|
1032 |
|
---|
1033 | // Locate the Block that contains the uncompressed target position.
|
---|
1034 | locateBlockByPos(curBlockInfo, seekPos);
|
---|
1035 |
|
---|
1036 | // Seek in the underlying stream and create a new Block decoder
|
---|
1037 | // only if really needed. We can skip it if the current position
|
---|
1038 | // is already in the correct Block and the target position hasn't
|
---|
1039 | // been decompressed yet.
|
---|
1040 | //
|
---|
1041 | // NOTE: If curPos points to the beginning of this Block, it's
|
---|
1042 | // because it was left there after decompressing an earlier Block.
|
---|
1043 | // In that case, decoding of the current Block hasn't been started
|
---|
1044 | // yet. (Decoding of a Block won't be started until at least one
|
---|
1045 | // byte will also be read from it.)
|
---|
1046 | if (!(curPos > curBlockInfo.uncompressedOffset && curPos <= seekPos)) {
|
---|
1047 | // Seek to the beginning of the Block.
|
---|
1048 | in.seek(curBlockInfo.compressedOffset);
|
---|
1049 |
|
---|
1050 | // Since it is possible that this Block is from a different
|
---|
1051 | // Stream than the previous Block, initialize a new Check.
|
---|
1052 | check = Check.getInstance(curBlockInfo.getCheckType());
|
---|
1053 |
|
---|
1054 | // Create a new Block decoder.
|
---|
1055 | initBlockDecoder();
|
---|
1056 | curPos = curBlockInfo.uncompressedOffset;
|
---|
1057 | }
|
---|
1058 |
|
---|
1059 | // If the target wasn't at a Block boundary, decompress and throw
|
---|
1060 | // away data to reach the target position.
|
---|
1061 | if (seekPos > curPos) {
|
---|
1062 | // NOTE: The "if" below is there just in case. In this situation,
|
---|
1063 | // blockDecoder.skip will always skip the requested amount
|
---|
1064 | // or throw an exception.
|
---|
1065 | long skipAmount = seekPos - curPos;
|
---|
1066 | if (blockDecoder.skip(skipAmount) != skipAmount)
|
---|
1067 | throw new CorruptedInputException();
|
---|
1068 |
|
---|
1069 | curPos = seekPos;
|
---|
1070 | }
|
---|
1071 | }
|
---|
1072 |
|
---|
1073 | /**
|
---|
1074 | * Locates the Block that contains the given uncompressed position.
|
---|
1075 | */
|
---|
1076 | private void locateBlockByPos(BlockInfo info, long pos) {
|
---|
1077 | if (pos < 0 || pos >= uncompressedSize)
|
---|
1078 | throw new IndexOutOfBoundsException(
|
---|
1079 | "Invalid uncompressed position: " + pos);
|
---|
1080 |
|
---|
1081 | // Locate the Stream that contains the target position.
|
---|
1082 | IndexDecoder index;
|
---|
1083 | for (int i = 0; ; ++i) {
|
---|
1084 | index = streams.get(i);
|
---|
1085 | if (index.hasUncompressedOffset(pos))
|
---|
1086 | break;
|
---|
1087 | }
|
---|
1088 |
|
---|
1089 | // Locate the Block from the Stream that contains the target position.
|
---|
1090 | index.locateBlock(info, pos);
|
---|
1091 |
|
---|
1092 | assert (info.compressedOffset & 3) == 0;
|
---|
1093 | assert info.uncompressedSize > 0;
|
---|
1094 | assert pos >= info.uncompressedOffset;
|
---|
1095 | assert pos < info.uncompressedOffset + info.uncompressedSize;
|
---|
1096 | }
|
---|
1097 |
|
---|
1098 | /**
|
---|
1099 | * Locates the given Block and stores information about it
|
---|
1100 | * to <code>info</code>.
|
---|
1101 | */
|
---|
1102 | private void locateBlockByNumber(BlockInfo info, int blockNumber) {
|
---|
1103 | // Validate.
|
---|
1104 | if (blockNumber < 0 || blockNumber >= blockCount)
|
---|
1105 | throw new IndexOutOfBoundsException(
|
---|
1106 | "Invalid XZ Block number: " + blockNumber);
|
---|
1107 |
|
---|
1108 | // Skip the search if info already points to the correct Block.
|
---|
1109 | if (info.blockNumber == blockNumber)
|
---|
1110 | return;
|
---|
1111 |
|
---|
1112 | // Search the Stream that contains the given Block and then
|
---|
1113 | // search the Block from that Stream.
|
---|
1114 | for (int i = 0; ; ++i) {
|
---|
1115 | IndexDecoder index = streams.get(i);
|
---|
1116 | if (index.hasRecord(blockNumber)) {
|
---|
1117 | index.setBlockInfo(info, blockNumber);
|
---|
1118 | return;
|
---|
1119 | }
|
---|
1120 | }
|
---|
1121 | }
|
---|
1122 |
|
---|
1123 | /**
|
---|
1124 | * Initializes a new BlockInputStream. This is a helper function for
|
---|
1125 | * <code>seek()</code>.
|
---|
1126 | */
|
---|
1127 | private void initBlockDecoder() throws IOException {
|
---|
1128 | try {
|
---|
1129 | // Set it to null first so that GC can collect it if memory
|
---|
1130 | // runs tight when initializing a new BlockInputStream.
|
---|
1131 | if (blockDecoder != null) {
|
---|
1132 | blockDecoder.close();
|
---|
1133 | blockDecoder = null;
|
---|
1134 | }
|
---|
1135 |
|
---|
1136 | blockDecoder = new BlockInputStream(
|
---|
1137 | in, check, verifyCheck, memoryLimit,
|
---|
1138 | curBlockInfo.unpaddedSize, curBlockInfo.uncompressedSize,
|
---|
1139 | arrayCache);
|
---|
1140 | } catch (MemoryLimitException e) {
|
---|
1141 | // BlockInputStream doesn't know how much memory we had
|
---|
1142 | // already needed so we need to recreate the exception.
|
---|
1143 | assert memoryLimit >= 0;
|
---|
1144 | throw new MemoryLimitException(
|
---|
1145 | e.getMemoryNeeded() + indexMemoryUsage,
|
---|
1146 | memoryLimit + indexMemoryUsage);
|
---|
1147 | } catch (IndexIndicatorException e) {
|
---|
1148 | // It cannot be Index so the file must be corrupt.
|
---|
1149 | throw new CorruptedInputException();
|
---|
1150 | }
|
---|
1151 | }
|
---|
1152 | }
|
---|