Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: josm/trunk/src/org/tukaani/xz/LZMA2Options.java@ 13935

Last change on this file since 13935 was 13350, checked in by stoecker, 8 years ago
see #15816 - add XZ support
File size: 19.3 KB

Line
1	/*
2	* LZMA2Options
3	*
4	* Author: Lasse Collin <lasse.collin@tukaani.org>
5	*
6	* This file has been put into the public domain.
7	* You can do whatever you want with this file.
8	*/
9
10	package org.tukaani.xz;
11
12	import java.io.InputStream;
13	import java.io.IOException;
14	import org.tukaani.xz.lz.LZEncoder;
15	import org.tukaani.xz.lzma.LZMAEncoder;
16
17	/**
18	* LZMA2 compression options.
19	* <p>
20	* While this allows setting the LZMA2 compression options in detail,
21	* often you only need <code>LZMA2Options()</code> or
22	* <code>LZMA2Options(int)</code>.
23	*/
24	public class LZMA2Options extends FilterOptions {
25	/**
26	* Minimum valid compression preset level is 0.
27	*/
28	public static final int PRESET_MIN = 0;
29
30	/**
31	* Maximum valid compression preset level is 9.
32	*/
33	public static final int PRESET_MAX = 9;
34
35	/**
36	* Default compression preset level is 6.
37	*/
38	public static final int PRESET_DEFAULT = 6;
39
40	/**
41	* Minimum dictionary size is 4 KiB.
42	*/
43	public static final int DICT_SIZE_MIN = 4096;
44
45	/**
46	* Maximum dictionary size for compression is 768 MiB.
47	* <p>
48	* The decompressor supports bigger dictionaries, up to almost 2 GiB.
49	* With HC4 the encoder would support dictionaries bigger than 768 MiB.
50	* The 768 MiB limit comes from the current implementation of BT4 where
51	* we would otherwise hit the limits of signed ints in array indexing.
52	* <p>
53	* If you really need bigger dictionary for decompression,
54	* use {@link LZMA2InputStream} directly.
55	*/
56	public static final int DICT_SIZE_MAX = 768 << 20;
57
58	/**
59	* The default dictionary size is 8 MiB.
60	*/
61	public static final int DICT_SIZE_DEFAULT = 8 << 20;
62
63	/**
64	* Maximum value for lc + lp is 4.
65	*/
66	public static final int LC_LP_MAX = 4;
67
68	/**
69	* The default number of literal context bits is 3.
70	*/
71	public static final int LC_DEFAULT = 3;
72
73	/**
74	* The default number of literal position bits is 0.
75	*/
76	public static final int LP_DEFAULT = 0;
77
78	/**
79	* Maximum value for pb is 4.
80	*/
81	public static final int PB_MAX = 4;
82
83	/**
84	* The default number of position bits is 2.
85	*/
86	public static final int PB_DEFAULT = 2;
87
88	/**
89	* Compression mode: uncompressed.
90	* The data is wrapped into a LZMA2 stream without compression.
91	*/
92	public static final int MODE_UNCOMPRESSED = 0;
93
94	/**
95	* Compression mode: fast.
96	* This is usually combined with a hash chain match finder.
97	*/
98	public static final int MODE_FAST = LZMAEncoder.MODE_FAST;
99
100	/**
101	* Compression mode: normal.
102	* This is usually combined with a binary tree match finder.
103	*/
104	public static final int MODE_NORMAL = LZMAEncoder.MODE_NORMAL;
105
106	/**
107	* Minimum value for <code>niceLen</code> is 8.
108	*/
109	public static final int NICE_LEN_MIN = 8;
110
111	/**
112	* Maximum value for <code>niceLen</code> is 273.
113	*/
114	public static final int NICE_LEN_MAX = 273;
115
116	/**
117	* Match finder: Hash Chain 2-3-4
118	*/
119	public static final int MF_HC4 = LZEncoder.MF_HC4;
120
121	/**
122	* Match finder: Binary tree 2-3-4
123	*/
124	public static final int MF_BT4 = LZEncoder.MF_BT4;
125
126	private static final int[] presetToDictSize = {
127	1 << 18, 1 << 20, 1 << 21, 1 << 22, 1 << 22,
128	1 << 23, 1 << 23, 1 << 24, 1 << 25, 1 << 26 };
129
130	private static final int[] presetToDepthLimit = { 4, 8, 24, 48 };
131
132	private int dictSize;
133	private byte[] presetDict = null;
134	private int lc;
135	private int lp;
136	private int pb;
137	private int mode;
138	private int niceLen;
139	private int mf;
140	private int depthLimit;
141
142	/**
143	* Creates new LZMA2 options and sets them to the default values.
144	* This is equivalent to <code>LZMA2Options(PRESET_DEFAULT)</code>.
145	*/
146	public LZMA2Options() {
147	try {
148	setPreset(PRESET_DEFAULT);
149	} catch (UnsupportedOptionsException e) {
150	assert false;
151	throw new RuntimeException();
152	}
153	}
154
155	/**
156	* Creates new LZMA2 options and sets them to the given preset.
157	*
158	* @throws UnsupportedOptionsException
159	* <code>preset</code> is not supported
160	*/
161	public LZMA2Options(int preset) throws UnsupportedOptionsException {
162	setPreset(preset);
163	}
164
165	/**
166	* Creates new LZMA2 options and sets them to the given custom values.
167	*
168	* @throws UnsupportedOptionsException
169	* unsupported options were specified
170	*/
171	public LZMA2Options(int dictSize, int lc, int lp, int pb, int mode,
172	int niceLen, int mf, int depthLimit)
173	throws UnsupportedOptionsException {
174	setDictSize(dictSize);
175	setLcLp(lc, lp);
176	setPb(pb);
177	setMode(mode);
178	setNiceLen(niceLen);
179	setMatchFinder(mf);
180	setDepthLimit(depthLimit);
181	}
182
183	/**
184	* Sets the compression options to the given preset.
185	* <p>
186	* The presets 0-3 are fast presets with medium compression.
187	* The presets 4-6 are fairly slow presets with high compression.
188	* The default preset (<code>PRESET_DEFAULT</code>) is 6.
189	* <p>
190	* The presets 7-9 are like the preset 6 but use bigger dictionaries
191	* and have higher compressor and decompressor memory requirements.
192	* Unless the uncompressed size of the file exceeds 8 MiB,
193	* 16 MiB, or 32 MiB, it is waste of memory to use the
194	* presets 7, 8, or 9, respectively.
195	*
196	* @throws UnsupportedOptionsException
197	* <code>preset</code> is not supported
198	*/
199	public void setPreset(int preset) throws UnsupportedOptionsException {
200	if (preset < 0 \|\| preset > 9)
201	throw new UnsupportedOptionsException(
202	"Unsupported preset: " + preset);
203
204	lc = LC_DEFAULT;
205	lp = LP_DEFAULT;
206	pb = PB_DEFAULT;
207	dictSize = presetToDictSize[preset];
208
209	if (preset <= 3) {
210	mode = MODE_FAST;
211	mf = MF_HC4;
212	niceLen = preset <= 1 ? 128 : NICE_LEN_MAX;
213	depthLimit = presetToDepthLimit[preset];
214	} else {
215	mode = MODE_NORMAL;
216	mf = MF_BT4;
217	niceLen = (preset == 4) ? 16 : (preset == 5) ? 32 : 64;
218	depthLimit = 0;
219	}
220	}
221
222	/**
223	* Sets the dictionary size in bytes.
224	* <p>
225	* The dictionary (or history buffer) holds the most recently seen
226	* uncompressed data. Bigger dictionary usually means better compression.
227	* However, using a dictioanary bigger than the size of the uncompressed
228	* data is waste of memory.
229	* <p>
230	* Any value in the range [DICT_SIZE_MIN, DICT_SIZE_MAX] is valid,
231	* but sizes of 2^n and 2^n + 2^(n-1) bytes are somewhat
232	* recommended.
233	*
234	* @throws UnsupportedOptionsException
235	* <code>dictSize</code> is not supported
236	*/
237	public void setDictSize(int dictSize) throws UnsupportedOptionsException {
238	if (dictSize < DICT_SIZE_MIN)
239	throw new UnsupportedOptionsException(
240	"LZMA2 dictionary size must be at least 4 KiB: "
241	+ dictSize + " B");
242
243	if (dictSize > DICT_SIZE_MAX)
244	throw new UnsupportedOptionsException(
245	"LZMA2 dictionary size must not exceed "
246	+ (DICT_SIZE_MAX >> 20) + " MiB: " + dictSize + " B");
247
248	this.dictSize = dictSize;
249	}
250
251	/**
252	* Gets the dictionary size in bytes.
253	*/
254	public int getDictSize() {
255	return dictSize;
256	}
257
258	/**
259	* Sets a preset dictionary. Use null to disable the use of
260	* a preset dictionary. By default there is no preset dictionary.
261	* <p>
262	* <b>The .xz format doesn't support a preset dictionary for now.
263	* Do not set a preset dictionary unless you use raw LZMA2.</b>
264	* <p>
265	* Preset dictionary can be useful when compressing many similar,
266	* relatively small chunks of data independently from each other.
267	* A preset dictionary should contain typical strings that occur in
268	* the files being compressed. The most probable strings should be
269	* near the end of the preset dictionary. The preset dictionary used
270	* for compression is also needed for decompression.
271	*/
272	public void setPresetDict(byte[] presetDict) {
273	this.presetDict = presetDict;
274	}
275
276	/**
277	* Gets the preset dictionary.
278	*/
279	public byte[] getPresetDict() {
280	return presetDict;
281	}
282
283	/**
284	* Sets the number of literal context bits and literal position bits.
285	* <p>
286	* The sum of <code>lc</code> and <code>lp</code> is limited to 4.
287	* Trying to exceed it will throw an exception. This function lets
288	* you change both at the same time.
289	*
290	* @throws UnsupportedOptionsException
291	* <code>lc</code> and <code>lp</code>
292	* are invalid
293	*/
294	public void setLcLp(int lc, int lp) throws UnsupportedOptionsException {
295	if (lc < 0 \|\| lp < 0 \|\| lc > LC_LP_MAX \|\| lp > LC_LP_MAX
296	\|\| lc + lp > LC_LP_MAX)
297	throw new UnsupportedOptionsException(
298	"lc + lp must not exceed " + LC_LP_MAX + ": "
299	+ lc + " + " + lp);
300
301	this.lc = lc;
302	this.lp = lp;
303	}
304
305	/**
306	* Sets the number of literal context bits.
307	* <p>
308	* All bytes that cannot be encoded as matches are encoded as literals.
309	* That is, literals are simply 8-bit bytes that are encoded one at
310	* a time.
311	* <p>
312	* The literal coding makes an assumption that the highest <code>lc</code>
313	* bits of the previous uncompressed byte correlate with the next byte.
314	* For example, in typical English text, an upper-case letter is often
315	* followed by a lower-case letter, and a lower-case letter is usually
316	* followed by another lower-case letter. In the US-ASCII character set,
317	* the highest three bits are 010 for upper-case letters and 011 for
318	* lower-case letters. When <code>lc</code> is at least 3, the literal
319	* coding can take advantage of this property in the uncompressed data.
320	* <p>
321	* The default value (3) is usually good. If you want maximum compression,
322	* try <code>setLc(4)</code>. Sometimes it helps a little, and sometimes it
323	* makes compression worse. If it makes it worse, test for example
324	* <code>setLc(2)</code> too.
325	*
326	* @throws UnsupportedOptionsException
327	* <code>lc</code> is invalid, or the sum
328	* of <code>lc</code> and <code>lp</code>
329	* exceed LC_LP_MAX
330	*/
331	public void setLc(int lc) throws UnsupportedOptionsException {
332	setLcLp(lc, lp);
333	}
334
335	/**
336	* Sets the number of literal position bits.
337	* <p>
338	* This affets what kind of alignment in the uncompressed data is
339	* assumed when encoding literals. See {@link #setPb(int) setPb} for
340	* more information about alignment.
341	*
342	* @throws UnsupportedOptionsException
343	* <code>lp</code> is invalid, or the sum
344	* of <code>lc</code> and <code>lp</code>
345	* exceed LC_LP_MAX
346	*/
347	public void setLp(int lp) throws UnsupportedOptionsException {
348	setLcLp(lc, lp);
349	}
350
351	/**
352	* Gets the number of literal context bits.
353	*/
354	public int getLc() {
355	return lc;
356	}
357
358	/**
359	* Gets the number of literal position bits.
360	*/
361	public int getLp() {
362	return lp;
363	}
364
365	/**
366	* Sets the number of position bits.
367	* <p>
368	* This affects what kind of alignment in the uncompressed data is
369	* assumed in general. The default (2) means four-byte alignment
370	* (2^<code>pb</code> = 2^2 = 4), which is often a good choice when
371	* there's no better guess.
372	* <p>
373	* When the alignment is known, setting the number of position bits
374	* accordingly may reduce the file size a little. For example with text
375	* files having one-byte alignment (US-ASCII, ISO-8859-*, UTF-8), using
376	* <code>setPb(0)</code> can improve compression slightly. For UTF-16
377	* text, <code>setPb(1)</code> is a good choice. If the alignment is
378	* an odd number like 3 bytes, <code>setPb(0)</code> might be the best
379	* choice.
380	* <p>
381	* Even though the assumed alignment can be adjusted with
382	* <code>setPb</code> and <code>setLp</code>, LZMA2 still slightly favors
383	* 16-byte alignment. It might be worth taking into account when designing
384	* file formats that are likely to be often compressed with LZMA2.
385	*
386	* @throws UnsupportedOptionsException
387	* <code>pb</code> is invalid
388	*/
389	public void setPb(int pb) throws UnsupportedOptionsException {
390	if (pb < 0 \|\| pb > PB_MAX)
391	throw new UnsupportedOptionsException(
392	"pb must not exceed " + PB_MAX + ": " + pb);
393
394	this.pb = pb;
395	}
396
397	/**
398	* Gets the number of position bits.
399	*/
400	public int getPb() {
401	return pb;
402	}
403
404	/**
405	* Sets the compression mode.
406	* <p>
407	* This specifies the method to analyze the data produced by
408	* a match finder. The default is <code>MODE_FAST</code> for presets
409	* 0-3 and <code>MODE_NORMAL</code> for presets 4-9.
410	* <p>
411	* Usually <code>MODE_FAST</code> is used with Hash Chain match finders
412	* and <code>MODE_NORMAL</code> with Binary Tree match finders. This is
413	* also what the presets do.
414	* <p>
415	* The special mode <code>MODE_UNCOMPRESSED</code> doesn't try to
416	* compress the data at all (and doesn't use a match finder) and will
417	* simply wrap it in uncompressed LZMA2 chunks.
418	*
419	* @throws UnsupportedOptionsException
420	* <code>mode</code> is not supported
421	*/
422	public void setMode(int mode) throws UnsupportedOptionsException {
423	if (mode < MODE_UNCOMPRESSED \|\| mode > MODE_NORMAL)
424	throw new UnsupportedOptionsException(
425	"Unsupported compression mode: " + mode);
426
427	this.mode = mode;
428	}
429
430	/**
431	* Gets the compression mode.
432	*/
433	public int getMode() {
434	return mode;
435	}
436
437	/**
438	* Sets the nice length of matches.
439	* Once a match of at least <code>niceLen</code> bytes is found,
440	* the algorithm stops looking for better matches. Higher values tend
441	* to give better compression at the expense of speed. The default
442	* depends on the preset.
443	*
444	* @throws UnsupportedOptionsException
445	* <code>niceLen</code> is invalid
446	*/
447	public void setNiceLen(int niceLen) throws UnsupportedOptionsException {
448	if (niceLen < NICE_LEN_MIN)
449	throw new UnsupportedOptionsException(
450	"Minimum nice length of matches is "
451	+ NICE_LEN_MIN + " bytes: " + niceLen);
452
453	if (niceLen > NICE_LEN_MAX)
454	throw new UnsupportedOptionsException(
455	"Maximum nice length of matches is " + NICE_LEN_MAX
456	+ ": " + niceLen);
457
458	this.niceLen = niceLen;
459	}
460
461	/**
462	* Gets the nice length of matches.
463	*/
464	public int getNiceLen() {
465	return niceLen;
466	}
467
468	/**
469	* Sets the match finder type.
470	* <p>
471	* Match finder has a major effect on compression speed, memory usage,
472	* and compression ratio. Usually Hash Chain match finders are faster
473	* than Binary Tree match finders. The default depends on the preset:
474	* 0-3 use <code>MF_HC4</code> and 4-9 use <code>MF_BT4</code>.
475	*
476	* @throws UnsupportedOptionsException
477	* <code>mf</code> is not supported
478	*/
479	public void setMatchFinder(int mf) throws UnsupportedOptionsException {
480	if (mf != MF_HC4 && mf != MF_BT4)
481	throw new UnsupportedOptionsException(
482	"Unsupported match finder: " + mf);
483
484	this.mf = mf;
485	}
486
487	/**
488	* Gets the match finder type.
489	*/
490	public int getMatchFinder() {
491	return mf;
492	}
493
494	/**
495	* Sets the match finder search depth limit.
496	* <p>
497	* The default is a special value of <code>0</code> which indicates that
498	* the depth limit should be automatically calculated by the selected
499	* match finder from the nice length of matches.
500	* <p>
501	* Reasonable depth limit for Hash Chain match finders is 4-100 and
502	* 16-1000 for Binary Tree match finders. Using very high values can
503	* make the compressor extremely slow with some files. Avoid settings
504	* higher than 1000 unless you are prepared to interrupt the compression
505	* in case it is taking far too long.
506	*
507	* @throws UnsupportedOptionsException
508	* <code>depthLimit</code> is invalid
509	*/
510	public void setDepthLimit(int depthLimit)
511	throws UnsupportedOptionsException {
512	if (depthLimit < 0)
513	throw new UnsupportedOptionsException(
514	"Depth limit cannot be negative: " + depthLimit);
515
516	this.depthLimit = depthLimit;
517	}
518
519	/**
520	* Gets the match finder search depth limit.
521	*/
522	public int getDepthLimit() {
523	return depthLimit;
524	}
525
526	public int getEncoderMemoryUsage() {
527	return (mode == MODE_UNCOMPRESSED)
528	? UncompressedLZMA2OutputStream.getMemoryUsage()
529	: LZMA2OutputStream.getMemoryUsage(this);
530	}
531
532	public FinishableOutputStream getOutputStream(FinishableOutputStream out,
533	ArrayCache arrayCache) {
534	if (mode == MODE_UNCOMPRESSED)
535	return new UncompressedLZMA2OutputStream(out, arrayCache);
536
537	return new LZMA2OutputStream(out, this, arrayCache);
538	}
539
540	/**
541	* Gets how much memory the LZMA2 decoder will need to decompress the data
542	* that was encoded with these options and stored in a .xz file.
543	* <p>
544	* The returned value may bigger than the value returned by a direct call
545	* to {@link LZMA2InputStream#getMemoryUsage(int)} if the dictionary size
546	* is not 2^n or 2^n + 2^(n-1) bytes. This is because the .xz
547	* headers store the dictionary size in such a format and other values
548	* are rounded up to the next such value. Such rounding is harmess except
549	* it might waste some memory if an unsual dictionary size is used.
550	* <p>
551	* If you use raw LZMA2 streams and unusual dictioanary size, call
552	* {@link LZMA2InputStream#getMemoryUsage} directly to get raw decoder
553	* memory requirements.
554	*/
555	public int getDecoderMemoryUsage() {
556	// Round the dictionary size up to the next 2^n or 2^n + 2^(n-1).
557	int d = dictSize - 1;
558	d \|= d >>> 2;
559	d \|= d >>> 3;
560	d \|= d >>> 4;
561	d \|= d >>> 8;
562	d \|= d >>> 16;
563	return LZMA2InputStream.getMemoryUsage(d + 1);
564	}
565
566	public InputStream getInputStream(InputStream in, ArrayCache arrayCache)
567	throws IOException {
568	return new LZMA2InputStream(in, dictSize, presetDict, arrayCache);
569	}
570
571	FilterEncoder getFilterEncoder() {
572	return new LZMA2Encoder(this);
573	}
574
575	public Object clone() {
576	try {
577	return super.clone();
578	} catch (CloneNotSupportedException e) {
579	assert false;
580	throw new RuntimeException();
581	}
582	}
583	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: