source: josm/trunk/src/org/tukaani/xz/lz/BT4.java@ 13350

Last change on this file since 13350 was 13350, checked in by stoecker, 6 years ago

see #15816 - add XZ support

File size: 8.3 KB
Line 
1/*
2 * Binary Tree match finder with 2-, 3-, and 4-byte hashing
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11package org.tukaani.xz.lz;
12
13import org.tukaani.xz.ArrayCache;
14
15final class BT4 extends LZEncoder {
16 private final Hash234 hash;
17 private final int[] tree;
18 private final Matches matches;
19 private final int depthLimit;
20
21 private final int cyclicSize;
22 private int cyclicPos = -1;
23 private int lzPos;
24
25 static int getMemoryUsage(int dictSize) {
26 return Hash234.getMemoryUsage(dictSize) + dictSize / (1024 / 8) + 10;
27 }
28
29 BT4(int dictSize, int beforeSizeMin, int readAheadMax,
30 int niceLen, int matchLenMax, int depthLimit,
31 ArrayCache arrayCache) {
32 super(dictSize, beforeSizeMin, readAheadMax, niceLen, matchLenMax,
33 arrayCache);
34
35 cyclicSize = dictSize + 1;
36 lzPos = cyclicSize;
37
38 hash = new Hash234(dictSize, arrayCache);
39 tree = arrayCache.getIntArray(cyclicSize * 2, false);
40
41 // Substracting 1 because the shortest match that this match
42 // finder can find is 2 bytes, so there's no need to reserve
43 // space for one-byte matches.
44 matches = new Matches(niceLen - 1);
45
46 this.depthLimit = depthLimit > 0 ? depthLimit : 16 + niceLen / 2;
47 }
48
49 public void putArraysToCache(ArrayCache arrayCache) {
50 arrayCache.putArray(tree);
51 hash.putArraysToCache(arrayCache);
52 super.putArraysToCache(arrayCache);
53 }
54
55 private int movePos() {
56 int avail = movePos(niceLen, 4);
57
58 if (avail != 0) {
59 if (++lzPos == Integer.MAX_VALUE) {
60 int normalizationOffset = Integer.MAX_VALUE - cyclicSize;
61 hash.normalize(normalizationOffset);
62 normalize(tree, cyclicSize * 2, normalizationOffset);
63 lzPos -= normalizationOffset;
64 }
65
66 if (++cyclicPos == cyclicSize)
67 cyclicPos = 0;
68 }
69
70 return avail;
71 }
72
73 public Matches getMatches() {
74 matches.count = 0;
75
76 int matchLenLimit = matchLenMax;
77 int niceLenLimit = niceLen;
78 int avail = movePos();
79
80 if (avail < matchLenLimit) {
81 if (avail == 0)
82 return matches;
83
84 matchLenLimit = avail;
85 if (niceLenLimit > avail)
86 niceLenLimit = avail;
87 }
88
89 hash.calcHashes(buf, readPos);
90 int delta2 = lzPos - hash.getHash2Pos();
91 int delta3 = lzPos - hash.getHash3Pos();
92 int currentMatch = hash.getHash4Pos();
93 hash.updateTables(lzPos);
94
95 int lenBest = 0;
96
97 // See if the hash from the first two bytes found a match.
98 // The hashing algorithm guarantees that if the first byte
99 // matches, also the second byte does, so there's no need to
100 // test the second byte.
101 if (delta2 < cyclicSize && buf[readPos - delta2] == buf[readPos]) {
102 lenBest = 2;
103 matches.len[0] = 2;
104 matches.dist[0] = delta2 - 1;
105 matches.count = 1;
106 }
107
108 // See if the hash from the first three bytes found a match that
109 // is different from the match possibly found by the two-byte hash.
110 // Also here the hashing algorithm guarantees that if the first byte
111 // matches, also the next two bytes do.
112 if (delta2 != delta3 && delta3 < cyclicSize
113 && buf[readPos - delta3] == buf[readPos]) {
114 lenBest = 3;
115 matches.dist[matches.count++] = delta3 - 1;
116 delta2 = delta3;
117 }
118
119 // If a match was found, see how long it is.
120 if (matches.count > 0) {
121 while (lenBest < matchLenLimit && buf[readPos + lenBest - delta2]
122 == buf[readPos + lenBest])
123 ++lenBest;
124
125 matches.len[matches.count - 1] = lenBest;
126
127 // Return if it is long enough (niceLen or reached the end of
128 // the dictionary).
129 if (lenBest >= niceLenLimit) {
130 skip(niceLenLimit, currentMatch);
131 return matches;
132 }
133 }
134
135 // Long enough match wasn't found so easily. Look for better matches
136 // from the binary tree.
137 if (lenBest < 3)
138 lenBest = 3;
139
140 int depth = depthLimit;
141
142 int ptr0 = (cyclicPos << 1) + 1;
143 int ptr1 = cyclicPos << 1;
144 int len0 = 0;
145 int len1 = 0;
146
147 while (true) {
148 int delta = lzPos - currentMatch;
149
150 // Return if the search depth limit has been reached or
151 // if the distance of the potential match exceeds the
152 // dictionary size.
153 if (depth-- == 0 || delta >= cyclicSize) {
154 tree[ptr0] = 0;
155 tree[ptr1] = 0;
156 return matches;
157 }
158
159 int pair = (cyclicPos - delta
160 + (delta > cyclicPos ? cyclicSize : 0)) << 1;
161 int len = Math.min(len0, len1);
162
163 if (buf[readPos + len - delta] == buf[readPos + len]) {
164 while (++len < matchLenLimit)
165 if (buf[readPos + len - delta] != buf[readPos + len])
166 break;
167
168 if (len > lenBest) {
169 lenBest = len;
170 matches.len[matches.count] = len;
171 matches.dist[matches.count] = delta - 1;
172 ++matches.count;
173
174 if (len >= niceLenLimit) {
175 tree[ptr1] = tree[pair];
176 tree[ptr0] = tree[pair + 1];
177 return matches;
178 }
179 }
180 }
181
182 if ((buf[readPos + len - delta] & 0xFF)
183 < (buf[readPos + len] & 0xFF)) {
184 tree[ptr1] = currentMatch;
185 ptr1 = pair + 1;
186 currentMatch = tree[ptr1];
187 len1 = len;
188 } else {
189 tree[ptr0] = currentMatch;
190 ptr0 = pair;
191 currentMatch = tree[ptr0];
192 len0 = len;
193 }
194 }
195 }
196
197 private void skip(int niceLenLimit, int currentMatch) {
198 int depth = depthLimit;
199
200 int ptr0 = (cyclicPos << 1) + 1;
201 int ptr1 = cyclicPos << 1;
202 int len0 = 0;
203 int len1 = 0;
204
205 while (true) {
206 int delta = lzPos - currentMatch;
207
208 if (depth-- == 0 || delta >= cyclicSize) {
209 tree[ptr0] = 0;
210 tree[ptr1] = 0;
211 return;
212 }
213
214 int pair = (cyclicPos - delta
215 + (delta > cyclicPos ? cyclicSize : 0)) << 1;
216 int len = Math.min(len0, len1);
217
218 if (buf[readPos + len - delta] == buf[readPos + len]) {
219 // No need to look for longer matches than niceLenLimit
220 // because we only are updating the tree, not returning
221 // matches found to the caller.
222 do {
223 if (++len == niceLenLimit) {
224 tree[ptr1] = tree[pair];
225 tree[ptr0] = tree[pair + 1];
226 return;
227 }
228 } while (buf[readPos + len - delta] == buf[readPos + len]);
229 }
230
231 if ((buf[readPos + len - delta] & 0xFF)
232 < (buf[readPos + len] & 0xFF)) {
233 tree[ptr1] = currentMatch;
234 ptr1 = pair + 1;
235 currentMatch = tree[ptr1];
236 len1 = len;
237 } else {
238 tree[ptr0] = currentMatch;
239 ptr0 = pair;
240 currentMatch = tree[ptr0];
241 len0 = len;
242 }
243 }
244 }
245
246 public void skip(int len) {
247 while (len-- > 0) {
248 int niceLenLimit = niceLen;
249 int avail = movePos();
250
251 if (avail < niceLenLimit) {
252 if (avail == 0)
253 continue;
254
255 niceLenLimit = avail;
256 }
257
258 hash.calcHashes(buf, readPos);
259 int currentMatch = hash.getHash4Pos();
260 hash.updateTables(lzPos);
261
262 skip(niceLenLimit, currentMatch);
263 }
264 }
265}
Note: See TracBrowser for help on using the repository browser.