source: josm/trunk/src/org/openstreetmap/josm/tools/Diff.java@ 8513

Last change on this file since 8513 was 8513, checked in by Don-vip, 9 years ago

checkstyle: blocks

File size: 35.3 KB
Line 
1// License: GPL. For details, see LICENSE file.
2package org.openstreetmap.josm.tools;
3
4//// Taken from http://www.bmsi.com/java/#diff
5
6// http://www.bmsi.com/java/DiffPrint.java could also be useful
7
8/*
9 * $Log: Diff.java,v $
10 * Revision 1.15 2013/04/01 16:27:31 stuart
11 * Fix DiffPrint unified format with test cases.
12 * Begin porting some diff-2.7 features.
13 *
14 * Revision 1.14 2010/03/03 21:21:25 stuart
15 * Test new direct equivalence API
16 *
17 * Revision 1.13 2009/12/07 17:43:17 stuart
18 * Compute equivMax for int[] ctor
19 *
20 * Revision 1.12 2009/12/07 17:34:46 stuart
21 * Ctor with int[].
22 *
23 * Revision 1.11 2009/11/15 01:11:54 stuart
24 * Diff doesn't need to be generic
25 *
26 * Revision 1.10 2009/11/15 00:54:03 stuart
27 * Update to Java 5 containers
28 *
29 * Revision 1.7 2009/01/19 03:05:26 stuart
30 * Fix StackOverflow bug with heuristic on reported by Jimmy Han.
31 *
32 * Revision 1.6 2003/03/06 22:51:32 stuart
33 * Convert to CVS
34 *
35 * Revision 1.5 2002/07/19 19:14:40 stuart
36 * fix reverseScript, make change ctor public, update docs
37 *
38 * Revision 1.4 2002/04/09 17:53:39 stuart
39 * More flexible interface for diff() function.
40 *
41 * Revision 1.3 2000/03/03 21:58:03 stuart
42 * move discard_confusing_lines and shift_boundaries to class file_data
43 *
44 * Revision 1.2 2000/03/02 16:37:38 stuart
45 * Add GPL and copyright
46 *
47 */
48
49import java.util.HashMap;
50import java.util.Map;
51
52/** A class to compare vectors of objects. The result of comparison
53 is a list of <code>change</code> objects which form an
54 edit script. The objects compared are traditionally lines
55 of text from two files. Comparison options such as "ignore
56 whitespace" are implemented by modifying the <code>equals</code>
57 and <code>hashcode</code> methods for the objects compared.
58<p>
59 The basic algorithm is described in: <br>
60 "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
61 Algorithmica Vol. 1 No. 2, 1986, p 251.
62<p>
63 This class outputs different results from GNU diff 1.15 on some
64 inputs. Our results are actually better (smaller change list, smaller
65 total size of changes), but it would be nice to know why. Perhaps
66 there is a memory overwrite bug in GNU diff 1.15.
67
68 @author Stuart D. Gathman, translated from GNU diff 1.15
69 Copyright (C) 2000 Business Management Systems, Inc.
70<p>
71 This program is free software; you can redistribute it and/or modify
72 it under the terms of the GNU General Public License as published by
73 the Free Software Foundation; either version 1, or (at your option)
74 any later version.
75<p>
76 This program is distributed in the hope that it will be useful,
77 but WITHOUT ANY WARRANTY; without even the implied warranty of
78 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
79 GNU General Public License for more details.
80<p>
81 You should have received a copy of the <a href=COPYING.txt>
82 GNU General Public License</a>
83 along with this program; if not, write to the Free Software
84 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
85
86 */
87
88public class Diff {
89
90 /** Prepare to find differences between two arrays. Each element of
91 the arrays is translated to an "equivalence number" based on
92 the result of <code>equals</code>. The original Object arrays
93 are no longer needed for computing the differences. They will
94 be needed again later to print the results of the comparison as
95 an edit script, if desired.
96 */
97 public Diff(Object[] a, Object[] b) {
98 Map<Object, Integer> h = new HashMap<>(a.length + b.length);
99 filevec = new FileData[] {new FileData(a, h), new FileData(b, h)};
100 }
101
102 /** 1 more than the maximum equivalence value used for this or its
103 sibling file. */
104 private int equivMax = 1;
105
106 /** When set to true, the comparison uses a heuristic to speed it up.
107 With this heuristic, for files with a constant small density
108 of changes, the algorithm is linear in the file size. */
109 public boolean heuristic = false;
110
111 /** When set to true, the algorithm returns a guarranteed minimal
112 set of changes. This makes things slower, sometimes much slower. */
113 public boolean noDiscards = false;
114
115 private int[] xvec, yvec; /* Vectors being compared. */
116 private int[] fdiag; /* Vector, indexed by diagonal, containing
117 the X coordinate of the point furthest
118 along the given diagonal in the forward
119 search of the edit matrix. */
120 private int[] bdiag; /* Vector, indexed by diagonal, containing
121 the X coordinate of the point furthest
122 along the given diagonal in the backward
123 search of the edit matrix. */
124 private int fdiagoff, bdiagoff;
125 private final FileData[] filevec;
126 private int cost;
127 /** Snakes bigger than this are considered "big". */
128 private static final int SNAKE_LIMIT = 20;
129
130 /** Find the midpoint of the shortest edit script for a specified
131 portion of the two files.
132
133 We scan from the beginnings of the files, and simultaneously from the ends,
134 doing a breadth-first search through the space of edit-sequence.
135 When the two searches meet, we have found the midpoint of the shortest
136 edit sequence.
137
138 The value returned is the number of the diagonal on which the midpoint lies.
139 The diagonal number equals the number of inserted lines minus the number
140 of deleted lines (counting only lines before the midpoint).
141 The edit cost is stored into COST; this is the total number of
142 lines inserted or deleted (counting only lines before the midpoint).
143
144 This function assumes that the first lines of the specified portions
145 of the two files do not match, and likewise that the last lines do not
146 match. The caller must trim matching lines from the beginning and end
147 of the portions it is going to specify.
148
149 Note that if we return the "wrong" diagonal value, or if
150 the value of bdiag at that diagonal is "wrong",
151 the worst this can do is cause suboptimal diff output.
152 It cannot cause incorrect diff output. */
153
154 private int diag(int xoff, int xlim, int yoff, int ylim) {
155 final int[] fd = fdiag; // Give the compiler a chance.
156 final int[] bd = bdiag; // Additional help for the compiler.
157 final int[] xv = xvec; // Still more help for the compiler.
158 final int[] yv = yvec; // And more and more . . .
159 final int dmin = xoff - ylim; // Minimum valid diagonal.
160 final int dmax = xlim - yoff; // Maximum valid diagonal.
161 final int fmid = xoff - yoff; // Center diagonal of top-down search.
162 final int bmid = xlim - ylim; // Center diagonal of bottom-up search.
163 int fmin = fmid, fmax = fmid; // Limits of top-down search.
164 int bmin = bmid, bmax = bmid; // Limits of bottom-up search.
165 /* True if southeast corner is on an odd
166 diagonal with respect to the northwest. */
167 final boolean odd = (fmid - bmid & 1) != 0;
168
169 fd[fdiagoff + fmid] = xoff;
170 bd[bdiagoff + bmid] = xlim;
171
172 for (int c = 1;; ++c) {
173 int d; /* Active diagonal. */
174 boolean big_snake = false;
175
176 /* Extend the top-down search by an edit step in each diagonal. */
177 if (fmin > dmin) {
178 fd[fdiagoff + --fmin - 1] = -1;
179 } else {
180 ++fmin;
181 }
182 if (fmax < dmax) {
183 fd[fdiagoff + ++fmax + 1] = -1;
184 } else {
185 --fmax;
186 }
187 for (d = fmax; d >= fmin; d -= 2) {
188 int x, y, oldx, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff + d + 1];
189
190 if (tlo >= thi) {
191 x = tlo + 1;
192 } else {
193 x = thi;
194 }
195 oldx = x;
196 y = x - d;
197 while (x < xlim && y < ylim && xv[x] == yv[y]) {
198 ++x; ++y;
199 }
200 if (x - oldx > SNAKE_LIMIT) {
201 big_snake = true;
202 }
203 fd[fdiagoff + d] = x;
204 if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) {
205 cost = 2 * c - 1;
206 return d;
207 }
208 }
209
210 /* Similar extend the bottom-up search. */
211 if (bmin > dmin) {
212 bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE;
213 } else {
214 ++bmin;
215 }
216 if (bmax < dmax) {
217 bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE;
218 } else {
219 --bmax;
220 }
221 for (d = bmax; d >= bmin; d -= 2) {
222 int x, y, oldx, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1];
223
224 if (tlo < thi) {
225 x = tlo;
226 } else {
227 x = thi - 1;
228 }
229 oldx = x;
230 y = x - d;
231 while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) {
232 --x; --y;
233 }
234 if (oldx - x > SNAKE_LIMIT) {
235 big_snake = true;
236 }
237 bd[bdiagoff + d] = x;
238 if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) {
239 cost = 2 * c;
240 return d;
241 }
242 }
243
244 /* Heuristic: check occasionally for a diagonal that has made
245 lots of progress compared with the edit distance.
246 If we have any such, find the one that has made the most
247 progress and return it as if it had succeeded.
248
249 With this heuristic, for files with a constant small density
250 of changes, the algorithm is linear in the file size. */
251
252 if (c > 200 && big_snake && heuristic) {
253 int best = 0;
254 int bestpos = -1;
255
256 for (d = fmax; d >= fmin; d -= 2) {
257 int dd = d - fmid;
258 int x = fd[fdiagoff + d];
259 int y = x - d;
260 int v = (x - xoff) * 2 - dd;
261 if (v > 12 * (c + (dd < 0 ? -dd : dd))) {
262 if (v > best
263 && xoff + SNAKE_LIMIT <= x && x < xlim
264 && yoff + SNAKE_LIMIT <= y && y < ylim) {
265 /* We have a good enough best diagonal.
266 now insist that it end with a significant snake. */
267 int k;
268
269 for (k = 1; xvec[x - k] == yvec[y - k]; k++) {
270 if (k == SNAKE_LIMIT) {
271 best = v;
272 bestpos = d;
273 break;
274 }
275 }
276 }
277 }
278 }
279 if (best > 0) {
280 cost = 2 * c - 1;
281 return bestpos;
282 }
283
284 best = 0;
285 for (d = bmax; d >= bmin; d -= 2) {
286 int dd = d - bmid;
287 int x = bd[bdiagoff + d];
288 int y = x - d;
289 int v = (xlim - x) * 2 + dd;
290 if (v > 12 * (c + (dd < 0 ? -dd : dd))) {
291 if (v > best
292 && xoff < x && x <= xlim - SNAKE_LIMIT
293 && yoff < y && y <= ylim - SNAKE_LIMIT) {
294 /* We have a good enough best diagonal.
295 now insist that it end with a significant snake. */
296 int k;
297
298 for (k = 0; xvec[x + k] == yvec[y + k]; k++) {
299 if (k == SNAKE_LIMIT) {
300 best = v;
301 bestpos = d;
302 break;
303 }
304 }
305 }
306 }
307 }
308 if (best > 0) {
309 cost = 2 * c - 1;
310 return bestpos;
311 }
312 }
313 }
314 }
315
316 /** Compare in detail contiguous subsequences of the two files
317 which are known, as a whole, to match each other.
318
319 The results are recorded in the vectors filevec[N].changed_flag, by
320 storing a 1 in the element for each line that is an insertion or deletion.
321
322 The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
323
324 Note that XLIM, YLIM are exclusive bounds.
325 All line numbers are origin-0 and discarded lines are not counted. */
326
327 private void compareseq(int xoff, int xlim, int yoff, int ylim) {
328 /* Slide down the bottom initial diagonal. */
329 while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) {
330 ++xoff; ++yoff;
331 }
332 /* Slide up the top initial diagonal. */
333 while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) {
334 --xlim; --ylim;
335 }
336
337 /* Handle simple cases. */
338 if (xoff == xlim) {
339 while (yoff < ylim) {
340 filevec[1].changedFlag[1+filevec[1].realindexes[yoff++]] = true;
341 }
342 } else if (yoff == ylim) {
343 while (xoff < xlim) {
344 filevec[0].changedFlag[1+filevec[0].realindexes[xoff++]] = true;
345 }
346 } else {
347 /* Find a point of correspondence in the middle of the files. */
348
349 int d = diag(xoff, xlim, yoff, ylim);
350 int c = cost;
351 int b = bdiag[bdiagoff + d];
352
353 if (c == 1)
354 /* This should be impossible, because it implies that
355 one of the two subsequences is empty,
356 and that case was handled above without calling `diag'.
357 Let's verify that this is true. */
358 throw new IllegalArgumentException("Empty subsequence");
359 else {
360 /* Use that point to split this problem into two subproblems. */
361 compareseq(xoff, b, yoff, b - d);
362 /* This used to use f instead of b,
363 but that is incorrect!
364 It is not necessarily the case that diagonal d
365 has a snake from b to f. */
366 compareseq(b, xlim, b - d, ylim);
367 }
368 }
369 }
370
371 /** Discard lines from one file that have no matches in the other file.
372 */
373 private void discard_confusing_lines() {
374 filevec[0].discard_confusing_lines(filevec[1]);
375 filevec[1].discard_confusing_lines(filevec[0]);
376 }
377
378 private boolean inhibit = false;
379
380 /** Adjust inserts/deletes of blank lines to join changes
381 as much as possible.
382 */
383 private void shift_boundaries() {
384 if (inhibit)
385 return;
386 filevec[0].shift_boundaries(filevec[1]);
387 filevec[1].shift_boundaries(filevec[0]);
388 }
389
390 public interface ScriptBuilder {
391 /** Scan the tables of which lines are inserted and deleted,
392 producing an edit script.
393 @param changed0 true for lines in first file which do not match 2nd
394 @param len0 number of lines in first file
395 @param changed1 true for lines in 2nd file which do not match 1st
396 @param len1 number of lines in 2nd file
397 @return a linked list of changes - or null
398 */
399 Change build_script(
400 boolean[] changed0, int len0,
401 boolean[] changed1, int len1
402 );
403 }
404
405 /** Scan the tables of which lines are inserted and deleted,
406 producing an edit script in reverse order. */
407
408 static class ReverseScript implements ScriptBuilder {
409 @Override
410 public Change build_script(
411 final boolean[] changed0, int len0,
412 final boolean[] changed1, int len1) {
413 Change script = null;
414 int i0 = 0, i1 = 0;
415 while (i0 < len0 || i1 < len1) {
416 if (changed0[1+i0] || changed1[1+i1]) {
417 int line0 = i0, line1 = i1;
418
419 /* Find # lines changed here in each file. */
420 while (changed0[1+i0]) {
421 ++i0;
422 }
423 while (changed1[1+i1]) {
424 ++i1;
425 }
426
427 /* Record this change. */
428 script = new Change(line0, line1, i0 - line0, i1 - line1, script);
429 }
430
431 /* We have reached lines in the two files that match each other. */
432 i0++; i1++;
433 }
434
435 return script;
436 }
437 }
438
439 static class ForwardScript implements ScriptBuilder {
440 /** Scan the tables of which lines are inserted and deleted,
441 producing an edit script in forward order. */
442 @Override
443 public Change build_script(
444 final boolean[] changed0, int len0,
445 final boolean[] changed1, int len1) {
446 Change script = null;
447 int i0 = len0, i1 = len1;
448
449 while (i0 >= 0 || i1 >= 0) {
450 if (changed0[i0] || changed1[i1]) {
451 int line0 = i0, line1 = i1;
452
453 /* Find # lines changed here in each file. */
454 while (changed0[i0]) {
455 --i0;
456 }
457 while (changed1[i1]) {
458 --i1;
459 }
460
461 /* Record this change. */
462 script = new Change(i0, i1, line0 - i0, line1 - i1, script);
463 }
464
465 /* We have reached lines in the two files that match each other. */
466 i0--; i1--;
467 }
468
469 return script;
470 }
471 }
472
473 /** Standard ScriptBuilders. */
474 public static final ScriptBuilder
475 forwardScript = new ForwardScript(),
476 reverseScript = new ReverseScript();
477
478 /** Report the differences of two files. DEPTH is the current directory
479 depth. */
480 public final Change diff_2(final boolean reverse) {
481 return diff(reverse ? reverseScript : forwardScript);
482 }
483
484 /** Get the results of comparison as an edit script. The script
485 is described by a list of changes. The standard ScriptBuilder
486 implementations provide for forward and reverse edit scripts.
487 Alternate implementations could, for instance, list common elements
488 instead of differences.
489 @param bld an object to build the script from change flags
490 @return the head of a list of changes
491 */
492 public Change diff(final ScriptBuilder bld) {
493
494 /* Some lines are obviously insertions or deletions
495 because they don't match anything. Detect them now,
496 and avoid even thinking about them in the main comparison algorithm. */
497
498 discard_confusing_lines();
499
500 /* Now do the main comparison algorithm, considering just the
501 undiscarded lines. */
502
503 xvec = filevec[0].undiscarded;
504 yvec = filevec[1].undiscarded;
505
506 int diags =
507 filevec[0].nondiscardedLines + filevec[1].nondiscardedLines + 3;
508 fdiag = new int[diags];
509 fdiagoff = filevec[1].nondiscardedLines + 1;
510 bdiag = new int[diags];
511 bdiagoff = filevec[1].nondiscardedLines + 1;
512
513 compareseq(0, filevec[0].nondiscardedLines,
514 0, filevec[1].nondiscardedLines);
515 fdiag = null;
516 bdiag = null;
517
518 /* Modify the results slightly to make them prettier
519 in cases where that can validly be done. */
520
521 shift_boundaries();
522
523 /* Get the results of comparison in the form of a chain
524 of `struct change's -- an edit script. */
525 return bld.build_script(
526 filevec[0].changedFlag,
527 filevec[0].bufferedLines,
528 filevec[1].changedFlag,
529 filevec[1].bufferedLines
530 );
531
532 }
533
534 /** The result of comparison is an "edit script": a chain of change objects.
535 Each change represents one place where some lines are deleted
536 and some are inserted.
537
538 LINE0 and LINE1 are the first affected lines in the two files (origin 0).
539 DELETED is the number of lines deleted here from file 0.
540 INSERTED is the number of lines inserted here in file 1.
541
542 If DELETED is 0 then LINE0 is the number of the line before
543 which the insertion was done; vice versa for INSERTED and LINE1. */
544
545 public static class Change {
546 /** Previous or next edit command. */
547 public Change link;
548 /** # lines of file 1 changed here. */
549 public final int inserted;
550 /** # lines of file 0 changed here. */
551 public final int deleted;
552 /** Line number of 1st deleted line. */
553 public final int line0;
554 /** Line number of 1st inserted line. */
555 public final int line1;
556
557 /** Cons an additional entry onto the front of an edit script OLD.
558 LINE0 and LINE1 are the first affected lines in the two files (origin 0).
559 DELETED is the number of lines deleted here from file 0.
560 INSERTED is the number of lines inserted here in file 1.
561
562 If DELETED is 0 then LINE0 is the number of the line before
563 which the insertion was done; vice versa for INSERTED and LINE1. */
564 public Change(int line0, int line1, int deleted, int inserted, Change old) {
565 this.line0 = line0;
566 this.line1 = line1;
567 this.inserted = inserted;
568 this.deleted = deleted;
569 this.link = old;
570 }
571
572 @Override
573 public String toString() {
574 String s = String.format("%d -%d +%d %d", line0, deleted, inserted, line1);
575 return (link != null) ? s = s + '\n' + link : s;
576 }
577 }
578
579 /** Data on one input file being compared.
580 */
581 class FileData {
582
583 /** Allocate changed array for the results of comparison. */
584 void clear() {
585 /* Allocate a flag for each line of each file, saying whether that line
586 is an insertion or deletion.
587 Allocate an extra element, always zero, at each end of each vector.
588 */
589 changedFlag = new boolean[bufferedLines + 2];
590 }
591
592 /** Return equiv_count[I] as the number of lines in this file
593 that fall in equivalence class I.
594 @return the array of equivalence class counts.
595 */
596 int[] equivCount() {
597 int[] equiv_count = new int[equivMax];
598 for (int i = 0; i < bufferedLines; ++i) {
599 ++equiv_count[equivs[i]];
600 }
601 return equiv_count;
602 }
603
604 /** Discard lines that have no matches in another file.
605
606 A line which is discarded will not be considered by the actual
607 comparison algorithm; it will be as if that line were not in the file.
608 The file's `realindexes' table maps virtual line numbers
609 (which don't count the discarded lines) into real line numbers;
610 this is how the actual comparison algorithm produces results
611 that are comprehensible when the discarded lines are counted.
612<p>
613 When we discard a line, we also mark it as a deletion or insertion
614 so that it will be printed in the output.
615 @param f the other file
616 */
617 void discard_confusing_lines(FileData f) {
618 clear();
619 /* Set up table of which lines are going to be discarded. */
620 final byte[] discarded = discardable(f.equivCount());
621
622 /* Don't really discard the provisional lines except when they occur
623 in a run of discardables, with nonprovisionals at the beginning
624 and end. */
625 filterDiscards(discarded);
626
627 /* Actually discard the lines. */
628 discard(discarded);
629 }
630
631 /**
632 * Mark to be discarded each line that matches no line of another file.
633 * If a line matches many lines, mark it as provisionally discardable.
634 * @param counts The count of each equivalence number for the other file.
635 * @return 0=nondiscardable, 1=discardable or 2=provisionally discardable for each line
636 * @see #equivCount()
637 */
638 private byte[] discardable(final int[] counts) {
639 final int end = bufferedLines;
640 final byte[] discards = new byte[end];
641 final int[] equivs = this.equivs;
642 int many = 5;
643 int tem = end / 64;
644
645 /* Multiply MANY by approximate square root of number of lines.
646 That is the threshold for provisionally discardable lines. */
647 while ((tem = tem >> 2) > 0) {
648 many *= 2;
649 }
650
651 for (int i = 0; i < end; i++) {
652 int nmatch;
653 if (equivs[i] == 0) {
654 continue;
655 }
656 nmatch = counts[equivs[i]];
657 if (nmatch == 0) {
658 discards[i] = 1;
659 } else if (nmatch > many) {
660 discards[i] = 2;
661 }
662 }
663 return discards;
664 }
665
666 /**
667 * Don't really discard the provisional lines except when they occur
668 * in a run of discardables, with nonprovisionals at the beginning
669 * and end.
670 */
671 private void filterDiscards(final byte[] discards) {
672 final int end = bufferedLines;
673
674 for (int i = 0; i < end; i++) {
675 /* Cancel provisional discards not in middle of run of discards. */
676 if (discards[i] == 2) {
677 discards[i] = 0;
678 } else if (discards[i] != 0) {
679 /* We have found a nonprovisional discard. */
680 int j;
681 int length;
682 int provisional = 0;
683
684 /* Find end of this run of discardable lines.
685 Count how many are provisionally discardable. */
686 for (j = i; j < end; j++) {
687 if (discards[j] == 0) {
688 break;
689 }
690 if (discards[j] == 2) {
691 ++provisional;
692 }
693 }
694
695 /* Cancel provisional discards at end, and shrink the run. */
696 while (j > i && discards[j - 1] == 2) {
697 discards[--j] = 0; --provisional;
698 }
699
700 /* Now we have the length of a run of discardable lines
701 whose first and last are not provisional. */
702 length = j - i;
703
704 /* If 1/4 of the lines in the run are provisional,
705 cancel discarding of all provisional lines in the run. */
706 if (provisional * 4 > length) {
707 while (j > i)
708 if (discards[--j] == 2) {
709 discards[j] = 0;
710 }
711 } else {
712 int consec;
713 int minimum = 1;
714 int tem = length / 4;
715
716 /* MINIMUM is approximate square root of LENGTH/4.
717 A subrun of two or more provisionals can stand
718 when LENGTH is at least 16.
719 A subrun of 4 or more can stand when LENGTH >= 64. */
720 while ((tem = tem >> 2) > 0) {
721 minimum *= 2;
722 }
723 minimum++;
724
725 /* Cancel any subrun of MINIMUM or more provisionals
726 within the larger run. */
727 for (j = 0, consec = 0; j < length; j++) {
728 if (discards[i + j] != 2) {
729 consec = 0;
730 } else if (minimum == ++consec) {
731 /* Back up to start of subrun, to cancel it all. */
732 j -= consec;
733 } else if (minimum < consec) {
734 discards[i + j] = 0;
735 }
736 }
737
738 /* Scan from beginning of run
739 until we find 3 or more nonprovisionals in a row
740 or until the first nonprovisional at least 8 lines in.
741 Until that point, cancel any provisionals. */
742 for (j = 0, consec = 0; j < length; j++) {
743 if (j >= 8 && discards[i + j] == 1) {
744 break;
745 }
746 if (discards[i + j] == 2) {
747 consec = 0; discards[i + j] = 0;
748 } else if (discards[i + j] == 0) {
749 consec = 0;
750 } else {
751 consec++;
752 }
753 if (consec == 3) {
754 break;
755 }
756 }
757
758 /* I advances to the last line of the run. */
759 i += length - 1;
760
761 /* Same thing, from end. */
762 for (j = 0, consec = 0; j < length; j++) {
763 if (j >= 8 && discards[i - j] == 1) {
764 break;
765 }
766 if (discards[i - j] == 2) {
767 consec = 0; discards[i - j] = 0;
768 } else if (discards[i - j] == 0) {
769 consec = 0;
770 } else {
771 consec++;
772 }
773 if (consec == 3) {
774 break;
775 }
776 }
777 }
778 }
779 }
780 }
781
782 /** Actually discard the lines.
783 @param discards flags lines to be discarded
784 */
785 private void discard(final byte[] discards) {
786 final int end = bufferedLines;
787 int j = 0;
788 for (int i = 0; i < end; ++i) {
789 if (noDiscards || discards[i] == 0) {
790 undiscarded[j] = equivs[i];
791 realindexes[j++] = i;
792 } else {
793 changedFlag[1+i] = true;
794 }
795 }
796 nondiscardedLines = j;
797 }
798
799 FileData(int length) {
800 bufferedLines = length;
801 equivs = new int[length];
802 undiscarded = new int[bufferedLines];
803 realindexes = new int[bufferedLines];
804 }
805
806 FileData(Object[] data, Map<Object, Integer> h) {
807 this(data.length);
808 // FIXME: diff 2.7 removes common prefix and common suffix
809 for (int i = 0; i < data.length; ++i) {
810 Integer ir = h.get(data[i]);
811 if (ir == null) {
812 h.put(data[i], equivs[i] = equivMax++);
813 } else {
814 equivs[i] = ir.intValue();
815 }
816 }
817 }
818
819 /** Adjust inserts/deletes of blank lines to join changes
820 as much as possible.
821
822 We do something when a run of changed lines include a blank
823 line at one end and have an excluded blank line at the other.
824 We are free to choose which blank line is included.
825 `compareseq' always chooses the one at the beginning,
826 but usually it is cleaner to consider the following blank line
827 to be the "change". The only exception is if the preceding blank line
828 would join this change to other changes.
829 @param f the file being compared against
830 */
831 void shift_boundaries(FileData f) {
832 final boolean[] changed = changedFlag;
833 final boolean[] other_changed = f.changedFlag;
834 int i = 0;
835 int j = 0;
836 int i_end = bufferedLines;
837 int preceding = -1;
838 int other_preceding = -1;
839
840 for (;;) {
841 int start, end, other_start;
842
843 /* Scan forwards to find beginning of another run of changes.
844 Also keep track of the corresponding point in the other file. */
845
846 while (i < i_end && !changed[1+i]) {
847 while (other_changed[1+j++]) {
848 /* Non-corresponding lines in the other file
849 will count as the preceding batch of changes. */
850 other_preceding = j;
851 }
852 i++;
853 }
854
855 if (i == i_end) {
856 break;
857 }
858
859 start = i;
860 other_start = j;
861
862 for (;;) {
863 /* Now find the end of this run of changes. */
864
865 while (i < i_end && changed[1+i]) {
866 i++;
867 }
868 end = i;
869
870 /* If the first changed line matches the following unchanged one,
871 and this run does not follow right after a previous run,
872 and there are no lines deleted from the other file here,
873 then classify the first changed line as unchanged
874 and the following line as changed in its place. */
875
876 /* You might ask, how could this run follow right after another?
877 Only because the previous run was shifted here. */
878
879 if (end != i_end && equivs[start] == equivs[end] && !other_changed[1+j]
880 && !((preceding >= 0 && start == preceding) || (other_preceding >= 0 && other_start == other_preceding))) {
881 changed[1+end++] = true;
882 changed[1+start++] = false;
883 ++i;
884 /* Since one line-that-matches is now before this run
885 instead of after, we must advance in the other file
886 to keep in synch. */
887 ++j;
888 } else {
889 break;
890 }
891 }
892
893 preceding = i;
894 other_preceding = j;
895 }
896 }
897
898 /** Number of elements (lines) in this file. */
899 private final int bufferedLines;
900
901 /** Vector, indexed by line number, containing an equivalence code for
902 each line. It is this vector that is actually compared with that
903 of another file to generate differences. */
904 private final int[] equivs;
905
906 /** Vector, like the previous one except that
907 the elements for discarded lines have been squeezed out. */
908 private final int[] undiscarded;
909
910 /** Vector mapping virtual line numbers (not counting discarded lines)
911 to real ones (counting those lines). Both are origin-0. */
912 private final int[] realindexes;
913
914 /** Total number of nondiscarded lines. */
915 private int nondiscardedLines;
916
917 /** Array, indexed by real origin-1 line number,
918 containing true for a line that is an insertion or a deletion.
919 The results of comparison are stored here. */
920 private boolean[] changedFlag;
921 }
922}
Note: See TracBrowser for help on using the repository browser.