source: josm/trunk/src/org/openstreetmap/josm/tools/Diff.java@ 8510

Last change on this file since 8510 was 8510, checked in by Don-vip, 9 years ago

checkstyle: enable relevant whitespace checks and fix them

File size: 35.2 KB
Line 
1// License: GPL. For details, see LICENSE file.
2package org.openstreetmap.josm.tools;
3
4//// Taken from http://www.bmsi.com/java/#diff
5
6// http://www.bmsi.com/java/DiffPrint.java could also be useful
7
8/*
9 * $Log: Diff.java,v $
10 * Revision 1.15 2013/04/01 16:27:31 stuart
11 * Fix DiffPrint unified format with test cases.
12 * Begin porting some diff-2.7 features.
13 *
14 * Revision 1.14 2010/03/03 21:21:25 stuart
15 * Test new direct equivalence API
16 *
17 * Revision 1.13 2009/12/07 17:43:17 stuart
18 * Compute equivMax for int[] ctor
19 *
20 * Revision 1.12 2009/12/07 17:34:46 stuart
21 * Ctor with int[].
22 *
23 * Revision 1.11 2009/11/15 01:11:54 stuart
24 * Diff doesn't need to be generic
25 *
26 * Revision 1.10 2009/11/15 00:54:03 stuart
27 * Update to Java 5 containers
28 *
29 * Revision 1.7 2009/01/19 03:05:26 stuart
30 * Fix StackOverflow bug with heuristic on reported by Jimmy Han.
31 *
32 * Revision 1.6 2003/03/06 22:51:32 stuart
33 * Convert to CVS
34 *
35 * Revision 1.5 2002/07/19 19:14:40 stuart
36 * fix reverseScript, make change ctor public, update docs
37 *
38 * Revision 1.4 2002/04/09 17:53:39 stuart
39 * More flexible interface for diff() function.
40 *
41 * Revision 1.3 2000/03/03 21:58:03 stuart
42 * move discard_confusing_lines and shift_boundaries to class file_data
43 *
44 * Revision 1.2 2000/03/02 16:37:38 stuart
45 * Add GPL and copyright
46 *
47 */
48
49import java.util.HashMap;
50import java.util.Map;
51
52/** A class to compare vectors of objects. The result of comparison
53 is a list of <code>change</code> objects which form an
54 edit script. The objects compared are traditionally lines
55 of text from two files. Comparison options such as "ignore
56 whitespace" are implemented by modifying the <code>equals</code>
57 and <code>hashcode</code> methods for the objects compared.
58<p>
59 The basic algorithm is described in: <br>
60 "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
61 Algorithmica Vol. 1 No. 2, 1986, p 251.
62<p>
63 This class outputs different results from GNU diff 1.15 on some
64 inputs. Our results are actually better (smaller change list, smaller
65 total size of changes), but it would be nice to know why. Perhaps
66 there is a memory overwrite bug in GNU diff 1.15.
67
68 @author Stuart D. Gathman, translated from GNU diff 1.15
69 Copyright (C) 2000 Business Management Systems, Inc.
70<p>
71 This program is free software; you can redistribute it and/or modify
72 it under the terms of the GNU General Public License as published by
73 the Free Software Foundation; either version 1, or (at your option)
74 any later version.
75<p>
76 This program is distributed in the hope that it will be useful,
77 but WITHOUT ANY WARRANTY; without even the implied warranty of
78 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
79 GNU General Public License for more details.
80<p>
81 You should have received a copy of the <a href=COPYING.txt>
82 GNU General Public License</a>
83 along with this program; if not, write to the Free Software
84 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
85
86 */
87
88public class Diff {
89
90 /** Prepare to find differences between two arrays. Each element of
91 the arrays is translated to an "equivalence number" based on
92 the result of <code>equals</code>. The original Object arrays
93 are no longer needed for computing the differences. They will
94 be needed again later to print the results of the comparison as
95 an edit script, if desired.
96 */
97 public Diff(Object[] a, Object[] b) {
98 Map<Object, Integer> h = new HashMap<>(a.length + b.length);
99 filevec = new FileData[] {new FileData(a, h), new FileData(b, h)};
100 }
101
102 /** 1 more than the maximum equivalence value used for this or its
103 sibling file. */
104 private int equivMax = 1;
105
106 /** When set to true, the comparison uses a heuristic to speed it up.
107 With this heuristic, for files with a constant small density
108 of changes, the algorithm is linear in the file size. */
109 public boolean heuristic = false;
110
111 /** When set to true, the algorithm returns a guarranteed minimal
112 set of changes. This makes things slower, sometimes much slower. */
113 public boolean noDiscards = false;
114
115 private int[] xvec, yvec; /* Vectors being compared. */
116 private int[] fdiag; /* Vector, indexed by diagonal, containing
117 the X coordinate of the point furthest
118 along the given diagonal in the forward
119 search of the edit matrix. */
120 private int[] bdiag; /* Vector, indexed by diagonal, containing
121 the X coordinate of the point furthest
122 along the given diagonal in the backward
123 search of the edit matrix. */
124 private int fdiagoff, bdiagoff;
125 private final FileData[] filevec;
126 private int cost;
127 /** Snakes bigger than this are considered "big". */
128 private static final int SNAKE_LIMIT = 20;
129
130 /** Find the midpoint of the shortest edit script for a specified
131 portion of the two files.
132
133 We scan from the beginnings of the files, and simultaneously from the ends,
134 doing a breadth-first search through the space of edit-sequence.
135 When the two searches meet, we have found the midpoint of the shortest
136 edit sequence.
137
138 The value returned is the number of the diagonal on which the midpoint lies.
139 The diagonal number equals the number of inserted lines minus the number
140 of deleted lines (counting only lines before the midpoint).
141 The edit cost is stored into COST; this is the total number of
142 lines inserted or deleted (counting only lines before the midpoint).
143
144 This function assumes that the first lines of the specified portions
145 of the two files do not match, and likewise that the last lines do not
146 match. The caller must trim matching lines from the beginning and end
147 of the portions it is going to specify.
148
149 Note that if we return the "wrong" diagonal value, or if
150 the value of bdiag at that diagonal is "wrong",
151 the worst this can do is cause suboptimal diff output.
152 It cannot cause incorrect diff output. */
153
154 private int diag(int xoff, int xlim, int yoff, int ylim) {
155 final int[] fd = fdiag; // Give the compiler a chance.
156 final int[] bd = bdiag; // Additional help for the compiler.
157 final int[] xv = xvec; // Still more help for the compiler.
158 final int[] yv = yvec; // And more and more . . .
159 final int dmin = xoff - ylim; // Minimum valid diagonal.
160 final int dmax = xlim - yoff; // Maximum valid diagonal.
161 final int fmid = xoff - yoff; // Center diagonal of top-down search.
162 final int bmid = xlim - ylim; // Center diagonal of bottom-up search.
163 int fmin = fmid, fmax = fmid; // Limits of top-down search.
164 int bmin = bmid, bmax = bmid; // Limits of bottom-up search.
165 /* True if southeast corner is on an odd
166 diagonal with respect to the northwest. */
167 final boolean odd = (fmid - bmid & 1) != 0;
168
169 fd[fdiagoff + fmid] = xoff;
170 bd[bdiagoff + bmid] = xlim;
171
172 for (int c = 1;; ++c) {
173 int d; /* Active diagonal. */
174 boolean big_snake = false;
175
176 /* Extend the top-down search by an edit step in each diagonal. */
177 if (fmin > dmin) {
178 fd[fdiagoff + --fmin - 1] = -1;
179 } else {
180 ++fmin;
181 }
182 if (fmax < dmax) {
183 fd[fdiagoff + ++fmax + 1] = -1;
184 } else {
185 --fmax;
186 }
187 for (d = fmax; d >= fmin; d -= 2) {
188 int x, y, oldx, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff + d + 1];
189
190 if (tlo >= thi) {
191 x = tlo + 1;
192 } else {
193 x = thi;
194 }
195 oldx = x;
196 y = x - d;
197 while (x < xlim && y < ylim && xv[x] == yv[y]) {
198 ++x; ++y;
199 }
200 if (x - oldx > SNAKE_LIMIT) {
201 big_snake = true;
202 }
203 fd[fdiagoff + d] = x;
204 if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) {
205 cost = 2 * c - 1;
206 return d;
207 }
208 }
209
210 /* Similar extend the bottom-up search. */
211 if (bmin > dmin) {
212 bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE;
213 } else {
214 ++bmin;
215 }
216 if (bmax < dmax) {
217 bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE;
218 } else {
219 --bmax;
220 }
221 for (d = bmax; d >= bmin; d -= 2) {
222 int x, y, oldx, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1];
223
224 if (tlo < thi) {
225 x = tlo;
226 } else {
227 x = thi - 1;
228 }
229 oldx = x;
230 y = x - d;
231 while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) {
232 --x; --y;
233 }
234 if (oldx - x > SNAKE_LIMIT) {
235 big_snake = true;
236 }
237 bd[bdiagoff + d] = x;
238 if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) {
239 cost = 2 * c;
240 return d;
241 }
242 }
243
244 /* Heuristic: check occasionally for a diagonal that has made
245 lots of progress compared with the edit distance.
246 If we have any such, find the one that has made the most
247 progress and return it as if it had succeeded.
248
249 With this heuristic, for files with a constant small density
250 of changes, the algorithm is linear in the file size. */
251
252 if (c > 200 && big_snake && heuristic) {
253 int best = 0;
254 int bestpos = -1;
255
256 for (d = fmax; d >= fmin; d -= 2) {
257 int dd = d - fmid;
258 int x = fd[fdiagoff + d];
259 int y = x - d;
260 int v = (x - xoff) * 2 - dd;
261 if (v > 12 * (c + (dd < 0 ? -dd : dd))) {
262 if (v > best
263 && xoff + SNAKE_LIMIT <= x && x < xlim
264 && yoff + SNAKE_LIMIT <= y && y < ylim) {
265 /* We have a good enough best diagonal.
266 now insist that it end with a significant snake. */
267 int k;
268
269 for (k = 1; xvec[x - k] == yvec[y - k]; k++)
270 if (k == SNAKE_LIMIT) {
271 best = v;
272 bestpos = d;
273 break;
274 }
275 }
276 }
277 }
278 if (best > 0) {
279 cost = 2 * c - 1;
280 return bestpos;
281 }
282
283 best = 0;
284 for (d = bmax; d >= bmin; d -= 2) {
285 int dd = d - bmid;
286 int x = bd[bdiagoff + d];
287 int y = x - d;
288 int v = (xlim - x) * 2 + dd;
289 if (v > 12 * (c + (dd < 0 ? -dd : dd))) {
290 if (v > best
291 && xoff < x && x <= xlim - SNAKE_LIMIT
292 && yoff < y && y <= ylim - SNAKE_LIMIT) {
293 /* We have a good enough best diagonal.
294 now insist that it end with a significant snake. */
295 int k;
296
297 for (k = 0; xvec[x + k] == yvec[y + k]; k++)
298 if (k == SNAKE_LIMIT) {
299 best = v;
300 bestpos = d;
301 break;
302 }
303 }
304 }
305 }
306 if (best > 0) {
307 cost = 2 * c - 1;
308 return bestpos;
309 }
310 }
311 }
312 }
313
314 /** Compare in detail contiguous subsequences of the two files
315 which are known, as a whole, to match each other.
316
317 The results are recorded in the vectors filevec[N].changed_flag, by
318 storing a 1 in the element for each line that is an insertion or deletion.
319
320 The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
321
322 Note that XLIM, YLIM are exclusive bounds.
323 All line numbers are origin-0 and discarded lines are not counted. */
324
325 private void compareseq(int xoff, int xlim, int yoff, int ylim) {
326 /* Slide down the bottom initial diagonal. */
327 while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) {
328 ++xoff; ++yoff;
329 }
330 /* Slide up the top initial diagonal. */
331 while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) {
332 --xlim; --ylim;
333 }
334
335 /* Handle simple cases. */
336 if (xoff == xlim) {
337 while (yoff < ylim) {
338 filevec[1].changedFlag[1+filevec[1].realindexes[yoff++]] = true;
339 }
340 } else if (yoff == ylim) {
341 while (xoff < xlim) {
342 filevec[0].changedFlag[1+filevec[0].realindexes[xoff++]] = true;
343 }
344 } else {
345 /* Find a point of correspondence in the middle of the files. */
346
347 int d = diag(xoff, xlim, yoff, ylim);
348 int c = cost;
349 int b = bdiag[bdiagoff + d];
350
351 if (c == 1)
352 /* This should be impossible, because it implies that
353 one of the two subsequences is empty,
354 and that case was handled above without calling `diag'.
355 Let's verify that this is true. */
356 throw new IllegalArgumentException("Empty subsequence");
357 else {
358 /* Use that point to split this problem into two subproblems. */
359 compareseq(xoff, b, yoff, b - d);
360 /* This used to use f instead of b,
361 but that is incorrect!
362 It is not necessarily the case that diagonal d
363 has a snake from b to f. */
364 compareseq(b, xlim, b - d, ylim);
365 }
366 }
367 }
368
369 /** Discard lines from one file that have no matches in the other file.
370 */
371 private void discard_confusing_lines() {
372 filevec[0].discard_confusing_lines(filevec[1]);
373 filevec[1].discard_confusing_lines(filevec[0]);
374 }
375
376 private boolean inhibit = false;
377
378 /** Adjust inserts/deletes of blank lines to join changes
379 as much as possible.
380 */
381 private void shift_boundaries() {
382 if (inhibit)
383 return;
384 filevec[0].shift_boundaries(filevec[1]);
385 filevec[1].shift_boundaries(filevec[0]);
386 }
387
388 public interface ScriptBuilder {
389 /** Scan the tables of which lines are inserted and deleted,
390 producing an edit script.
391 @param changed0 true for lines in first file which do not match 2nd
392 @param len0 number of lines in first file
393 @param changed1 true for lines in 2nd file which do not match 1st
394 @param len1 number of lines in 2nd file
395 @return a linked list of changes - or null
396 */
397 public Change build_script(
398 boolean[] changed0, int len0,
399 boolean[] changed1, int len1
400 );
401 }
402
403 /** Scan the tables of which lines are inserted and deleted,
404 producing an edit script in reverse order. */
405
406 static class ReverseScript implements ScriptBuilder {
407 @Override
408 public Change build_script(
409 final boolean[] changed0, int len0,
410 final boolean[] changed1, int len1) {
411 Change script = null;
412 int i0 = 0, i1 = 0;
413 while (i0 < len0 || i1 < len1) {
414 if (changed0[1+i0] || changed1[1+i1]) {
415 int line0 = i0, line1 = i1;
416
417 /* Find # lines changed here in each file. */
418 while (changed0[1+i0]) {
419 ++i0;
420 }
421 while (changed1[1+i1]) {
422 ++i1;
423 }
424
425 /* Record this change. */
426 script = new Change(line0, line1, i0 - line0, i1 - line1, script);
427 }
428
429 /* We have reached lines in the two files that match each other. */
430 i0++; i1++;
431 }
432
433 return script;
434 }
435 }
436
437 static class ForwardScript implements ScriptBuilder {
438 /** Scan the tables of which lines are inserted and deleted,
439 producing an edit script in forward order. */
440 @Override
441 public Change build_script(
442 final boolean[] changed0, int len0,
443 final boolean[] changed1, int len1) {
444 Change script = null;
445 int i0 = len0, i1 = len1;
446
447 while (i0 >= 0 || i1 >= 0) {
448 if (changed0[i0] || changed1[i1]) {
449 int line0 = i0, line1 = i1;
450
451 /* Find # lines changed here in each file. */
452 while (changed0[i0]) {
453 --i0;
454 }
455 while (changed1[i1]) {
456 --i1;
457 }
458
459 /* Record this change. */
460 script = new Change(i0, i1, line0 - i0, line1 - i1, script);
461 }
462
463 /* We have reached lines in the two files that match each other. */
464 i0--; i1--;
465 }
466
467 return script;
468 }
469 }
470
471 /** Standard ScriptBuilders. */
472 public static final ScriptBuilder
473 forwardScript = new ForwardScript(),
474 reverseScript = new ReverseScript();
475
476 /** Report the differences of two files. DEPTH is the current directory
477 depth. */
478 public final Change diff_2(final boolean reverse) {
479 return diff(reverse ? reverseScript : forwardScript);
480 }
481
482 /** Get the results of comparison as an edit script. The script
483 is described by a list of changes. The standard ScriptBuilder
484 implementations provide for forward and reverse edit scripts.
485 Alternate implementations could, for instance, list common elements
486 instead of differences.
487 @param bld an object to build the script from change flags
488 @return the head of a list of changes
489 */
490 public Change diff(final ScriptBuilder bld) {
491
492 /* Some lines are obviously insertions or deletions
493 because they don't match anything. Detect them now,
494 and avoid even thinking about them in the main comparison algorithm. */
495
496 discard_confusing_lines();
497
498 /* Now do the main comparison algorithm, considering just the
499 undiscarded lines. */
500
501 xvec = filevec[0].undiscarded;
502 yvec = filevec[1].undiscarded;
503
504 int diags =
505 filevec[0].nondiscardedLines + filevec[1].nondiscardedLines + 3;
506 fdiag = new int[diags];
507 fdiagoff = filevec[1].nondiscardedLines + 1;
508 bdiag = new int[diags];
509 bdiagoff = filevec[1].nondiscardedLines + 1;
510
511 compareseq(0, filevec[0].nondiscardedLines,
512 0, filevec[1].nondiscardedLines);
513 fdiag = null;
514 bdiag = null;
515
516 /* Modify the results slightly to make them prettier
517 in cases where that can validly be done. */
518
519 shift_boundaries();
520
521 /* Get the results of comparison in the form of a chain
522 of `struct change's -- an edit script. */
523 return bld.build_script(
524 filevec[0].changedFlag,
525 filevec[0].bufferedLines,
526 filevec[1].changedFlag,
527 filevec[1].bufferedLines
528 );
529
530 }
531
532 /** The result of comparison is an "edit script": a chain of change objects.
533 Each change represents one place where some lines are deleted
534 and some are inserted.
535
536 LINE0 and LINE1 are the first affected lines in the two files (origin 0).
537 DELETED is the number of lines deleted here from file 0.
538 INSERTED is the number of lines inserted here in file 1.
539
540 If DELETED is 0 then LINE0 is the number of the line before
541 which the insertion was done; vice versa for INSERTED and LINE1. */
542
543 public static class Change {
544 /** Previous or next edit command. */
545 public Change link;
546 /** # lines of file 1 changed here. */
547 public final int inserted;
548 /** # lines of file 0 changed here. */
549 public final int deleted;
550 /** Line number of 1st deleted line. */
551 public final int line0;
552 /** Line number of 1st inserted line. */
553 public final int line1;
554
555 /** Cons an additional entry onto the front of an edit script OLD.
556 LINE0 and LINE1 are the first affected lines in the two files (origin 0).
557 DELETED is the number of lines deleted here from file 0.
558 INSERTED is the number of lines inserted here in file 1.
559
560 If DELETED is 0 then LINE0 is the number of the line before
561 which the insertion was done; vice versa for INSERTED and LINE1. */
562 public Change(int line0, int line1, int deleted, int inserted, Change old) {
563 this.line0 = line0;
564 this.line1 = line1;
565 this.inserted = inserted;
566 this.deleted = deleted;
567 this.link = old;
568 }
569
570 @Override
571 public String toString() {
572 String s = String.format("%d -%d +%d %d", line0, deleted, inserted, line1);
573 return (link != null) ? s = s + '\n' + link : s;
574 }
575 }
576
577 /** Data on one input file being compared.
578 */
579 class FileData {
580
581 /** Allocate changed array for the results of comparison. */
582 void clear() {
583 /* Allocate a flag for each line of each file, saying whether that line
584 is an insertion or deletion.
585 Allocate an extra element, always zero, at each end of each vector.
586 */
587 changedFlag = new boolean[bufferedLines + 2];
588 }
589
590 /** Return equiv_count[I] as the number of lines in this file
591 that fall in equivalence class I.
592 @return the array of equivalence class counts.
593 */
594 int[] equivCount() {
595 int[] equiv_count = new int[equivMax];
596 for (int i = 0; i < bufferedLines; ++i) {
597 ++equiv_count[equivs[i]];
598 }
599 return equiv_count;
600 }
601
602 /** Discard lines that have no matches in another file.
603
604 A line which is discarded will not be considered by the actual
605 comparison algorithm; it will be as if that line were not in the file.
606 The file's `realindexes' table maps virtual line numbers
607 (which don't count the discarded lines) into real line numbers;
608 this is how the actual comparison algorithm produces results
609 that are comprehensible when the discarded lines are counted.
610<p>
611 When we discard a line, we also mark it as a deletion or insertion
612 so that it will be printed in the output.
613 @param f the other file
614 */
615 void discard_confusing_lines(FileData f) {
616 clear();
617 /* Set up table of which lines are going to be discarded. */
618 final byte[] discarded = discardable(f.equivCount());
619
620 /* Don't really discard the provisional lines except when they occur
621 in a run of discardables, with nonprovisionals at the beginning
622 and end. */
623 filterDiscards(discarded);
624
625 /* Actually discard the lines. */
626 discard(discarded);
627 }
628
629 /**
630 * Mark to be discarded each line that matches no line of another file.
631 * If a line matches many lines, mark it as provisionally discardable.
632 * @param counts The count of each equivalence number for the other file.
633 * @return 0=nondiscardable, 1=discardable or 2=provisionally discardable for each line
634 * @see #equivCount()
635 */
636 private byte[] discardable(final int[] counts) {
637 final int end = bufferedLines;
638 final byte[] discards = new byte[end];
639 final int[] equivs = this.equivs;
640 int many = 5;
641 int tem = end / 64;
642
643 /* Multiply MANY by approximate square root of number of lines.
644 That is the threshold for provisionally discardable lines. */
645 while ((tem = tem >> 2) > 0) {
646 many *= 2;
647 }
648
649 for (int i = 0; i < end; i++) {
650 int nmatch;
651 if (equivs[i] == 0) {
652 continue;
653 }
654 nmatch = counts[equivs[i]];
655 if (nmatch == 0) {
656 discards[i] = 1;
657 } else if (nmatch > many) {
658 discards[i] = 2;
659 }
660 }
661 return discards;
662 }
663
664 /**
665 * Don't really discard the provisional lines except when they occur
666 * in a run of discardables, with nonprovisionals at the beginning
667 * and end.
668 */
669 private void filterDiscards(final byte[] discards) {
670 final int end = bufferedLines;
671
672 for (int i = 0; i < end; i++) {
673 /* Cancel provisional discards not in middle of run of discards. */
674 if (discards[i] == 2) {
675 discards[i] = 0;
676 } else if (discards[i] != 0) {
677 /* We have found a nonprovisional discard. */
678 int j;
679 int length;
680 int provisional = 0;
681
682 /* Find end of this run of discardable lines.
683 Count how many are provisionally discardable. */
684 for (j = i; j < end; j++) {
685 if (discards[j] == 0) {
686 break;
687 }
688 if (discards[j] == 2) {
689 ++provisional;
690 }
691 }
692
693 /* Cancel provisional discards at end, and shrink the run. */
694 while (j > i && discards[j - 1] == 2) {
695 discards[--j] = 0; --provisional;
696 }
697
698 /* Now we have the length of a run of discardable lines
699 whose first and last are not provisional. */
700 length = j - i;
701
702 /* If 1/4 of the lines in the run are provisional,
703 cancel discarding of all provisional lines in the run. */
704 if (provisional * 4 > length) {
705 while (j > i)
706 if (discards[--j] == 2) {
707 discards[j] = 0;
708 }
709 } else {
710 int consec;
711 int minimum = 1;
712 int tem = length / 4;
713
714 /* MINIMUM is approximate square root of LENGTH/4.
715 A subrun of two or more provisionals can stand
716 when LENGTH is at least 16.
717 A subrun of 4 or more can stand when LENGTH >= 64. */
718 while ((tem = tem >> 2) > 0) {
719 minimum *= 2;
720 }
721 minimum++;
722
723 /* Cancel any subrun of MINIMUM or more provisionals
724 within the larger run. */
725 for (j = 0, consec = 0; j < length; j++)
726 if (discards[i + j] != 2) {
727 consec = 0;
728 } else if (minimum == ++consec) {
729 /* Back up to start of subrun, to cancel it all. */
730 j -= consec;
731 } else if (minimum < consec) {
732 discards[i + j] = 0;
733 }
734
735 /* Scan from beginning of run
736 until we find 3 or more nonprovisionals in a row
737 or until the first nonprovisional at least 8 lines in.
738 Until that point, cancel any provisionals. */
739 for (j = 0, consec = 0; j < length; j++) {
740 if (j >= 8 && discards[i + j] == 1) {
741 break;
742 }
743 if (discards[i + j] == 2) {
744 consec = 0; discards[i + j] = 0;
745 } else if (discards[i + j] == 0) {
746 consec = 0;
747 } else {
748 consec++;
749 }
750 if (consec == 3) {
751 break;
752 }
753 }
754
755 /* I advances to the last line of the run. */
756 i += length - 1;
757
758 /* Same thing, from end. */
759 for (j = 0, consec = 0; j < length; j++) {
760 if (j >= 8 && discards[i - j] == 1) {
761 break;
762 }
763 if (discards[i - j] == 2) {
764 consec = 0; discards[i - j] = 0;
765 } else if (discards[i - j] == 0) {
766 consec = 0;
767 } else {
768 consec++;
769 }
770 if (consec == 3) {
771 break;
772 }
773 }
774 }
775 }
776 }
777 }
778
779 /** Actually discard the lines.
780 @param discards flags lines to be discarded
781 */
782 private void discard(final byte[] discards) {
783 final int end = bufferedLines;
784 int j = 0;
785 for (int i = 0; i < end; ++i)
786 if (noDiscards || discards[i] == 0) {
787 undiscarded[j] = equivs[i];
788 realindexes[j++] = i;
789 } else {
790 changedFlag[1+i] = true;
791 }
792 nondiscardedLines = j;
793 }
794
795 FileData(int length) {
796 bufferedLines = length;
797 equivs = new int[length];
798 undiscarded = new int[bufferedLines];
799 realindexes = new int[bufferedLines];
800 }
801
802 FileData(Object[] data, Map<Object, Integer> h) {
803 this(data.length);
804 // FIXME: diff 2.7 removes common prefix and common suffix
805 for (int i = 0; i < data.length; ++i) {
806 Integer ir = h.get(data[i]);
807 if (ir == null) {
808 h.put(data[i], equivs[i] = equivMax++);
809 } else {
810 equivs[i] = ir.intValue();
811 }
812 }
813 }
814
815 /** Adjust inserts/deletes of blank lines to join changes
816 as much as possible.
817
818 We do something when a run of changed lines include a blank
819 line at one end and have an excluded blank line at the other.
820 We are free to choose which blank line is included.
821 `compareseq' always chooses the one at the beginning,
822 but usually it is cleaner to consider the following blank line
823 to be the "change". The only exception is if the preceding blank line
824 would join this change to other changes.
825 @param f the file being compared against
826 */
827 void shift_boundaries(FileData f) {
828 final boolean[] changed = changedFlag;
829 final boolean[] other_changed = f.changedFlag;
830 int i = 0;
831 int j = 0;
832 int i_end = bufferedLines;
833 int preceding = -1;
834 int other_preceding = -1;
835
836 for (;;) {
837 int start, end, other_start;
838
839 /* Scan forwards to find beginning of another run of changes.
840 Also keep track of the corresponding point in the other file. */
841
842 while (i < i_end && !changed[1+i]) {
843 while (other_changed[1+j++]) {
844 /* Non-corresponding lines in the other file
845 will count as the preceding batch of changes. */
846 other_preceding = j;
847 }
848 i++;
849 }
850
851 if (i == i_end) {
852 break;
853 }
854
855 start = i;
856 other_start = j;
857
858 for (;;) {
859 /* Now find the end of this run of changes. */
860
861 while (i < i_end && changed[1+i]) {
862 i++;
863 }
864 end = i;
865
866 /* If the first changed line matches the following unchanged one,
867 and this run does not follow right after a previous run,
868 and there are no lines deleted from the other file here,
869 then classify the first changed line as unchanged
870 and the following line as changed in its place. */
871
872 /* You might ask, how could this run follow right after another?
873 Only because the previous run was shifted here. */
874
875 if (end != i_end && equivs[start] == equivs[end] && !other_changed[1+j]
876 && !((preceding >= 0 && start == preceding) || (other_preceding >= 0 && other_start == other_preceding))) {
877 changed[1+end++] = true;
878 changed[1+start++] = false;
879 ++i;
880 /* Since one line-that-matches is now before this run
881 instead of after, we must advance in the other file
882 to keep in synch. */
883 ++j;
884 } else {
885 break;
886 }
887 }
888
889 preceding = i;
890 other_preceding = j;
891 }
892 }
893
894 /** Number of elements (lines) in this file. */
895 private final int bufferedLines;
896
897 /** Vector, indexed by line number, containing an equivalence code for
898 each line. It is this vector that is actually compared with that
899 of another file to generate differences. */
900 private final int[] equivs;
901
902 /** Vector, like the previous one except that
903 the elements for discarded lines have been squeezed out. */
904 private final int[] undiscarded;
905
906 /** Vector mapping virtual line numbers (not counting discarded lines)
907 to real ones (counting those lines). Both are origin-0. */
908 private final int[] realindexes;
909
910 /** Total number of nondiscarded lines. */
911 private int nondiscardedLines;
912
913 /** Array, indexed by real origin-1 line number,
914 containing true for a line that is an insertion or a deletion.
915 The results of comparison are stored here. */
916 private boolean[] changedFlag;
917 }
918}
Note: See TracBrowser for help on using the repository browser.