source: josm/trunk/src/org/openstreetmap/josm/tools/Diff.java@ 8994

Last change on this file since 8994 was 8976, checked in by simon04, 8 years ago

fix #12016 - History dialog sometimes incorrectly shows a way as reversed

  • Property svn:eol-style set to native
File size: 35.7 KB
Line 
1// License: GPL. For details, see LICENSE file.
2package org.openstreetmap.josm.tools;
3
4//// Taken from http://www.bmsi.com/java/#diff
5
6// http://www.bmsi.com/java/DiffPrint.java could also be useful
7
8/*
9 * $Log: Diff.java,v $
10 * Revision 1.15 2013/04/01 16:27:31 stuart
11 * Fix DiffPrint unified format with test cases.
12 * Begin porting some diff-2.7 features.
13 *
14 * Revision 1.14 2010/03/03 21:21:25 stuart
15 * Test new direct equivalence API
16 *
17 * Revision 1.13 2009/12/07 17:43:17 stuart
18 * Compute equivMax for int[] ctor
19 *
20 * Revision 1.12 2009/12/07 17:34:46 stuart
21 * Ctor with int[].
22 *
23 * Revision 1.11 2009/11/15 01:11:54 stuart
24 * Diff doesn't need to be generic
25 *
26 * Revision 1.10 2009/11/15 00:54:03 stuart
27 * Update to Java 5 containers
28 *
29 * Revision 1.7 2009/01/19 03:05:26 stuart
30 * Fix StackOverflow bug with heuristic on reported by Jimmy Han.
31 *
32 * Revision 1.6 2003/03/06 22:51:32 stuart
33 * Convert to CVS
34 *
35 * Revision 1.5 2002/07/19 19:14:40 stuart
36 * fix reverseScript, make change ctor public, update docs
37 *
38 * Revision 1.4 2002/04/09 17:53:39 stuart
39 * More flexible interface for diff() function.
40 *
41 * Revision 1.3 2000/03/03 21:58:03 stuart
42 * move discard_confusing_lines and shift_boundaries to class file_data
43 *
44 * Revision 1.2 2000/03/02 16:37:38 stuart
45 * Add GPL and copyright
46 *
47 */
48
49import java.util.HashMap;
50import java.util.Map;
51
52/** A class to compare vectors of objects. The result of comparison
53 is a list of <code>change</code> objects which form an
54 edit script. The objects compared are traditionally lines
55 of text from two files. Comparison options such as "ignore
56 whitespace" are implemented by modifying the <code>equals</code>
57 and <code>hashcode</code> methods for the objects compared.
58<p>
59 The basic algorithm is described in: <br>
60 "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
61 Algorithmica Vol. 1 No. 2, 1986, p 251.
62<p>
63 This class outputs different results from GNU diff 1.15 on some
64 inputs. Our results are actually better (smaller change list, smaller
65 total size of changes), but it would be nice to know why. Perhaps
66 there is a memory overwrite bug in GNU diff 1.15.
67
68 @author Stuart D. Gathman, translated from GNU diff 1.15
69 Copyright (C) 2000 Business Management Systems, Inc.
70<p>
71 This program is free software; you can redistribute it and/or modify
72 it under the terms of the GNU General Public License as published by
73 the Free Software Foundation; either version 1, or (at your option)
74 any later version.
75<p>
76 This program is distributed in the hope that it will be useful,
77 but WITHOUT ANY WARRANTY; without even the implied warranty of
78 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
79 GNU General Public License for more details.
80<p>
81 You should have received a copy of the <a href=COPYING.txt>
82 GNU General Public License</a>
83 along with this program; if not, write to the Free Software
84 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
85 */
86public class Diff {
87
88 /** Prepare to find differences between two arrays. Each element of
89 the arrays is translated to an "equivalence number" based on
90 the result of <code>equals</code>. The original Object arrays
91 are no longer needed for computing the differences. They will
92 be needed again later to print the results of the comparison as
93 an edit script, if desired.
94 * @param a first array
95 * @param b second array
96 */
97 public Diff(Object[] a, Object[] b) {
98 Map<Object, Integer> h = new HashMap<>(a.length + b.length);
99 filevec = new FileData[] {new FileData(a, h), new FileData(b, h)};
100 }
101
102 /** 1 more than the maximum equivalence value used for this or its
103 sibling file. */
104 private int equivMax = 1;
105
106 /** When set to true, the comparison uses a heuristic to speed it up.
107 With this heuristic, for files with a constant small density
108 of changes, the algorithm is linear in the file size. */
109 public boolean heuristic;
110
111 /** When set to true, the algorithm returns a guarranteed minimal
112 set of changes. This makes things slower, sometimes much slower. */
113 public boolean noDiscards;
114
115 private int[] xvec, yvec; /* Vectors being compared. */
116 private int[] fdiag; /* Vector, indexed by diagonal, containing
117 the X coordinate of the point furthest
118 along the given diagonal in the forward
119 search of the edit matrix. */
120 private int[] bdiag; /* Vector, indexed by diagonal, containing
121 the X coordinate of the point furthest
122 along the given diagonal in the backward
123 search of the edit matrix. */
124 private int fdiagoff, bdiagoff;
125 private final FileData[] filevec;
126 private int cost;
127 /** Snakes bigger than this are considered "big". */
128 private static final int SNAKE_LIMIT = 20;
129
130 /**
131 * Find the midpoint of the shortest edit script for a specified
132 * portion of the two files.
133 *
134 * We scan from the beginnings of the files, and simultaneously from the ends,
135 * doing a breadth-first search through the space of edit-sequence.
136 * When the two searches meet, we have found the midpoint of the shortest
137 * edit sequence.
138 *
139 * The value returned is the number of the diagonal on which the midpoint lies.
140 * The diagonal number equals the number of inserted lines minus the number
141 * of deleted lines (counting only lines before the midpoint).
142 * The edit cost is stored into COST; this is the total number of
143 * lines inserted or deleted (counting only lines before the midpoint).
144 *
145 * This function assumes that the first lines of the specified portions
146 * of the two files do not match, and likewise that the last lines do not
147 * match. The caller must trim matching lines from the beginning and end
148 * of the portions it is going to specify.
149 *
150 * Note that if we return the "wrong" diagonal value, or if
151 * the value of bdiag at that diagonal is "wrong",
152 * the worst this can do is cause suboptimal diff output.
153 * It cannot cause incorrect diff output.
154 */
155 private int diag(int xoff, int xlim, int yoff, int ylim) {
156 final int[] fd = fdiag; // Give the compiler a chance.
157 final int[] bd = bdiag; // Additional help for the compiler.
158 final int[] xv = xvec; // Still more help for the compiler.
159 final int[] yv = yvec; // And more and more . . .
160 final int dmin = xoff - ylim; // Minimum valid diagonal.
161 final int dmax = xlim - yoff; // Maximum valid diagonal.
162 final int fmid = xoff - yoff; // Center diagonal of top-down search.
163 final int bmid = xlim - ylim; // Center diagonal of bottom-up search.
164 int fmin = fmid, fmax = fmid; // Limits of top-down search.
165 int bmin = bmid, bmax = bmid; // Limits of bottom-up search.
166 /* True if southeast corner is on an odd
167 diagonal with respect to the northwest. */
168 final boolean odd = (fmid - bmid & 1) != 0;
169
170 fd[fdiagoff + fmid] = xoff;
171 bd[bdiagoff + bmid] = xlim;
172
173 for (int c = 1;; ++c) {
174 int d; /* Active diagonal. */
175 boolean big_snake = false;
176
177 /* Extend the top-down search by an edit step in each diagonal. */
178 if (fmin > dmin) {
179 fd[fdiagoff + --fmin - 1] = -1;
180 } else {
181 ++fmin;
182 }
183 if (fmax < dmax) {
184 fd[fdiagoff + ++fmax + 1] = -1;
185 } else {
186 --fmax;
187 }
188 for (d = fmax; d >= fmin; d -= 2) {
189 int x, y, oldx, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff + d + 1];
190
191 if (tlo >= thi) {
192 x = tlo + 1;
193 } else {
194 x = thi;
195 }
196 oldx = x;
197 y = x - d;
198 while (x < xlim && y < ylim && xv[x] == yv[y]) {
199 ++x; ++y;
200 }
201 if (x - oldx > SNAKE_LIMIT) {
202 big_snake = true;
203 }
204 fd[fdiagoff + d] = x;
205 if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) {
206 cost = 2 * c - 1;
207 return d;
208 }
209 }
210
211 /* Similar extend the bottom-up search. */
212 if (bmin > dmin) {
213 bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE;
214 } else {
215 ++bmin;
216 }
217 if (bmax < dmax) {
218 bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE;
219 } else {
220 --bmax;
221 }
222 for (d = bmax; d >= bmin; d -= 2) {
223 int x, y, oldx, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1];
224
225 if (tlo < thi) {
226 x = tlo;
227 } else {
228 x = thi - 1;
229 }
230 oldx = x;
231 y = x - d;
232 while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) {
233 --x; --y;
234 }
235 if (oldx - x > SNAKE_LIMIT) {
236 big_snake = true;
237 }
238 bd[bdiagoff + d] = x;
239 if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) {
240 cost = 2 * c;
241 return d;
242 }
243 }
244
245 /* Heuristic: check occasionally for a diagonal that has made
246 lots of progress compared with the edit distance.
247 If we have any such, find the one that has made the most
248 progress and return it as if it had succeeded.
249
250 With this heuristic, for files with a constant small density
251 of changes, the algorithm is linear in the file size. */
252
253 if (c > 200 && big_snake && heuristic) {
254 int best = 0;
255 int bestpos = -1;
256
257 for (d = fmax; d >= fmin; d -= 2) {
258 int dd = d - fmid;
259 int x = fd[fdiagoff + d];
260 int y = x - d;
261 int v = (x - xoff) * 2 - dd;
262 if (v > 12 * (c + (dd < 0 ? -dd : dd))) {
263 if (v > best
264 && xoff + SNAKE_LIMIT <= x && x < xlim
265 && yoff + SNAKE_LIMIT <= y && y < ylim) {
266 /* We have a good enough best diagonal.
267 now insist that it end with a significant snake. */
268 int k;
269
270 for (k = 1; xvec[x - k] == yvec[y - k]; k++) {
271 if (k == SNAKE_LIMIT) {
272 best = v;
273 bestpos = d;
274 break;
275 }
276 }
277 }
278 }
279 }
280 if (best > 0) {
281 cost = 2 * c - 1;
282 return bestpos;
283 }
284
285 best = 0;
286 for (d = bmax; d >= bmin; d -= 2) {
287 int dd = d - bmid;
288 int x = bd[bdiagoff + d];
289 int y = x - d;
290 int v = (xlim - x) * 2 + dd;
291 if (v > 12 * (c + (dd < 0 ? -dd : dd))) {
292 if (v > best
293 && xoff < x && x <= xlim - SNAKE_LIMIT
294 && yoff < y && y <= ylim - SNAKE_LIMIT) {
295 /* We have a good enough best diagonal.
296 now insist that it end with a significant snake. */
297 int k;
298
299 for (k = 0; xvec[x + k] == yvec[y + k]; k++) {
300 if (k == SNAKE_LIMIT) {
301 best = v;
302 bestpos = d;
303 break;
304 }
305 }
306 }
307 }
308 }
309 if (best > 0) {
310 cost = 2 * c - 1;
311 return bestpos;
312 }
313 }
314 }
315 }
316
317 /** Compare in detail contiguous subsequences of the two files
318 which are known, as a whole, to match each other.
319
320 The results are recorded in the vectors filevec[N].changed_flag, by
321 storing a 1 in the element for each line that is an insertion or deletion.
322
323 The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
324
325 Note that XLIM, YLIM are exclusive bounds.
326 All line numbers are origin-0 and discarded lines are not counted. */
327
328 private void compareseq(int xoff, int xlim, int yoff, int ylim) {
329 /* Slide down the bottom initial diagonal. */
330 while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) {
331 ++xoff; ++yoff;
332 }
333 /* Slide up the top initial diagonal. */
334 while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) {
335 --xlim; --ylim;
336 }
337
338 /* Handle simple cases. */
339 if (xoff == xlim) {
340 while (yoff < ylim) {
341 filevec[1].changedFlag[1+filevec[1].realindexes[yoff++]] = true;
342 }
343 } else if (yoff == ylim) {
344 while (xoff < xlim) {
345 filevec[0].changedFlag[1+filevec[0].realindexes[xoff++]] = true;
346 }
347 } else {
348 /* Find a point of correspondence in the middle of the files. */
349
350 int d = diag(xoff, xlim, yoff, ylim);
351 int c = cost;
352 int b = bdiag[bdiagoff + d];
353
354 if (c == 1)
355 /* This should be impossible, because it implies that
356 one of the two subsequences is empty,
357 and that case was handled above without calling `diag'.
358 Let's verify that this is true. */
359 throw new IllegalArgumentException("Empty subsequence");
360 else {
361 /* Use that point to split this problem into two subproblems. */
362 compareseq(xoff, b, yoff, b - d);
363 /* This used to use f instead of b,
364 but that is incorrect!
365 It is not necessarily the case that diagonal d
366 has a snake from b to f. */
367 compareseq(b, xlim, b - d, ylim);
368 }
369 }
370 }
371
372 /** Discard lines from one file that have no matches in the other file.
373 */
374 private void discard_confusing_lines() {
375 filevec[0].discard_confusing_lines(filevec[1]);
376 filevec[1].discard_confusing_lines(filevec[0]);
377 }
378
379 private boolean inhibit;
380
381 /** Adjust inserts/deletes of blank lines to join changes
382 as much as possible.
383 */
384 private void shift_boundaries() {
385 if (inhibit)
386 return;
387 filevec[0].shift_boundaries(filevec[1]);
388 filevec[1].shift_boundaries(filevec[0]);
389 }
390
391 public interface ScriptBuilder {
392 /** Scan the tables of which lines are inserted and deleted,
393 producing an edit script.
394 @param changed0 true for lines in first file which do not match 2nd
395 @param len0 number of lines in first file
396 @param changed1 true for lines in 2nd file which do not match 1st
397 @param len1 number of lines in 2nd file
398 @return a linked list of changes - or null
399 */
400 Change build_script(
401 boolean[] changed0, int len0,
402 boolean[] changed1, int len1
403 );
404 }
405
406 /** Scan the tables of which lines are inserted and deleted,
407 producing an edit script in reverse order. */
408
409 static class ReverseScript implements ScriptBuilder {
410 @Override
411 public Change build_script(
412 final boolean[] changed0, int len0,
413 final boolean[] changed1, int len1) {
414 Change script = null;
415 int i0 = 0, i1 = 0;
416 while (i0 < len0 || i1 < len1) {
417 if (changed0[1+i0] || changed1[1+i1]) {
418 int line0 = i0, line1 = i1;
419
420 /* Find # lines changed here in each file. */
421 while (changed0[1+i0]) {
422 ++i0;
423 }
424 while (changed1[1+i1]) {
425 ++i1;
426 }
427
428 /* Record this change. */
429 script = new Change(line0, line1, i0 - line0, i1 - line1, script);
430 }
431
432 /* We have reached lines in the two files that match each other. */
433 i0++; i1++;
434 }
435
436 return script;
437 }
438 }
439
440 static class ForwardScript implements ScriptBuilder {
441 /** Scan the tables of which lines are inserted and deleted,
442 producing an edit script in forward order. */
443 @Override
444 public Change build_script(
445 final boolean[] changed0, int len0,
446 final boolean[] changed1, int len1) {
447 Change script = null;
448 int i0 = len0, i1 = len1;
449
450 while (i0 >= 0 || i1 >= 0) {
451 if (changed0[i0] || changed1[i1]) {
452 int line0 = i0, line1 = i1;
453
454 /* Find # lines changed here in each file. */
455 while (changed0[i0]) {
456 --i0;
457 }
458 while (changed1[i1]) {
459 --i1;
460 }
461
462 /* Record this change. */
463 script = new Change(i0, i1, line0 - i0, line1 - i1, script);
464 }
465
466 /* We have reached lines in the two files that match each other. */
467 i0--; i1--;
468 }
469
470 return script;
471 }
472 }
473
474 /** Standard ScriptBuilders. */
475 public static final ScriptBuilder
476 forwardScript = new ForwardScript(),
477 reverseScript = new ReverseScript();
478
479 /** Report the differences of two files. DEPTH is the current directory depth. */
480 public final Change diff_2(final boolean reverse) {
481 return diff(reverse ? reverseScript : forwardScript);
482 }
483
484 /** Get the results of comparison as an edit script. The script
485 is described by a list of changes. The standard ScriptBuilder
486 implementations provide for forward and reverse edit scripts.
487 Alternate implementations could, for instance, list common elements
488 instead of differences.
489 @param bld an object to build the script from change flags
490 @return the head of a list of changes
491 */
492 public Change diff(final ScriptBuilder bld) {
493
494 /* Some lines are obviously insertions or deletions
495 because they don't match anything. Detect them now,
496 and avoid even thinking about them in the main comparison algorithm. */
497
498 discard_confusing_lines();
499
500 /* Now do the main comparison algorithm, considering just the
501 undiscarded lines. */
502
503 xvec = filevec[0].undiscarded;
504 yvec = filevec[1].undiscarded;
505
506 int diags =
507 filevec[0].nondiscardedLines + filevec[1].nondiscardedLines + 3;
508 fdiag = new int[diags];
509 fdiagoff = filevec[1].nondiscardedLines + 1;
510 bdiag = new int[diags];
511 bdiagoff = filevec[1].nondiscardedLines + 1;
512
513 compareseq(0, filevec[0].nondiscardedLines,
514 0, filevec[1].nondiscardedLines);
515 fdiag = null;
516 bdiag = null;
517
518 /* Modify the results slightly to make them prettier
519 in cases where that can validly be done. */
520
521 shift_boundaries();
522
523 /* Get the results of comparison in the form of a chain
524 of `struct change's -- an edit script. */
525 return bld.build_script(
526 filevec[0].changedFlag,
527 filevec[0].bufferedLines,
528 filevec[1].changedFlag,
529 filevec[1].bufferedLines
530 );
531
532 }
533
534 /** The result of comparison is an "edit script": a chain of change objects.
535 Each change represents one place where some lines are deleted
536 and some are inserted.
537
538 LINE0 and LINE1 are the first affected lines in the two files (origin 0).
539 DELETED is the number of lines deleted here from file 0.
540 INSERTED is the number of lines inserted here in file 1.
541
542 If DELETED is 0 then LINE0 is the number of the line before
543 which the insertion was done; vice versa for INSERTED and LINE1. */
544
545 public static class Change {
546 /** Previous or next edit command. */
547 public Change link;
548 /** # lines of file 1 changed here. */
549 public final int inserted;
550 /** # lines of file 0 changed here. */
551 public final int deleted;
552 /** Line number of 1st deleted line. */
553 public final int line0;
554 /** Line number of 1st inserted line. */
555 public final int line1;
556
557 /** Cons an additional entry onto the front of an edit script OLD.
558 LINE0 and LINE1 are the first affected lines in the two files (origin 0).
559 DELETED is the number of lines deleted here from file 0.
560 INSERTED is the number of lines inserted here in file 1.
561
562 If DELETED is 0 then LINE0 is the number of the line before
563 which the insertion was done; vice versa for INSERTED and LINE1. */
564 public Change(int line0, int line1, int deleted, int inserted, Change old) {
565 this.line0 = line0;
566 this.line1 = line1;
567 this.inserted = inserted;
568 this.deleted = deleted;
569 this.link = old;
570 }
571
572 /**
573 * Returns the number of insertions and deletions of this change as well as
574 * (recursively) the changes linked via {@link #link}.
575 */
576 public int getTotalNumberOfChanges() {
577 return inserted + deleted + (link != null ? link.getTotalNumberOfChanges() : 0);
578 }
579
580 @Override
581 public String toString() {
582 String s = String.format("%d -%d +%d %d", line0, deleted, inserted, line1);
583 return (link != null) ? s = s + '\n' + link : s;
584 }
585 }
586
587 /** Data on one input file being compared.
588 */
589 class FileData {
590
591 /** Allocate changed array for the results of comparison. */
592 void clear() {
593 /* Allocate a flag for each line of each file, saying whether that line
594 is an insertion or deletion.
595 Allocate an extra element, always zero, at each end of each vector.
596 */
597 changedFlag = new boolean[bufferedLines + 2];
598 }
599
600 /** Return equiv_count[I] as the number of lines in this file
601 that fall in equivalence class I.
602 @return the array of equivalence class counts.
603 */
604 int[] equivCount() {
605 int[] equiv_count = new int[equivMax];
606 for (int i = 0; i < bufferedLines; ++i) {
607 ++equiv_count[equivs[i]];
608 }
609 return equiv_count;
610 }
611
612 /** Discard lines that have no matches in another file.
613
614 A line which is discarded will not be considered by the actual
615 comparison algorithm; it will be as if that line were not in the file.
616 The file's `realindexes' table maps virtual line numbers
617 (which don't count the discarded lines) into real line numbers;
618 this is how the actual comparison algorithm produces results
619 that are comprehensible when the discarded lines are counted.
620<p>
621 When we discard a line, we also mark it as a deletion or insertion
622 so that it will be printed in the output.
623 @param f the other file
624 */
625 void discard_confusing_lines(FileData f) {
626 clear();
627 /* Set up table of which lines are going to be discarded. */
628 final byte[] discarded = discardable(f.equivCount());
629
630 /* Don't really discard the provisional lines except when they occur
631 in a run of discardables, with nonprovisionals at the beginning
632 and end. */
633 filterDiscards(discarded);
634
635 /* Actually discard the lines. */
636 discard(discarded);
637 }
638
639 /**
640 * Mark to be discarded each line that matches no line of another file.
641 * If a line matches many lines, mark it as provisionally discardable.
642 * @param counts The count of each equivalence number for the other file.
643 * @return 0=nondiscardable, 1=discardable or 2=provisionally discardable for each line
644 * @see #equivCount()
645 */
646 private byte[] discardable(final int[] counts) {
647 final int end = bufferedLines;
648 final byte[] discards = new byte[end];
649 final int[] equivs = this.equivs;
650 int many = 5;
651 int tem = end / 64;
652
653 /* Multiply MANY by approximate square root of number of lines.
654 That is the threshold for provisionally discardable lines. */
655 while ((tem = tem >> 2) > 0) {
656 many *= 2;
657 }
658
659 for (int i = 0; i < end; i++) {
660 int nmatch;
661 if (equivs[i] == 0) {
662 continue;
663 }
664 nmatch = counts[equivs[i]];
665 if (nmatch == 0) {
666 discards[i] = 1;
667 } else if (nmatch > many) {
668 discards[i] = 2;
669 }
670 }
671 return discards;
672 }
673
674 /**
675 * Don't really discard the provisional lines except when they occur
676 * in a run of discardables, with nonprovisionals at the beginning
677 * and end.
678 */
679 private void filterDiscards(final byte[] discards) {
680 final int end = bufferedLines;
681
682 for (int i = 0; i < end; i++) {
683 /* Cancel provisional discards not in middle of run of discards. */
684 if (discards[i] == 2) {
685 discards[i] = 0;
686 } else if (discards[i] != 0) {
687 /* We have found a nonprovisional discard. */
688 int j;
689 int length;
690 int provisional = 0;
691
692 /* Find end of this run of discardable lines.
693 Count how many are provisionally discardable. */
694 for (j = i; j < end; j++) {
695 if (discards[j] == 0) {
696 break;
697 }
698 if (discards[j] == 2) {
699 ++provisional;
700 }
701 }
702
703 /* Cancel provisional discards at end, and shrink the run. */
704 while (j > i && discards[j - 1] == 2) {
705 discards[--j] = 0; --provisional;
706 }
707
708 /* Now we have the length of a run of discardable lines
709 whose first and last are not provisional. */
710 length = j - i;
711
712 /* If 1/4 of the lines in the run are provisional,
713 cancel discarding of all provisional lines in the run. */
714 if (provisional * 4 > length) {
715 while (j > i)
716 if (discards[--j] == 2) {
717 discards[j] = 0;
718 }
719 } else {
720 int consec;
721 int minimum = 1;
722 int tem = length / 4;
723
724 /* MINIMUM is approximate square root of LENGTH/4.
725 A subrun of two or more provisionals can stand
726 when LENGTH is at least 16.
727 A subrun of 4 or more can stand when LENGTH >= 64. */
728 while ((tem = tem >> 2) > 0) {
729 minimum *= 2;
730 }
731 minimum++;
732
733 /* Cancel any subrun of MINIMUM or more provisionals
734 within the larger run. */
735 for (j = 0, consec = 0; j < length; j++) {
736 if (discards[i + j] != 2) {
737 consec = 0;
738 } else if (minimum == ++consec) {
739 /* Back up to start of subrun, to cancel it all. */
740 j -= consec;
741 } else if (minimum < consec) {
742 discards[i + j] = 0;
743 }
744 }
745
746 /* Scan from beginning of run
747 until we find 3 or more nonprovisionals in a row
748 or until the first nonprovisional at least 8 lines in.
749 Until that point, cancel any provisionals. */
750 for (j = 0, consec = 0; j < length; j++) {
751 if (j >= 8 && discards[i + j] == 1) {
752 break;
753 }
754 if (discards[i + j] == 2) {
755 consec = 0; discards[i + j] = 0;
756 } else if (discards[i + j] == 0) {
757 consec = 0;
758 } else {
759 consec++;
760 }
761 if (consec == 3) {
762 break;
763 }
764 }
765
766 /* I advances to the last line of the run. */
767 i += length - 1;
768
769 /* Same thing, from end. */
770 for (j = 0, consec = 0; j < length; j++) {
771 if (j >= 8 && discards[i - j] == 1) {
772 break;
773 }
774 if (discards[i - j] == 2) {
775 consec = 0; discards[i - j] = 0;
776 } else if (discards[i - j] == 0) {
777 consec = 0;
778 } else {
779 consec++;
780 }
781 if (consec == 3) {
782 break;
783 }
784 }
785 }
786 }
787 }
788 }
789
790 /** Actually discard the lines.
791 @param discards flags lines to be discarded
792 */
793 private void discard(final byte[] discards) {
794 final int end = bufferedLines;
795 int j = 0;
796 for (int i = 0; i < end; ++i) {
797 if (noDiscards || discards[i] == 0) {
798 undiscarded[j] = equivs[i];
799 realindexes[j++] = i;
800 } else {
801 changedFlag[1+i] = true;
802 }
803 }
804 nondiscardedLines = j;
805 }
806
807 FileData(int length) {
808 bufferedLines = length;
809 equivs = new int[length];
810 undiscarded = new int[bufferedLines];
811 realindexes = new int[bufferedLines];
812 }
813
814 FileData(Object[] data, Map<Object, Integer> h) {
815 this(data.length);
816 // FIXME: diff 2.7 removes common prefix and common suffix
817 for (int i = 0; i < data.length; ++i) {
818 Integer ir = h.get(data[i]);
819 if (ir == null) {
820 h.put(data[i], equivs[i] = equivMax++);
821 } else {
822 equivs[i] = ir.intValue();
823 }
824 }
825 }
826
827 /** Adjust inserts/deletes of blank lines to join changes
828 as much as possible.
829
830 We do something when a run of changed lines include a blank
831 line at one end and have an excluded blank line at the other.
832 We are free to choose which blank line is included.
833 `compareseq' always chooses the one at the beginning,
834 but usually it is cleaner to consider the following blank line
835 to be the "change". The only exception is if the preceding blank line
836 would join this change to other changes.
837 @param f the file being compared against
838 */
839 void shift_boundaries(FileData f) {
840 final boolean[] changed = changedFlag;
841 final boolean[] other_changed = f.changedFlag;
842 int i = 0;
843 int j = 0;
844 int i_end = bufferedLines;
845 int preceding = -1;
846 int other_preceding = -1;
847
848 for (;;) {
849 int start, end, other_start;
850
851 /* Scan forwards to find beginning of another run of changes.
852 Also keep track of the corresponding point in the other file. */
853
854 while (i < i_end && !changed[1+i]) {
855 while (other_changed[1+j++]) {
856 /* Non-corresponding lines in the other file
857 will count as the preceding batch of changes. */
858 other_preceding = j;
859 }
860 i++;
861 }
862
863 if (i == i_end) {
864 break;
865 }
866
867 start = i;
868 other_start = j;
869
870 for (;;) {
871 /* Now find the end of this run of changes. */
872
873 while (i < i_end && changed[1+i]) {
874 i++;
875 }
876 end = i;
877
878 /* If the first changed line matches the following unchanged one,
879 and this run does not follow right after a previous run,
880 and there are no lines deleted from the other file here,
881 then classify the first changed line as unchanged
882 and the following line as changed in its place. */
883
884 /* You might ask, how could this run follow right after another?
885 Only because the previous run was shifted here. */
886
887 if (end != i_end && equivs[start] == equivs[end] && !other_changed[1+j]
888 && !((preceding >= 0 && start == preceding) || (other_preceding >= 0 && other_start == other_preceding))) {
889 changed[1+end++] = true;
890 changed[1+start++] = false;
891 ++i;
892 /* Since one line-that-matches is now before this run
893 instead of after, we must advance in the other file
894 to keep in synch. */
895 ++j;
896 } else {
897 break;
898 }
899 }
900
901 preceding = i;
902 other_preceding = j;
903 }
904 }
905
906 /** Number of elements (lines) in this file. */
907 private final int bufferedLines;
908
909 /** Vector, indexed by line number, containing an equivalence code for
910 each line. It is this vector that is actually compared with that
911 of another file to generate differences. */
912 private final int[] equivs;
913
914 /** Vector, like the previous one except that
915 the elements for discarded lines have been squeezed out. */
916 private final int[] undiscarded;
917
918 /** Vector mapping virtual line numbers (not counting discarded lines)
919 to real ones (counting those lines). Both are origin-0. */
920 private final int[] realindexes;
921
922 /** Total number of nondiscarded lines. */
923 private int nondiscardedLines;
924
925 /** Array, indexed by real origin-1 line number,
926 containing true for a line that is an insertion or a deletion.
927 The results of comparison are stored here. */
928 private boolean[] changedFlag;
929 }
930}
Note: See TracBrowser for help on using the repository browser.