source: josm/trunk/src/org/openstreetmap/josm/tools/Diff.java@ 8449

Last change on this file since 8449 was 8443, checked in by Don-vip, 9 years ago

remove extra whitespaces

File size: 35.2 KB
Line 
1// License: GPL. For details, see LICENSE file.
2package org.openstreetmap.josm.tools;
3
4//// Taken from http://www.bmsi.com/java/#diff
5
6// http://www.bmsi.com/java/DiffPrint.java could also be useful
7
8/*
9 * $Log: Diff.java,v $
10 * Revision 1.15 2013/04/01 16:27:31 stuart
11 * Fix DiffPrint unified format with test cases.
12 * Begin porting some diff-2.7 features.
13 *
14 * Revision 1.14 2010/03/03 21:21:25 stuart
15 * Test new direct equivalence API
16 *
17 * Revision 1.13 2009/12/07 17:43:17 stuart
18 * Compute equivMax for int[] ctor
19 *
20 * Revision 1.12 2009/12/07 17:34:46 stuart
21 * Ctor with int[].
22 *
23 * Revision 1.11 2009/11/15 01:11:54 stuart
24 * Diff doesn't need to be generic
25 *
26 * Revision 1.10 2009/11/15 00:54:03 stuart
27 * Update to Java 5 containers
28 *
29 * Revision 1.7 2009/01/19 03:05:26 stuart
30 * Fix StackOverflow bug with heuristic on reported by Jimmy Han.
31 *
32 * Revision 1.6 2003/03/06 22:51:32 stuart
33 * Convert to CVS
34 *
35 * Revision 1.5 2002/07/19 19:14:40 stuart
36 * fix reverseScript, make change ctor public, update docs
37 *
38 * Revision 1.4 2002/04/09 17:53:39 stuart
39 * More flexible interface for diff() function.
40 *
41 * Revision 1.3 2000/03/03 21:58:03 stuart
42 * move discard_confusing_lines and shift_boundaries to class file_data
43 *
44 * Revision 1.2 2000/03/02 16:37:38 stuart
45 * Add GPL and copyright
46 *
47 */
48
49import java.util.HashMap;
50import java.util.Map;
51
52/** A class to compare vectors of objects. The result of comparison
53 is a list of <code>change</code> objects which form an
54 edit script. The objects compared are traditionally lines
55 of text from two files. Comparison options such as "ignore
56 whitespace" are implemented by modifying the <code>equals</code>
57 and <code>hashcode</code> methods for the objects compared.
58<p>
59 The basic algorithm is described in: <br>
60 "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
61 Algorithmica Vol. 1 No. 2, 1986, p 251.
62<p>
63 This class outputs different results from GNU diff 1.15 on some
64 inputs. Our results are actually better (smaller change list, smaller
65 total size of changes), but it would be nice to know why. Perhaps
66 there is a memory overwrite bug in GNU diff 1.15.
67
68 @author Stuart D. Gathman, translated from GNU diff 1.15
69 Copyright (C) 2000 Business Management Systems, Inc.
70<p>
71 This program is free software; you can redistribute it and/or modify
72 it under the terms of the GNU General Public License as published by
73 the Free Software Foundation; either version 1, or (at your option)
74 any later version.
75<p>
76 This program is distributed in the hope that it will be useful,
77 but WITHOUT ANY WARRANTY; without even the implied warranty of
78 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
79 GNU General Public License for more details.
80<p>
81 You should have received a copy of the <a href=COPYING.txt>
82 GNU General Public License</a>
83 along with this program; if not, write to the Free Software
84 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
85
86 */
87
88public class Diff {
89
90 /** Prepare to find differences between two arrays. Each element of
91 the arrays is translated to an "equivalence number" based on
92 the result of <code>equals</code>. The original Object arrays
93 are no longer needed for computing the differences. They will
94 be needed again later to print the results of the comparison as
95 an edit script, if desired.
96 */
97 public Diff(Object[] a, Object[] b) {
98 Map<Object,Integer> h = new HashMap<>(a.length + b.length);
99 filevec = new FileData[] {new FileData(a,h), new FileData(b,h)};
100 }
101
102 /** 1 more than the maximum equivalence value used for this or its
103 sibling file. */
104 private int equivMax = 1;
105
106 /** When set to true, the comparison uses a heuristic to speed it up.
107 With this heuristic, for files with a constant small density
108 of changes, the algorithm is linear in the file size. */
109 public boolean heuristic = false;
110
111 /** When set to true, the algorithm returns a guarranteed minimal
112 set of changes. This makes things slower, sometimes much slower. */
113 public boolean noDiscards = false;
114
115 private int[] xvec, yvec; /* Vectors being compared. */
116 private int[] fdiag; /* Vector, indexed by diagonal, containing
117 the X coordinate of the point furthest
118 along the given diagonal in the forward
119 search of the edit matrix. */
120 private int[] bdiag; /* Vector, indexed by diagonal, containing
121 the X coordinate of the point furthest
122 along the given diagonal in the backward
123 search of the edit matrix. */
124 private int fdiagoff, bdiagoff;
125 private final FileData[] filevec;
126 private int cost;
127 /** Snakes bigger than this are considered "big". */
128 private static final int SNAKE_LIMIT = 20;
129
130 /** Find the midpoint of the shortest edit script for a specified
131 portion of the two files.
132
133 We scan from the beginnings of the files, and simultaneously from the ends,
134 doing a breadth-first search through the space of edit-sequence.
135 When the two searches meet, we have found the midpoint of the shortest
136 edit sequence.
137
138 The value returned is the number of the diagonal on which the midpoint lies.
139 The diagonal number equals the number of inserted lines minus the number
140 of deleted lines (counting only lines before the midpoint).
141 The edit cost is stored into COST; this is the total number of
142 lines inserted or deleted (counting only lines before the midpoint).
143
144 This function assumes that the first lines of the specified portions
145 of the two files do not match, and likewise that the last lines do not
146 match. The caller must trim matching lines from the beginning and end
147 of the portions it is going to specify.
148
149 Note that if we return the "wrong" diagonal value, or if
150 the value of bdiag at that diagonal is "wrong",
151 the worst this can do is cause suboptimal diff output.
152 It cannot cause incorrect diff output. */
153
154 private int diag(int xoff, int xlim, int yoff, int ylim) {
155 final int[] fd = fdiag; // Give the compiler a chance.
156 final int[] bd = bdiag; // Additional help for the compiler.
157 final int[] xv = xvec; // Still more help for the compiler.
158 final int[] yv = yvec; // And more and more . . .
159 final int dmin = xoff - ylim; // Minimum valid diagonal.
160 final int dmax = xlim - yoff; // Maximum valid diagonal.
161 final int fmid = xoff - yoff; // Center diagonal of top-down search.
162 final int bmid = xlim - ylim; // Center diagonal of bottom-up search.
163 int fmin = fmid, fmax = fmid; // Limits of top-down search.
164 int bmin = bmid, bmax = bmid; // Limits of bottom-up search.
165 /* True if southeast corner is on an odd
166 diagonal with respect to the northwest. */
167 final boolean odd = (fmid - bmid & 1) != 0;
168
169 fd[fdiagoff + fmid] = xoff;
170 bd[bdiagoff + bmid] = xlim;
171
172 for (int c = 1;; ++c) {
173 int d; /* Active diagonal. */
174 boolean big_snake = false;
175
176 /* Extend the top-down search by an edit step in each diagonal. */
177 if (fmin > dmin) {
178 fd[fdiagoff + --fmin - 1] = -1;
179 } else {
180 ++fmin;
181 }
182 if (fmax < dmax) {
183 fd[fdiagoff + ++fmax + 1] = -1;
184 } else {
185 --fmax;
186 }
187 for (d = fmax; d >= fmin; d -= 2) {
188 int x, y, oldx, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff + d + 1];
189
190 if (tlo >= thi) {
191 x = tlo + 1;
192 } else {
193 x = thi;
194 }
195 oldx = x;
196 y = x - d;
197 while (x < xlim && y < ylim && xv[x] == yv[y]) {
198 ++x; ++y;
199 }
200 if (x - oldx > SNAKE_LIMIT) {
201 big_snake = true;
202 }
203 fd[fdiagoff + d] = x;
204 if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) {
205 cost = 2 * c - 1;
206 return d;
207 }
208 }
209
210 /* Similar extend the bottom-up search. */
211 if (bmin > dmin) {
212 bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE;
213 } else {
214 ++bmin;
215 }
216 if (bmax < dmax) {
217 bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE;
218 } else {
219 --bmax;
220 }
221 for (d = bmax; d >= bmin; d -= 2) {
222 int x, y, oldx, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1];
223
224 if (tlo < thi) {
225 x = tlo;
226 } else {
227 x = thi - 1;
228 }
229 oldx = x;
230 y = x - d;
231 while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) {
232 --x; --y;
233 }
234 if (oldx - x > SNAKE_LIMIT) {
235 big_snake = true;
236 }
237 bd[bdiagoff + d] = x;
238 if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) {
239 cost = 2 * c;
240 return d;
241 }
242 }
243
244 /* Heuristic: check occasionally for a diagonal that has made
245 lots of progress compared with the edit distance.
246 If we have any such, find the one that has made the most
247 progress and return it as if it had succeeded.
248
249 With this heuristic, for files with a constant small density
250 of changes, the algorithm is linear in the file size. */
251
252 if (c > 200 && big_snake && heuristic) {
253 int best = 0;
254 int bestpos = -1;
255
256 for (d = fmax; d >= fmin; d -= 2) {
257 int dd = d - fmid;
258 int x = fd[fdiagoff + d];
259 int y = x - d;
260 int v = (x - xoff) * 2 - dd;
261 if (v > 12 * (c + (dd < 0 ? -dd : dd))) {
262 if (v > best
263 && xoff + SNAKE_LIMIT <= x && x < xlim
264 && yoff + SNAKE_LIMIT <= y && y < ylim) {
265 /* We have a good enough best diagonal.
266 now insist that it end with a significant snake. */
267 int k;
268
269 for (k = 1; xvec[x - k] == yvec[y - k]; k++)
270 if (k == SNAKE_LIMIT) {
271 best = v;
272 bestpos = d;
273 break;
274 }
275 }
276 }
277 }
278 if (best > 0) {
279 cost = 2 * c - 1;
280 return bestpos;
281 }
282
283 best = 0;
284 for (d = bmax; d >= bmin; d -= 2) {
285 int dd = d - bmid;
286 int x = bd[bdiagoff + d];
287 int y = x - d;
288 int v = (xlim - x) * 2 + dd;
289 if (v > 12 * (c + (dd < 0 ? -dd : dd))) {
290 if (v > best
291 && xoff < x && x <= xlim - SNAKE_LIMIT
292 && yoff < y && y <= ylim - SNAKE_LIMIT) {
293 /* We have a good enough best diagonal.
294 now insist that it end with a significant snake. */
295 int k;
296
297 for (k = 0; xvec[x + k] == yvec[y + k]; k++)
298 if (k == SNAKE_LIMIT) {
299 best = v;
300 bestpos = d;
301 break;
302 }
303 }
304 }
305 }
306 if (best > 0) {
307 cost = 2 * c - 1;
308 return bestpos;
309 }
310 }
311 }
312 }
313
314 /** Compare in detail contiguous subsequences of the two files
315 which are known, as a whole, to match each other.
316
317 The results are recorded in the vectors filevec[N].changed_flag, by
318 storing a 1 in the element for each line that is an insertion or deletion.
319
320 The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
321
322 Note that XLIM, YLIM are exclusive bounds.
323 All line numbers are origin-0 and discarded lines are not counted. */
324
325 private void compareseq(int xoff, int xlim, int yoff, int ylim) {
326 /* Slide down the bottom initial diagonal. */
327 while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) {
328 ++xoff; ++yoff;
329 }
330 /* Slide up the top initial diagonal. */
331 while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) {
332 --xlim; --ylim;
333 }
334
335 /* Handle simple cases. */
336 if (xoff == xlim) {
337 while (yoff < ylim) {
338 filevec[1].changedFlag[1+filevec[1].realindexes[yoff++]] = true;
339 }
340 } else if (yoff == ylim) {
341 while (xoff < xlim) {
342 filevec[0].changedFlag[1+filevec[0].realindexes[xoff++]] = true;
343 }
344 } else {
345 /* Find a point of correspondence in the middle of the files. */
346
347 int d = diag(xoff, xlim, yoff, ylim);
348 int c = cost;
349 int b = bdiag[bdiagoff + d];
350
351 if (c == 1)
352 /* This should be impossible, because it implies that
353 one of the two subsequences is empty,
354 and that case was handled above without calling `diag'.
355 Let's verify that this is true. */
356 throw new IllegalArgumentException("Empty subsequence");
357 else {
358 /* Use that point to split this problem into two subproblems. */
359 compareseq(xoff, b, yoff, b - d);
360 /* This used to use f instead of b,
361 but that is incorrect!
362 It is not necessarily the case that diagonal d
363 has a snake from b to f. */
364 compareseq(b, xlim, b - d, ylim);
365 }
366 }
367 }
368
369 /** Discard lines from one file that have no matches in the other file.
370 */
371 private void discard_confusing_lines() {
372 filevec[0].discard_confusing_lines(filevec[1]);
373 filevec[1].discard_confusing_lines(filevec[0]);
374 }
375
376 private boolean inhibit = false;
377
378 /** Adjust inserts/deletes of blank lines to join changes
379 as much as possible.
380 */
381 private void shift_boundaries() {
382 if (inhibit)
383 return;
384 filevec[0].shift_boundaries(filevec[1]);
385 filevec[1].shift_boundaries(filevec[0]);
386 }
387
388 public interface ScriptBuilder {
389 /** Scan the tables of which lines are inserted and deleted,
390 producing an edit script.
391 @param changed0 true for lines in first file which do not match 2nd
392 @param len0 number of lines in first file
393 @param changed1 true for lines in 2nd file which do not match 1st
394 @param len1 number of lines in 2nd file
395 @return a linked list of changes - or null
396 */
397 public Change build_script(
398 boolean[] changed0,int len0,
399 boolean[] changed1,int len1
400 );
401 }
402
403 /** Scan the tables of which lines are inserted and deleted,
404 producing an edit script in reverse order. */
405
406 static class ReverseScript implements ScriptBuilder {
407 @Override
408 public Change build_script(
409 final boolean[] changed0,int len0,
410 final boolean[] changed1,int len1) {
411 Change script = null;
412 int i0 = 0, i1 = 0;
413 while (i0 < len0 || i1 < len1) {
414 if (changed0[1+i0] || changed1[1+i1]) {
415 int line0 = i0, line1 = i1;
416
417 /* Find # lines changed here in each file. */
418 while (changed0[1+i0]) {
419 ++i0;
420 }
421 while (changed1[1+i1]) {
422 ++i1;
423 }
424
425 /* Record this change. */
426 script = new Change(line0, line1, i0 - line0, i1 - line1, script);
427 }
428
429 /* We have reached lines in the two files that match each other. */
430 i0++; i1++;
431 }
432
433 return script;
434 }
435 }
436
437 static class ForwardScript implements ScriptBuilder {
438 /** Scan the tables of which lines are inserted and deleted,
439 producing an edit script in forward order. */
440 @Override
441 public Change build_script(
442 final boolean[] changed0,int len0,
443 final boolean[] changed1,int len1) {
444 Change script = null;
445 int i0 = len0, i1 = len1;
446
447 while (i0 >= 0 || i1 >= 0) {
448 if (changed0[i0] || changed1[i1]) {
449 int line0 = i0, line1 = i1;
450
451 /* Find # lines changed here in each file. */
452 while (changed0[i0]) {
453 --i0;
454 }
455 while (changed1[i1]) {
456 --i1;
457 }
458
459 /* Record this change. */
460 script = new Change(i0, i1, line0 - i0, line1 - i1, script);
461 }
462
463 /* We have reached lines in the two files that match each other. */
464 i0--; i1--;
465 }
466
467 return script;
468 }
469 }
470
471 /** Standard ScriptBuilders. */
472 public static final ScriptBuilder
473 forwardScript = new ForwardScript(),
474 reverseScript = new ReverseScript();
475
476 /** Report the differences of two files. DEPTH is the current directory
477 depth. */
478 public final Change diff_2(final boolean reverse) {
479 return diff(reverse ? reverseScript : forwardScript);
480 }
481
482 /** Get the results of comparison as an edit script. The script
483 is described by a list of changes. The standard ScriptBuilder
484 implementations provide for forward and reverse edit scripts.
485 Alternate implementations could, for instance, list common elements
486 instead of differences.
487 @param bld an object to build the script from change flags
488 @return the head of a list of changes
489 */
490 public Change diff(final ScriptBuilder bld) {
491
492 /* Some lines are obviously insertions or deletions
493 because they don't match anything. Detect them now,
494 and avoid even thinking about them in the main comparison algorithm. */
495
496 discard_confusing_lines();
497
498 /* Now do the main comparison algorithm, considering just the
499 undiscarded lines. */
500
501 xvec = filevec[0].undiscarded;
502 yvec = filevec[1].undiscarded;
503
504 int diags =
505 filevec[0].nondiscardedLines + filevec[1].nondiscardedLines + 3;
506 fdiag = new int[diags];
507 fdiagoff = filevec[1].nondiscardedLines + 1;
508 bdiag = new int[diags];
509 bdiagoff = filevec[1].nondiscardedLines + 1;
510
511 compareseq(0, filevec[0].nondiscardedLines,
512 0, filevec[1].nondiscardedLines);
513 fdiag = null;
514 bdiag = null;
515
516 /* Modify the results slightly to make them prettier
517 in cases where that can validly be done. */
518
519 shift_boundaries();
520
521 /* Get the results of comparison in the form of a chain
522 of `struct change's -- an edit script. */
523 return bld.build_script(
524 filevec[0].changedFlag,
525 filevec[0].bufferedLines,
526 filevec[1].changedFlag,
527 filevec[1].bufferedLines
528 );
529
530 }
531
532 /** The result of comparison is an "edit script": a chain of change objects.
533 Each change represents one place where some lines are deleted
534 and some are inserted.
535
536 LINE0 and LINE1 are the first affected lines in the two files (origin 0).
537 DELETED is the number of lines deleted here from file 0.
538 INSERTED is the number of lines inserted here in file 1.
539
540 If DELETED is 0 then LINE0 is the number of the line before
541 which the insertion was done; vice versa for INSERTED and LINE1. */
542
543 public static class Change {
544 /** Previous or next edit command. */
545 public Change link;
546 /** # lines of file 1 changed here. */
547 public final int inserted;
548 /** # lines of file 0 changed here. */
549 public final int deleted;
550 /** Line number of 1st deleted line. */
551 public final int line0;
552 /** Line number of 1st inserted line. */
553 public final int line1;
554
555 /** Cons an additional entry onto the front of an edit script OLD.
556 LINE0 and LINE1 are the first affected lines in the two files (origin 0).
557 DELETED is the number of lines deleted here from file 0.
558 INSERTED is the number of lines inserted here in file 1.
559
560 If DELETED is 0 then LINE0 is the number of the line before
561 which the insertion was done; vice versa for INSERTED and LINE1. */
562 public Change(int line0, int line1, int deleted, int inserted, Change old) {
563 this.line0 = line0;
564 this.line1 = line1;
565 this.inserted = inserted;
566 this.deleted = deleted;
567 this.link = old;
568 }
569
570 @Override
571 public String toString() {
572 String s = String.format("%d -%d +%d %d",line0,deleted,inserted,line1);
573 return (link != null) ? s = s + '\n' + link : s;
574 }
575 }
576
577 /** Data on one input file being compared.
578 */
579
580 class FileData {
581
582 /** Allocate changed array for the results of comparison. */
583 void clear() {
584 /* Allocate a flag for each line of each file, saying whether that line
585 is an insertion or deletion.
586 Allocate an extra element, always zero, at each end of each vector.
587 */
588 changedFlag = new boolean[bufferedLines + 2];
589 }
590
591 /** Return equiv_count[I] as the number of lines in this file
592 that fall in equivalence class I.
593 @return the array of equivalence class counts.
594 */
595 int[] equivCount() {
596 int[] equiv_count = new int[equivMax];
597 for (int i = 0; i < bufferedLines; ++i) {
598 ++equiv_count[equivs[i]];
599 }
600 return equiv_count;
601 }
602
603 /** Discard lines that have no matches in another file.
604
605 A line which is discarded will not be considered by the actual
606 comparison algorithm; it will be as if that line were not in the file.
607 The file's `realindexes' table maps virtual line numbers
608 (which don't count the discarded lines) into real line numbers;
609 this is how the actual comparison algorithm produces results
610 that are comprehensible when the discarded lines are counted.
611<p>
612 When we discard a line, we also mark it as a deletion or insertion
613 so that it will be printed in the output.
614 @param f the other file
615 */
616 void discard_confusing_lines(FileData f) {
617 clear();
618 /* Set up table of which lines are going to be discarded. */
619 final byte[] discarded = discardable(f.equivCount());
620
621 /* Don't really discard the provisional lines except when they occur
622 in a run of discardables, with nonprovisionals at the beginning
623 and end. */
624 filterDiscards(discarded);
625
626 /* Actually discard the lines. */
627 discard(discarded);
628 }
629
630 /**
631 * Mark to be discarded each line that matches no line of another file.
632 * If a line matches many lines, mark it as provisionally discardable.
633 * @param counts The count of each equivalence number for the other file.
634 * @return 0=nondiscardable, 1=discardable or 2=provisionally discardable for each line
635 * @see #equivCount()
636 */
637 private byte[] discardable(final int[] counts) {
638 final int end = bufferedLines;
639 final byte[] discards = new byte[end];
640 final int[] equivs = this.equivs;
641 int many = 5;
642 int tem = end / 64;
643
644 /* Multiply MANY by approximate square root of number of lines.
645 That is the threshold for provisionally discardable lines. */
646 while ((tem = tem >> 2) > 0) {
647 many *= 2;
648 }
649
650 for (int i = 0; i < end; i++) {
651 int nmatch;
652 if (equivs[i] == 0) {
653 continue;
654 }
655 nmatch = counts[equivs[i]];
656 if (nmatch == 0) {
657 discards[i] = 1;
658 } else if (nmatch > many) {
659 discards[i] = 2;
660 }
661 }
662 return discards;
663 }
664
665 /**
666 * Don't really discard the provisional lines except when they occur
667 * in a run of discardables, with nonprovisionals at the beginning
668 * and end.
669 */
670 private void filterDiscards(final byte[] discards) {
671 final int end = bufferedLines;
672
673 for (int i = 0; i < end; i++) {
674 /* Cancel provisional discards not in middle of run of discards. */
675 if (discards[i] == 2) {
676 discards[i] = 0;
677 } else if (discards[i] != 0) {
678 /* We have found a nonprovisional discard. */
679 int j;
680 int length;
681 int provisional = 0;
682
683 /* Find end of this run of discardable lines.
684 Count how many are provisionally discardable. */
685 for (j = i; j < end; j++) {
686 if (discards[j] == 0) {
687 break;
688 }
689 if (discards[j] == 2) {
690 ++provisional;
691 }
692 }
693
694 /* Cancel provisional discards at end, and shrink the run. */
695 while (j > i && discards[j - 1] == 2) {
696 discards[--j] = 0; --provisional;
697 }
698
699 /* Now we have the length of a run of discardable lines
700 whose first and last are not provisional. */
701 length = j - i;
702
703 /* If 1/4 of the lines in the run are provisional,
704 cancel discarding of all provisional lines in the run. */
705 if (provisional * 4 > length) {
706 while (j > i)
707 if (discards[--j] == 2) {
708 discards[j] = 0;
709 }
710 } else {
711 int consec;
712 int minimum = 1;
713 int tem = length / 4;
714
715 /* MINIMUM is approximate square root of LENGTH/4.
716 A subrun of two or more provisionals can stand
717 when LENGTH is at least 16.
718 A subrun of 4 or more can stand when LENGTH >= 64. */
719 while ((tem = tem >> 2) > 0) {
720 minimum *= 2;
721 }
722 minimum++;
723
724 /* Cancel any subrun of MINIMUM or more provisionals
725 within the larger run. */
726 for (j = 0, consec = 0; j < length; j++)
727 if (discards[i + j] != 2) {
728 consec = 0;
729 } else if (minimum == ++consec) {
730 /* Back up to start of subrun, to cancel it all. */
731 j -= consec;
732 } else if (minimum < consec) {
733 discards[i + j] = 0;
734 }
735
736 /* Scan from beginning of run
737 until we find 3 or more nonprovisionals in a row
738 or until the first nonprovisional at least 8 lines in.
739 Until that point, cancel any provisionals. */
740 for (j = 0, consec = 0; j < length; j++) {
741 if (j >= 8 && discards[i + j] == 1) {
742 break;
743 }
744 if (discards[i + j] == 2) {
745 consec = 0; discards[i + j] = 0;
746 } else if (discards[i + j] == 0) {
747 consec = 0;
748 } else {
749 consec++;
750 }
751 if (consec == 3) {
752 break;
753 }
754 }
755
756 /* I advances to the last line of the run. */
757 i += length - 1;
758
759 /* Same thing, from end. */
760 for (j = 0, consec = 0; j < length; j++) {
761 if (j >= 8 && discards[i - j] == 1) {
762 break;
763 }
764 if (discards[i - j] == 2) {
765 consec = 0; discards[i - j] = 0;
766 } else if (discards[i - j] == 0) {
767 consec = 0;
768 } else {
769 consec++;
770 }
771 if (consec == 3) {
772 break;
773 }
774 }
775 }
776 }
777 }
778 }
779
780 /** Actually discard the lines.
781 @param discards flags lines to be discarded
782 */
783 private void discard(final byte[] discards) {
784 final int end = bufferedLines;
785 int j = 0;
786 for (int i = 0; i < end; ++i)
787 if (noDiscards || discards[i] == 0) {
788 undiscarded[j] = equivs[i];
789 realindexes[j++] = i;
790 } else {
791 changedFlag[1+i] = true;
792 }
793 nondiscardedLines = j;
794 }
795
796 FileData(int length) {
797 bufferedLines = length;
798 equivs = new int[length];
799 undiscarded = new int[bufferedLines];
800 realindexes = new int[bufferedLines];
801 }
802
803 FileData(Object[] data, Map<Object,Integer> h) {
804 this(data.length);
805 // FIXME: diff 2.7 removes common prefix and common suffix
806 for (int i = 0; i < data.length; ++i) {
807 Integer ir = h.get(data[i]);
808 if (ir == null) {
809 h.put(data[i],equivs[i] = equivMax++);
810 } else {
811 equivs[i] = ir.intValue();
812 }
813 }
814 }
815
816 /** Adjust inserts/deletes of blank lines to join changes
817 as much as possible.
818
819 We do something when a run of changed lines include a blank
820 line at one end and have an excluded blank line at the other.
821 We are free to choose which blank line is included.
822 `compareseq' always chooses the one at the beginning,
823 but usually it is cleaner to consider the following blank line
824 to be the "change". The only exception is if the preceding blank line
825 would join this change to other changes.
826 @param f the file being compared against
827 */
828 void shift_boundaries(FileData f) {
829 final boolean[] changed = changedFlag;
830 final boolean[] other_changed = f.changedFlag;
831 int i = 0;
832 int j = 0;
833 int i_end = bufferedLines;
834 int preceding = -1;
835 int other_preceding = -1;
836
837 for (;;) {
838 int start, end, other_start;
839
840 /* Scan forwards to find beginning of another run of changes.
841 Also keep track of the corresponding point in the other file. */
842
843 while (i < i_end && !changed[1+i]) {
844 while (other_changed[1+j++]) {
845 /* Non-corresponding lines in the other file
846 will count as the preceding batch of changes. */
847 other_preceding = j;
848 }
849 i++;
850 }
851
852 if (i == i_end) {
853 break;
854 }
855
856 start = i;
857 other_start = j;
858
859 for (;;) {
860 /* Now find the end of this run of changes. */
861
862 while (i < i_end && changed[1+i]) {
863 i++;
864 }
865 end = i;
866
867 /* If the first changed line matches the following unchanged one,
868 and this run does not follow right after a previous run,
869 and there are no lines deleted from the other file here,
870 then classify the first changed line as unchanged
871 and the following line as changed in its place. */
872
873 /* You might ask, how could this run follow right after another?
874 Only because the previous run was shifted here. */
875
876 if (end != i_end && equivs[start] == equivs[end] && !other_changed[1+j]
877 && !((preceding >= 0 && start == preceding) || (other_preceding >= 0 && other_start == other_preceding))) {
878 changed[1+end++] = true;
879 changed[1+start++] = false;
880 ++i;
881 /* Since one line-that-matches is now before this run
882 instead of after, we must advance in the other file
883 to keep in synch. */
884 ++j;
885 } else {
886 break;
887 }
888 }
889
890 preceding = i;
891 other_preceding = j;
892 }
893 }
894
895 /** Number of elements (lines) in this file. */
896 private final int bufferedLines;
897
898 /** Vector, indexed by line number, containing an equivalence code for
899 each line. It is this vector that is actually compared with that
900 of another file to generate differences. */
901 private final int[] equivs;
902
903 /** Vector, like the previous one except that
904 the elements for discarded lines have been squeezed out. */
905 private final int[] undiscarded;
906
907 /** Vector mapping virtual line numbers (not counting discarded lines)
908 to real ones (counting those lines). Both are origin-0. */
909 private final int[] realindexes;
910
911 /** Total number of nondiscarded lines. */
912 private int nondiscardedLines;
913
914 /** Array, indexed by real origin-1 line number,
915 containing true for a line that is an insertion or a deletion.
916 The results of comparison are stored here. */
917 private boolean[] changedFlag;
918 }
919}
Note: See TracBrowser for help on using the repository browser.