source: josm/trunk/src/org/openstreetmap/josm/tools/Diff.java@ 10302

Last change on this file since 10302 was 10246, checked in by Don-vip, 8 years ago

findbugs - UWF_UNWRITTEN_PUBLIC_OR_PROTECTED_FIELD - remove unused code from Diff

  • Property svn:eol-style set to native
File size: 33.4 KB
Line 
1// License: GPL. For details, see LICENSE file.
2package org.openstreetmap.josm.tools;
3
4//// Taken from http://www.bmsi.com/java/#diff
5
6// http://www.bmsi.com/java/DiffPrint.java could also be useful
7
8/*
9 * $Log: Diff.java,v $
10 * Revision 1.15 2013/04/01 16:27:31 stuart
11 * Fix DiffPrint unified format with test cases.
12 * Begin porting some diff-2.7 features.
13 *
14 * Revision 1.14 2010/03/03 21:21:25 stuart
15 * Test new direct equivalence API
16 *
17 * Revision 1.13 2009/12/07 17:43:17 stuart
18 * Compute equivMax for int[] ctor
19 *
20 * Revision 1.12 2009/12/07 17:34:46 stuart
21 * Ctor with int[].
22 *
23 * Revision 1.11 2009/11/15 01:11:54 stuart
24 * Diff doesn't need to be generic
25 *
26 * Revision 1.10 2009/11/15 00:54:03 stuart
27 * Update to Java 5 containers
28 *
29 * Revision 1.7 2009/01/19 03:05:26 stuart
30 * Fix StackOverflow bug with heuristic on reported by Jimmy Han.
31 *
32 * Revision 1.6 2003/03/06 22:51:32 stuart
33 * Convert to CVS
34 *
35 * Revision 1.5 2002/07/19 19:14:40 stuart
36 * fix reverseScript, make change ctor public, update docs
37 *
38 * Revision 1.4 2002/04/09 17:53:39 stuart
39 * More flexible interface for diff() function.
40 *
41 * Revision 1.3 2000/03/03 21:58:03 stuart
42 * move discard_confusing_lines and shift_boundaries to class file_data
43 *
44 * Revision 1.2 2000/03/02 16:37:38 stuart
45 * Add GPL and copyright
46 *
47 */
48
49import java.util.HashMap;
50import java.util.Map;
51
52/** A class to compare vectors of objects. The result of comparison
53 is a list of <code>change</code> objects which form an
54 edit script. The objects compared are traditionally lines
55 of text from two files. Comparison options such as "ignore
56 whitespace" are implemented by modifying the <code>equals</code>
57 and <code>hashcode</code> methods for the objects compared.
58<p>
59 The basic algorithm is described in: <br>
60 "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
61 Algorithmica Vol. 1 No. 2, 1986, p 251.
62<p>
63 This class outputs different results from GNU diff 1.15 on some
64 inputs. Our results are actually better (smaller change list, smaller
65 total size of changes), but it would be nice to know why. Perhaps
66 there is a memory overwrite bug in GNU diff 1.15.
67
68 @author Stuart D. Gathman, translated from GNU diff 1.15
69 Copyright (C) 2000 Business Management Systems, Inc.
70<p>
71 This program is free software; you can redistribute it and/or modify
72 it under the terms of the GNU General Public License as published by
73 the Free Software Foundation; either version 1, or (at your option)
74 any later version.
75<p>
76 This program is distributed in the hope that it will be useful,
77 but WITHOUT ANY WARRANTY; without even the implied warranty of
78 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
79 GNU General Public License for more details.
80<p>
81 You should have received a copy of the <a href=COPYING.txt>
82 GNU General Public License</a>
83 along with this program; if not, write to the Free Software
84 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
85 */
86public class Diff {
87
88 /** Prepare to find differences between two arrays. Each element of
89 the arrays is translated to an "equivalence number" based on
90 the result of <code>equals</code>. The original Object arrays
91 are no longer needed for computing the differences. They will
92 be needed again later to print the results of the comparison as
93 an edit script, if desired.
94 * @param a first array
95 * @param b second array
96 */
97 public Diff(Object[] a, Object[] b) {
98 Map<Object, Integer> h = new HashMap<>(a.length + b.length);
99 filevec = new FileData[] {new FileData(a, h), new FileData(b, h)};
100 }
101
102 /** 1 more than the maximum equivalence value used for this or its
103 sibling file. */
104 private int equivMax = 1;
105
106 private int[] xvec, yvec; /* Vectors being compared. */
107 private int[] fdiag; /* Vector, indexed by diagonal, containing
108 the X coordinate of the point furthest
109 along the given diagonal in the forward
110 search of the edit matrix. */
111 private int[] bdiag; /* Vector, indexed by diagonal, containing
112 the X coordinate of the point furthest
113 along the given diagonal in the backward
114 search of the edit matrix. */
115 private int fdiagoff, bdiagoff;
116 private final FileData[] filevec;
117 private int cost;
118 /** Snakes bigger than this are considered "big". */
119 private static final int SNAKE_LIMIT = 20;
120
121 /**
122 * Find the midpoint of the shortest edit script for a specified
123 * portion of the two files.
124 *
125 * We scan from the beginnings of the files, and simultaneously from the ends,
126 * doing a breadth-first search through the space of edit-sequence.
127 * When the two searches meet, we have found the midpoint of the shortest
128 * edit sequence.
129 *
130 * The value returned is the number of the diagonal on which the midpoint lies.
131 * The diagonal number equals the number of inserted lines minus the number
132 * of deleted lines (counting only lines before the midpoint).
133 * The edit cost is stored into COST; this is the total number of
134 * lines inserted or deleted (counting only lines before the midpoint).
135 *
136 * This function assumes that the first lines of the specified portions
137 * of the two files do not match, and likewise that the last lines do not
138 * match. The caller must trim matching lines from the beginning and end
139 * of the portions it is going to specify.
140 *
141 * Note that if we return the "wrong" diagonal value, or if
142 * the value of bdiag at that diagonal is "wrong",
143 * the worst this can do is cause suboptimal diff output.
144 * It cannot cause incorrect diff output.
145 * @param xoff xoff
146 * @param xlim xlim
147 * @param yoff yoff
148 * @param ylim ylim
149 * @return midpoint of the shortest edit script
150 */
151 private int diag(int xoff, int xlim, int yoff, int ylim) {
152 final int[] fd = fdiag; // Give the compiler a chance.
153 final int[] bd = bdiag; // Additional help for the compiler.
154 final int[] xv = xvec; // Still more help for the compiler.
155 final int[] yv = yvec; // And more and more . . .
156 final int dmin = xoff - ylim; // Minimum valid diagonal.
157 final int dmax = xlim - yoff; // Maximum valid diagonal.
158 final int fmid = xoff - yoff; // Center diagonal of top-down search.
159 final int bmid = xlim - ylim; // Center diagonal of bottom-up search.
160 int fmin = fmid, fmax = fmid; // Limits of top-down search.
161 int bmin = bmid, bmax = bmid; // Limits of bottom-up search.
162 // True if southeast corner is on an odd diagonal with respect to the northwest.
163 final boolean odd = (fmid - bmid & 1) != 0;
164
165 fd[fdiagoff + fmid] = xoff;
166 bd[bdiagoff + bmid] = xlim;
167
168 for (int c = 1;; ++c) {
169 int d; /* Active diagonal. */
170 boolean bigSnake = false;
171
172 /* Extend the top-down search by an edit step in each diagonal. */
173 if (fmin > dmin) {
174 fd[fdiagoff + --fmin - 1] = -1;
175 } else {
176 ++fmin;
177 }
178 if (fmax < dmax) {
179 fd[fdiagoff + ++fmax + 1] = -1;
180 } else {
181 --fmax;
182 }
183 for (d = fmax; d >= fmin; d -= 2) {
184 int x, y, oldx, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff + d + 1];
185
186 if (tlo >= thi) {
187 x = tlo + 1;
188 } else {
189 x = thi;
190 }
191 oldx = x;
192 y = x - d;
193 while (x < xlim && y < ylim && xv[x] == yv[y]) {
194 ++x; ++y;
195 }
196 if (x - oldx > SNAKE_LIMIT) {
197 bigSnake = true;
198 }
199 fd[fdiagoff + d] = x;
200 if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) {
201 cost = 2 * c - 1;
202 return d;
203 }
204 }
205
206 /* Similar extend the bottom-up search. */
207 if (bmin > dmin) {
208 bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE;
209 } else {
210 ++bmin;
211 }
212 if (bmax < dmax) {
213 bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE;
214 } else {
215 --bmax;
216 }
217 for (d = bmax; d >= bmin; d -= 2) {
218 int x, y, oldx, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1];
219
220 if (tlo < thi) {
221 x = tlo;
222 } else {
223 x = thi - 1;
224 }
225 oldx = x;
226 y = x - d;
227 while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) {
228 --x; --y;
229 }
230 if (oldx - x > SNAKE_LIMIT) {
231 bigSnake = true;
232 }
233 bd[bdiagoff + d] = x;
234 if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) {
235 cost = 2 * c;
236 return d;
237 }
238 }
239 }
240 }
241
242 /**
243 * Compare in detail contiguous subsequences of the two files
244 * which are known, as a whole, to match each other.
245 *
246 * The results are recorded in the vectors filevec[N].changed_flag, by
247 * storing a 1 in the element for each line that is an insertion or deletion.
248 *
249 * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
250 *
251 * Note that XLIM, YLIM are exclusive bounds.
252 * All line numbers are origin-0 and discarded lines are not counted.
253 * @param xoff xoff
254 * @param xlim xlim
255 * @param yoff yoff
256 * @param ylim ylim
257 */
258 private void compareseq(int xoff, int xlim, int yoff, int ylim) {
259 /* Slide down the bottom initial diagonal. */
260 while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) {
261 ++xoff; ++yoff;
262 }
263 /* Slide up the top initial diagonal. */
264 while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) {
265 --xlim; --ylim;
266 }
267
268 /* Handle simple cases. */
269 if (xoff == xlim) {
270 while (yoff < ylim) {
271 filevec[1].changedFlag[1+filevec[1].realindexes[yoff++]] = true;
272 }
273 } else if (yoff == ylim) {
274 while (xoff < xlim) {
275 filevec[0].changedFlag[1+filevec[0].realindexes[xoff++]] = true;
276 }
277 } else {
278 /* Find a point of correspondence in the middle of the files. */
279
280 int d = diag(xoff, xlim, yoff, ylim);
281 int c = cost;
282 int b = bdiag[bdiagoff + d];
283
284 if (c == 1)
285 /* This should be impossible, because it implies that
286 one of the two subsequences is empty,
287 and that case was handled above without calling `diag'.
288 Let's verify that this is true. */
289 throw new IllegalArgumentException("Empty subsequence");
290 else {
291 /* Use that point to split this problem into two subproblems. */
292 compareseq(xoff, b, yoff, b - d);
293 /* This used to use f instead of b,
294 but that is incorrect!
295 It is not necessarily the case that diagonal d
296 has a snake from b to f. */
297 compareseq(b, xlim, b - d, ylim);
298 }
299 }
300 }
301
302 /** Discard lines from one file that have no matches in the other file.
303 */
304 private void discard_confusing_lines() {
305 filevec[0].discard_confusing_lines(filevec[1]);
306 filevec[1].discard_confusing_lines(filevec[0]);
307 }
308
309 /**
310 * Adjust inserts/deletes of blank lines to join changes as much as possible.
311 */
312 private void shift_boundaries() {
313 filevec[0].shift_boundaries(filevec[1]);
314 filevec[1].shift_boundaries(filevec[0]);
315 }
316
317 /**
318 * Script builder.
319 */
320 public interface ScriptBuilder {
321 /**
322 * Scan the tables of which lines are inserted and deleted, producing an edit script.
323 * @param changed0 true for lines in first file which do not match 2nd
324 * @param len0 number of lines in first file
325 * @param changed1 true for lines in 2nd file which do not match 1st
326 * @param len1 number of lines in 2nd file
327 * @return a linked list of changes - or null
328 */
329 Change build_script(
330 boolean[] changed0, int len0,
331 boolean[] changed1, int len1
332 );
333 }
334
335 /** Scan the tables of which lines are inserted and deleted,
336 producing an edit script in reverse order. */
337
338 static class ReverseScript implements ScriptBuilder {
339 @Override
340 public Change build_script(
341 final boolean[] changed0, int len0,
342 final boolean[] changed1, int len1) {
343 Change script = null;
344 int i0 = 0, i1 = 0;
345 while (i0 < len0 || i1 < len1) {
346 if (changed0[1+i0] || changed1[1+i1]) {
347 int line0 = i0, line1 = i1;
348
349 /* Find # lines changed here in each file. */
350 while (changed0[1+i0]) {
351 ++i0;
352 }
353 while (changed1[1+i1]) {
354 ++i1;
355 }
356
357 /* Record this change. */
358 script = new Change(line0, line1, i0 - line0, i1 - line1, script);
359 }
360
361 /* We have reached lines in the two files that match each other. */
362 i0++; i1++;
363 }
364
365 return script;
366 }
367 }
368
369 static class ForwardScript implements ScriptBuilder {
370 /** Scan the tables of which lines are inserted and deleted,
371 producing an edit script in forward order. */
372 @Override
373 public Change build_script(
374 final boolean[] changed0, int len0,
375 final boolean[] changed1, int len1) {
376 Change script = null;
377 int i0 = len0, i1 = len1;
378
379 while (i0 >= 0 || i1 >= 0) {
380 if (changed0[i0] || changed1[i1]) {
381 int line0 = i0, line1 = i1;
382
383 /* Find # lines changed here in each file. */
384 while (changed0[i0]) {
385 --i0;
386 }
387 while (changed1[i1]) {
388 --i1;
389 }
390
391 /* Record this change. */
392 script = new Change(i0, i1, line0 - i0, line1 - i1, script);
393 }
394
395 /* We have reached lines in the two files that match each other. */
396 i0--; i1--;
397 }
398
399 return script;
400 }
401 }
402
403 /** Standard Forward ScriptBuilder. */
404 public static final ScriptBuilder forwardScript = new ForwardScript();
405 /** Standard Reverse ScriptBuilder. */
406 public static final ScriptBuilder reverseScript = new ReverseScript();
407
408 /**
409 * Report the differences of two files. DEPTH is the current directory depth.
410 * @param reverse if {@code true} use {@link #reverseScript} else use {@link #forwardScript}
411 * @return the differences of two files
412 */
413 public final Change diff_2(final boolean reverse) {
414 return diff(reverse ? reverseScript : forwardScript);
415 }
416
417 /**
418 * Get the results of comparison as an edit script. The script
419 * is described by a list of changes. The standard ScriptBuilder
420 * implementations provide for forward and reverse edit scripts.
421 * Alternate implementations could, for instance, list common elements
422 * instead of differences.
423 * @param bld an object to build the script from change flags
424 * @return the head of a list of changes
425 */
426 public Change diff(final ScriptBuilder bld) {
427
428 // Some lines are obviously insertions or deletions because they don't match anything.
429 // Detect them now, and avoid even thinking about them in the main comparison algorithm.
430 discard_confusing_lines();
431
432 // Now do the main comparison algorithm, considering just the undiscarded lines.
433 xvec = filevec[0].undiscarded;
434 yvec = filevec[1].undiscarded;
435
436 int diags = filevec[0].nondiscardedLines + filevec[1].nondiscardedLines + 3;
437 fdiag = new int[diags];
438 fdiagoff = filevec[1].nondiscardedLines + 1;
439 bdiag = new int[diags];
440 bdiagoff = filevec[1].nondiscardedLines + 1;
441
442 compareseq(0, filevec[0].nondiscardedLines,
443 0, filevec[1].nondiscardedLines);
444 fdiag = null;
445 bdiag = null;
446
447 // Modify the results slightly to make them prettier in cases where that can validly be done.
448 shift_boundaries();
449
450 // Get the results of comparison in the form of a chain of `struct change's -- an edit script.
451 return bld.build_script(
452 filevec[0].changedFlag,
453 filevec[0].bufferedLines,
454 filevec[1].changedFlag,
455 filevec[1].bufferedLines
456 );
457 }
458
459 /** The result of comparison is an "edit script": a chain of change objects.
460 Each change represents one place where some lines are deleted
461 and some are inserted.
462
463 LINE0 and LINE1 are the first affected lines in the two files (origin 0).
464 DELETED is the number of lines deleted here from file 0.
465 INSERTED is the number of lines inserted here in file 1.
466
467 If DELETED is 0 then LINE0 is the number of the line before
468 which the insertion was done; vice versa for INSERTED and LINE1. */
469
470 public static class Change {
471 /** Previous or next edit command. */
472 public Change link;
473 /** # lines of file 1 changed here. */
474 public final int inserted;
475 /** # lines of file 0 changed here. */
476 public final int deleted;
477 /** Line number of 1st deleted line. */
478 public final int line0;
479 /** Line number of 1st inserted line. */
480 public final int line1;
481
482 /**
483 * Cons an additional entry onto the front of an edit script OLD.
484 * LINE0 and LINE1 are the first affected lines in the two files (origin 0).
485 * DELETED is the number of lines deleted here from file 0.
486 * INSERTED is the number of lines inserted here in file 1.
487 *
488 * If DELETED is 0 then LINE0 is the number of the line before
489 * which the insertion was done; vice versa for INSERTED and LINE1.
490 * @param line0 first affected lines in the two files (origin 0)
491 * @param line1 first affected lines in the two files (origin 0)
492 * @param deleted the number of lines deleted here from file 0
493 * @param inserted the number of lines inserted here in file 1
494 * @param old edit script
495 */
496 public Change(int line0, int line1, int deleted, int inserted, Change old) {
497 this.line0 = line0;
498 this.line1 = line1;
499 this.inserted = inserted;
500 this.deleted = deleted;
501 this.link = old;
502 }
503
504 /**
505 * Returns the number of insertions and deletions of this change as well as
506 * (recursively) the changes linked via {@link #link}.
507 * @return recursive number of insertions and deletions
508 */
509 public int getTotalNumberOfChanges() {
510 return inserted + deleted + (link != null ? link.getTotalNumberOfChanges() : 0);
511 }
512
513 @Override
514 public String toString() {
515 String s = String.format("%d -%d +%d %d", line0, deleted, inserted, line1);
516 return (link != null) ? s + '\n' + link : s;
517 }
518 }
519
520 /**
521 * Data on one input file being compared.
522 */
523 class FileData {
524
525 /** Allocate changed array for the results of comparison. */
526 void clear() {
527 // Allocate a flag for each line of each file, saying whether that line is an insertion or deletion.
528 // Allocate an extra element, always zero, at each end of each vector.
529 changedFlag = new boolean[bufferedLines + 2];
530 }
531
532 /**
533 * Return equiv_count[I] as the number of lines in this file that fall in equivalence class I.
534 * @return the array of equivalence class counts.
535 */
536 int[] equivCount() {
537 int[] equivCount = new int[equivMax];
538 for (int i = 0; i < bufferedLines; ++i) {
539 ++equivCount[equivs[i]];
540 }
541 return equivCount;
542 }
543
544 /**
545 * Discard lines that have no matches in another file.
546 *
547 * A line which is discarded will not be considered by the actual comparison algorithm;
548 * it will be as if that line were not in the file.
549 * The file's `realindexes' table maps virtual line numbers
550 * (which don't count the discarded lines) into real line numbers;
551 * this is how the actual comparison algorithm produces results
552 * that are comprehensible when the discarded lines are counted.
553 * <p>
554 * When we discard a line, we also mark it as a deletion or insertion so that it will be printed in the output.
555 * @param f the other file
556 */
557 void discard_confusing_lines(FileData f) {
558 clear();
559 // Set up table of which lines are going to be discarded.
560 final byte[] discarded = discardable(f.equivCount());
561
562 // Don't really discard the provisional lines except when they occur in a run of discardables,
563 // with nonprovisionals at the beginning and end.
564 filterDiscards(discarded);
565
566 // Actually discard the lines.
567 discard(discarded);
568 }
569
570 /**
571 * Mark to be discarded each line that matches no line of another file.
572 * If a line matches many lines, mark it as provisionally discardable.
573 * @param counts The count of each equivalence number for the other file.
574 * @return 0=nondiscardable, 1=discardable or 2=provisionally discardable for each line
575 * @see #equivCount()
576 */
577 private byte[] discardable(final int[] counts) {
578 final int end = bufferedLines;
579 final byte[] discards = new byte[end];
580 final int[] equivs = this.equivs;
581 int many = 5;
582 int tem = end / 64;
583
584 /* Multiply MANY by approximate square root of number of lines.
585 That is the threshold for provisionally discardable lines. */
586 while ((tem = tem >> 2) > 0) {
587 many *= 2;
588 }
589
590 for (int i = 0; i < end; i++) {
591 int nmatch;
592 if (equivs[i] == 0) {
593 continue;
594 }
595 nmatch = counts[equivs[i]];
596 if (nmatch == 0) {
597 discards[i] = 1;
598 } else if (nmatch > many) {
599 discards[i] = 2;
600 }
601 }
602 return discards;
603 }
604
605 /**
606 * Don't really discard the provisional lines except when they occur
607 * in a run of discardables, with nonprovisionals at the beginning and end.
608 * @param discards discards
609 */
610 private void filterDiscards(final byte[] discards) {
611 final int end = bufferedLines;
612
613 for (int i = 0; i < end; i++) {
614 /* Cancel provisional discards not in middle of run of discards. */
615 if (discards[i] == 2) {
616 discards[i] = 0;
617 } else if (discards[i] != 0) {
618 /* We have found a nonprovisional discard. */
619 int j;
620 int length;
621 int provisional = 0;
622
623 /* Find end of this run of discardable lines.
624 Count how many are provisionally discardable. */
625 for (j = i; j < end; j++) {
626 if (discards[j] == 0) {
627 break;
628 }
629 if (discards[j] == 2) {
630 ++provisional;
631 }
632 }
633
634 /* Cancel provisional discards at end, and shrink the run. */
635 while (j > i && discards[j - 1] == 2) {
636 discards[--j] = 0; --provisional;
637 }
638
639 /* Now we have the length of a run of discardable lines
640 whose first and last are not provisional. */
641 length = j - i;
642
643 /* If 1/4 of the lines in the run are provisional,
644 cancel discarding of all provisional lines in the run. */
645 if (provisional * 4 > length) {
646 while (j > i)
647 if (discards[--j] == 2) {
648 discards[j] = 0;
649 }
650 } else {
651 int consec;
652 int minimum = 1;
653 int tem = length / 4;
654
655 /* MINIMUM is approximate square root of LENGTH/4.
656 A subrun of two or more provisionals can stand
657 when LENGTH is at least 16.
658 A subrun of 4 or more can stand when LENGTH >= 64. */
659 while ((tem = tem >> 2) > 0) {
660 minimum *= 2;
661 }
662 minimum++;
663
664 /* Cancel any subrun of MINIMUM or more provisionals
665 within the larger run. */
666 for (j = 0, consec = 0; j < length; j++) {
667 if (discards[i + j] != 2) {
668 consec = 0;
669 } else if (minimum == ++consec) {
670 /* Back up to start of subrun, to cancel it all. */
671 j -= consec;
672 } else if (minimum < consec) {
673 discards[i + j] = 0;
674 }
675 }
676
677 /* Scan from beginning of run
678 until we find 3 or more nonprovisionals in a row
679 or until the first nonprovisional at least 8 lines in.
680 Until that point, cancel any provisionals. */
681 for (j = 0, consec = 0; j < length; j++) {
682 if (j >= 8 && discards[i + j] == 1) {
683 break;
684 }
685 if (discards[i + j] == 2) {
686 consec = 0; discards[i + j] = 0;
687 } else if (discards[i + j] == 0) {
688 consec = 0;
689 } else {
690 consec++;
691 }
692 if (consec == 3) {
693 break;
694 }
695 }
696
697 /* I advances to the last line of the run. */
698 i += length - 1;
699
700 /* Same thing, from end. */
701 for (j = 0, consec = 0; j < length; j++) {
702 if (j >= 8 && discards[i - j] == 1) {
703 break;
704 }
705 if (discards[i - j] == 2) {
706 consec = 0; discards[i - j] = 0;
707 } else if (discards[i - j] == 0) {
708 consec = 0;
709 } else {
710 consec++;
711 }
712 if (consec == 3) {
713 break;
714 }
715 }
716 }
717 }
718 }
719 }
720
721 /** Actually discard the lines.
722 @param discards flags lines to be discarded
723 */
724 private void discard(final byte[] discards) {
725 final int end = bufferedLines;
726 int j = 0;
727 for (int i = 0; i < end; ++i) {
728 if (discards[i] == 0) {
729 undiscarded[j] = equivs[i];
730 realindexes[j++] = i;
731 } else {
732 changedFlag[1+i] = true;
733 }
734 }
735 nondiscardedLines = j;
736 }
737
738 FileData(int length) {
739 bufferedLines = length;
740 equivs = new int[length];
741 undiscarded = new int[bufferedLines];
742 realindexes = new int[bufferedLines];
743 }
744
745 FileData(Object[] data, Map<Object, Integer> h) {
746 this(data.length);
747 // FIXME: diff 2.7 removes common prefix and common suffix
748 for (int i = 0; i < data.length; ++i) {
749 Integer ir = h.get(data[i]);
750 if (ir == null) {
751 equivs[i] = equivMax++;
752 h.put(data[i], equivs[i]);
753 } else {
754 equivs[i] = ir.intValue();
755 }
756 }
757 }
758
759 /** Adjust inserts/deletes of blank lines to join changes
760 as much as possible.
761
762 We do something when a run of changed lines include a blank
763 line at one end and have an excluded blank line at the other.
764 We are free to choose which blank line is included.
765 `compareseq' always chooses the one at the beginning,
766 but usually it is cleaner to consider the following blank line
767 to be the "change". The only exception is if the preceding blank line
768 would join this change to other changes.
769 @param f the file being compared against
770 */
771 void shift_boundaries(FileData f) {
772 final boolean[] changed = changedFlag;
773 final boolean[] otherChanged = f.changedFlag;
774 int i = 0;
775 int j = 0;
776 int iEnd = bufferedLines;
777 int preceding = -1;
778 int otherPreceding = -1;
779
780 for (;;) {
781 int start, end, otherStart;
782
783 /* Scan forwards to find beginning of another run of changes.
784 Also keep track of the corresponding point in the other file. */
785
786 while (i < iEnd && !changed[1+i]) {
787 while (otherChanged[1+j++]) {
788 /* Non-corresponding lines in the other file
789 will count as the preceding batch of changes. */
790 otherPreceding = j;
791 }
792 i++;
793 }
794
795 if (i == iEnd) {
796 break;
797 }
798
799 start = i;
800 otherStart = j;
801
802 for (;;) {
803 /* Now find the end of this run of changes. */
804
805 while (i < iEnd && changed[1+i]) {
806 i++;
807 }
808 end = i;
809
810 /* If the first changed line matches the following unchanged one,
811 and this run does not follow right after a previous run,
812 and there are no lines deleted from the other file here,
813 then classify the first changed line as unchanged
814 and the following line as changed in its place. */
815
816 /* You might ask, how could this run follow right after another?
817 Only because the previous run was shifted here. */
818
819 if (end != iEnd && equivs[start] == equivs[end] && !otherChanged[1+j]
820 && !((preceding >= 0 && start == preceding) || (otherPreceding >= 0 && otherStart == otherPreceding))) {
821 changed[1+end++] = true;
822 changed[1+start++] = false;
823 ++i;
824 /* Since one line-that-matches is now before this run
825 instead of after, we must advance in the other file
826 to keep in synch. */
827 ++j;
828 } else {
829 break;
830 }
831 }
832
833 preceding = i;
834 otherPreceding = j;
835 }
836 }
837
838 /** Number of elements (lines) in this file. */
839 private final int bufferedLines;
840
841 /** Vector, indexed by line number, containing an equivalence code for
842 each line. It is this vector that is actually compared with that
843 of another file to generate differences. */
844 private final int[] equivs;
845
846 /** Vector, like the previous one except that
847 the elements for discarded lines have been squeezed out. */
848 private final int[] undiscarded;
849
850 /** Vector mapping virtual line numbers (not counting discarded lines)
851 to real ones (counting those lines). Both are origin-0. */
852 private final int[] realindexes;
853
854 /** Total number of nondiscarded lines. */
855 private int nondiscardedLines;
856
857 /** Array, indexed by real origin-1 line number,
858 containing true for a line that is an insertion or a deletion.
859 The results of comparison are stored here. */
860 private boolean[] changedFlag;
861 }
862}
Note: See TracBrowser for help on using the repository browser.