source: josm/trunk/src/org/openstreetmap/josm/tools/Diff.java@ 10644

Last change on this file since 10644 was 10600, checked in by Don-vip, 8 years ago

see #11390 - sonar - squid:S1609 - Java 8: @FunctionalInterface annotation should be used to flag Single Abstract Method interfaces

  • Property svn:eol-style set to native
File size: 33.0 KB
Line 
1// License: GPL. For details, see LICENSE file.
2package org.openstreetmap.josm.tools;
3
4//// Taken from http://www.bmsi.com/java/#diff
5
6// http://www.bmsi.com/java/DiffPrint.java could also be useful
7
8/*
9 * $Log: Diff.java,v $
10 * Revision 1.15 2013/04/01 16:27:31 stuart
11 * Fix DiffPrint unified format with test cases.
12 * Begin porting some diff-2.7 features.
13 *
14 * Revision 1.14 2010/03/03 21:21:25 stuart
15 * Test new direct equivalence API
16 *
17 * Revision 1.13 2009/12/07 17:43:17 stuart
18 * Compute equivMax for int[] ctor
19 *
20 * Revision 1.12 2009/12/07 17:34:46 stuart
21 * Ctor with int[].
22 *
23 * Revision 1.11 2009/11/15 01:11:54 stuart
24 * Diff doesn't need to be generic
25 *
26 * Revision 1.10 2009/11/15 00:54:03 stuart
27 * Update to Java 5 containers
28 *
29 * Revision 1.7 2009/01/19 03:05:26 stuart
30 * Fix StackOverflow bug with heuristic on reported by Jimmy Han.
31 *
32 * Revision 1.6 2003/03/06 22:51:32 stuart
33 * Convert to CVS
34 *
35 * Revision 1.5 2002/07/19 19:14:40 stuart
36 * fix reverseScript, make change ctor public, update docs
37 *
38 * Revision 1.4 2002/04/09 17:53:39 stuart
39 * More flexible interface for diff() function.
40 *
41 * Revision 1.3 2000/03/03 21:58:03 stuart
42 * move discard_confusing_lines and shift_boundaries to class file_data
43 *
44 * Revision 1.2 2000/03/02 16:37:38 stuart
45 * Add GPL and copyright
46 *
47 */
48
49import java.util.HashMap;
50import java.util.Map;
51
52/** A class to compare vectors of objects. The result of comparison
53 is a list of <code>change</code> objects which form an
54 edit script. The objects compared are traditionally lines
55 of text from two files. Comparison options such as "ignore
56 whitespace" are implemented by modifying the <code>equals</code>
57 and <code>hashcode</code> methods for the objects compared.
58<p>
59 The basic algorithm is described in: <br>
60 "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
61 Algorithmica Vol. 1 No. 2, 1986, p 251.
62<p>
63 This class outputs different results from GNU diff 1.15 on some
64 inputs. Our results are actually better (smaller change list, smaller
65 total size of changes), but it would be nice to know why. Perhaps
66 there is a memory overwrite bug in GNU diff 1.15.
67
68 @author Stuart D. Gathman, translated from GNU diff 1.15
69 Copyright (C) 2000 Business Management Systems, Inc.
70<p>
71 This program is free software; you can redistribute it and/or modify
72 it under the terms of the GNU General Public License as published by
73 the Free Software Foundation; either version 1, or (at your option)
74 any later version.
75<p>
76 This program is distributed in the hope that it will be useful,
77 but WITHOUT ANY WARRANTY; without even the implied warranty of
78 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
79 GNU General Public License for more details.
80<p>
81 You should have received a copy of the <a href=COPYING.txt>
82 GNU General Public License</a>
83 along with this program; if not, write to the Free Software
84 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
85 */
86public class Diff {
87
88 /** Prepare to find differences between two arrays. Each element of
89 the arrays is translated to an "equivalence number" based on
90 the result of <code>equals</code>. The original Object arrays
91 are no longer needed for computing the differences. They will
92 be needed again later to print the results of the comparison as
93 an edit script, if desired.
94 * @param a first array
95 * @param b second array
96 */
97 public Diff(Object[] a, Object[] b) {
98 Map<Object, Integer> h = new HashMap<>(a.length + b.length);
99 filevec = new FileData[] {new FileData(a, h), new FileData(b, h)};
100 }
101
102 /** 1 more than the maximum equivalence value used for this or its
103 sibling file. */
104 private int equivMax = 1;
105
106 private int[] xvec, yvec; /* Vectors being compared. */
107 private int[] fdiag; /* Vector, indexed by diagonal, containing
108 the X coordinate of the point furthest
109 along the given diagonal in the forward
110 search of the edit matrix. */
111 private int[] bdiag; /* Vector, indexed by diagonal, containing
112 the X coordinate of the point furthest
113 along the given diagonal in the backward
114 search of the edit matrix. */
115 private int fdiagoff, bdiagoff;
116 private final FileData[] filevec;
117 private int cost;
118
119 /**
120 * Find the midpoint of the shortest edit script for a specified
121 * portion of the two files.
122 *
123 * We scan from the beginnings of the files, and simultaneously from the ends,
124 * doing a breadth-first search through the space of edit-sequence.
125 * When the two searches meet, we have found the midpoint of the shortest
126 * edit sequence.
127 *
128 * The value returned is the number of the diagonal on which the midpoint lies.
129 * The diagonal number equals the number of inserted lines minus the number
130 * of deleted lines (counting only lines before the midpoint).
131 * The edit cost is stored into COST; this is the total number of
132 * lines inserted or deleted (counting only lines before the midpoint).
133 *
134 * This function assumes that the first lines of the specified portions
135 * of the two files do not match, and likewise that the last lines do not
136 * match. The caller must trim matching lines from the beginning and end
137 * of the portions it is going to specify.
138 *
139 * Note that if we return the "wrong" diagonal value, or if
140 * the value of bdiag at that diagonal is "wrong",
141 * the worst this can do is cause suboptimal diff output.
142 * It cannot cause incorrect diff output.
143 * @param xoff xoff
144 * @param xlim xlim
145 * @param yoff yoff
146 * @param ylim ylim
147 * @return midpoint of the shortest edit script
148 */
149 private int diag(int xoff, int xlim, int yoff, int ylim) {
150 final int[] fd = fdiag; // Give the compiler a chance.
151 final int[] bd = bdiag; // Additional help for the compiler.
152 final int[] xv = xvec; // Still more help for the compiler.
153 final int[] yv = yvec; // And more and more . . .
154 final int dmin = xoff - ylim; // Minimum valid diagonal.
155 final int dmax = xlim - yoff; // Maximum valid diagonal.
156 final int fmid = xoff - yoff; // Center diagonal of top-down search.
157 final int bmid = xlim - ylim; // Center diagonal of bottom-up search.
158 int fmin = fmid, fmax = fmid; // Limits of top-down search.
159 int bmin = bmid, bmax = bmid; // Limits of bottom-up search.
160 // True if southeast corner is on an odd diagonal with respect to the northwest.
161 final boolean odd = (fmid - bmid & 1) != 0;
162
163 fd[fdiagoff + fmid] = xoff;
164 bd[bdiagoff + bmid] = xlim;
165
166 for (int c = 1;; ++c) {
167 int d; /* Active diagonal. */
168
169 /* Extend the top-down search by an edit step in each diagonal. */
170 if (fmin > dmin) {
171 fd[fdiagoff + --fmin - 1] = -1;
172 } else {
173 ++fmin;
174 }
175 if (fmax < dmax) {
176 fd[fdiagoff + ++fmax + 1] = -1;
177 } else {
178 --fmax;
179 }
180 for (d = fmax; d >= fmin; d -= 2) {
181 int x, y, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff + d + 1];
182
183 if (tlo >= thi) {
184 x = tlo + 1;
185 } else {
186 x = thi;
187 }
188 y = x - d;
189 while (x < xlim && y < ylim && xv[x] == yv[y]) {
190 ++x; ++y;
191 }
192 fd[fdiagoff + d] = x;
193 if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) {
194 cost = 2 * c - 1;
195 return d;
196 }
197 }
198
199 /* Similar extend the bottom-up search. */
200 if (bmin > dmin) {
201 bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE;
202 } else {
203 ++bmin;
204 }
205 if (bmax < dmax) {
206 bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE;
207 } else {
208 --bmax;
209 }
210 for (d = bmax; d >= bmin; d -= 2) {
211 int x, y, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1];
212
213 if (tlo < thi) {
214 x = tlo;
215 } else {
216 x = thi - 1;
217 }
218 y = x - d;
219 while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) {
220 --x; --y;
221 }
222 bd[bdiagoff + d] = x;
223 if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) {
224 cost = 2 * c;
225 return d;
226 }
227 }
228 }
229 }
230
231 /**
232 * Compare in detail contiguous subsequences of the two files
233 * which are known, as a whole, to match each other.
234 *
235 * The results are recorded in the vectors filevec[N].changed_flag, by
236 * storing a 1 in the element for each line that is an insertion or deletion.
237 *
238 * The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
239 *
240 * Note that XLIM, YLIM are exclusive bounds.
241 * All line numbers are origin-0 and discarded lines are not counted.
242 * @param xoff xoff
243 * @param xlim xlim
244 * @param yoff yoff
245 * @param ylim ylim
246 */
247 private void compareseq(int xoff, int xlim, int yoff, int ylim) {
248 /* Slide down the bottom initial diagonal. */
249 while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) {
250 ++xoff; ++yoff;
251 }
252 /* Slide up the top initial diagonal. */
253 while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) {
254 --xlim; --ylim;
255 }
256
257 /* Handle simple cases. */
258 if (xoff == xlim) {
259 while (yoff < ylim) {
260 filevec[1].changedFlag[1+filevec[1].realindexes[yoff++]] = true;
261 }
262 } else if (yoff == ylim) {
263 while (xoff < xlim) {
264 filevec[0].changedFlag[1+filevec[0].realindexes[xoff++]] = true;
265 }
266 } else {
267 /* Find a point of correspondence in the middle of the files. */
268
269 int d = diag(xoff, xlim, yoff, ylim);
270 int c = cost;
271 int b = bdiag[bdiagoff + d];
272
273 if (c == 1)
274 /* This should be impossible, because it implies that
275 one of the two subsequences is empty,
276 and that case was handled above without calling `diag'.
277 Let's verify that this is true. */
278 throw new IllegalArgumentException("Empty subsequence");
279 else {
280 /* Use that point to split this problem into two subproblems. */
281 compareseq(xoff, b, yoff, b - d);
282 /* This used to use f instead of b,
283 but that is incorrect!
284 It is not necessarily the case that diagonal d
285 has a snake from b to f. */
286 compareseq(b, xlim, b - d, ylim);
287 }
288 }
289 }
290
291 /** Discard lines from one file that have no matches in the other file.
292 */
293 private void discard_confusing_lines() {
294 filevec[0].discard_confusing_lines(filevec[1]);
295 filevec[1].discard_confusing_lines(filevec[0]);
296 }
297
298 /**
299 * Adjust inserts/deletes of blank lines to join changes as much as possible.
300 */
301 private void shift_boundaries() {
302 filevec[0].shift_boundaries(filevec[1]);
303 filevec[1].shift_boundaries(filevec[0]);
304 }
305
306 /**
307 * Script builder.
308 * @since 10600 (functional interface)
309 */
310 @FunctionalInterface
311 public interface ScriptBuilder {
312 /**
313 * Scan the tables of which lines are inserted and deleted, producing an edit script.
314 * @param changed0 true for lines in first file which do not match 2nd
315 * @param len0 number of lines in first file
316 * @param changed1 true for lines in 2nd file which do not match 1st
317 * @param len1 number of lines in 2nd file
318 * @return a linked list of changes - or null
319 * @since 10600 (renamed)
320 */
321 Change buildScript(
322 boolean[] changed0, int len0,
323 boolean[] changed1, int len1
324 );
325 }
326
327 /**
328 * Scan the tables of which lines are inserted and deleted, producing an edit script in reverse order.
329 */
330 static class ReverseScript implements ScriptBuilder {
331 @Override
332 public Change buildScript(
333 final boolean[] changed0, int len0,
334 final boolean[] changed1, int len1) {
335 Change script = null;
336 int i0 = 0, i1 = 0;
337 while (i0 < len0 || i1 < len1) {
338 if (changed0[1+i0] || changed1[1+i1]) {
339 int line0 = i0, line1 = i1;
340
341 /* Find # lines changed here in each file. */
342 while (changed0[1+i0]) {
343 ++i0;
344 }
345 while (changed1[1+i1]) {
346 ++i1;
347 }
348
349 /* Record this change. */
350 script = new Change(line0, line1, i0 - line0, i1 - line1, script);
351 }
352
353 /* We have reached lines in the two files that match each other. */
354 i0++; i1++;
355 }
356
357 return script;
358 }
359 }
360
361 static class ForwardScript implements ScriptBuilder {
362 /** Scan the tables of which lines are inserted and deleted,
363 producing an edit script in forward order. */
364 @Override
365 public Change buildScript(
366 final boolean[] changed0, int len0,
367 final boolean[] changed1, int len1) {
368 Change script = null;
369 int i0 = len0, i1 = len1;
370
371 while (i0 >= 0 || i1 >= 0) {
372 if (changed0[i0] || changed1[i1]) {
373 int line0 = i0, line1 = i1;
374
375 /* Find # lines changed here in each file. */
376 while (changed0[i0]) {
377 --i0;
378 }
379 while (changed1[i1]) {
380 --i1;
381 }
382
383 /* Record this change. */
384 script = new Change(i0, i1, line0 - i0, line1 - i1, script);
385 }
386
387 /* We have reached lines in the two files that match each other. */
388 i0--; i1--;
389 }
390
391 return script;
392 }
393 }
394
395 /** Standard Forward ScriptBuilder. */
396 public static final ScriptBuilder forwardScript = new ForwardScript();
397 /** Standard Reverse ScriptBuilder. */
398 public static final ScriptBuilder reverseScript = new ReverseScript();
399
400 /**
401 * Report the differences of two files. DEPTH is the current directory depth.
402 * @param reverse if {@code true} use {@link #reverseScript} else use {@link #forwardScript}
403 * @return the differences of two files
404 */
405 public final Change diff_2(final boolean reverse) {
406 return diff(reverse ? reverseScript : forwardScript);
407 }
408
409 /**
410 * Get the results of comparison as an edit script. The script
411 * is described by a list of changes. The standard ScriptBuilder
412 * implementations provide for forward and reverse edit scripts.
413 * Alternate implementations could, for instance, list common elements
414 * instead of differences.
415 * @param bld an object to build the script from change flags
416 * @return the head of a list of changes
417 */
418 public Change diff(final ScriptBuilder bld) {
419
420 // Some lines are obviously insertions or deletions because they don't match anything.
421 // Detect them now, and avoid even thinking about them in the main comparison algorithm.
422 discard_confusing_lines();
423
424 // Now do the main comparison algorithm, considering just the undiscarded lines.
425 xvec = filevec[0].undiscarded;
426 yvec = filevec[1].undiscarded;
427
428 int diags = filevec[0].nondiscardedLines + filevec[1].nondiscardedLines + 3;
429 fdiag = new int[diags];
430 fdiagoff = filevec[1].nondiscardedLines + 1;
431 bdiag = new int[diags];
432 bdiagoff = filevec[1].nondiscardedLines + 1;
433
434 compareseq(0, filevec[0].nondiscardedLines,
435 0, filevec[1].nondiscardedLines);
436 fdiag = null;
437 bdiag = null;
438
439 // Modify the results slightly to make them prettier in cases where that can validly be done.
440 shift_boundaries();
441
442 // Get the results of comparison in the form of a chain of `struct change's -- an edit script.
443 return bld.buildScript(
444 filevec[0].changedFlag,
445 filevec[0].bufferedLines,
446 filevec[1].changedFlag,
447 filevec[1].bufferedLines
448 );
449 }
450
451 /** The result of comparison is an "edit script": a chain of change objects.
452 Each change represents one place where some lines are deleted
453 and some are inserted.
454
455 LINE0 and LINE1 are the first affected lines in the two files (origin 0).
456 DELETED is the number of lines deleted here from file 0.
457 INSERTED is the number of lines inserted here in file 1.
458
459 If DELETED is 0 then LINE0 is the number of the line before
460 which the insertion was done; vice versa for INSERTED and LINE1. */
461
462 public static class Change {
463 /** Previous or next edit command. */
464 public Change link;
465 /** # lines of file 1 changed here. */
466 public final int inserted;
467 /** # lines of file 0 changed here. */
468 public final int deleted;
469 /** Line number of 1st deleted line. */
470 public final int line0;
471 /** Line number of 1st inserted line. */
472 public final int line1;
473
474 /**
475 * Cons an additional entry onto the front of an edit script OLD.
476 * LINE0 and LINE1 are the first affected lines in the two files (origin 0).
477 * DELETED is the number of lines deleted here from file 0.
478 * INSERTED is the number of lines inserted here in file 1.
479 *
480 * If DELETED is 0 then LINE0 is the number of the line before
481 * which the insertion was done; vice versa for INSERTED and LINE1.
482 * @param line0 first affected lines in the two files (origin 0)
483 * @param line1 first affected lines in the two files (origin 0)
484 * @param deleted the number of lines deleted here from file 0
485 * @param inserted the number of lines inserted here in file 1
486 * @param old edit script
487 */
488 public Change(int line0, int line1, int deleted, int inserted, Change old) {
489 this.line0 = line0;
490 this.line1 = line1;
491 this.inserted = inserted;
492 this.deleted = deleted;
493 this.link = old;
494 }
495
496 /**
497 * Returns the number of insertions and deletions of this change as well as
498 * (recursively) the changes linked via {@link #link}.
499 * @return recursive number of insertions and deletions
500 */
501 public int getTotalNumberOfChanges() {
502 return inserted + deleted + (link != null ? link.getTotalNumberOfChanges() : 0);
503 }
504
505 @Override
506 public String toString() {
507 String s = String.format("%d -%d +%d %d", line0, deleted, inserted, line1);
508 return (link != null) ? s + '\n' + link : s;
509 }
510 }
511
512 /**
513 * Data on one input file being compared.
514 */
515 class FileData {
516
517 /** Allocate changed array for the results of comparison. */
518 void clear() {
519 // Allocate a flag for each line of each file, saying whether that line is an insertion or deletion.
520 // Allocate an extra element, always zero, at each end of each vector.
521 changedFlag = new boolean[bufferedLines + 2];
522 }
523
524 /**
525 * Return equiv_count[I] as the number of lines in this file that fall in equivalence class I.
526 * @return the array of equivalence class counts.
527 */
528 int[] equivCount() {
529 int[] equivCount = new int[equivMax];
530 for (int i = 0; i < bufferedLines; ++i) {
531 ++equivCount[equivs[i]];
532 }
533 return equivCount;
534 }
535
536 /**
537 * Discard lines that have no matches in another file.
538 *
539 * A line which is discarded will not be considered by the actual comparison algorithm;
540 * it will be as if that line were not in the file.
541 * The file's `realindexes' table maps virtual line numbers
542 * (which don't count the discarded lines) into real line numbers;
543 * this is how the actual comparison algorithm produces results
544 * that are comprehensible when the discarded lines are counted.
545 * <p>
546 * When we discard a line, we also mark it as a deletion or insertion so that it will be printed in the output.
547 * @param f the other file
548 */
549 void discard_confusing_lines(FileData f) {
550 clear();
551 // Set up table of which lines are going to be discarded.
552 final byte[] discarded = discardable(f.equivCount());
553
554 // Don't really discard the provisional lines except when they occur in a run of discardables,
555 // with nonprovisionals at the beginning and end.
556 filterDiscards(discarded);
557
558 // Actually discard the lines.
559 discard(discarded);
560 }
561
562 /**
563 * Mark to be discarded each line that matches no line of another file.
564 * If a line matches many lines, mark it as provisionally discardable.
565 * @param counts The count of each equivalence number for the other file.
566 * @return 0=nondiscardable, 1=discardable or 2=provisionally discardable for each line
567 * @see #equivCount()
568 */
569 private byte[] discardable(final int[] counts) {
570 final int end = bufferedLines;
571 final byte[] discards = new byte[end];
572 final int[] equivs = this.equivs;
573 int many = 5;
574 int tem = end / 64;
575
576 /* Multiply MANY by approximate square root of number of lines.
577 That is the threshold for provisionally discardable lines. */
578 while ((tem = tem >> 2) > 0) {
579 many *= 2;
580 }
581
582 for (int i = 0; i < end; i++) {
583 int nmatch;
584 if (equivs[i] == 0) {
585 continue;
586 }
587 nmatch = counts[equivs[i]];
588 if (nmatch == 0) {
589 discards[i] = 1;
590 } else if (nmatch > many) {
591 discards[i] = 2;
592 }
593 }
594 return discards;
595 }
596
597 /**
598 * Don't really discard the provisional lines except when they occur
599 * in a run of discardables, with nonprovisionals at the beginning and end.
600 * @param discards discards
601 */
602 private void filterDiscards(final byte[] discards) {
603 final int end = bufferedLines;
604
605 for (int i = 0; i < end; i++) {
606 /* Cancel provisional discards not in middle of run of discards. */
607 if (discards[i] == 2) {
608 discards[i] = 0;
609 } else if (discards[i] != 0) {
610 /* We have found a nonprovisional discard. */
611 int j;
612 int length;
613 int provisional = 0;
614
615 /* Find end of this run of discardable lines.
616 Count how many are provisionally discardable. */
617 for (j = i; j < end; j++) {
618 if (discards[j] == 0) {
619 break;
620 }
621 if (discards[j] == 2) {
622 ++provisional;
623 }
624 }
625
626 /* Cancel provisional discards at end, and shrink the run. */
627 while (j > i && discards[j - 1] == 2) {
628 discards[--j] = 0; --provisional;
629 }
630
631 /* Now we have the length of a run of discardable lines
632 whose first and last are not provisional. */
633 length = j - i;
634
635 /* If 1/4 of the lines in the run are provisional,
636 cancel discarding of all provisional lines in the run. */
637 if (provisional * 4 > length) {
638 while (j > i)
639 if (discards[--j] == 2) {
640 discards[j] = 0;
641 }
642 } else {
643 int consec;
644 int minimum = 1;
645 int tem = length / 4;
646
647 /* MINIMUM is approximate square root of LENGTH/4.
648 A subrun of two or more provisionals can stand
649 when LENGTH is at least 16.
650 A subrun of 4 or more can stand when LENGTH >= 64. */
651 while ((tem = tem >> 2) > 0) {
652 minimum *= 2;
653 }
654 minimum++;
655
656 /* Cancel any subrun of MINIMUM or more provisionals
657 within the larger run. */
658 for (j = 0, consec = 0; j < length; j++) {
659 if (discards[i + j] != 2) {
660 consec = 0;
661 } else if (minimum == ++consec) {
662 /* Back up to start of subrun, to cancel it all. */
663 j -= consec;
664 } else if (minimum < consec) {
665 discards[i + j] = 0;
666 }
667 }
668
669 /* Scan from beginning of run
670 until we find 3 or more nonprovisionals in a row
671 or until the first nonprovisional at least 8 lines in.
672 Until that point, cancel any provisionals. */
673 for (j = 0, consec = 0; j < length; j++) {
674 if (j >= 8 && discards[i + j] == 1) {
675 break;
676 }
677 if (discards[i + j] == 2) {
678 consec = 0; discards[i + j] = 0;
679 } else if (discards[i + j] == 0) {
680 consec = 0;
681 } else {
682 consec++;
683 }
684 if (consec == 3) {
685 break;
686 }
687 }
688
689 /* I advances to the last line of the run. */
690 i += length - 1;
691
692 /* Same thing, from end. */
693 for (j = 0, consec = 0; j < length; j++) {
694 if (j >= 8 && discards[i - j] == 1) {
695 break;
696 }
697 if (discards[i - j] == 2) {
698 consec = 0; discards[i - j] = 0;
699 } else if (discards[i - j] == 0) {
700 consec = 0;
701 } else {
702 consec++;
703 }
704 if (consec == 3) {
705 break;
706 }
707 }
708 }
709 }
710 }
711 }
712
713 /** Actually discard the lines.
714 @param discards flags lines to be discarded
715 */
716 private void discard(final byte[] discards) {
717 final int end = bufferedLines;
718 int j = 0;
719 for (int i = 0; i < end; ++i) {
720 if (discards[i] == 0) {
721 undiscarded[j] = equivs[i];
722 realindexes[j++] = i;
723 } else {
724 changedFlag[1+i] = true;
725 }
726 }
727 nondiscardedLines = j;
728 }
729
730 FileData(int length) {
731 bufferedLines = length;
732 equivs = new int[length];
733 undiscarded = new int[bufferedLines];
734 realindexes = new int[bufferedLines];
735 }
736
737 FileData(Object[] data, Map<Object, Integer> h) {
738 this(data.length);
739 // FIXME: diff 2.7 removes common prefix and common suffix
740 for (int i = 0; i < data.length; ++i) {
741 Integer ir = h.get(data[i]);
742 if (ir == null) {
743 equivs[i] = equivMax++;
744 h.put(data[i], equivs[i]);
745 } else {
746 equivs[i] = ir.intValue();
747 }
748 }
749 }
750
751 /** Adjust inserts/deletes of blank lines to join changes
752 as much as possible.
753
754 We do something when a run of changed lines include a blank
755 line at one end and have an excluded blank line at the other.
756 We are free to choose which blank line is included.
757 `compareseq' always chooses the one at the beginning,
758 but usually it is cleaner to consider the following blank line
759 to be the "change". The only exception is if the preceding blank line
760 would join this change to other changes.
761 @param f the file being compared against
762 */
763 void shift_boundaries(FileData f) {
764 final boolean[] changed = changedFlag;
765 final boolean[] otherChanged = f.changedFlag;
766 int i = 0;
767 int j = 0;
768 int iEnd = bufferedLines;
769 int preceding = -1;
770 int otherPreceding = -1;
771
772 for (;;) {
773 int start, end, otherStart;
774
775 /* Scan forwards to find beginning of another run of changes.
776 Also keep track of the corresponding point in the other file. */
777
778 while (i < iEnd && !changed[1+i]) {
779 while (otherChanged[1+j++]) {
780 /* Non-corresponding lines in the other file
781 will count as the preceding batch of changes. */
782 otherPreceding = j;
783 }
784 i++;
785 }
786
787 if (i == iEnd) {
788 break;
789 }
790
791 start = i;
792 otherStart = j;
793
794 for (;;) {
795 /* Now find the end of this run of changes. */
796
797 while (i < iEnd && changed[1+i]) {
798 i++;
799 }
800 end = i;
801
802 /* If the first changed line matches the following unchanged one,
803 and this run does not follow right after a previous run,
804 and there are no lines deleted from the other file here,
805 then classify the first changed line as unchanged
806 and the following line as changed in its place. */
807
808 /* You might ask, how could this run follow right after another?
809 Only because the previous run was shifted here. */
810
811 if (end != iEnd && equivs[start] == equivs[end] && !otherChanged[1+j]
812 && !((preceding >= 0 && start == preceding) || (otherPreceding >= 0 && otherStart == otherPreceding))) {
813 changed[1+end++] = true;
814 changed[1+start++] = false;
815 ++i;
816 /* Since one line-that-matches is now before this run
817 instead of after, we must advance in the other file
818 to keep in synch. */
819 ++j;
820 } else {
821 break;
822 }
823 }
824
825 preceding = i;
826 otherPreceding = j;
827 }
828 }
829
830 /** Number of elements (lines) in this file. */
831 private final int bufferedLines;
832
833 /** Vector, indexed by line number, containing an equivalence code for each line.
834 * It is this vector that is actually compared with that of another file to generate differences. */
835 private final int[] equivs;
836
837 /** Vector, like the previous one except that the elements for discarded lines have been squeezed out. */
838 private final int[] undiscarded;
839
840 /** Vector mapping virtual line numbers (not counting discarded lines) to real ones (counting those lines).
841 * Both are origin-0. */
842 private final int[] realindexes;
843
844 /** Total number of nondiscarded lines. */
845 private int nondiscardedLines;
846
847 /** Array, indexed by real origin-1 line number, containing true for a line that is an insertion or a deletion.
848 The results of comparison are stored here. */
849 private boolean[] changedFlag;
850 }
851}
Note: See TracBrowser for help on using the repository browser.