source: josm/trunk/src/org/openstreetmap/josm/tools/Diff.java@ 6084

Last change on this file since 6084 was 6084, checked in by bastiK, 11 years ago

see #8902 - add missing @Override annotations (patch by shinigami)

File size: 35.1 KB
Line 
1// License: GPL.
2package org.openstreetmap.josm.tools;
3
4//// Taken from http://www.bmsi.com/java/#diff
5
6// http://www.bmsi.com/java/DiffPrint.java could also be useful
7
8/*
9 * $Log: Diff.java,v $
10 * Revision 1.7 2009/01/19 03:05:26 stuart
11 * Fix StackOverflow bug with heuristic on reported by Jimmy Han.
12 *
13 * Revision 1.6 2003/03/06 22:51:32 stuart
14 * Convert to CVS
15 *
16 * Revision 1.5 2002/07/19 19:14:40 stuart
17 * fix reverseScript, make change ctor public, update docs
18 *
19 * Revision 1.4 2002/04/09 17:53:39 stuart
20 * More flexible interface for diff() function.
21 *
22 * Revision 1.3 2000/03/03 21:58:03 stuart
23 * move discard_confusing_lines and shift_boundaries to class file_data
24 *
25 * Revision 1.2 2000/03/02 16:37:38 stuart
26 * Add GPL and copyright
27 *
28 */
29
30import java.util.Hashtable;
31
32/** A class to compare vectors of objects. The result of comparison
33 is a list of <code>change</code> objects which form an
34 edit script. The objects compared are traditionally lines
35 of text from two files. Comparison options such as "ignore
36 whitespace" are implemented by modifying the <code>equals</code>
37 and <code>hashcode</code> methods for the objects compared.
38<p>
39 The basic algorithm is described in: </br>
40 "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
41 Algorithmica Vol. 1 No. 2, 1986, p 251.
42<p>
43 This class outputs different results from GNU diff 1.15 on some
44 inputs. Our results are actually better (smaller change list, smaller
45 total size of changes), but it would be nice to know why. Perhaps
46 there is a memory overwrite bug in GNU diff 1.15.
47
48 @author Stuart D. Gathman, translated from GNU diff 1.15
49 Copyright (C) 2000 Business Management Systems, Inc.
50<p>
51 This program is free software; you can redistribute it and/or modify
52 it under the terms of the GNU General Public License as published by
53 the Free Software Foundation; either version 1, or (at your option)
54 any later version.
55<p>
56 This program is distributed in the hope that it will be useful,
57 but WITHOUT ANY WARRANTY; without even the implied warranty of
58 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
59 GNU General Public License for more details.
60<p>
61 You should have received a copy of the <a href=COPYING.txt>
62 GNU General Public License</a>
63 along with this program; if not, write to the Free Software
64 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
65
66 */
67
68public class Diff {
69
70 /** Prepare to find differences between two arrays. Each element of
71 the arrays is translated to an "equivalence number" based on
72 the result of <code>equals</code>. The original Object arrays
73 are no longer needed for computing the differences. They will
74 be needed again later to print the results of the comparison as
75 an edit script, if desired.
76 */
77 public Diff(Object[] a,Object[] b) {
78 Hashtable<Object, Integer> h = new Hashtable<Object, Integer>(a.length + b.length);
79 filevec[0] = new file_data(a,h);
80 filevec[1] = new file_data(b,h);
81 }
82
83 /** 1 more than the maximum equivalence value used for this or its
84 sibling file. */
85 private int equiv_max = 1;
86
87 /** When set to true, the comparison uses a heuristic to speed it up.
88 With this heuristic, for files with a constant small density
89 of changes, the algorithm is linear in the file size. */
90 public boolean heuristic = false;
91
92 /** When set to true, the algorithm returns a guarranteed minimal
93 set of changes. This makes things slower, sometimes much slower. */
94 public boolean no_discards = false;
95
96 private int[] xvec, yvec; /* Vectors being compared. */
97 private int[] fdiag; /* Vector, indexed by diagonal, containing
98 the X coordinate of the point furthest
99 along the given diagonal in the forward
100 search of the edit matrix. */
101 private int[] bdiag; /* Vector, indexed by diagonal, containing
102 the X coordinate of the point furthest
103 along the given diagonal in the backward
104 search of the edit matrix. */
105 private int fdiagoff, bdiagoff;
106 private final file_data[] filevec = new file_data[2];
107 private int cost;
108 /** Snakes bigger than this are considered "big". */
109 private static final int SNAKE_LIMIT = 20;
110
111 /** Find the midpoint of the shortest edit script for a specified
112 portion of the two files.
113
114 We scan from the beginnings of the files, and simultaneously from the ends,
115 doing a breadth-first search through the space of edit-sequence.
116 When the two searches meet, we have found the midpoint of the shortest
117 edit sequence.
118
119 The value returned is the number of the diagonal on which the midpoint lies.
120 The diagonal number equals the number of inserted lines minus the number
121 of deleted lines (counting only lines before the midpoint).
122 The edit cost is stored into COST; this is the total number of
123 lines inserted or deleted (counting only lines before the midpoint).
124
125 This function assumes that the first lines of the specified portions
126 of the two files do not match, and likewise that the last lines do not
127 match. The caller must trim matching lines from the beginning and end
128 of the portions it is going to specify.
129
130 Note that if we return the "wrong" diagonal value, or if
131 the value of bdiag at that diagonal is "wrong",
132 the worst this can do is cause suboptimal diff output.
133 It cannot cause incorrect diff output. */
134
135 private int diag (int xoff, int xlim, int yoff, int ylim) {
136 final int[] fd = fdiag; // Give the compiler a chance.
137 final int[] bd = bdiag; // Additional help for the compiler.
138 final int[] xv = xvec; // Still more help for the compiler.
139 final int[] yv = yvec; // And more and more . . .
140 final int dmin = xoff - ylim; // Minimum valid diagonal.
141 final int dmax = xlim - yoff; // Maximum valid diagonal.
142 final int fmid = xoff - yoff; // Center diagonal of top-down search.
143 final int bmid = xlim - ylim; // Center diagonal of bottom-up search.
144 int fmin = fmid, fmax = fmid; // Limits of top-down search.
145 int bmin = bmid, bmax = bmid; // Limits of bottom-up search.
146 /* True if southeast corner is on an odd
147 diagonal with respect to the northwest. */
148 final boolean odd = (fmid - bmid & 1) != 0;
149
150 fd[fdiagoff + fmid] = xoff;
151 bd[bdiagoff + bmid] = xlim;
152
153 for (int c = 1;; ++c)
154 {
155 int d; /* Active diagonal. */
156 boolean big_snake = false;
157
158 /* Extend the top-down search by an edit step in each diagonal. */
159 if (fmin > dmin) {
160 fd[fdiagoff + --fmin - 1] = -1;
161 } else {
162 ++fmin;
163 }
164 if (fmax < dmax) {
165 fd[fdiagoff + ++fmax + 1] = -1;
166 } else {
167 --fmax;
168 }
169 for (d = fmax; d >= fmin; d -= 2)
170 {
171 int x, y, oldx, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff + d + 1];
172
173 if (tlo >= thi) {
174 x = tlo + 1;
175 } else {
176 x = thi;
177 }
178 oldx = x;
179 y = x - d;
180 while (x < xlim && y < ylim && xv[x] == yv[y]) {
181 ++x; ++y;
182 }
183 if (x - oldx > SNAKE_LIMIT) {
184 big_snake = true;
185 }
186 fd[fdiagoff + d] = x;
187 if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d])
188 {
189 cost = 2 * c - 1;
190 return d;
191 }
192 }
193
194 /* Similar extend the bottom-up search. */
195 if (bmin > dmin) {
196 bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE;
197 } else {
198 ++bmin;
199 }
200 if (bmax < dmax) {
201 bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE;
202 } else {
203 --bmax;
204 }
205 for (d = bmax; d >= bmin; d -= 2)
206 {
207 int x, y, oldx, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1];
208
209 if (tlo < thi) {
210 x = tlo;
211 } else {
212 x = thi - 1;
213 }
214 oldx = x;
215 y = x - d;
216 while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) {
217 --x; --y;
218 }
219 if (oldx - x > SNAKE_LIMIT) {
220 big_snake = true;
221 }
222 bd[bdiagoff + d] = x;
223 if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d])
224 {
225 cost = 2 * c;
226 return d;
227 }
228 }
229
230 /* Heuristic: check occasionally for a diagonal that has made
231 lots of progress compared with the edit distance.
232 If we have any such, find the one that has made the most
233 progress and return it as if it had succeeded.
234
235 With this heuristic, for files with a constant small density
236 of changes, the algorithm is linear in the file size. */
237
238 if (c > 200 && big_snake && heuristic)
239 {
240 int best = 0;
241 int bestpos = -1;
242
243 for (d = fmax; d >= fmin; d -= 2)
244 {
245 int dd = d - fmid;
246 int x = fd[fdiagoff + d];
247 int y = x - d;
248 int v = (x - xoff) * 2 - dd;
249 if (v > 12 * (c + (dd < 0 ? -dd : dd)))
250 {
251 if (v > best
252 && xoff + SNAKE_LIMIT <= x && x < xlim
253 && yoff + SNAKE_LIMIT <= y && y < ylim)
254 {
255 /* We have a good enough best diagonal;
256 now insist that it end with a significant snake. */
257 int k;
258
259 for (k = 1; xvec[x - k] == yvec[y - k]; k++)
260 if (k == SNAKE_LIMIT)
261 {
262 best = v;
263 bestpos = d;
264 break;
265 }
266 }
267 }
268 }
269 if (best > 0)
270 {
271 cost = 2 * c - 1;
272 return bestpos;
273 }
274
275 best = 0;
276 for (d = bmax; d >= bmin; d -= 2)
277 {
278 int dd = d - bmid;
279 int x = bd[bdiagoff + d];
280 int y = x - d;
281 int v = (xlim - x) * 2 + dd;
282 if (v > 12 * (c + (dd < 0 ? -dd : dd)))
283 {
284 if (v > best
285 && xoff < x && x <= xlim - SNAKE_LIMIT
286 && yoff < y && y <= ylim - SNAKE_LIMIT)
287 {
288 /* We have a good enough best diagonal;
289 now insist that it end with a significant snake. */
290 int k;
291
292 for (k = 0; xvec[x + k] == yvec[y + k]; k++)
293 if (k == SNAKE_LIMIT)
294 {
295 best = v;
296 bestpos = d;
297 break;
298 }
299 }
300 }
301 }
302 if (best > 0)
303 {
304 cost = 2 * c - 1;
305 return bestpos;
306 }
307 }
308 }
309 }
310
311 /** Compare in detail contiguous subsequences of the two files
312 which are known, as a whole, to match each other.
313
314 The results are recorded in the vectors filevec[N].changed_flag, by
315 storing a 1 in the element for each line that is an insertion or deletion.
316
317 The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
318
319 Note that XLIM, YLIM are exclusive bounds.
320 All line numbers are origin-0 and discarded lines are not counted. */
321
322 private void compareseq (int xoff, int xlim, int yoff, int ylim) {
323 /* Slide down the bottom initial diagonal. */
324 while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) {
325 ++xoff; ++yoff;
326 }
327 /* Slide up the top initial diagonal. */
328 while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) {
329 --xlim; --ylim;
330 }
331
332 /* Handle simple cases. */
333 if (xoff == xlim) {
334 while (yoff < ylim) {
335 filevec[1].changed_flag[1+filevec[1].realindexes[yoff++]] = true;
336 }
337 } else if (yoff == ylim) {
338 while (xoff < xlim) {
339 filevec[0].changed_flag[1+filevec[0].realindexes[xoff++]] = true;
340 }
341 } else
342 {
343 /* Find a point of correspondence in the middle of the files. */
344
345 int d = diag (xoff, xlim, yoff, ylim);
346 int c = cost;
347 //int f = fdiag[fdiagoff + d];
348 int b = bdiag[bdiagoff + d];
349
350 if (c == 1)
351 /* This should be impossible, because it implies that
352 one of the two subsequences is empty,
353 and that case was handled above without calling `diag'.
354 Let's verify that this is true. */
355 throw new IllegalArgumentException("Empty subsequence");
356 else
357 {
358 /* Use that point to split this problem into two subproblems. */
359 compareseq (xoff, b, yoff, b - d);
360 /* This used to use f instead of b,
361 but that is incorrect!
362 It is not necessarily the case that diagonal d
363 has a snake from b to f. */
364 compareseq (b, xlim, b - d, ylim);
365 }
366 }
367 }
368
369 /** Discard lines from one file that have no matches in the other file.
370 */
371
372 private void discard_confusing_lines() {
373 filevec[0].discard_confusing_lines(filevec[1]);
374 filevec[1].discard_confusing_lines(filevec[0]);
375 }
376
377 private boolean inhibit = false;
378
379 /** Adjust inserts/deletes of blank lines to join changes
380 as much as possible.
381 */
382
383 private void shift_boundaries() {
384 if (inhibit)
385 return;
386 filevec[0].shift_boundaries(filevec[1]);
387 filevec[1].shift_boundaries(filevec[0]);
388 }
389
390 public interface ScriptBuilder {
391 /** Scan the tables of which lines are inserted and deleted,
392 producing an edit script.
393 @param changed0 true for lines in first file which do not match 2nd
394 @param len0 number of lines in first file
395 @param changed1 true for lines in 2nd file which do not match 1st
396 @param len1 number of lines in 2nd file
397 @return a linked list of changes - or null
398 */
399 public change build_script(
400 boolean[] changed0,int len0,
401 boolean[] changed1,int len1
402 );
403 }
404
405 /** Scan the tables of which lines are inserted and deleted,
406 producing an edit script in reverse order. */
407
408 static class ReverseScript implements ScriptBuilder {
409 @Override
410 public change build_script(
411 final boolean[] changed0,int len0,
412 final boolean[] changed1,int len1)
413 {
414 change script = null;
415 int i0 = 0, i1 = 0;
416 while (i0 < len0 || i1 < len1) {
417 if (changed0[1+i0] || changed1[1+i1]) {
418 int line0 = i0, line1 = i1;
419
420 /* Find # lines changed here in each file. */
421 while (changed0[1+i0]) {
422 ++i0;
423 }
424 while (changed1[1+i1]) {
425 ++i1;
426 }
427
428 /* Record this change. */
429 script = new change(line0, line1, i0 - line0, i1 - line1, script);
430 }
431
432 /* We have reached lines in the two files that match each other. */
433 i0++; i1++;
434 }
435
436 return script;
437 }
438 }
439
440 static class ForwardScript implements ScriptBuilder {
441 /** Scan the tables of which lines are inserted and deleted,
442 producing an edit script in forward order. */
443 @Override
444 public change build_script(
445 final boolean[] changed0,int len0,
446 final boolean[] changed1,int len1)
447 {
448 change script = null;
449 int i0 = len0, i1 = len1;
450
451 while (i0 >= 0 || i1 >= 0)
452 {
453 if (changed0[i0] || changed1[i1])
454 {
455 int line0 = i0, line1 = i1;
456
457 /* Find # lines changed here in each file. */
458 while (changed0[i0]) {
459 --i0;
460 }
461 while (changed1[i1]) {
462 --i1;
463 }
464
465 /* Record this change. */
466 script = new change(i0, i1, line0 - i0, line1 - i1, script);
467 }
468
469 /* We have reached lines in the two files that match each other. */
470 i0--; i1--;
471 }
472
473 return script;
474 }
475 }
476
477 /** Standard ScriptBuilders. */
478 public final static ScriptBuilder
479 forwardScript = new ForwardScript(),
480 reverseScript = new ReverseScript();
481
482 /* Report the differences of two files. DEPTH is the current directory
483 depth. */
484 public final change diff_2(final boolean reverse) {
485 return diff(reverse ? reverseScript : forwardScript);
486 }
487
488 /** Get the results of comparison as an edit script. The script
489 is described by a list of changes. The standard ScriptBuilder
490 implementations provide for forward and reverse edit scripts.
491 Alternate implementations could, for instance, list common elements
492 instead of differences.
493 @param bld an object to build the script from change flags
494 @return the head of a list of changes
495 */
496 public change diff(final ScriptBuilder bld) {
497
498 /* Some lines are obviously insertions or deletions
499 because they don't match anything. Detect them now,
500 and avoid even thinking about them in the main comparison algorithm. */
501
502 discard_confusing_lines ();
503
504 /* Now do the main comparison algorithm, considering just the
505 undiscarded lines. */
506
507 xvec = filevec[0].undiscarded;
508 yvec = filevec[1].undiscarded;
509
510 int diags =
511 filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines + 3;
512 fdiag = new int[diags];
513 fdiagoff = filevec[1].nondiscarded_lines + 1;
514 bdiag = new int[diags];
515 bdiagoff = filevec[1].nondiscarded_lines + 1;
516
517 compareseq (0, filevec[0].nondiscarded_lines,
518 0, filevec[1].nondiscarded_lines);
519 fdiag = null;
520 bdiag = null;
521
522 /* Modify the results slightly to make them prettier
523 in cases where that can validly be done. */
524
525 shift_boundaries ();
526
527 /* Get the results of comparison in the form of a chain
528 of `struct change's -- an edit script. */
529 return bld.build_script(
530 filevec[0].changed_flag,
531 filevec[0].buffered_lines,
532 filevec[1].changed_flag,
533 filevec[1].buffered_lines
534 );
535
536 }
537
538 /** The result of comparison is an "edit script": a chain of change objects.
539 Each change represents one place where some lines are deleted
540 and some are inserted.
541
542 LINE0 and LINE1 are the first affected lines in the two files (origin 0).
543 DELETED is the number of lines deleted here from file 0.
544 INSERTED is the number of lines inserted here in file 1.
545
546 If DELETED is 0 then LINE0 is the number of the line before
547 which the insertion was done; vice versa for INSERTED and LINE1. */
548
549 public static class change {
550 /** Previous or next edit command. */
551 public change link;
552 /** # lines of file 1 changed here. */
553 public final int inserted;
554 /** # lines of file 0 changed here. */
555 public final int deleted;
556 /** Line number of 1st deleted line. */
557 public final int line0;
558 /** Line number of 1st inserted line. */
559 public final int line1;
560
561 /** Cons an additional entry onto the front of an edit script OLD.
562 LINE0 and LINE1 are the first affected lines in the two files (origin 0).
563 DELETED is the number of lines deleted here from file 0.
564 INSERTED is the number of lines inserted here in file 1.
565
566 If DELETED is 0 then LINE0 is the number of the line before
567 which the insertion was done; vice versa for INSERTED and LINE1. */
568 public change(int line0, int line1, int deleted, int inserted, change old) {
569 this.line0 = line0;
570 this.line1 = line1;
571 this.inserted = inserted;
572 this.deleted = deleted;
573 this.link = old;
574 //System.err.println(line0+","+line1+","+inserted+","+deleted);
575 }
576 }
577
578 /** Data on one input file being compared.
579 */
580
581 class file_data {
582
583 /** Allocate changed array for the results of comparison. */
584 void clear() {
585 /* Allocate a flag for each line of each file, saying whether that line
586 is an insertion or deletion.
587 Allocate an extra element, always zero, at each end of each vector.
588 */
589 changed_flag = new boolean[buffered_lines + 2];
590 }
591
592 /** Return equiv_count[I] as the number of lines in this file
593 that fall in equivalence class I.
594 @return the array of equivalence class counts.
595 */
596 int[] equivCount() {
597 int[] equiv_count = new int[equiv_max];
598 for (int i = 0; i < buffered_lines; ++i) {
599 ++equiv_count[equivs[i]];
600 }
601 return equiv_count;
602 }
603
604 /** Discard lines that have no matches in another file.
605
606 A line which is discarded will not be considered by the actual
607 comparison algorithm; it will be as if that line were not in the file.
608 The file's `realindexes' table maps virtual line numbers
609 (which don't count the discarded lines) into real line numbers;
610 this is how the actual comparison algorithm produces results
611 that are comprehensible when the discarded lines are counted.
612<p>
613 When we discard a line, we also mark it as a deletion or insertion
614 so that it will be printed in the output.
615 @param f the other file
616 */
617 void discard_confusing_lines(file_data f) {
618 clear();
619 /* Set up table of which lines are going to be discarded. */
620 final byte[] discarded = discardable(f.equivCount());
621
622 /* Don't really discard the provisional lines except when they occur
623 in a run of discardables, with nonprovisionals at the beginning
624 and end. */
625 filterDiscards(discarded);
626
627 /* Actually discard the lines. */
628 discard(discarded);
629 }
630
631 /**
632 * Mark to be discarded each line that matches no line of another file.
633 * If a line matches many lines, mark it as provisionally discardable.
634 * @see #equivCount()
635 * @param counts The count of each equivalence number for the other file.
636 * @return 0=nondiscardable, 1=discardable or 2=provisionally discardable
637 * for each line
638 */
639 private byte[] discardable(final int[] counts) {
640 final int end = buffered_lines;
641 final byte[] discards = new byte[end];
642 final int[] equivs = this.equivs;
643 int many = 5;
644 int tem = end / 64;
645
646 /* Multiply MANY by approximate square root of number of lines.
647 That is the threshold for provisionally discardable lines. */
648 while ((tem = tem >> 2) > 0) {
649 many *= 2;
650 }
651
652 for (int i = 0; i < end; i++)
653 {
654 int nmatch;
655 if (equivs[i] == 0) {
656 continue;
657 }
658 nmatch = counts[equivs[i]];
659 if (nmatch == 0) {
660 discards[i] = 1;
661 } else if (nmatch > many) {
662 discards[i] = 2;
663 }
664 }
665 return discards;
666 }
667
668 /**
669 * Don't really discard the provisional lines except when they occur
670 * in a run of discardables, with nonprovisionals at the beginning
671 * and end.
672 */
673 private void filterDiscards(final byte[] discards) {
674 final int end = buffered_lines;
675
676 for (int i = 0; i < end; i++)
677 {
678 /* Cancel provisional discards not in middle of run of discards. */
679 if (discards[i] == 2) {
680 discards[i] = 0;
681 } else if (discards[i] != 0)
682 {
683 /* We have found a nonprovisional discard. */
684 int j;
685 int length;
686 int provisional = 0;
687
688 /* Find end of this run of discardable lines.
689 Count how many are provisionally discardable. */
690 for (j = i; j < end; j++)
691 {
692 if (discards[j] == 0) {
693 break;
694 }
695 if (discards[j] == 2) {
696 ++provisional;
697 }
698 }
699
700 /* Cancel provisional discards at end, and shrink the run. */
701 while (j > i && discards[j - 1] == 2) {
702 discards[--j] = 0; --provisional;
703 }
704
705 /* Now we have the length of a run of discardable lines
706 whose first and last are not provisional. */
707 length = j - i;
708
709 /* If 1/4 of the lines in the run are provisional,
710 cancel discarding of all provisional lines in the run. */
711 if (provisional * 4 > length)
712 {
713 while (j > i)
714 if (discards[--j] == 2) {
715 discards[j] = 0;
716 }
717 }
718 else
719 {
720 int consec;
721 int minimum = 1;
722 int tem = length / 4;
723
724 /* MINIMUM is approximate square root of LENGTH/4.
725 A subrun of two or more provisionals can stand
726 when LENGTH is at least 16.
727 A subrun of 4 or more can stand when LENGTH >= 64. */
728 while ((tem = tem >> 2) > 0) {
729 minimum *= 2;
730 }
731 minimum++;
732
733 /* Cancel any subrun of MINIMUM or more provisionals
734 within the larger run. */
735 for (j = 0, consec = 0; j < length; j++)
736 if (discards[i + j] != 2) {
737 consec = 0;
738 } else if (minimum == ++consec) {
739 /* Back up to start of subrun, to cancel it all. */
740 j -= consec;
741 } else if (minimum < consec) {
742 discards[i + j] = 0;
743 }
744
745 /* Scan from beginning of run
746 until we find 3 or more nonprovisionals in a row
747 or until the first nonprovisional at least 8 lines in.
748 Until that point, cancel any provisionals. */
749 for (j = 0, consec = 0; j < length; j++)
750 {
751 if (j >= 8 && discards[i + j] == 1) {
752 break;
753 }
754 if (discards[i + j] == 2) {
755 consec = 0; discards[i + j] = 0;
756 }
757 else if (discards[i + j] == 0) {
758 consec = 0;
759 } else {
760 consec++;
761 }
762 if (consec == 3) {
763 break;
764 }
765 }
766
767 /* I advances to the last line of the run. */
768 i += length - 1;
769
770 /* Same thing, from end. */
771 for (j = 0, consec = 0; j < length; j++)
772 {
773 if (j >= 8 && discards[i - j] == 1) {
774 break;
775 }
776 if (discards[i - j] == 2) {
777 consec = 0; discards[i - j] = 0;
778 }
779 else if (discards[i - j] == 0) {
780 consec = 0;
781 } else {
782 consec++;
783 }
784 if (consec == 3) {
785 break;
786 }
787 }
788 }
789 }
790 }
791 }
792
793 /** Actually discard the lines.
794 @param discards flags lines to be discarded
795 */
796 private void discard(final byte[] discards) {
797 final int end = buffered_lines;
798 int j = 0;
799 for (int i = 0; i < end; ++i)
800 if (no_discards || discards[i] == 0)
801 {
802 undiscarded[j] = equivs[i];
803 realindexes[j++] = i;
804 } else {
805 changed_flag[1+i] = true;
806 }
807 nondiscarded_lines = j;
808 }
809
810 file_data(Object[] data,Hashtable<Object, Integer> h) {
811 buffered_lines = data.length;
812
813 equivs = new int[buffered_lines];
814 undiscarded = new int[buffered_lines];
815 realindexes = new int[buffered_lines];
816
817 for (int i = 0; i < data.length; ++i) {
818 Integer ir = (Integer)h.get(data[i]);
819 if (ir == null) {
820 h.put(data[i],new Integer(equivs[i] = equiv_max++));
821 } else {
822 equivs[i] = ir.intValue();
823 }
824 }
825 }
826
827 /** Adjust inserts/deletes of blank lines to join changes
828 as much as possible.
829
830 We do something when a run of changed lines include a blank
831 line at one end and have an excluded blank line at the other.
832 We are free to choose which blank line is included.
833 `compareseq' always chooses the one at the beginning,
834 but usually it is cleaner to consider the following blank line
835 to be the "change". The only exception is if the preceding blank line
836 would join this change to other changes.
837 @param f the file being compared against
838 */
839
840 void shift_boundaries(file_data f) {
841 final boolean[] changed = changed_flag;
842 final boolean[] other_changed = f.changed_flag;
843 int i = 0;
844 int j = 0;
845 int i_end = buffered_lines;
846 int preceding = -1;
847 int other_preceding = -1;
848
849 for (;;)
850 {
851 int start, end, other_start;
852
853 /* Scan forwards to find beginning of another run of changes.
854 Also keep track of the corresponding point in the other file. */
855
856 while (i < i_end && !changed[1+i])
857 {
858 while (other_changed[1+j++]) {
859 /* Non-corresponding lines in the other file
860 will count as the preceding batch of changes. */
861 other_preceding = j;
862 }
863 i++;
864 }
865
866 if (i == i_end) {
867 break;
868 }
869
870 start = i;
871 other_start = j;
872
873 for (;;)
874 {
875 /* Now find the end of this run of changes. */
876
877 while (i < i_end && changed[1+i]) {
878 i++;
879 }
880 end = i;
881
882 /* If the first changed line matches the following unchanged one,
883 and this run does not follow right after a previous run,
884 and there are no lines deleted from the other file here,
885 then classify the first changed line as unchanged
886 and the following line as changed in its place. */
887
888 /* You might ask, how could this run follow right after another?
889 Only because the previous run was shifted here. */
890
891 if (end != i_end
892 && equivs[start] == equivs[end]
893 && !other_changed[1+j]
894 && end != i_end
895 && !((preceding >= 0 && start == preceding)
896 || (other_preceding >= 0
897 && other_start == other_preceding)))
898 {
899 changed[1+end++] = true;
900 changed[1+start++] = false;
901 ++i;
902 /* Since one line-that-matches is now before this run
903 instead of after, we must advance in the other file
904 to keep in synch. */
905 ++j;
906 } else {
907 break;
908 }
909 }
910
911 preceding = i;
912 other_preceding = j;
913 }
914 }
915
916 /** Number of elements (lines) in this file. */
917 final int buffered_lines;
918
919 /** Vector, indexed by line number, containing an equivalence code for
920 each line. It is this vector that is actually compared with that
921 of another file to generate differences. */
922 private final int[] equivs;
923
924 /** Vector, like the previous one except that
925 the elements for discarded lines have been squeezed out. */
926 final int[] undiscarded;
927
928 /** Vector mapping virtual line numbers (not counting discarded lines)
929 to real ones (counting those lines). Both are origin-0. */
930 final int[] realindexes;
931
932 /** Total number of nondiscarded lines. */
933 int nondiscarded_lines;
934
935 /** Array, indexed by real origin-1 line number,
936 containing true for a line that is an insertion or a deletion.
937 The results of comparison are stored here. */
938 boolean[] changed_flag;
939
940 }
941}
Note: See TracBrowser for help on using the repository browser.