source: josm/trunk/src/org/openstreetmap/josm/tools/Diff.java@ 5863

Last change on this file since 5863 was 5863, checked in by stoecker, 11 years ago

javadoc fixes

File size: 35.1 KB
Line 
1package org.openstreetmap.josm.tools;
2
3//// Taken from http://www.bmsi.com/java/#diff
4
5// http://www.bmsi.com/java/DiffPrint.java could also be useful
6
7/*
8 * $Log: Diff.java,v $
9 * Revision 1.7 2009/01/19 03:05:26 stuart
10 * Fix StackOverflow bug with heuristic on reported by Jimmy Han.
11 *
12 * Revision 1.6 2003/03/06 22:51:32 stuart
13 * Convert to CVS
14 *
15 * Revision 1.5 2002/07/19 19:14:40 stuart
16 * fix reverseScript, make change ctor public, update docs
17 *
18 * Revision 1.4 2002/04/09 17:53:39 stuart
19 * More flexible interface for diff() function.
20 *
21 * Revision 1.3 2000/03/03 21:58:03 stuart
22 * move discard_confusing_lines and shift_boundaries to class file_data
23 *
24 * Revision 1.2 2000/03/02 16:37:38 stuart
25 * Add GPL and copyright
26 *
27 */
28
29import java.util.Hashtable;
30
31/** A class to compare vectors of objects. The result of comparison
32 is a list of <code>change</code> objects which form an
33 edit script. The objects compared are traditionally lines
34 of text from two files. Comparison options such as "ignore
35 whitespace" are implemented by modifying the <code>equals</code>
36 and <code>hashcode</code> methods for the objects compared.
37<p>
38 The basic algorithm is described in: </br>
39 "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
40 Algorithmica Vol. 1 No. 2, 1986, p 251.
41<p>
42 This class outputs different results from GNU diff 1.15 on some
43 inputs. Our results are actually better (smaller change list, smaller
44 total size of changes), but it would be nice to know why. Perhaps
45 there is a memory overwrite bug in GNU diff 1.15.
46
47 @author Stuart D. Gathman, translated from GNU diff 1.15
48 Copyright (C) 2000 Business Management Systems, Inc.
49<p>
50 This program is free software; you can redistribute it and/or modify
51 it under the terms of the GNU General Public License as published by
52 the Free Software Foundation; either version 1, or (at your option)
53 any later version.
54<p>
55 This program is distributed in the hope that it will be useful,
56 but WITHOUT ANY WARRANTY; without even the implied warranty of
57 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
58 GNU General Public License for more details.
59<p>
60 You should have received a copy of the <a href=COPYING.txt>
61 GNU General Public License</a>
62 along with this program; if not, write to the Free Software
63 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
64
65 */
66
67public class Diff {
68
69 /** Prepare to find differences between two arrays. Each element of
70 the arrays is translated to an "equivalence number" based on
71 the result of <code>equals</code>. The original Object arrays
72 are no longer needed for computing the differences. They will
73 be needed again later to print the results of the comparison as
74 an edit script, if desired.
75 */
76 public Diff(Object[] a,Object[] b) {
77 Hashtable<Object, Integer> h = new Hashtable<Object, Integer>(a.length + b.length);
78 filevec[0] = new file_data(a,h);
79 filevec[1] = new file_data(b,h);
80 }
81
82 /** 1 more than the maximum equivalence value used for this or its
83 sibling file. */
84 private int equiv_max = 1;
85
86 /** When set to true, the comparison uses a heuristic to speed it up.
87 With this heuristic, for files with a constant small density
88 of changes, the algorithm is linear in the file size. */
89 public boolean heuristic = false;
90
91 /** When set to true, the algorithm returns a guarranteed minimal
92 set of changes. This makes things slower, sometimes much slower. */
93 public boolean no_discards = false;
94
95 private int[] xvec, yvec; /* Vectors being compared. */
96 private int[] fdiag; /* Vector, indexed by diagonal, containing
97 the X coordinate of the point furthest
98 along the given diagonal in the forward
99 search of the edit matrix. */
100 private int[] bdiag; /* Vector, indexed by diagonal, containing
101 the X coordinate of the point furthest
102 along the given diagonal in the backward
103 search of the edit matrix. */
104 private int fdiagoff, bdiagoff;
105 private final file_data[] filevec = new file_data[2];
106 private int cost;
107 /** Snakes bigger than this are considered "big". */
108 private static final int SNAKE_LIMIT = 20;
109
110 /** Find the midpoint of the shortest edit script for a specified
111 portion of the two files.
112
113 We scan from the beginnings of the files, and simultaneously from the ends,
114 doing a breadth-first search through the space of edit-sequence.
115 When the two searches meet, we have found the midpoint of the shortest
116 edit sequence.
117
118 The value returned is the number of the diagonal on which the midpoint lies.
119 The diagonal number equals the number of inserted lines minus the number
120 of deleted lines (counting only lines before the midpoint).
121 The edit cost is stored into COST; this is the total number of
122 lines inserted or deleted (counting only lines before the midpoint).
123
124 This function assumes that the first lines of the specified portions
125 of the two files do not match, and likewise that the last lines do not
126 match. The caller must trim matching lines from the beginning and end
127 of the portions it is going to specify.
128
129 Note that if we return the "wrong" diagonal value, or if
130 the value of bdiag at that diagonal is "wrong",
131 the worst this can do is cause suboptimal diff output.
132 It cannot cause incorrect diff output. */
133
134 private int diag (int xoff, int xlim, int yoff, int ylim) {
135 final int[] fd = fdiag; // Give the compiler a chance.
136 final int[] bd = bdiag; // Additional help for the compiler.
137 final int[] xv = xvec; // Still more help for the compiler.
138 final int[] yv = yvec; // And more and more . . .
139 final int dmin = xoff - ylim; // Minimum valid diagonal.
140 final int dmax = xlim - yoff; // Maximum valid diagonal.
141 final int fmid = xoff - yoff; // Center diagonal of top-down search.
142 final int bmid = xlim - ylim; // Center diagonal of bottom-up search.
143 int fmin = fmid, fmax = fmid; // Limits of top-down search.
144 int bmin = bmid, bmax = bmid; // Limits of bottom-up search.
145 /* True if southeast corner is on an odd
146 diagonal with respect to the northwest. */
147 final boolean odd = (fmid - bmid & 1) != 0;
148
149 fd[fdiagoff + fmid] = xoff;
150 bd[bdiagoff + bmid] = xlim;
151
152 for (int c = 1;; ++c)
153 {
154 int d; /* Active diagonal. */
155 boolean big_snake = false;
156
157 /* Extend the top-down search by an edit step in each diagonal. */
158 if (fmin > dmin) {
159 fd[fdiagoff + --fmin - 1] = -1;
160 } else {
161 ++fmin;
162 }
163 if (fmax < dmax) {
164 fd[fdiagoff + ++fmax + 1] = -1;
165 } else {
166 --fmax;
167 }
168 for (d = fmax; d >= fmin; d -= 2)
169 {
170 int x, y, oldx, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff + d + 1];
171
172 if (tlo >= thi) {
173 x = tlo + 1;
174 } else {
175 x = thi;
176 }
177 oldx = x;
178 y = x - d;
179 while (x < xlim && y < ylim && xv[x] == yv[y]) {
180 ++x; ++y;
181 }
182 if (x - oldx > SNAKE_LIMIT) {
183 big_snake = true;
184 }
185 fd[fdiagoff + d] = x;
186 if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d])
187 {
188 cost = 2 * c - 1;
189 return d;
190 }
191 }
192
193 /* Similar extend the bottom-up search. */
194 if (bmin > dmin) {
195 bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE;
196 } else {
197 ++bmin;
198 }
199 if (bmax < dmax) {
200 bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE;
201 } else {
202 --bmax;
203 }
204 for (d = bmax; d >= bmin; d -= 2)
205 {
206 int x, y, oldx, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1];
207
208 if (tlo < thi) {
209 x = tlo;
210 } else {
211 x = thi - 1;
212 }
213 oldx = x;
214 y = x - d;
215 while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) {
216 --x; --y;
217 }
218 if (oldx - x > SNAKE_LIMIT) {
219 big_snake = true;
220 }
221 bd[bdiagoff + d] = x;
222 if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d])
223 {
224 cost = 2 * c;
225 return d;
226 }
227 }
228
229 /* Heuristic: check occasionally for a diagonal that has made
230 lots of progress compared with the edit distance.
231 If we have any such, find the one that has made the most
232 progress and return it as if it had succeeded.
233
234 With this heuristic, for files with a constant small density
235 of changes, the algorithm is linear in the file size. */
236
237 if (c > 200 && big_snake && heuristic)
238 {
239 int best = 0;
240 int bestpos = -1;
241
242 for (d = fmax; d >= fmin; d -= 2)
243 {
244 int dd = d - fmid;
245 int x = fd[fdiagoff + d];
246 int y = x - d;
247 int v = (x - xoff) * 2 - dd;
248 if (v > 12 * (c + (dd < 0 ? -dd : dd)))
249 {
250 if (v > best
251 && xoff + SNAKE_LIMIT <= x && x < xlim
252 && yoff + SNAKE_LIMIT <= y && y < ylim)
253 {
254 /* We have a good enough best diagonal;
255 now insist that it end with a significant snake. */
256 int k;
257
258 for (k = 1; xvec[x - k] == yvec[y - k]; k++)
259 if (k == SNAKE_LIMIT)
260 {
261 best = v;
262 bestpos = d;
263 break;
264 }
265 }
266 }
267 }
268 if (best > 0)
269 {
270 cost = 2 * c - 1;
271 return bestpos;
272 }
273
274 best = 0;
275 for (d = bmax; d >= bmin; d -= 2)
276 {
277 int dd = d - bmid;
278 int x = bd[bdiagoff + d];
279 int y = x - d;
280 int v = (xlim - x) * 2 + dd;
281 if (v > 12 * (c + (dd < 0 ? -dd : dd)))
282 {
283 if (v > best
284 && xoff < x && x <= xlim - SNAKE_LIMIT
285 && yoff < y && y <= ylim - SNAKE_LIMIT)
286 {
287 /* We have a good enough best diagonal;
288 now insist that it end with a significant snake. */
289 int k;
290
291 for (k = 0; xvec[x + k] == yvec[y + k]; k++)
292 if (k == SNAKE_LIMIT)
293 {
294 best = v;
295 bestpos = d;
296 break;
297 }
298 }
299 }
300 }
301 if (best > 0)
302 {
303 cost = 2 * c - 1;
304 return bestpos;
305 }
306 }
307 }
308 }
309
310 /** Compare in detail contiguous subsequences of the two files
311 which are known, as a whole, to match each other.
312
313 The results are recorded in the vectors filevec[N].changed_flag, by
314 storing a 1 in the element for each line that is an insertion or deletion.
315
316 The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
317
318 Note that XLIM, YLIM are exclusive bounds.
319 All line numbers are origin-0 and discarded lines are not counted. */
320
321 private void compareseq (int xoff, int xlim, int yoff, int ylim) {
322 /* Slide down the bottom initial diagonal. */
323 while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) {
324 ++xoff; ++yoff;
325 }
326 /* Slide up the top initial diagonal. */
327 while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) {
328 --xlim; --ylim;
329 }
330
331 /* Handle simple cases. */
332 if (xoff == xlim) {
333 while (yoff < ylim) {
334 filevec[1].changed_flag[1+filevec[1].realindexes[yoff++]] = true;
335 }
336 } else if (yoff == ylim) {
337 while (xoff < xlim) {
338 filevec[0].changed_flag[1+filevec[0].realindexes[xoff++]] = true;
339 }
340 } else
341 {
342 /* Find a point of correspondence in the middle of the files. */
343
344 int d = diag (xoff, xlim, yoff, ylim);
345 int c = cost;
346 //int f = fdiag[fdiagoff + d];
347 int b = bdiag[bdiagoff + d];
348
349 if (c == 1)
350 /* This should be impossible, because it implies that
351 one of the two subsequences is empty,
352 and that case was handled above without calling `diag'.
353 Let's verify that this is true. */
354 throw new IllegalArgumentException("Empty subsequence");
355 else
356 {
357 /* Use that point to split this problem into two subproblems. */
358 compareseq (xoff, b, yoff, b - d);
359 /* This used to use f instead of b,
360 but that is incorrect!
361 It is not necessarily the case that diagonal d
362 has a snake from b to f. */
363 compareseq (b, xlim, b - d, ylim);
364 }
365 }
366 }
367
368 /** Discard lines from one file that have no matches in the other file.
369 */
370
371 private void discard_confusing_lines() {
372 filevec[0].discard_confusing_lines(filevec[1]);
373 filevec[1].discard_confusing_lines(filevec[0]);
374 }
375
376 private boolean inhibit = false;
377
378 /** Adjust inserts/deletes of blank lines to join changes
379 as much as possible.
380 */
381
382 private void shift_boundaries() {
383 if (inhibit)
384 return;
385 filevec[0].shift_boundaries(filevec[1]);
386 filevec[1].shift_boundaries(filevec[0]);
387 }
388
389 public interface ScriptBuilder {
390 /** Scan the tables of which lines are inserted and deleted,
391 producing an edit script.
392 @param changed0 true for lines in first file which do not match 2nd
393 @param len0 number of lines in first file
394 @param changed1 true for lines in 2nd file which do not match 1st
395 @param len1 number of lines in 2nd file
396 @return a linked list of changes - or null
397 */
398 public change build_script(
399 boolean[] changed0,int len0,
400 boolean[] changed1,int len1
401 );
402 }
403
404 /** Scan the tables of which lines are inserted and deleted,
405 producing an edit script in reverse order. */
406
407 static class ReverseScript implements ScriptBuilder {
408 public change build_script(
409 final boolean[] changed0,int len0,
410 final boolean[] changed1,int len1)
411 {
412 change script = null;
413 int i0 = 0, i1 = 0;
414 while (i0 < len0 || i1 < len1) {
415 if (changed0[1+i0] || changed1[1+i1]) {
416 int line0 = i0, line1 = i1;
417
418 /* Find # lines changed here in each file. */
419 while (changed0[1+i0]) {
420 ++i0;
421 }
422 while (changed1[1+i1]) {
423 ++i1;
424 }
425
426 /* Record this change. */
427 script = new change(line0, line1, i0 - line0, i1 - line1, script);
428 }
429
430 /* We have reached lines in the two files that match each other. */
431 i0++; i1++;
432 }
433
434 return script;
435 }
436 }
437
438 static class ForwardScript implements ScriptBuilder {
439 /** Scan the tables of which lines are inserted and deleted,
440 producing an edit script in forward order. */
441 public change build_script(
442 final boolean[] changed0,int len0,
443 final boolean[] changed1,int len1)
444 {
445 change script = null;
446 int i0 = len0, i1 = len1;
447
448 while (i0 >= 0 || i1 >= 0)
449 {
450 if (changed0[i0] || changed1[i1])
451 {
452 int line0 = i0, line1 = i1;
453
454 /* Find # lines changed here in each file. */
455 while (changed0[i0]) {
456 --i0;
457 }
458 while (changed1[i1]) {
459 --i1;
460 }
461
462 /* Record this change. */
463 script = new change(i0, i1, line0 - i0, line1 - i1, script);
464 }
465
466 /* We have reached lines in the two files that match each other. */
467 i0--; i1--;
468 }
469
470 return script;
471 }
472 }
473
474 /** Standard ScriptBuilders. */
475 public final static ScriptBuilder
476 forwardScript = new ForwardScript(),
477 reverseScript = new ReverseScript();
478
479 /* Report the differences of two files. DEPTH is the current directory
480 depth. */
481 public final change diff_2(final boolean reverse) {
482 return diff(reverse ? reverseScript : forwardScript);
483 }
484
485 /** Get the results of comparison as an edit script. The script
486 is described by a list of changes. The standard ScriptBuilder
487 implementations provide for forward and reverse edit scripts.
488 Alternate implementations could, for instance, list common elements
489 instead of differences.
490 @param bld an object to build the script from change flags
491 @return the head of a list of changes
492 */
493 public change diff(final ScriptBuilder bld) {
494
495 /* Some lines are obviously insertions or deletions
496 because they don't match anything. Detect them now,
497 and avoid even thinking about them in the main comparison algorithm. */
498
499 discard_confusing_lines ();
500
501 /* Now do the main comparison algorithm, considering just the
502 undiscarded lines. */
503
504 xvec = filevec[0].undiscarded;
505 yvec = filevec[1].undiscarded;
506
507 int diags =
508 filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines + 3;
509 fdiag = new int[diags];
510 fdiagoff = filevec[1].nondiscarded_lines + 1;
511 bdiag = new int[diags];
512 bdiagoff = filevec[1].nondiscarded_lines + 1;
513
514 compareseq (0, filevec[0].nondiscarded_lines,
515 0, filevec[1].nondiscarded_lines);
516 fdiag = null;
517 bdiag = null;
518
519 /* Modify the results slightly to make them prettier
520 in cases where that can validly be done. */
521
522 shift_boundaries ();
523
524 /* Get the results of comparison in the form of a chain
525 of `struct change's -- an edit script. */
526 return bld.build_script(
527 filevec[0].changed_flag,
528 filevec[0].buffered_lines,
529 filevec[1].changed_flag,
530 filevec[1].buffered_lines
531 );
532
533 }
534
535 /** The result of comparison is an "edit script": a chain of change objects.
536 Each change represents one place where some lines are deleted
537 and some are inserted.
538
539 LINE0 and LINE1 are the first affected lines in the two files (origin 0).
540 DELETED is the number of lines deleted here from file 0.
541 INSERTED is the number of lines inserted here in file 1.
542
543 If DELETED is 0 then LINE0 is the number of the line before
544 which the insertion was done; vice versa for INSERTED and LINE1. */
545
546 public static class change {
547 /** Previous or next edit command. */
548 public change link;
549 /** # lines of file 1 changed here. */
550 public final int inserted;
551 /** # lines of file 0 changed here. */
552 public final int deleted;
553 /** Line number of 1st deleted line. */
554 public final int line0;
555 /** Line number of 1st inserted line. */
556 public final int line1;
557
558 /** Cons an additional entry onto the front of an edit script OLD.
559 LINE0 and LINE1 are the first affected lines in the two files (origin 0).
560 DELETED is the number of lines deleted here from file 0.
561 INSERTED is the number of lines inserted here in file 1.
562
563 If DELETED is 0 then LINE0 is the number of the line before
564 which the insertion was done; vice versa for INSERTED and LINE1. */
565 public change(int line0, int line1, int deleted, int inserted, change old) {
566 this.line0 = line0;
567 this.line1 = line1;
568 this.inserted = inserted;
569 this.deleted = deleted;
570 this.link = old;
571 //System.err.println(line0+","+line1+","+inserted+","+deleted);
572 }
573 }
574
575 /** Data on one input file being compared.
576 */
577
578 class file_data {
579
580 /** Allocate changed array for the results of comparison. */
581 void clear() {
582 /* Allocate a flag for each line of each file, saying whether that line
583 is an insertion or deletion.
584 Allocate an extra element, always zero, at each end of each vector.
585 */
586 changed_flag = new boolean[buffered_lines + 2];
587 }
588
589 /** Return equiv_count[I] as the number of lines in this file
590 that fall in equivalence class I.
591 @return the array of equivalence class counts.
592 */
593 int[] equivCount() {
594 int[] equiv_count = new int[equiv_max];
595 for (int i = 0; i < buffered_lines; ++i) {
596 ++equiv_count[equivs[i]];
597 }
598 return equiv_count;
599 }
600
601 /** Discard lines that have no matches in another file.
602
603 A line which is discarded will not be considered by the actual
604 comparison algorithm; it will be as if that line were not in the file.
605 The file's `realindexes' table maps virtual line numbers
606 (which don't count the discarded lines) into real line numbers;
607 this is how the actual comparison algorithm produces results
608 that are comprehensible when the discarded lines are counted.
609<p>
610 When we discard a line, we also mark it as a deletion or insertion
611 so that it will be printed in the output.
612 @param f the other file
613 */
614 void discard_confusing_lines(file_data f) {
615 clear();
616 /* Set up table of which lines are going to be discarded. */
617 final byte[] discarded = discardable(f.equivCount());
618
619 /* Don't really discard the provisional lines except when they occur
620 in a run of discardables, with nonprovisionals at the beginning
621 and end. */
622 filterDiscards(discarded);
623
624 /* Actually discard the lines. */
625 discard(discarded);
626 }
627
628 /**
629 * Mark to be discarded each line that matches no line of another file.
630 * If a line matches many lines, mark it as provisionally discardable.
631 * @see #equivCount()
632 * @param counts The count of each equivalence number for the other file.
633 * @return 0=nondiscardable, 1=discardable or 2=provisionally discardable
634 * for each line
635 */
636 private byte[] discardable(final int[] counts) {
637 final int end = buffered_lines;
638 final byte[] discards = new byte[end];
639 final int[] equivs = this.equivs;
640 int many = 5;
641 int tem = end / 64;
642
643 /* Multiply MANY by approximate square root of number of lines.
644 That is the threshold for provisionally discardable lines. */
645 while ((tem = tem >> 2) > 0) {
646 many *= 2;
647 }
648
649 for (int i = 0; i < end; i++)
650 {
651 int nmatch;
652 if (equivs[i] == 0) {
653 continue;
654 }
655 nmatch = counts[equivs[i]];
656 if (nmatch == 0) {
657 discards[i] = 1;
658 } else if (nmatch > many) {
659 discards[i] = 2;
660 }
661 }
662 return discards;
663 }
664
665 /**
666 * Don't really discard the provisional lines except when they occur
667 * in a run of discardables, with nonprovisionals at the beginning
668 * and end.
669 */
670 private void filterDiscards(final byte[] discards) {
671 final int end = buffered_lines;
672
673 for (int i = 0; i < end; i++)
674 {
675 /* Cancel provisional discards not in middle of run of discards. */
676 if (discards[i] == 2) {
677 discards[i] = 0;
678 } else if (discards[i] != 0)
679 {
680 /* We have found a nonprovisional discard. */
681 int j;
682 int length;
683 int provisional = 0;
684
685 /* Find end of this run of discardable lines.
686 Count how many are provisionally discardable. */
687 for (j = i; j < end; j++)
688 {
689 if (discards[j] == 0) {
690 break;
691 }
692 if (discards[j] == 2) {
693 ++provisional;
694 }
695 }
696
697 /* Cancel provisional discards at end, and shrink the run. */
698 while (j > i && discards[j - 1] == 2) {
699 discards[--j] = 0; --provisional;
700 }
701
702 /* Now we have the length of a run of discardable lines
703 whose first and last are not provisional. */
704 length = j - i;
705
706 /* If 1/4 of the lines in the run are provisional,
707 cancel discarding of all provisional lines in the run. */
708 if (provisional * 4 > length)
709 {
710 while (j > i)
711 if (discards[--j] == 2) {
712 discards[j] = 0;
713 }
714 }
715 else
716 {
717 int consec;
718 int minimum = 1;
719 int tem = length / 4;
720
721 /* MINIMUM is approximate square root of LENGTH/4.
722 A subrun of two or more provisionals can stand
723 when LENGTH is at least 16.
724 A subrun of 4 or more can stand when LENGTH >= 64. */
725 while ((tem = tem >> 2) > 0) {
726 minimum *= 2;
727 }
728 minimum++;
729
730 /* Cancel any subrun of MINIMUM or more provisionals
731 within the larger run. */
732 for (j = 0, consec = 0; j < length; j++)
733 if (discards[i + j] != 2) {
734 consec = 0;
735 } else if (minimum == ++consec) {
736 /* Back up to start of subrun, to cancel it all. */
737 j -= consec;
738 } else if (minimum < consec) {
739 discards[i + j] = 0;
740 }
741
742 /* Scan from beginning of run
743 until we find 3 or more nonprovisionals in a row
744 or until the first nonprovisional at least 8 lines in.
745 Until that point, cancel any provisionals. */
746 for (j = 0, consec = 0; j < length; j++)
747 {
748 if (j >= 8 && discards[i + j] == 1) {
749 break;
750 }
751 if (discards[i + j] == 2) {
752 consec = 0; discards[i + j] = 0;
753 }
754 else if (discards[i + j] == 0) {
755 consec = 0;
756 } else {
757 consec++;
758 }
759 if (consec == 3) {
760 break;
761 }
762 }
763
764 /* I advances to the last line of the run. */
765 i += length - 1;
766
767 /* Same thing, from end. */
768 for (j = 0, consec = 0; j < length; j++)
769 {
770 if (j >= 8 && discards[i - j] == 1) {
771 break;
772 }
773 if (discards[i - j] == 2) {
774 consec = 0; discards[i - j] = 0;
775 }
776 else if (discards[i - j] == 0) {
777 consec = 0;
778 } else {
779 consec++;
780 }
781 if (consec == 3) {
782 break;
783 }
784 }
785 }
786 }
787 }
788 }
789
790 /** Actually discard the lines.
791 @param discards flags lines to be discarded
792 */
793 private void discard(final byte[] discards) {
794 final int end = buffered_lines;
795 int j = 0;
796 for (int i = 0; i < end; ++i)
797 if (no_discards || discards[i] == 0)
798 {
799 undiscarded[j] = equivs[i];
800 realindexes[j++] = i;
801 } else {
802 changed_flag[1+i] = true;
803 }
804 nondiscarded_lines = j;
805 }
806
807 file_data(Object[] data,Hashtable<Object, Integer> h) {
808 buffered_lines = data.length;
809
810 equivs = new int[buffered_lines];
811 undiscarded = new int[buffered_lines];
812 realindexes = new int[buffered_lines];
813
814 for (int i = 0; i < data.length; ++i) {
815 Integer ir = (Integer)h.get(data[i]);
816 if (ir == null) {
817 h.put(data[i],new Integer(equivs[i] = equiv_max++));
818 } else {
819 equivs[i] = ir.intValue();
820 }
821 }
822 }
823
824 /** Adjust inserts/deletes of blank lines to join changes
825 as much as possible.
826
827 We do something when a run of changed lines include a blank
828 line at one end and have an excluded blank line at the other.
829 We are free to choose which blank line is included.
830 `compareseq' always chooses the one at the beginning,
831 but usually it is cleaner to consider the following blank line
832 to be the "change". The only exception is if the preceding blank line
833 would join this change to other changes.
834 @param f the file being compared against
835 */
836
837 void shift_boundaries(file_data f) {
838 final boolean[] changed = changed_flag;
839 final boolean[] other_changed = f.changed_flag;
840 int i = 0;
841 int j = 0;
842 int i_end = buffered_lines;
843 int preceding = -1;
844 int other_preceding = -1;
845
846 for (;;)
847 {
848 int start, end, other_start;
849
850 /* Scan forwards to find beginning of another run of changes.
851 Also keep track of the corresponding point in the other file. */
852
853 while (i < i_end && !changed[1+i])
854 {
855 while (other_changed[1+j++]) {
856 /* Non-corresponding lines in the other file
857 will count as the preceding batch of changes. */
858 other_preceding = j;
859 }
860 i++;
861 }
862
863 if (i == i_end) {
864 break;
865 }
866
867 start = i;
868 other_start = j;
869
870 for (;;)
871 {
872 /* Now find the end of this run of changes. */
873
874 while (i < i_end && changed[1+i]) {
875 i++;
876 }
877 end = i;
878
879 /* If the first changed line matches the following unchanged one,
880 and this run does not follow right after a previous run,
881 and there are no lines deleted from the other file here,
882 then classify the first changed line as unchanged
883 and the following line as changed in its place. */
884
885 /* You might ask, how could this run follow right after another?
886 Only because the previous run was shifted here. */
887
888 if (end != i_end
889 && equivs[start] == equivs[end]
890 && !other_changed[1+j]
891 && end != i_end
892 && !((preceding >= 0 && start == preceding)
893 || (other_preceding >= 0
894 && other_start == other_preceding)))
895 {
896 changed[1+end++] = true;
897 changed[1+start++] = false;
898 ++i;
899 /* Since one line-that-matches is now before this run
900 instead of after, we must advance in the other file
901 to keep in synch. */
902 ++j;
903 } else {
904 break;
905 }
906 }
907
908 preceding = i;
909 other_preceding = j;
910 }
911 }
912
913 /** Number of elements (lines) in this file. */
914 final int buffered_lines;
915
916 /** Vector, indexed by line number, containing an equivalence code for
917 each line. It is this vector that is actually compared with that
918 of another file to generate differences. */
919 private final int[] equivs;
920
921 /** Vector, like the previous one except that
922 the elements for discarded lines have been squeezed out. */
923 final int[] undiscarded;
924
925 /** Vector mapping virtual line numbers (not counting discarded lines)
926 to real ones (counting those lines). Both are origin-0. */
927 final int[] realindexes;
928
929 /** Total number of nondiscarded lines. */
930 int nondiscarded_lines;
931
932 /** Array, indexed by real origin-1 line number,
933 containing true for a line that is an insertion or a deletion.
934 The results of comparison are stored here. */
935 boolean[] changed_flag;
936
937 }
938}
Note: See TracBrowser for help on using the repository browser.