source: josm/trunk/src/org/openstreetmap/josm/tools/Diff.java@ 5508

Last change on this file since 5508 was 4534, checked in by Don-vip, 13 years ago

Fixed a bunch of compile warnings

File size: 35.0 KB
Line 
1package org.openstreetmap.josm.tools;
2
3//// Taken from http://www.bmsi.com/java/#diff
4
5// http://www.bmsi.com/java/DiffPrint.java could also be useful
6
7/*
8 * $Log: Diff.java,v $
9 * Revision 1.7 2009/01/19 03:05:26 stuart
10 * Fix StackOverflow bug with heuristic on reported by Jimmy Han.
11 *
12 * Revision 1.6 2003/03/06 22:51:32 stuart
13 * Convert to CVS
14 *
15 * Revision 1.5 2002/07/19 19:14:40 stuart
16 * fix reverseScript, make change ctor public, update docs
17 *
18 * Revision 1.4 2002/04/09 17:53:39 stuart
19 * More flexible interface for diff() function.
20 *
21 * Revision 1.3 2000/03/03 21:58:03 stuart
22 * move discard_confusing_lines and shift_boundaries to class file_data
23 *
24 * Revision 1.2 2000/03/02 16:37:38 stuart
25 * Add GPL and copyright
26 *
27 */
28
29import java.util.Hashtable;
30
31/** A class to compare vectors of objects. The result of comparison
32 is a list of <code>change</code> objects which form an
33 edit script. The objects compared are traditionally lines
34 of text from two files. Comparison options such as "ignore
35 whitespace" are implemented by modifying the <code>equals</code>
36 and <code>hashcode</code> methods for the objects compared.
37<p>
38 The basic algorithm is described in: </br>
39 "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
40 Algorithmica Vol. 1 No. 2, 1986, p 251.
41<p>
42 This class outputs different results from GNU diff 1.15 on some
43 inputs. Our results are actually better (smaller change list, smaller
44 total size of changes), but it would be nice to know why. Perhaps
45 there is a memory overwrite bug in GNU diff 1.15.
46
47 @author Stuart D. Gathman, translated from GNU diff 1.15
48 Copyright (C) 2000 Business Management Systems, Inc.
49<p>
50 This program is free software; you can redistribute it and/or modify
51 it under the terms of the GNU General Public License as published by
52 the Free Software Foundation; either version 1, or (at your option)
53 any later version.
54<p>
55 This program is distributed in the hope that it will be useful,
56 but WITHOUT ANY WARRANTY; without even the implied warranty of
57 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
58 GNU General Public License for more details.
59<p>
60 You should have received a copy of the <a href=COPYING.txt>
61 GNU General Public License</a>
62 along with this program; if not, write to the Free Software
63 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
64
65 */
66
67public class Diff {
68
69 /** Prepare to find differences between two arrays. Each element of
70 the arrays is translated to an "equivalence number" based on
71 the result of <code>equals</code>. The original Object arrays
72 are no longer needed for computing the differences. They will
73 be needed again later to print the results of the comparison as
74 an edit script, if desired.
75 */
76 public Diff(Object[] a,Object[] b) {
77 Hashtable<Object, Integer> h = new Hashtable<Object, Integer>(a.length + b.length);
78 filevec[0] = new file_data(a,h);
79 filevec[1] = new file_data(b,h);
80 }
81
82 /** 1 more than the maximum equivalence value used for this or its
83 sibling file. */
84 private int equiv_max = 1;
85
86 /** When set to true, the comparison uses a heuristic to speed it up.
87 With this heuristic, for files with a constant small density
88 of changes, the algorithm is linear in the file size. */
89 public boolean heuristic = false;
90
91 /** When set to true, the algorithm returns a guarranteed minimal
92 set of changes. This makes things slower, sometimes much slower. */
93 public boolean no_discards = false;
94
95 private int[] xvec, yvec; /* Vectors being compared. */
96 private int[] fdiag; /* Vector, indexed by diagonal, containing
97 the X coordinate of the point furthest
98 along the given diagonal in the forward
99 search of the edit matrix. */
100 private int[] bdiag; /* Vector, indexed by diagonal, containing
101 the X coordinate of the point furthest
102 along the given diagonal in the backward
103 search of the edit matrix. */
104 private int fdiagoff, bdiagoff;
105 private final file_data[] filevec = new file_data[2];
106 private int cost;
107 /** Snakes bigger than this are considered "big". */
108 private static final int SNAKE_LIMIT = 20;
109
110 /** Find the midpoint of the shortest edit script for a specified
111 portion of the two files.
112
113 We scan from the beginnings of the files, and simultaneously from the ends,
114 doing a breadth-first search through the space of edit-sequence.
115 When the two searches meet, we have found the midpoint of the shortest
116 edit sequence.
117
118 The value returned is the number of the diagonal on which the midpoint lies.
119 The diagonal number equals the number of inserted lines minus the number
120 of deleted lines (counting only lines before the midpoint).
121 The edit cost is stored into COST; this is the total number of
122 lines inserted or deleted (counting only lines before the midpoint).
123
124 This function assumes that the first lines of the specified portions
125 of the two files do not match, and likewise that the last lines do not
126 match. The caller must trim matching lines from the beginning and end
127 of the portions it is going to specify.
128
129 Note that if we return the "wrong" diagonal value, or if
130 the value of bdiag at that diagonal is "wrong",
131 the worst this can do is cause suboptimal diff output.
132 It cannot cause incorrect diff output. */
133
134 private int diag (int xoff, int xlim, int yoff, int ylim) {
135 final int[] fd = fdiag; // Give the compiler a chance.
136 final int[] bd = bdiag; // Additional help for the compiler.
137 final int[] xv = xvec; // Still more help for the compiler.
138 final int[] yv = yvec; // And more and more . . .
139 final int dmin = xoff - ylim; // Minimum valid diagonal.
140 final int dmax = xlim - yoff; // Maximum valid diagonal.
141 final int fmid = xoff - yoff; // Center diagonal of top-down search.
142 final int bmid = xlim - ylim; // Center diagonal of bottom-up search.
143 int fmin = fmid, fmax = fmid; // Limits of top-down search.
144 int bmin = bmid, bmax = bmid; // Limits of bottom-up search.
145 /* True if southeast corner is on an odd
146 diagonal with respect to the northwest. */
147 final boolean odd = (fmid - bmid & 1) != 0;
148
149 fd[fdiagoff + fmid] = xoff;
150 bd[bdiagoff + bmid] = xlim;
151
152 for (int c = 1;; ++c)
153 {
154 int d; /* Active diagonal. */
155 boolean big_snake = false;
156
157 /* Extend the top-down search by an edit step in each diagonal. */
158 if (fmin > dmin) {
159 fd[fdiagoff + --fmin - 1] = -1;
160 } else {
161 ++fmin;
162 }
163 if (fmax < dmax) {
164 fd[fdiagoff + ++fmax + 1] = -1;
165 } else {
166 --fmax;
167 }
168 for (d = fmax; d >= fmin; d -= 2)
169 {
170 int x, y, oldx, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff + d + 1];
171
172 if (tlo >= thi) {
173 x = tlo + 1;
174 } else {
175 x = thi;
176 }
177 oldx = x;
178 y = x - d;
179 while (x < xlim && y < ylim && xv[x] == yv[y]) {
180 ++x; ++y;
181 }
182 if (x - oldx > SNAKE_LIMIT) {
183 big_snake = true;
184 }
185 fd[fdiagoff + d] = x;
186 if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d])
187 {
188 cost = 2 * c - 1;
189 return d;
190 }
191 }
192
193 /* Similar extend the bottom-up search. */
194 if (bmin > dmin) {
195 bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE;
196 } else {
197 ++bmin;
198 }
199 if (bmax < dmax) {
200 bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE;
201 } else {
202 --bmax;
203 }
204 for (d = bmax; d >= bmin; d -= 2)
205 {
206 int x, y, oldx, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1];
207
208 if (tlo < thi) {
209 x = tlo;
210 } else {
211 x = thi - 1;
212 }
213 oldx = x;
214 y = x - d;
215 while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) {
216 --x; --y;
217 }
218 if (oldx - x > SNAKE_LIMIT) {
219 big_snake = true;
220 }
221 bd[bdiagoff + d] = x;
222 if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d])
223 {
224 cost = 2 * c;
225 return d;
226 }
227 }
228
229 /* Heuristic: check occasionally for a diagonal that has made
230 lots of progress compared with the edit distance.
231 If we have any such, find the one that has made the most
232 progress and return it as if it had succeeded.
233
234 With this heuristic, for files with a constant small density
235 of changes, the algorithm is linear in the file size. */
236
237 if (c > 200 && big_snake && heuristic)
238 {
239 int best = 0;
240 int bestpos = -1;
241
242 for (d = fmax; d >= fmin; d -= 2)
243 {
244 int dd = d - fmid;
245 int x = fd[fdiagoff + d];
246 int y = x - d;
247 int v = (x - xoff) * 2 - dd;
248 if (v > 12 * (c + (dd < 0 ? -dd : dd)))
249 {
250 if (v > best
251 && xoff + SNAKE_LIMIT <= x && x < xlim
252 && yoff + SNAKE_LIMIT <= y && y < ylim)
253 {
254 /* We have a good enough best diagonal;
255 now insist that it end with a significant snake. */
256 int k;
257
258 for (k = 1; xvec[x - k] == yvec[y - k]; k++)
259 if (k == SNAKE_LIMIT)
260 {
261 best = v;
262 bestpos = d;
263 break;
264 }
265 }
266 }
267 }
268 if (best > 0)
269 {
270 cost = 2 * c - 1;
271 return bestpos;
272 }
273
274 best = 0;
275 for (d = bmax; d >= bmin; d -= 2)
276 {
277 int dd = d - bmid;
278 int x = bd[bdiagoff + d];
279 int y = x - d;
280 int v = (xlim - x) * 2 + dd;
281 if (v > 12 * (c + (dd < 0 ? -dd : dd)))
282 {
283 if (v > best
284 && xoff < x && x <= xlim - SNAKE_LIMIT
285 && yoff < y && y <= ylim - SNAKE_LIMIT)
286 {
287 /* We have a good enough best diagonal;
288 now insist that it end with a significant snake. */
289 int k;
290
291 for (k = 0; xvec[x + k] == yvec[y + k]; k++)
292 if (k == SNAKE_LIMIT)
293 {
294 best = v;
295 bestpos = d;
296 break;
297 }
298 }
299 }
300 }
301 if (best > 0)
302 {
303 cost = 2 * c - 1;
304 return bestpos;
305 }
306 }
307 }
308 }
309
310 /** Compare in detail contiguous subsequences of the two files
311 which are known, as a whole, to match each other.
312
313 The results are recorded in the vectors filevec[N].changed_flag, by
314 storing a 1 in the element for each line that is an insertion or deletion.
315
316 The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
317
318 Note that XLIM, YLIM are exclusive bounds.
319 All line numbers are origin-0 and discarded lines are not counted. */
320
321 private void compareseq (int xoff, int xlim, int yoff, int ylim) {
322 /* Slide down the bottom initial diagonal. */
323 while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) {
324 ++xoff; ++yoff;
325 }
326 /* Slide up the top initial diagonal. */
327 while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) {
328 --xlim; --ylim;
329 }
330
331 /* Handle simple cases. */
332 if (xoff == xlim) {
333 while (yoff < ylim) {
334 filevec[1].changed_flag[1+filevec[1].realindexes[yoff++]] = true;
335 }
336 } else if (yoff == ylim) {
337 while (xoff < xlim) {
338 filevec[0].changed_flag[1+filevec[0].realindexes[xoff++]] = true;
339 }
340 } else
341 {
342 /* Find a point of correspondence in the middle of the files. */
343
344 int d = diag (xoff, xlim, yoff, ylim);
345 int c = cost;
346 //int f = fdiag[fdiagoff + d];
347 int b = bdiag[bdiagoff + d];
348
349 if (c == 1)
350 /* This should be impossible, because it implies that
351 one of the two subsequences is empty,
352 and that case was handled above without calling `diag'.
353 Let's verify that this is true. */
354 throw new IllegalArgumentException("Empty subsequence");
355 else
356 {
357 /* Use that point to split this problem into two subproblems. */
358 compareseq (xoff, b, yoff, b - d);
359 /* This used to use f instead of b,
360 but that is incorrect!
361 It is not necessarily the case that diagonal d
362 has a snake from b to f. */
363 compareseq (b, xlim, b - d, ylim);
364 }
365 }
366 }
367
368 /** Discard lines from one file that have no matches in the other file.
369 */
370
371 private void discard_confusing_lines() {
372 filevec[0].discard_confusing_lines(filevec[1]);
373 filevec[1].discard_confusing_lines(filevec[0]);
374 }
375
376 private boolean inhibit = false;
377
378 /** Adjust inserts/deletes of blank lines to join changes
379 as much as possible.
380 */
381
382 private void shift_boundaries() {
383 if (inhibit)
384 return;
385 filevec[0].shift_boundaries(filevec[1]);
386 filevec[1].shift_boundaries(filevec[0]);
387 }
388
389 public interface ScriptBuilder {
390 /** Scan the tables of which lines are inserted and deleted,
391 producing an edit script.
392 @param changed0 true for lines in first file which do not match 2nd
393 @param len0 number of lines in first file
394 @param changed1 true for lines in 2nd file which do not match 1st
395 @param len1 number of lines in 2nd file
396 @return a linked list of changes - or null
397 */
398 public change build_script(
399 boolean[] changed0,int len0,
400 boolean[] changed1,int len1
401 );
402 }
403
404 /** Scan the tables of which lines are inserted and deleted,
405 producing an edit script in reverse order. */
406
407 static class ReverseScript implements ScriptBuilder {
408 public change build_script(
409 final boolean[] changed0,int len0,
410 final boolean[] changed1,int len1)
411 {
412 change script = null;
413 int i0 = 0, i1 = 0;
414 while (i0 < len0 || i1 < len1) {
415 if (changed0[1+i0] || changed1[1+i1]) {
416 int line0 = i0, line1 = i1;
417
418 /* Find # lines changed here in each file. */
419 while (changed0[1+i0]) {
420 ++i0;
421 }
422 while (changed1[1+i1]) {
423 ++i1;
424 }
425
426 /* Record this change. */
427 script = new change(line0, line1, i0 - line0, i1 - line1, script);
428 }
429
430 /* We have reached lines in the two files that match each other. */
431 i0++; i1++;
432 }
433
434 return script;
435 }
436 }
437
438 static class ForwardScript implements ScriptBuilder {
439 /** Scan the tables of which lines are inserted and deleted,
440 producing an edit script in forward order. */
441 public change build_script(
442 final boolean[] changed0,int len0,
443 final boolean[] changed1,int len1)
444 {
445 change script = null;
446 int i0 = len0, i1 = len1;
447
448 while (i0 >= 0 || i1 >= 0)
449 {
450 if (changed0[i0] || changed1[i1])
451 {
452 int line0 = i0, line1 = i1;
453
454 /* Find # lines changed here in each file. */
455 while (changed0[i0]) {
456 --i0;
457 }
458 while (changed1[i1]) {
459 --i1;
460 }
461
462 /* Record this change. */
463 script = new change(i0, i1, line0 - i0, line1 - i1, script);
464 }
465
466 /* We have reached lines in the two files that match each other. */
467 i0--; i1--;
468 }
469
470 return script;
471 }
472 }
473
474 /** Standard ScriptBuilders. */
475 public final static ScriptBuilder
476 forwardScript = new ForwardScript(),
477 reverseScript = new ReverseScript();
478
479 /* Report the differences of two files. DEPTH is the current directory
480 depth. */
481 public final change diff_2(final boolean reverse) {
482 return diff(reverse ? reverseScript : forwardScript);
483 }
484
485 /** Get the results of comparison as an edit script. The script
486 is described by a list of changes. The standard ScriptBuilder
487 implementations provide for forward and reverse edit scripts.
488 Alternate implementations could, for instance, list common elements
489 instead of differences.
490 @param bld an object to build the script from change flags
491 @return the head of a list of changes
492 */
493 public change diff(final ScriptBuilder bld) {
494
495 /* Some lines are obviously insertions or deletions
496 because they don't match anything. Detect them now,
497 and avoid even thinking about them in the main comparison algorithm. */
498
499 discard_confusing_lines ();
500
501 /* Now do the main comparison algorithm, considering just the
502 undiscarded lines. */
503
504 xvec = filevec[0].undiscarded;
505 yvec = filevec[1].undiscarded;
506
507 int diags =
508 filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines + 3;
509 fdiag = new int[diags];
510 fdiagoff = filevec[1].nondiscarded_lines + 1;
511 bdiag = new int[diags];
512 bdiagoff = filevec[1].nondiscarded_lines + 1;
513
514 compareseq (0, filevec[0].nondiscarded_lines,
515 0, filevec[1].nondiscarded_lines);
516 fdiag = null;
517 bdiag = null;
518
519 /* Modify the results slightly to make them prettier
520 in cases where that can validly be done. */
521
522 shift_boundaries ();
523
524 /* Get the results of comparison in the form of a chain
525 of `struct change's -- an edit script. */
526 return bld.build_script(
527 filevec[0].changed_flag,
528 filevec[0].buffered_lines,
529 filevec[1].changed_flag,
530 filevec[1].buffered_lines
531 );
532
533 }
534
535 /** The result of comparison is an "edit script": a chain of change objects.
536 Each change represents one place where some lines are deleted
537 and some are inserted.
538
539 LINE0 and LINE1 are the first affected lines in the two files (origin 0).
540 DELETED is the number of lines deleted here from file 0.
541 INSERTED is the number of lines inserted here in file 1.
542
543 If DELETED is 0 then LINE0 is the number of the line before
544 which the insertion was done; vice versa for INSERTED and LINE1. */
545
546 public static class change {
547 /** Previous or next edit command. */
548 public change link;
549 /** # lines of file 1 changed here. */
550 public final int inserted;
551 /** # lines of file 0 changed here. */
552 public final int deleted;
553 /** Line number of 1st deleted line. */
554 public final int line0;
555 /** Line number of 1st inserted line. */
556 public final int line1;
557
558 /** Cons an additional entry onto the front of an edit script OLD.
559 LINE0 and LINE1 are the first affected lines in the two files (origin 0).
560 DELETED is the number of lines deleted here from file 0.
561 INSERTED is the number of lines inserted here in file 1.
562
563 If DELETED is 0 then LINE0 is the number of the line before
564 which the insertion was done; vice versa for INSERTED and LINE1. */
565 public change(int line0, int line1, int deleted, int inserted, change old) {
566 this.line0 = line0;
567 this.line1 = line1;
568 this.inserted = inserted;
569 this.deleted = deleted;
570 this.link = old;
571 //System.err.println(line0+","+line1+","+inserted+","+deleted);
572 }
573 }
574
575 /** Data on one input file being compared.
576 */
577
578 class file_data {
579
580 /** Allocate changed array for the results of comparison. */
581 void clear() {
582 /* Allocate a flag for each line of each file, saying whether that line
583 is an insertion or deletion.
584 Allocate an extra element, always zero, at each end of each vector.
585 */
586 changed_flag = new boolean[buffered_lines + 2];
587 }
588
589 /** Return equiv_count[I] as the number of lines in this file
590 that fall in equivalence class I.
591 @return the array of equivalence class counts.
592 */
593 int[] equivCount() {
594 int[] equiv_count = new int[equiv_max];
595 for (int i = 0; i < buffered_lines; ++i) {
596 ++equiv_count[equivs[i]];
597 }
598 return equiv_count;
599 }
600
601 /** Discard lines that have no matches in another file.
602
603 A line which is discarded will not be considered by the actual
604 comparison algorithm; it will be as if that line were not in the file.
605 The file's `realindexes' table maps virtual line numbers
606 (which don't count the discarded lines) into real line numbers;
607 this is how the actual comparison algorithm produces results
608 that are comprehensible when the discarded lines are counted.
609<p>
610 When we discard a line, we also mark it as a deletion or insertion
611 so that it will be printed in the output.
612 @param f the other file
613 */
614 void discard_confusing_lines(file_data f) {
615 clear();
616 /* Set up table of which lines are going to be discarded. */
617 final byte[] discarded = discardable(f.equivCount());
618
619 /* Don't really discard the provisional lines except when they occur
620 in a run of discardables, with nonprovisionals at the beginning
621 and end. */
622 filterDiscards(discarded);
623
624 /* Actually discard the lines. */
625 discard(discarded);
626 }
627
628 /** Mark to be discarded each line that matches no line of another file.
629 If a line matches many lines, mark it as provisionally discardable.
630 @see equivCount()
631 @param counts The count of each equivalence number for the other file.
632 @return 0=nondiscardable, 1=discardable or 2=provisionally discardable
633 for each line
634 */
635
636 private byte[] discardable(final int[] counts) {
637 final int end = buffered_lines;
638 final byte[] discards = new byte[end];
639 final int[] equivs = this.equivs;
640 int many = 5;
641 int tem = end / 64;
642
643 /* Multiply MANY by approximate square root of number of lines.
644 That is the threshold for provisionally discardable lines. */
645 while ((tem = tem >> 2) > 0) {
646 many *= 2;
647 }
648
649 for (int i = 0; i < end; i++)
650 {
651 int nmatch;
652 if (equivs[i] == 0) {
653 continue;
654 }
655 nmatch = counts[equivs[i]];
656 if (nmatch == 0) {
657 discards[i] = 1;
658 } else if (nmatch > many) {
659 discards[i] = 2;
660 }
661 }
662 return discards;
663 }
664
665 /** Don't really discard the provisional lines except when they occur
666 in a run of discardables, with nonprovisionals at the beginning
667 and end. */
668
669 private void filterDiscards(final byte[] discards) {
670 final int end = buffered_lines;
671
672 for (int i = 0; i < end; i++)
673 {
674 /* Cancel provisional discards not in middle of run of discards. */
675 if (discards[i] == 2) {
676 discards[i] = 0;
677 } else if (discards[i] != 0)
678 {
679 /* We have found a nonprovisional discard. */
680 int j;
681 int length;
682 int provisional = 0;
683
684 /* Find end of this run of discardable lines.
685 Count how many are provisionally discardable. */
686 for (j = i; j < end; j++)
687 {
688 if (discards[j] == 0) {
689 break;
690 }
691 if (discards[j] == 2) {
692 ++provisional;
693 }
694 }
695
696 /* Cancel provisional discards at end, and shrink the run. */
697 while (j > i && discards[j - 1] == 2) {
698 discards[--j] = 0; --provisional;
699 }
700
701 /* Now we have the length of a run of discardable lines
702 whose first and last are not provisional. */
703 length = j - i;
704
705 /* If 1/4 of the lines in the run are provisional,
706 cancel discarding of all provisional lines in the run. */
707 if (provisional * 4 > length)
708 {
709 while (j > i)
710 if (discards[--j] == 2) {
711 discards[j] = 0;
712 }
713 }
714 else
715 {
716 int consec;
717 int minimum = 1;
718 int tem = length / 4;
719
720 /* MINIMUM is approximate square root of LENGTH/4.
721 A subrun of two or more provisionals can stand
722 when LENGTH is at least 16.
723 A subrun of 4 or more can stand when LENGTH >= 64. */
724 while ((tem = tem >> 2) > 0) {
725 minimum *= 2;
726 }
727 minimum++;
728
729 /* Cancel any subrun of MINIMUM or more provisionals
730 within the larger run. */
731 for (j = 0, consec = 0; j < length; j++)
732 if (discards[i + j] != 2) {
733 consec = 0;
734 } else if (minimum == ++consec) {
735 /* Back up to start of subrun, to cancel it all. */
736 j -= consec;
737 } else if (minimum < consec) {
738 discards[i + j] = 0;
739 }
740
741 /* Scan from beginning of run
742 until we find 3 or more nonprovisionals in a row
743 or until the first nonprovisional at least 8 lines in.
744 Until that point, cancel any provisionals. */
745 for (j = 0, consec = 0; j < length; j++)
746 {
747 if (j >= 8 && discards[i + j] == 1) {
748 break;
749 }
750 if (discards[i + j] == 2) {
751 consec = 0; discards[i + j] = 0;
752 }
753 else if (discards[i + j] == 0) {
754 consec = 0;
755 } else {
756 consec++;
757 }
758 if (consec == 3) {
759 break;
760 }
761 }
762
763 /* I advances to the last line of the run. */
764 i += length - 1;
765
766 /* Same thing, from end. */
767 for (j = 0, consec = 0; j < length; j++)
768 {
769 if (j >= 8 && discards[i - j] == 1) {
770 break;
771 }
772 if (discards[i - j] == 2) {
773 consec = 0; discards[i - j] = 0;
774 }
775 else if (discards[i - j] == 0) {
776 consec = 0;
777 } else {
778 consec++;
779 }
780 if (consec == 3) {
781 break;
782 }
783 }
784 }
785 }
786 }
787 }
788
789 /** Actually discard the lines.
790 @param discards flags lines to be discarded
791 */
792 private void discard(final byte[] discards) {
793 final int end = buffered_lines;
794 int j = 0;
795 for (int i = 0; i < end; ++i)
796 if (no_discards || discards[i] == 0)
797 {
798 undiscarded[j] = equivs[i];
799 realindexes[j++] = i;
800 } else {
801 changed_flag[1+i] = true;
802 }
803 nondiscarded_lines = j;
804 }
805
806 file_data(Object[] data,Hashtable<Object, Integer> h) {
807 buffered_lines = data.length;
808
809 equivs = new int[buffered_lines];
810 undiscarded = new int[buffered_lines];
811 realindexes = new int[buffered_lines];
812
813 for (int i = 0; i < data.length; ++i) {
814 Integer ir = (Integer)h.get(data[i]);
815 if (ir == null) {
816 h.put(data[i],new Integer(equivs[i] = equiv_max++));
817 } else {
818 equivs[i] = ir.intValue();
819 }
820 }
821 }
822
823 /** Adjust inserts/deletes of blank lines to join changes
824 as much as possible.
825
826 We do something when a run of changed lines include a blank
827 line at one end and have an excluded blank line at the other.
828 We are free to choose which blank line is included.
829 `compareseq' always chooses the one at the beginning,
830 but usually it is cleaner to consider the following blank line
831 to be the "change". The only exception is if the preceding blank line
832 would join this change to other changes.
833 @param f the file being compared against
834 */
835
836 void shift_boundaries(file_data f) {
837 final boolean[] changed = changed_flag;
838 final boolean[] other_changed = f.changed_flag;
839 int i = 0;
840 int j = 0;
841 int i_end = buffered_lines;
842 int preceding = -1;
843 int other_preceding = -1;
844
845 for (;;)
846 {
847 int start, end, other_start;
848
849 /* Scan forwards to find beginning of another run of changes.
850 Also keep track of the corresponding point in the other file. */
851
852 while (i < i_end && !changed[1+i])
853 {
854 while (other_changed[1+j++]) {
855 /* Non-corresponding lines in the other file
856 will count as the preceding batch of changes. */
857 other_preceding = j;
858 }
859 i++;
860 }
861
862 if (i == i_end) {
863 break;
864 }
865
866 start = i;
867 other_start = j;
868
869 for (;;)
870 {
871 /* Now find the end of this run of changes. */
872
873 while (i < i_end && changed[1+i]) {
874 i++;
875 }
876 end = i;
877
878 /* If the first changed line matches the following unchanged one,
879 and this run does not follow right after a previous run,
880 and there are no lines deleted from the other file here,
881 then classify the first changed line as unchanged
882 and the following line as changed in its place. */
883
884 /* You might ask, how could this run follow right after another?
885 Only because the previous run was shifted here. */
886
887 if (end != i_end
888 && equivs[start] == equivs[end]
889 && !other_changed[1+j]
890 && end != i_end
891 && !((preceding >= 0 && start == preceding)
892 || (other_preceding >= 0
893 && other_start == other_preceding)))
894 {
895 changed[1+end++] = true;
896 changed[1+start++] = false;
897 ++i;
898 /* Since one line-that-matches is now before this run
899 instead of after, we must advance in the other file
900 to keep in synch. */
901 ++j;
902 } else {
903 break;
904 }
905 }
906
907 preceding = i;
908 other_preceding = j;
909 }
910 }
911
912 /** Number of elements (lines) in this file. */
913 final int buffered_lines;
914
915 /** Vector, indexed by line number, containing an equivalence code for
916 each line. It is this vector that is actually compared with that
917 of another file to generate differences. */
918 private final int[] equivs;
919
920 /** Vector, like the previous one except that
921 the elements for discarded lines have been squeezed out. */
922 final int[] undiscarded;
923
924 /** Vector mapping virtual line numbers (not counting discarded lines)
925 to real ones (counting those lines). Both are origin-0. */
926 final int[] realindexes;
927
928 /** Total number of nondiscarded lines. */
929 int nondiscarded_lines;
930
931 /** Array, indexed by real origin-1 line number,
932 containing true for a line that is an insertion or a deletion.
933 The results of comparison are stored here. */
934 boolean[] changed_flag;
935
936 }
937}
Note: See TracBrowser for help on using the repository browser.