source: josm/trunk/src/org/openstreetmap/josm/tools/Diff.java@ 7845

Last change on this file since 7845 was 7801, checked in by Don-vip, 9 years ago

fix some Sonar issues

File size: 36.1 KB
Line 
1// License: GPL.
2package org.openstreetmap.josm.tools;
3
4//// Taken from http://www.bmsi.com/java/#diff
5
6// http://www.bmsi.com/java/DiffPrint.java could also be useful
7
8/*
9 * $Log: Diff.java,v $
10 * Revision 1.15 2013/04/01 16:27:31 stuart
11 * Fix DiffPrint unified format with test cases.
12 * Begin porting some diff-2.7 features.
13 *
14 * Revision 1.14 2010/03/03 21:21:25 stuart
15 * Test new direct equivalence API
16 *
17 * Revision 1.13 2009/12/07 17:43:17 stuart
18 * Compute equiv_max for int[] ctor
19 *
20 * Revision 1.12 2009/12/07 17:34:46 stuart
21 * Ctor with int[].
22 *
23 * Revision 1.11 2009/11/15 01:11:54 stuart
24 * Diff doesn't need to be generic
25 *
26 * Revision 1.10 2009/11/15 00:54:03 stuart
27 * Update to Java 5 containers
28 *
29 * Revision 1.7 2009/01/19 03:05:26 stuart
30 * Fix StackOverflow bug with heuristic on reported by Jimmy Han.
31 *
32 * Revision 1.6 2003/03/06 22:51:32 stuart
33 * Convert to CVS
34 *
35 * Revision 1.5 2002/07/19 19:14:40 stuart
36 * fix reverseScript, make change ctor public, update docs
37 *
38 * Revision 1.4 2002/04/09 17:53:39 stuart
39 * More flexible interface for diff() function.
40 *
41 * Revision 1.3 2000/03/03 21:58:03 stuart
42 * move discard_confusing_lines and shift_boundaries to class file_data
43 *
44 * Revision 1.2 2000/03/02 16:37:38 stuart
45 * Add GPL and copyright
46 *
47 */
48
49import java.util.HashMap;
50import java.util.Map;
51
52/** A class to compare vectors of objects. The result of comparison
53 is a list of <code>change</code> objects which form an
54 edit script. The objects compared are traditionally lines
55 of text from two files. Comparison options such as "ignore
56 whitespace" are implemented by modifying the <code>equals</code>
57 and <code>hashcode</code> methods for the objects compared.
58<p>
59 The basic algorithm is described in: <br>
60 "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
61 Algorithmica Vol. 1 No. 2, 1986, p 251.
62<p>
63 This class outputs different results from GNU diff 1.15 on some
64 inputs. Our results are actually better (smaller change list, smaller
65 total size of changes), but it would be nice to know why. Perhaps
66 there is a memory overwrite bug in GNU diff 1.15.
67
68 @author Stuart D. Gathman, translated from GNU diff 1.15
69 Copyright (C) 2000 Business Management Systems, Inc.
70<p>
71 This program is free software; you can redistribute it and/or modify
72 it under the terms of the GNU General Public License as published by
73 the Free Software Foundation; either version 1, or (at your option)
74 any later version.
75<p>
76 This program is distributed in the hope that it will be useful,
77 but WITHOUT ANY WARRANTY; without even the implied warranty of
78 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
79 GNU General Public License for more details.
80<p>
81 You should have received a copy of the <a href=COPYING.txt>
82 GNU General Public License</a>
83 along with this program; if not, write to the Free Software
84 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
85
86 */
87
88public class Diff {
89
90 /** Prepare to find differences between two arrays. Each element of
91 the arrays is translated to an "equivalence number" based on
92 the result of <code>equals</code>. The original Object arrays
93 are no longer needed for computing the differences. They will
94 be needed again later to print the results of the comparison as
95 an edit script, if desired.
96 */
97 public Diff(Object[] a, Object[] b) {
98 Map<Object,Integer> h = new HashMap<>(a.length + b.length);
99 filevec = new FileData[] { new FileData(a,h),new FileData(b,h) };
100 }
101
102 /** 1 more than the maximum equivalence value used for this or its
103 sibling file. */
104 private int equiv_max = 1;
105
106 /** When set to true, the comparison uses a heuristic to speed it up.
107 With this heuristic, for files with a constant small density
108 of changes, the algorithm is linear in the file size. */
109 public boolean heuristic = false;
110
111 /** When set to true, the algorithm returns a guarranteed minimal
112 set of changes. This makes things slower, sometimes much slower. */
113 public boolean no_discards = false;
114
115 private int[] xvec, yvec; /* Vectors being compared. */
116 private int[] fdiag; /* Vector, indexed by diagonal, containing
117 the X coordinate of the point furthest
118 along the given diagonal in the forward
119 search of the edit matrix. */
120 private int[] bdiag; /* Vector, indexed by diagonal, containing
121 the X coordinate of the point furthest
122 along the given diagonal in the backward
123 search of the edit matrix. */
124 private int fdiagoff, bdiagoff;
125 private final FileData[] filevec;
126 private int cost;
127 /** Snakes bigger than this are considered "big". */
128 private static final int SNAKE_LIMIT = 20;
129
130 /** Find the midpoint of the shortest edit script for a specified
131 portion of the two files.
132
133 We scan from the beginnings of the files, and simultaneously from the ends,
134 doing a breadth-first search through the space of edit-sequence.
135 When the two searches meet, we have found the midpoint of the shortest
136 edit sequence.
137
138 The value returned is the number of the diagonal on which the midpoint lies.
139 The diagonal number equals the number of inserted lines minus the number
140 of deleted lines (counting only lines before the midpoint).
141 The edit cost is stored into COST; this is the total number of
142 lines inserted or deleted (counting only lines before the midpoint).
143
144 This function assumes that the first lines of the specified portions
145 of the two files do not match, and likewise that the last lines do not
146 match. The caller must trim matching lines from the beginning and end
147 of the portions it is going to specify.
148
149 Note that if we return the "wrong" diagonal value, or if
150 the value of bdiag at that diagonal is "wrong",
151 the worst this can do is cause suboptimal diff output.
152 It cannot cause incorrect diff output. */
153
154 private int diag (int xoff, int xlim, int yoff, int ylim) {
155 final int[] fd = fdiag; // Give the compiler a chance.
156 final int[] bd = bdiag; // Additional help for the compiler.
157 final int[] xv = xvec; // Still more help for the compiler.
158 final int[] yv = yvec; // And more and more . . .
159 final int dmin = xoff - ylim; // Minimum valid diagonal.
160 final int dmax = xlim - yoff; // Maximum valid diagonal.
161 final int fmid = xoff - yoff; // Center diagonal of top-down search.
162 final int bmid = xlim - ylim; // Center diagonal of bottom-up search.
163 int fmin = fmid, fmax = fmid; // Limits of top-down search.
164 int bmin = bmid, bmax = bmid; // Limits of bottom-up search.
165 /* True if southeast corner is on an odd
166 diagonal with respect to the northwest. */
167 final boolean odd = (fmid - bmid & 1) != 0;
168
169 fd[fdiagoff + fmid] = xoff;
170 bd[bdiagoff + bmid] = xlim;
171
172 for (int c = 1;; ++c)
173 {
174 int d; /* Active diagonal. */
175 boolean big_snake = false;
176
177 /* Extend the top-down search by an edit step in each diagonal. */
178 if (fmin > dmin) {
179 fd[fdiagoff + --fmin - 1] = -1;
180 } else {
181 ++fmin;
182 }
183 if (fmax < dmax) {
184 fd[fdiagoff + ++fmax + 1] = -1;
185 } else {
186 --fmax;
187 }
188 for (d = fmax; d >= fmin; d -= 2)
189 {
190 int x, y, oldx, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff + d + 1];
191
192 if (tlo >= thi) {
193 x = tlo + 1;
194 } else {
195 x = thi;
196 }
197 oldx = x;
198 y = x - d;
199 while (x < xlim && y < ylim && xv[x] == yv[y]) {
200 ++x; ++y;
201 }
202 if (x - oldx > SNAKE_LIMIT) {
203 big_snake = true;
204 }
205 fd[fdiagoff + d] = x;
206 if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d])
207 {
208 cost = 2 * c - 1;
209 return d;
210 }
211 }
212
213 /* Similar extend the bottom-up search. */
214 if (bmin > dmin) {
215 bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE;
216 } else {
217 ++bmin;
218 }
219 if (bmax < dmax) {
220 bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE;
221 } else {
222 --bmax;
223 }
224 for (d = bmax; d >= bmin; d -= 2)
225 {
226 int x, y, oldx, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1];
227
228 if (tlo < thi) {
229 x = tlo;
230 } else {
231 x = thi - 1;
232 }
233 oldx = x;
234 y = x - d;
235 while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) {
236 --x; --y;
237 }
238 if (oldx - x > SNAKE_LIMIT) {
239 big_snake = true;
240 }
241 bd[bdiagoff + d] = x;
242 if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d])
243 {
244 cost = 2 * c;
245 return d;
246 }
247 }
248
249 /* Heuristic: check occasionally for a diagonal that has made
250 lots of progress compared with the edit distance.
251 If we have any such, find the one that has made the most
252 progress and return it as if it had succeeded.
253
254 With this heuristic, for files with a constant small density
255 of changes, the algorithm is linear in the file size. */
256
257 if (c > 200 && big_snake && heuristic)
258 {
259 int best = 0;
260 int bestpos = -1;
261
262 for (d = fmax; d >= fmin; d -= 2)
263 {
264 int dd = d - fmid;
265 int x = fd[fdiagoff + d];
266 int y = x - d;
267 int v = (x - xoff) * 2 - dd;
268 if (v > 12 * (c + (dd < 0 ? -dd : dd)))
269 {
270 if (v > best
271 && xoff + SNAKE_LIMIT <= x && x < xlim
272 && yoff + SNAKE_LIMIT <= y && y < ylim)
273 {
274 /* We have a good enough best diagonal.
275 now insist that it end with a significant snake. */
276 int k;
277
278 for (k = 1; xvec[x - k] == yvec[y - k]; k++)
279 if (k == SNAKE_LIMIT)
280 {
281 best = v;
282 bestpos = d;
283 break;
284 }
285 }
286 }
287 }
288 if (best > 0)
289 {
290 cost = 2 * c - 1;
291 return bestpos;
292 }
293
294 best = 0;
295 for (d = bmax; d >= bmin; d -= 2)
296 {
297 int dd = d - bmid;
298 int x = bd[bdiagoff + d];
299 int y = x - d;
300 int v = (xlim - x) * 2 + dd;
301 if (v > 12 * (c + (dd < 0 ? -dd : dd)))
302 {
303 if (v > best
304 && xoff < x && x <= xlim - SNAKE_LIMIT
305 && yoff < y && y <= ylim - SNAKE_LIMIT)
306 {
307 /* We have a good enough best diagonal.
308 now insist that it end with a significant snake. */
309 int k;
310
311 for (k = 0; xvec[x + k] == yvec[y + k]; k++)
312 if (k == SNAKE_LIMIT)
313 {
314 best = v;
315 bestpos = d;
316 break;
317 }
318 }
319 }
320 }
321 if (best > 0)
322 {
323 cost = 2 * c - 1;
324 return bestpos;
325 }
326 }
327 }
328 }
329
330 /** Compare in detail contiguous subsequences of the two files
331 which are known, as a whole, to match each other.
332
333 The results are recorded in the vectors filevec[N].changed_flag, by
334 storing a 1 in the element for each line that is an insertion or deletion.
335
336 The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
337
338 Note that XLIM, YLIM are exclusive bounds.
339 All line numbers are origin-0 and discarded lines are not counted. */
340
341 private void compareseq (int xoff, int xlim, int yoff, int ylim) {
342 /* Slide down the bottom initial diagonal. */
343 while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) {
344 ++xoff; ++yoff;
345 }
346 /* Slide up the top initial diagonal. */
347 while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) {
348 --xlim; --ylim;
349 }
350
351 /* Handle simple cases. */
352 if (xoff == xlim) {
353 while (yoff < ylim) {
354 filevec[1].changed_flag[1+filevec[1].realindexes[yoff++]] = true;
355 }
356 } else if (yoff == ylim) {
357 while (xoff < xlim) {
358 filevec[0].changed_flag[1+filevec[0].realindexes[xoff++]] = true;
359 }
360 } else {
361 /* Find a point of correspondence in the middle of the files. */
362
363 int d = diag (xoff, xlim, yoff, ylim);
364 int c = cost;
365 int b = bdiag[bdiagoff + d];
366
367 if (c == 1)
368 /* This should be impossible, because it implies that
369 one of the two subsequences is empty,
370 and that case was handled above without calling `diag'.
371 Let's verify that this is true. */
372 throw new IllegalArgumentException("Empty subsequence");
373 else
374 {
375 /* Use that point to split this problem into two subproblems. */
376 compareseq (xoff, b, yoff, b - d);
377 /* This used to use f instead of b,
378 but that is incorrect!
379 It is not necessarily the case that diagonal d
380 has a snake from b to f. */
381 compareseq (b, xlim, b - d, ylim);
382 }
383 }
384 }
385
386 /** Discard lines from one file that have no matches in the other file.
387 */
388 private void discard_confusing_lines() {
389 filevec[0].discard_confusing_lines(filevec[1]);
390 filevec[1].discard_confusing_lines(filevec[0]);
391 }
392
393 private boolean inhibit = false;
394
395 /** Adjust inserts/deletes of blank lines to join changes
396 as much as possible.
397 */
398 private void shift_boundaries() {
399 if (inhibit)
400 return;
401 filevec[0].shift_boundaries(filevec[1]);
402 filevec[1].shift_boundaries(filevec[0]);
403 }
404
405 public interface ScriptBuilder {
406 /** Scan the tables of which lines are inserted and deleted,
407 producing an edit script.
408 @param changed0 true for lines in first file which do not match 2nd
409 @param len0 number of lines in first file
410 @param changed1 true for lines in 2nd file which do not match 1st
411 @param len1 number of lines in 2nd file
412 @return a linked list of changes - or null
413 */
414 public Change build_script(
415 boolean[] changed0,int len0,
416 boolean[] changed1,int len1
417 );
418 }
419
420 /** Scan the tables of which lines are inserted and deleted,
421 producing an edit script in reverse order. */
422
423 static class ReverseScript implements ScriptBuilder {
424 @Override
425 public Change build_script(
426 final boolean[] changed0,int len0,
427 final boolean[] changed1,int len1)
428 {
429 Change script = null;
430 int i0 = 0, i1 = 0;
431 while (i0 < len0 || i1 < len1) {
432 if (changed0[1+i0] || changed1[1+i1]) {
433 int line0 = i0, line1 = i1;
434
435 /* Find # lines changed here in each file. */
436 while (changed0[1+i0]) {
437 ++i0;
438 }
439 while (changed1[1+i1]) {
440 ++i1;
441 }
442
443 /* Record this change. */
444 script = new Change(line0, line1, i0 - line0, i1 - line1, script);
445 }
446
447 /* We have reached lines in the two files that match each other. */
448 i0++; i1++;
449 }
450
451 return script;
452 }
453 }
454
455 static class ForwardScript implements ScriptBuilder {
456 /** Scan the tables of which lines are inserted and deleted,
457 producing an edit script in forward order. */
458 @Override
459 public Change build_script(
460 final boolean[] changed0,int len0,
461 final boolean[] changed1,int len1)
462 {
463 Change script = null;
464 int i0 = len0, i1 = len1;
465
466 while (i0 >= 0 || i1 >= 0)
467 {
468 if (changed0[i0] || changed1[i1])
469 {
470 int line0 = i0, line1 = i1;
471
472 /* Find # lines changed here in each file. */
473 while (changed0[i0]) {
474 --i0;
475 }
476 while (changed1[i1]) {
477 --i1;
478 }
479
480 /* Record this change. */
481 script = new Change(i0, i1, line0 - i0, line1 - i1, script);
482 }
483
484 /* We have reached lines in the two files that match each other. */
485 i0--; i1--;
486 }
487
488 return script;
489 }
490 }
491
492 /** Standard ScriptBuilders. */
493 public static final ScriptBuilder
494 forwardScript = new ForwardScript(),
495 reverseScript = new ReverseScript();
496
497 /** Report the differences of two files. DEPTH is the current directory
498 depth. */
499 public final Change diff_2(final boolean reverse) {
500 return diff(reverse ? reverseScript : forwardScript);
501 }
502
503 /** Get the results of comparison as an edit script. The script
504 is described by a list of changes. The standard ScriptBuilder
505 implementations provide for forward and reverse edit scripts.
506 Alternate implementations could, for instance, list common elements
507 instead of differences.
508 @param bld an object to build the script from change flags
509 @return the head of a list of changes
510 */
511 public Change diff(final ScriptBuilder bld) {
512
513 /* Some lines are obviously insertions or deletions
514 because they don't match anything. Detect them now,
515 and avoid even thinking about them in the main comparison algorithm. */
516
517 discard_confusing_lines ();
518
519 /* Now do the main comparison algorithm, considering just the
520 undiscarded lines. */
521
522 xvec = filevec[0].undiscarded;
523 yvec = filevec[1].undiscarded;
524
525 int diags =
526 filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines + 3;
527 fdiag = new int[diags];
528 fdiagoff = filevec[1].nondiscarded_lines + 1;
529 bdiag = new int[diags];
530 bdiagoff = filevec[1].nondiscarded_lines + 1;
531
532 compareseq (0, filevec[0].nondiscarded_lines,
533 0, filevec[1].nondiscarded_lines);
534 fdiag = null;
535 bdiag = null;
536
537 /* Modify the results slightly to make them prettier
538 in cases where that can validly be done. */
539
540 shift_boundaries ();
541
542 /* Get the results of comparison in the form of a chain
543 of `struct change's -- an edit script. */
544 return bld.build_script(
545 filevec[0].changed_flag,
546 filevec[0].buffered_lines,
547 filevec[1].changed_flag,
548 filevec[1].buffered_lines
549 );
550
551 }
552
553 /** The result of comparison is an "edit script": a chain of change objects.
554 Each change represents one place where some lines are deleted
555 and some are inserted.
556
557 LINE0 and LINE1 are the first affected lines in the two files (origin 0).
558 DELETED is the number of lines deleted here from file 0.
559 INSERTED is the number of lines inserted here in file 1.
560
561 If DELETED is 0 then LINE0 is the number of the line before
562 which the insertion was done; vice versa for INSERTED and LINE1. */
563
564 public static class Change {
565 /** Previous or next edit command. */
566 public Change link;
567 /** # lines of file 1 changed here. */
568 public final int inserted;
569 /** # lines of file 0 changed here. */
570 public final int deleted;
571 /** Line number of 1st deleted line. */
572 public final int line0;
573 /** Line number of 1st inserted line. */
574 public final int line1;
575
576 /** Cons an additional entry onto the front of an edit script OLD.
577 LINE0 and LINE1 are the first affected lines in the two files (origin 0).
578 DELETED is the number of lines deleted here from file 0.
579 INSERTED is the number of lines inserted here in file 1.
580
581 If DELETED is 0 then LINE0 is the number of the line before
582 which the insertion was done; vice versa for INSERTED and LINE1. */
583 public Change(int line0, int line1, int deleted, int inserted, Change old) {
584 this.line0 = line0;
585 this.line1 = line1;
586 this.inserted = inserted;
587 this.deleted = deleted;
588 this.link = old;
589 }
590
591 @Override
592 public String toString() {
593 String s = String.format("%d -%d +%d %d",line0,deleted,inserted,line1);
594 return (link != null) ? s = s + '\n' + link : s;
595 }
596 }
597
598 /** Data on one input file being compared.
599 */
600
601 class FileData {
602
603 /** Allocate changed array for the results of comparison. */
604 void clear() {
605 /* Allocate a flag for each line of each file, saying whether that line
606 is an insertion or deletion.
607 Allocate an extra element, always zero, at each end of each vector.
608 */
609 changed_flag = new boolean[buffered_lines + 2];
610 }
611
612 /** Return equiv_count[I] as the number of lines in this file
613 that fall in equivalence class I.
614 @return the array of equivalence class counts.
615 */
616 int[] equivCount() {
617 int[] equiv_count = new int[equiv_max];
618 for (int i = 0; i < buffered_lines; ++i) {
619 ++equiv_count[equivs[i]];
620 }
621 return equiv_count;
622 }
623
624 /** Discard lines that have no matches in another file.
625
626 A line which is discarded will not be considered by the actual
627 comparison algorithm; it will be as if that line were not in the file.
628 The file's `realindexes' table maps virtual line numbers
629 (which don't count the discarded lines) into real line numbers;
630 this is how the actual comparison algorithm produces results
631 that are comprehensible when the discarded lines are counted.
632<p>
633 When we discard a line, we also mark it as a deletion or insertion
634 so that it will be printed in the output.
635 @param f the other file
636 */
637 void discard_confusing_lines(FileData f) {
638 clear();
639 /* Set up table of which lines are going to be discarded. */
640 final byte[] discarded = discardable(f.equivCount());
641
642 /* Don't really discard the provisional lines except when they occur
643 in a run of discardables, with nonprovisionals at the beginning
644 and end. */
645 filterDiscards(discarded);
646
647 /* Actually discard the lines. */
648 discard(discarded);
649 }
650
651 /**
652 * Mark to be discarded each line that matches no line of another file.
653 * If a line matches many lines, mark it as provisionally discardable.
654 * @see #equivCount()
655 * @param counts The count of each equivalence number for the other file.
656 * @return 0=nondiscardable, 1=discardable or 2=provisionally discardable
657 * for each line
658 */
659 private byte[] discardable(final int[] counts) {
660 final int end = buffered_lines;
661 final byte[] discards = new byte[end];
662 final int[] equivs = this.equivs;
663 int many = 5;
664 int tem = end / 64;
665
666 /* Multiply MANY by approximate square root of number of lines.
667 That is the threshold for provisionally discardable lines. */
668 while ((tem = tem >> 2) > 0) {
669 many *= 2;
670 }
671
672 for (int i = 0; i < end; i++)
673 {
674 int nmatch;
675 if (equivs[i] == 0) {
676 continue;
677 }
678 nmatch = counts[equivs[i]];
679 if (nmatch == 0) {
680 discards[i] = 1;
681 } else if (nmatch > many) {
682 discards[i] = 2;
683 }
684 }
685 return discards;
686 }
687
688 /**
689 * Don't really discard the provisional lines except when they occur
690 * in a run of discardables, with nonprovisionals at the beginning
691 * and end.
692 */
693 private void filterDiscards(final byte[] discards) {
694 final int end = buffered_lines;
695
696 for (int i = 0; i < end; i++)
697 {
698 /* Cancel provisional discards not in middle of run of discards. */
699 if (discards[i] == 2) {
700 discards[i] = 0;
701 } else if (discards[i] != 0)
702 {
703 /* We have found a nonprovisional discard. */
704 int j;
705 int length;
706 int provisional = 0;
707
708 /* Find end of this run of discardable lines.
709 Count how many are provisionally discardable. */
710 for (j = i; j < end; j++)
711 {
712 if (discards[j] == 0) {
713 break;
714 }
715 if (discards[j] == 2) {
716 ++provisional;
717 }
718 }
719
720 /* Cancel provisional discards at end, and shrink the run. */
721 while (j > i && discards[j - 1] == 2) {
722 discards[--j] = 0; --provisional;
723 }
724
725 /* Now we have the length of a run of discardable lines
726 whose first and last are not provisional. */
727 length = j - i;
728
729 /* If 1/4 of the lines in the run are provisional,
730 cancel discarding of all provisional lines in the run. */
731 if (provisional * 4 > length) {
732 while (j > i)
733 if (discards[--j] == 2) {
734 discards[j] = 0;
735 }
736 } else {
737 int consec;
738 int minimum = 1;
739 int tem = length / 4;
740
741 /* MINIMUM is approximate square root of LENGTH/4.
742 A subrun of two or more provisionals can stand
743 when LENGTH is at least 16.
744 A subrun of 4 or more can stand when LENGTH >= 64. */
745 while ((tem = tem >> 2) > 0) {
746 minimum *= 2;
747 }
748 minimum++;
749
750 /* Cancel any subrun of MINIMUM or more provisionals
751 within the larger run. */
752 for (j = 0, consec = 0; j < length; j++)
753 if (discards[i + j] != 2) {
754 consec = 0;
755 } else if (minimum == ++consec) {
756 /* Back up to start of subrun, to cancel it all. */
757 j -= consec;
758 } else if (minimum < consec) {
759 discards[i + j] = 0;
760 }
761
762 /* Scan from beginning of run
763 until we find 3 or more nonprovisionals in a row
764 or until the first nonprovisional at least 8 lines in.
765 Until that point, cancel any provisionals. */
766 for (j = 0, consec = 0; j < length; j++)
767 {
768 if (j >= 8 && discards[i + j] == 1) {
769 break;
770 }
771 if (discards[i + j] == 2) {
772 consec = 0; discards[i + j] = 0;
773 }
774 else if (discards[i + j] == 0) {
775 consec = 0;
776 } else {
777 consec++;
778 }
779 if (consec == 3) {
780 break;
781 }
782 }
783
784 /* I advances to the last line of the run. */
785 i += length - 1;
786
787 /* Same thing, from end. */
788 for (j = 0, consec = 0; j < length; j++)
789 {
790 if (j >= 8 && discards[i - j] == 1) {
791 break;
792 }
793 if (discards[i - j] == 2) {
794 consec = 0; discards[i - j] = 0;
795 }
796 else if (discards[i - j] == 0) {
797 consec = 0;
798 } else {
799 consec++;
800 }
801 if (consec == 3) {
802 break;
803 }
804 }
805 }
806 }
807 }
808 }
809
810 /** Actually discard the lines.
811 @param discards flags lines to be discarded
812 */
813 private void discard(final byte[] discards) {
814 final int end = buffered_lines;
815 int j = 0;
816 for (int i = 0; i < end; ++i)
817 if (no_discards || discards[i] == 0)
818 {
819 undiscarded[j] = equivs[i];
820 realindexes[j++] = i;
821 } else {
822 changed_flag[1+i] = true;
823 }
824 nondiscarded_lines = j;
825 }
826
827 FileData(int length) {
828 buffered_lines = length;
829 equivs = new int[length];
830 undiscarded = new int[buffered_lines];
831 realindexes = new int[buffered_lines];
832 }
833
834 FileData(Object[] data, Map<Object,Integer> h) {
835 this(data.length);
836 // FIXME: diff 2.7 removes common prefix and common suffix
837 for (int i = 0; i < data.length; ++i) {
838 Integer ir = h.get(data[i]);
839 if (ir == null) {
840 h.put(data[i],equivs[i] = equiv_max++);
841 } else {
842 equivs[i] = ir.intValue();
843 }
844 }
845 }
846
847 /** Adjust inserts/deletes of blank lines to join changes
848 as much as possible.
849
850 We do something when a run of changed lines include a blank
851 line at one end and have an excluded blank line at the other.
852 We are free to choose which blank line is included.
853 `compareseq' always chooses the one at the beginning,
854 but usually it is cleaner to consider the following blank line
855 to be the "change". The only exception is if the preceding blank line
856 would join this change to other changes.
857 @param f the file being compared against
858 */
859
860 void shift_boundaries(FileData f) {
861 final boolean[] changed = changed_flag;
862 final boolean[] other_changed = f.changed_flag;
863 int i = 0;
864 int j = 0;
865 int i_end = buffered_lines;
866 int preceding = -1;
867 int other_preceding = -1;
868
869 for (;;)
870 {
871 int start, end, other_start;
872
873 /* Scan forwards to find beginning of another run of changes.
874 Also keep track of the corresponding point in the other file. */
875
876 while (i < i_end && !changed[1+i])
877 {
878 while (other_changed[1+j++]) {
879 /* Non-corresponding lines in the other file
880 will count as the preceding batch of changes. */
881 other_preceding = j;
882 }
883 i++;
884 }
885
886 if (i == i_end) {
887 break;
888 }
889
890 start = i;
891 other_start = j;
892
893 for (;;)
894 {
895 /* Now find the end of this run of changes. */
896
897 while (i < i_end && changed[1+i]) {
898 i++;
899 }
900 end = i;
901
902 /* If the first changed line matches the following unchanged one,
903 and this run does not follow right after a previous run,
904 and there are no lines deleted from the other file here,
905 then classify the first changed line as unchanged
906 and the following line as changed in its place. */
907
908 /* You might ask, how could this run follow right after another?
909 Only because the previous run was shifted here. */
910
911 if (end != i_end
912 && equivs[start] == equivs[end]
913 && !other_changed[1+j]
914 && !((preceding >= 0 && start == preceding)
915 || (other_preceding >= 0
916 && other_start == other_preceding)))
917 {
918 changed[1+end++] = true;
919 changed[1+start++] = false;
920 ++i;
921 /* Since one line-that-matches is now before this run
922 instead of after, we must advance in the other file
923 to keep in synch. */
924 ++j;
925 } else {
926 break;
927 }
928 }
929
930 preceding = i;
931 other_preceding = j;
932 }
933 }
934
935 /** Number of elements (lines) in this file. */
936 final int buffered_lines;
937
938 /** Vector, indexed by line number, containing an equivalence code for
939 each line. It is this vector that is actually compared with that
940 of another file to generate differences. */
941 private final int[] equivs;
942
943 /** Vector, like the previous one except that
944 the elements for discarded lines have been squeezed out. */
945 final int[] undiscarded;
946
947 /** Vector mapping virtual line numbers (not counting discarded lines)
948 to real ones (counting those lines). Both are origin-0. */
949 final int[] realindexes;
950
951 /** Total number of nondiscarded lines. */
952 int nondiscarded_lines;
953
954 /** Array, indexed by real origin-1 line number,
955 containing true for a line that is an insertion or a deletion.
956 The results of comparison are stored here. */
957 boolean[] changed_flag;
958 }
959}
Note: See TracBrowser for help on using the repository browser.