Context Navigation

source: josm/trunk/src/org/openstreetmap/josm/tools/Diff.java@ 8994

Last change on this file since 8994 was 8976, checked in by simon04, 8 years ago
fix #12016 - History dialog sometimes incorrectly shows a way as reversed
Property svn:eol-style set to `native`
File size: 35.7 KB

Line
1	// License: GPL. For details, see LICENSE file.
2	package org.openstreetmap.josm.tools;
3
4	//// Taken from http://www.bmsi.com/java/#diff
5
6	// http://www.bmsi.com/java/DiffPrint.java could also be useful
7
8	/*
9	* $Log: Diff.java,v $
10	* Revision 1.15 2013/04/01 16:27:31 stuart
11	* Fix DiffPrint unified format with test cases.
12	* Begin porting some diff-2.7 features.
13	*
14	* Revision 1.14 2010/03/03 21:21:25 stuart
15	* Test new direct equivalence API
16	*
17	* Revision 1.13 2009/12/07 17:43:17 stuart
18	* Compute equivMax for int[] ctor
19	*
20	* Revision 1.12 2009/12/07 17:34:46 stuart
21	* Ctor with int[].
22	*
23	* Revision 1.11 2009/11/15 01:11:54 stuart
24	* Diff doesn't need to be generic
25	*
26	* Revision 1.10 2009/11/15 00:54:03 stuart
27	* Update to Java 5 containers
28	*
29	* Revision 1.7 2009/01/19 03:05:26 stuart
30	* Fix StackOverflow bug with heuristic on reported by Jimmy Han.
31	*
32	* Revision 1.6 2003/03/06 22:51:32 stuart
33	* Convert to CVS
34	*
35	* Revision 1.5 2002/07/19 19:14:40 stuart
36	* fix reverseScript, make change ctor public, update docs
37	*
38	* Revision 1.4 2002/04/09 17:53:39 stuart
39	* More flexible interface for diff() function.
40	*
41	* Revision 1.3 2000/03/03 21:58:03 stuart
42	* move discard_confusing_lines and shift_boundaries to class file_data
43	*
44	* Revision 1.2 2000/03/02 16:37:38 stuart
45	* Add GPL and copyright
46	*
47	*/
48
49	import java.util.HashMap;
50	import java.util.Map;
51
52	/** A class to compare vectors of objects. The result of comparison
53	is a list of <code>change</code> objects which form an
54	edit script. The objects compared are traditionally lines
55	of text from two files. Comparison options such as "ignore
56	whitespace" are implemented by modifying the <code>equals</code>
57	and <code>hashcode</code> methods for the objects compared.
58	<p>
59	The basic algorithm is described in: <br>
60	"An O(ND) Difference Algorithm and its Variations", Eugene Myers,
61	Algorithmica Vol. 1 No. 2, 1986, p 251.
62	<p>
63	This class outputs different results from GNU diff 1.15 on some
64	inputs. Our results are actually better (smaller change list, smaller
65	total size of changes), but it would be nice to know why. Perhaps
66	there is a memory overwrite bug in GNU diff 1.15.
67
68	@author Stuart D. Gathman, translated from GNU diff 1.15
69	Copyright (C) 2000 Business Management Systems, Inc.
70	<p>
71	This program is free software; you can redistribute it and/or modify
72	it under the terms of the GNU General Public License as published by
73	the Free Software Foundation; either version 1, or (at your option)
74	any later version.
75	<p>
76	This program is distributed in the hope that it will be useful,
77	but WITHOUT ANY WARRANTY; without even the implied warranty of
78	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
79	GNU General Public License for more details.
80	<p>
81	You should have received a copy of the <a href=COPYING.txt>
82	GNU General Public License</a>
83	along with this program; if not, write to the Free Software
84	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
85	*/
86	public class Diff {
87
88	/** Prepare to find differences between two arrays. Each element of
89	the arrays is translated to an "equivalence number" based on
90	the result of <code>equals</code>. The original Object arrays
91	are no longer needed for computing the differences. They will
92	be needed again later to print the results of the comparison as
93	an edit script, if desired.
94	* @param a first array
95	* @param b second array
96	*/
97	public Diff(Object[] a, Object[] b) {
98	Map<Object, Integer> h = new HashMap<>(a.length + b.length);
99	filevec = new FileData[] {new FileData(a, h), new FileData(b, h)};
100	}
101
102	/** 1 more than the maximum equivalence value used for this or its
103	sibling file. */
104	private int equivMax = 1;
105
106	/** When set to true, the comparison uses a heuristic to speed it up.
107	With this heuristic, for files with a constant small density
108	of changes, the algorithm is linear in the file size. */
109	public boolean heuristic;
110
111	/** When set to true, the algorithm returns a guarranteed minimal
112	set of changes. This makes things slower, sometimes much slower. */
113	public boolean noDiscards;
114
115	private int[] xvec, yvec; /* Vectors being compared. */
116	private int[] fdiag; /* Vector, indexed by diagonal, containing
117	the X coordinate of the point furthest
118	along the given diagonal in the forward
119	search of the edit matrix. */
120	private int[] bdiag; /* Vector, indexed by diagonal, containing
121	the X coordinate of the point furthest
122	along the given diagonal in the backward
123	search of the edit matrix. */
124	private int fdiagoff, bdiagoff;
125	private final FileData[] filevec;
126	private int cost;
127	/** Snakes bigger than this are considered "big". */
128	private static final int SNAKE_LIMIT = 20;
129
130	/**
131	* Find the midpoint of the shortest edit script for a specified
132	* portion of the two files.
133	*
134	* We scan from the beginnings of the files, and simultaneously from the ends,
135	* doing a breadth-first search through the space of edit-sequence.
136	* When the two searches meet, we have found the midpoint of the shortest
137	* edit sequence.
138	*
139	* The value returned is the number of the diagonal on which the midpoint lies.
140	* The diagonal number equals the number of inserted lines minus the number
141	* of deleted lines (counting only lines before the midpoint).
142	* The edit cost is stored into COST; this is the total number of
143	* lines inserted or deleted (counting only lines before the midpoint).
144	*
145	* This function assumes that the first lines of the specified portions
146	* of the two files do not match, and likewise that the last lines do not
147	* match. The caller must trim matching lines from the beginning and end
148	* of the portions it is going to specify.
149	*
150	* Note that if we return the "wrong" diagonal value, or if
151	* the value of bdiag at that diagonal is "wrong",
152	* the worst this can do is cause suboptimal diff output.
153	* It cannot cause incorrect diff output.
154	*/
155	private int diag(int xoff, int xlim, int yoff, int ylim) {
156	final int[] fd = fdiag; // Give the compiler a chance.
157	final int[] bd = bdiag; // Additional help for the compiler.
158	final int[] xv = xvec; // Still more help for the compiler.
159	final int[] yv = yvec; // And more and more . . .
160	final int dmin = xoff - ylim; // Minimum valid diagonal.
161	final int dmax = xlim - yoff; // Maximum valid diagonal.
162	final int fmid = xoff - yoff; // Center diagonal of top-down search.
163	final int bmid = xlim - ylim; // Center diagonal of bottom-up search.
164	int fmin = fmid, fmax = fmid; // Limits of top-down search.
165	int bmin = bmid, bmax = bmid; // Limits of bottom-up search.
166	/* True if southeast corner is on an odd
167	diagonal with respect to the northwest. */
168	final boolean odd = (fmid - bmid & 1) != 0;
169
170	fd[fdiagoff + fmid] = xoff;
171	bd[bdiagoff + bmid] = xlim;
172
173	for (int c = 1;; ++c) {
174	int d; /* Active diagonal. */
175	boolean big_snake = false;
176
177	/* Extend the top-down search by an edit step in each diagonal. */
178	if (fmin > dmin) {
179	fd[fdiagoff + --fmin - 1] = -1;
180	} else {
181	++fmin;
182	}
183	if (fmax < dmax) {
184	fd[fdiagoff + ++fmax + 1] = -1;
185	} else {
186	--fmax;
187	}
188	for (d = fmax; d >= fmin; d -= 2) {
189	int x, y, oldx, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff + d + 1];
190
191	if (tlo >= thi) {
192	x = tlo + 1;
193	} else {
194	x = thi;
195	}
196	oldx = x;
197	y = x - d;
198	while (x < xlim && y < ylim && xv[x] == yv[y]) {
199	++x; ++y;
200	}
201	if (x - oldx > SNAKE_LIMIT) {
202	big_snake = true;
203	}
204	fd[fdiagoff + d] = x;
205	if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) {
206	cost = 2 * c - 1;
207	return d;
208	}
209	}
210
211	/* Similar extend the bottom-up search. */
212	if (bmin > dmin) {
213	bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE;
214	} else {
215	++bmin;
216	}
217	if (bmax < dmax) {
218	bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE;
219	} else {
220	--bmax;
221	}
222	for (d = bmax; d >= bmin; d -= 2) {
223	int x, y, oldx, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1];
224
225	if (tlo < thi) {
226	x = tlo;
227	} else {
228	x = thi - 1;
229	}
230	oldx = x;
231	y = x - d;
232	while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) {
233	--x; --y;
234	}
235	if (oldx - x > SNAKE_LIMIT) {
236	big_snake = true;
237	}
238	bd[bdiagoff + d] = x;
239	if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d]) {
240	cost = 2 * c;
241	return d;
242	}
243	}
244
245	/* Heuristic: check occasionally for a diagonal that has made
246	lots of progress compared with the edit distance.
247	If we have any such, find the one that has made the most
248	progress and return it as if it had succeeded.
249
250	With this heuristic, for files with a constant small density
251	of changes, the algorithm is linear in the file size. */
252
253	if (c > 200 && big_snake && heuristic) {
254	int best = 0;
255	int bestpos = -1;
256
257	for (d = fmax; d >= fmin; d -= 2) {
258	int dd = d - fmid;
259	int x = fd[fdiagoff + d];
260	int y = x - d;
261	int v = (x - xoff) * 2 - dd;
262	if (v > 12 * (c + (dd < 0 ? -dd : dd))) {
263	if (v > best
264	&& xoff + SNAKE_LIMIT <= x && x < xlim
265	&& yoff + SNAKE_LIMIT <= y && y < ylim) {
266	/* We have a good enough best diagonal.
267	now insist that it end with a significant snake. */
268	int k;
269
270	for (k = 1; xvec[x - k] == yvec[y - k]; k++) {
271	if (k == SNAKE_LIMIT) {
272	best = v;
273	bestpos = d;
274	break;
275	}
276	}
277	}
278	}
279	}
280	if (best > 0) {
281	cost = 2 * c - 1;
282	return bestpos;
283	}
284
285	best = 0;
286	for (d = bmax; d >= bmin; d -= 2) {
287	int dd = d - bmid;
288	int x = bd[bdiagoff + d];
289	int y = x - d;
290	int v = (xlim - x) * 2 + dd;
291	if (v > 12 * (c + (dd < 0 ? -dd : dd))) {
292	if (v > best
293	&& xoff < x && x <= xlim - SNAKE_LIMIT
294	&& yoff < y && y <= ylim - SNAKE_LIMIT) {
295	/* We have a good enough best diagonal.
296	now insist that it end with a significant snake. */
297	int k;
298
299	for (k = 0; xvec[x + k] == yvec[y + k]; k++) {
300	if (k == SNAKE_LIMIT) {
301	best = v;
302	bestpos = d;
303	break;
304	}
305	}
306	}
307	}
308	}
309	if (best > 0) {
310	cost = 2 * c - 1;
311	return bestpos;
312	}
313	}
314	}
315	}
316
317	/** Compare in detail contiguous subsequences of the two files
318	which are known, as a whole, to match each other.
319
320	The results are recorded in the vectors filevec[N].changed_flag, by
321	storing a 1 in the element for each line that is an insertion or deletion.
322
323	The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
324
325	Note that XLIM, YLIM are exclusive bounds.
326	All line numbers are origin-0 and discarded lines are not counted. */
327
328	private void compareseq(int xoff, int xlim, int yoff, int ylim) {
329	/* Slide down the bottom initial diagonal. */
330	while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) {
331	++xoff; ++yoff;
332	}
333	/* Slide up the top initial diagonal. */
334	while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) {
335	--xlim; --ylim;
336	}
337
338	/* Handle simple cases. */
339	if (xoff == xlim) {
340	while (yoff < ylim) {
341	filevec[1].changedFlag[1+filevec[1].realindexes[yoff++]] = true;
342	}
343	} else if (yoff == ylim) {
344	while (xoff < xlim) {
345	filevec[0].changedFlag[1+filevec[0].realindexes[xoff++]] = true;
346	}
347	} else {
348	/* Find a point of correspondence in the middle of the files. */
349
350	int d = diag(xoff, xlim, yoff, ylim);
351	int c = cost;
352	int b = bdiag[bdiagoff + d];
353
354	if (c == 1)
355	/* This should be impossible, because it implies that
356	one of the two subsequences is empty,
357	and that case was handled above without calling `diag'.
358	Let's verify that this is true. */
359	throw new IllegalArgumentException("Empty subsequence");
360	else {
361	/* Use that point to split this problem into two subproblems. */
362	compareseq(xoff, b, yoff, b - d);
363	/* This used to use f instead of b,
364	but that is incorrect!
365	It is not necessarily the case that diagonal d
366	has a snake from b to f. */
367	compareseq(b, xlim, b - d, ylim);
368	}
369	}
370	}
371
372	/** Discard lines from one file that have no matches in the other file.
373	*/
374	private void discard_confusing_lines() {
375	filevec[0].discard_confusing_lines(filevec[1]);
376	filevec[1].discard_confusing_lines(filevec[0]);
377	}
378
379	private boolean inhibit;
380
381	/** Adjust inserts/deletes of blank lines to join changes
382	as much as possible.
383	*/
384	private void shift_boundaries() {
385	if (inhibit)
386	return;
387	filevec[0].shift_boundaries(filevec[1]);
388	filevec[1].shift_boundaries(filevec[0]);
389	}
390
391	public interface ScriptBuilder {
392	/** Scan the tables of which lines are inserted and deleted,
393	producing an edit script.
394	@param changed0 true for lines in first file which do not match 2nd
395	@param len0 number of lines in first file
396	@param changed1 true for lines in 2nd file which do not match 1st
397	@param len1 number of lines in 2nd file
398	@return a linked list of changes - or null
399	*/
400	Change build_script(
401	boolean[] changed0, int len0,
402	boolean[] changed1, int len1
403	);
404	}
405
406	/** Scan the tables of which lines are inserted and deleted,
407	producing an edit script in reverse order. */
408
409	static class ReverseScript implements ScriptBuilder {
410	@Override
411	public Change build_script(
412	final boolean[] changed0, int len0,
413	final boolean[] changed1, int len1) {
414	Change script = null;
415	int i0 = 0, i1 = 0;
416	while (i0 < len0 \|\| i1 < len1) {
417	if (changed0[1+i0] \|\| changed1[1+i1]) {
418	int line0 = i0, line1 = i1;
419
420	/* Find # lines changed here in each file. */
421	while (changed0[1+i0]) {
422	++i0;
423	}
424	while (changed1[1+i1]) {
425	++i1;
426	}
427
428	/* Record this change. */
429	script = new Change(line0, line1, i0 - line0, i1 - line1, script);
430	}
431
432	/* We have reached lines in the two files that match each other. */
433	i0++; i1++;
434	}
435
436	return script;
437	}
438	}
439
440	static class ForwardScript implements ScriptBuilder {
441	/** Scan the tables of which lines are inserted and deleted,
442	producing an edit script in forward order. */
443	@Override
444	public Change build_script(
445	final boolean[] changed0, int len0,
446	final boolean[] changed1, int len1) {
447	Change script = null;
448	int i0 = len0, i1 = len1;
449
450	while (i0 >= 0 \|\| i1 >= 0) {
451	if (changed0[i0] \|\| changed1[i1]) {
452	int line0 = i0, line1 = i1;
453
454	/* Find # lines changed here in each file. */
455	while (changed0[i0]) {
456	--i0;
457	}
458	while (changed1[i1]) {
459	--i1;
460	}
461
462	/* Record this change. */
463	script = new Change(i0, i1, line0 - i0, line1 - i1, script);
464	}
465
466	/* We have reached lines in the two files that match each other. */
467	i0--; i1--;
468	}
469
470	return script;
471	}
472	}
473
474	/** Standard ScriptBuilders. */
475	public static final ScriptBuilder
476	forwardScript = new ForwardScript(),
477	reverseScript = new ReverseScript();
478
479	/** Report the differences of two files. DEPTH is the current directory depth. */
480	public final Change diff_2(final boolean reverse) {
481	return diff(reverse ? reverseScript : forwardScript);
482	}
483
484	/** Get the results of comparison as an edit script. The script
485	is described by a list of changes. The standard ScriptBuilder
486	implementations provide for forward and reverse edit scripts.
487	Alternate implementations could, for instance, list common elements
488	instead of differences.
489	@param bld an object to build the script from change flags
490	@return the head of a list of changes
491	*/
492	public Change diff(final ScriptBuilder bld) {
493
494	/* Some lines are obviously insertions or deletions
495	because they don't match anything. Detect them now,
496	and avoid even thinking about them in the main comparison algorithm. */
497
498	discard_confusing_lines();
499
500	/* Now do the main comparison algorithm, considering just the
501	undiscarded lines. */
502
503	xvec = filevec[0].undiscarded;
504	yvec = filevec[1].undiscarded;
505
506	int diags =
507	filevec[0].nondiscardedLines + filevec[1].nondiscardedLines + 3;
508	fdiag = new int[diags];
509	fdiagoff = filevec[1].nondiscardedLines + 1;
510	bdiag = new int[diags];
511	bdiagoff = filevec[1].nondiscardedLines + 1;
512
513	compareseq(0, filevec[0].nondiscardedLines,
514	0, filevec[1].nondiscardedLines);
515	fdiag = null;
516	bdiag = null;
517
518	/* Modify the results slightly to make them prettier
519	in cases where that can validly be done. */
520
521	shift_boundaries();
522
523	/* Get the results of comparison in the form of a chain
524	of `struct change's -- an edit script. */
525	return bld.build_script(
526	filevec[0].changedFlag,
527	filevec[0].bufferedLines,
528	filevec[1].changedFlag,
529	filevec[1].bufferedLines
530	);
531
532	}
533
534	/** The result of comparison is an "edit script": a chain of change objects.
535	Each change represents one place where some lines are deleted
536	and some are inserted.
537
538	LINE0 and LINE1 are the first affected lines in the two files (origin 0).
539	DELETED is the number of lines deleted here from file 0.
540	INSERTED is the number of lines inserted here in file 1.
541
542	If DELETED is 0 then LINE0 is the number of the line before
543	which the insertion was done; vice versa for INSERTED and LINE1. */
544
545	public static class Change {
546	/** Previous or next edit command. */
547	public Change link;
548	/** # lines of file 1 changed here. */
549	public final int inserted;
550	/** # lines of file 0 changed here. */
551	public final int deleted;
552	/** Line number of 1st deleted line. */
553	public final int line0;
554	/** Line number of 1st inserted line. */
555	public final int line1;
556
557	/** Cons an additional entry onto the front of an edit script OLD.
558	LINE0 and LINE1 are the first affected lines in the two files (origin 0).
559	DELETED is the number of lines deleted here from file 0.
560	INSERTED is the number of lines inserted here in file 1.
561
562	If DELETED is 0 then LINE0 is the number of the line before
563	which the insertion was done; vice versa for INSERTED and LINE1. */
564	public Change(int line0, int line1, int deleted, int inserted, Change old) {
565	this.line0 = line0;
566	this.line1 = line1;
567	this.inserted = inserted;
568	this.deleted = deleted;
569	this.link = old;
570	}
571
572	/**
573	* Returns the number of insertions and deletions of this change as well as
574	* (recursively) the changes linked via {@link #link}.
575	*/
576	public int getTotalNumberOfChanges() {
577	return inserted + deleted + (link != null ? link.getTotalNumberOfChanges() : 0);
578	}
579
580	@Override
581	public String toString() {
582	String s = String.format("%d -%d +%d %d", line0, deleted, inserted, line1);
583	return (link != null) ? s = s + '\n' + link : s;
584	}
585	}
586
587	/** Data on one input file being compared.
588	*/
589	class FileData {
590
591	/** Allocate changed array for the results of comparison. */
592	void clear() {
593	/* Allocate a flag for each line of each file, saying whether that line
594	is an insertion or deletion.
595	Allocate an extra element, always zero, at each end of each vector.
596	*/
597	changedFlag = new boolean[bufferedLines + 2];
598	}
599
600	/** Return equiv_count[I] as the number of lines in this file
601	that fall in equivalence class I.
602	@return the array of equivalence class counts.
603	*/
604	int[] equivCount() {
605	int[] equiv_count = new int[equivMax];
606	for (int i = 0; i < bufferedLines; ++i) {
607	++equiv_count[equivs[i]];
608	}
609	return equiv_count;
610	}
611
612	/** Discard lines that have no matches in another file.
613
614	A line which is discarded will not be considered by the actual
615	comparison algorithm; it will be as if that line were not in the file.
616	The file's `realindexes' table maps virtual line numbers
617	(which don't count the discarded lines) into real line numbers;
618	this is how the actual comparison algorithm produces results
619	that are comprehensible when the discarded lines are counted.
620	<p>
621	When we discard a line, we also mark it as a deletion or insertion
622	so that it will be printed in the output.
623	@param f the other file
624	*/
625	void discard_confusing_lines(FileData f) {
626	clear();
627	/* Set up table of which lines are going to be discarded. */
628	final byte[] discarded = discardable(f.equivCount());
629
630	/* Don't really discard the provisional lines except when they occur
631	in a run of discardables, with nonprovisionals at the beginning
632	and end. */
633	filterDiscards(discarded);
634
635	/* Actually discard the lines. */
636	discard(discarded);
637	}
638
639	/**
640	* Mark to be discarded each line that matches no line of another file.
641	* If a line matches many lines, mark it as provisionally discardable.
642	* @param counts The count of each equivalence number for the other file.
643	* @return 0=nondiscardable, 1=discardable or 2=provisionally discardable for each line
644	* @see #equivCount()
645	*/
646	private byte[] discardable(final int[] counts) {
647	final int end = bufferedLines;
648	final byte[] discards = new byte[end];
649	final int[] equivs = this.equivs;
650	int many = 5;
651	int tem = end / 64;
652
653	/* Multiply MANY by approximate square root of number of lines.
654	That is the threshold for provisionally discardable lines. */
655	while ((tem = tem >> 2) > 0) {
656	many *= 2;
657	}
658
659	for (int i = 0; i < end; i++) {
660	int nmatch;
661	if (equivs[i] == 0) {
662	continue;
663	}
664	nmatch = counts[equivs[i]];
665	if (nmatch == 0) {
666	discards[i] = 1;
667	} else if (nmatch > many) {
668	discards[i] = 2;
669	}
670	}
671	return discards;
672	}
673
674	/**
675	* Don't really discard the provisional lines except when they occur
676	* in a run of discardables, with nonprovisionals at the beginning
677	* and end.
678	*/
679	private void filterDiscards(final byte[] discards) {
680	final int end = bufferedLines;
681
682	for (int i = 0; i < end; i++) {
683	/* Cancel provisional discards not in middle of run of discards. */
684	if (discards[i] == 2) {
685	discards[i] = 0;
686	} else if (discards[i] != 0) {
687	/* We have found a nonprovisional discard. */
688	int j;
689	int length;
690	int provisional = 0;
691
692	/* Find end of this run of discardable lines.
693	Count how many are provisionally discardable. */
694	for (j = i; j < end; j++) {
695	if (discards[j] == 0) {
696	break;
697	}
698	if (discards[j] == 2) {
699	++provisional;
700	}
701	}
702
703	/* Cancel provisional discards at end, and shrink the run. */
704	while (j > i && discards[j - 1] == 2) {
705	discards[--j] = 0; --provisional;
706	}
707
708	/* Now we have the length of a run of discardable lines
709	whose first and last are not provisional. */
710	length = j - i;
711
712	/* If 1/4 of the lines in the run are provisional,
713	cancel discarding of all provisional lines in the run. */
714	if (provisional * 4 > length) {
715	while (j > i)
716	if (discards[--j] == 2) {
717	discards[j] = 0;
718	}
719	} else {
720	int consec;
721	int minimum = 1;
722	int tem = length / 4;
723
724	/* MINIMUM is approximate square root of LENGTH/4.
725	A subrun of two or more provisionals can stand
726	when LENGTH is at least 16.
727	A subrun of 4 or more can stand when LENGTH >= 64. */
728	while ((tem = tem >> 2) > 0) {
729	minimum *= 2;
730	}
731	minimum++;
732
733	/* Cancel any subrun of MINIMUM or more provisionals
734	within the larger run. */
735	for (j = 0, consec = 0; j < length; j++) {
736	if (discards[i + j] != 2) {
737	consec = 0;
738	} else if (minimum == ++consec) {
739	/* Back up to start of subrun, to cancel it all. */
740	j -= consec;
741	} else if (minimum < consec) {
742	discards[i + j] = 0;
743	}
744	}
745
746	/* Scan from beginning of run
747	until we find 3 or more nonprovisionals in a row
748	or until the first nonprovisional at least 8 lines in.
749	Until that point, cancel any provisionals. */
750	for (j = 0, consec = 0; j < length; j++) {
751	if (j >= 8 && discards[i + j] == 1) {
752	break;
753	}
754	if (discards[i + j] == 2) {
755	consec = 0; discards[i + j] = 0;
756	} else if (discards[i + j] == 0) {
757	consec = 0;
758	} else {
759	consec++;
760	}
761	if (consec == 3) {
762	break;
763	}
764	}
765
766	/* I advances to the last line of the run. */
767	i += length - 1;
768
769	/* Same thing, from end. */
770	for (j = 0, consec = 0; j < length; j++) {
771	if (j >= 8 && discards[i - j] == 1) {
772	break;
773	}
774	if (discards[i - j] == 2) {
775	consec = 0; discards[i - j] = 0;
776	} else if (discards[i - j] == 0) {
777	consec = 0;
778	} else {
779	consec++;
780	}
781	if (consec == 3) {
782	break;
783	}
784	}
785	}
786	}
787	}
788	}
789
790	/** Actually discard the lines.
791	@param discards flags lines to be discarded
792	*/
793	private void discard(final byte[] discards) {
794	final int end = bufferedLines;
795	int j = 0;
796	for (int i = 0; i < end; ++i) {
797	if (noDiscards \|\| discards[i] == 0) {
798	undiscarded[j] = equivs[i];
799	realindexes[j++] = i;
800	} else {
801	changedFlag[1+i] = true;
802	}
803	}
804	nondiscardedLines = j;
805	}
806
807	FileData(int length) {
808	bufferedLines = length;
809	equivs = new int[length];
810	undiscarded = new int[bufferedLines];
811	realindexes = new int[bufferedLines];
812	}
813
814	FileData(Object[] data, Map<Object, Integer> h) {
815	this(data.length);
816	// FIXME: diff 2.7 removes common prefix and common suffix
817	for (int i = 0; i < data.length; ++i) {
818	Integer ir = h.get(data[i]);
819	if (ir == null) {
820	h.put(data[i], equivs[i] = equivMax++);
821	} else {
822	equivs[i] = ir.intValue();
823	}
824	}
825	}
826
827	/** Adjust inserts/deletes of blank lines to join changes
828	as much as possible.
829
830	We do something when a run of changed lines include a blank
831	line at one end and have an excluded blank line at the other.
832	We are free to choose which blank line is included.
833	`compareseq' always chooses the one at the beginning,
834	but usually it is cleaner to consider the following blank line
835	to be the "change". The only exception is if the preceding blank line
836	would join this change to other changes.
837	@param f the file being compared against
838	*/
839	void shift_boundaries(FileData f) {
840	final boolean[] changed = changedFlag;
841	final boolean[] other_changed = f.changedFlag;
842	int i = 0;
843	int j = 0;
844	int i_end = bufferedLines;
845	int preceding = -1;
846	int other_preceding = -1;
847
848	for (;;) {
849	int start, end, other_start;
850
851	/* Scan forwards to find beginning of another run of changes.
852	Also keep track of the corresponding point in the other file. */
853
854	while (i < i_end && !changed[1+i]) {
855	while (other_changed[1+j++]) {
856	/* Non-corresponding lines in the other file
857	will count as the preceding batch of changes. */
858	other_preceding = j;
859	}
860	i++;
861	}
862
863	if (i == i_end) {
864	break;
865	}
866
867	start = i;
868	other_start = j;
869
870	for (;;) {
871	/* Now find the end of this run of changes. */
872
873	while (i < i_end && changed[1+i]) {
874	i++;
875	}
876	end = i;
877
878	/* If the first changed line matches the following unchanged one,
879	and this run does not follow right after a previous run,
880	and there are no lines deleted from the other file here,
881	then classify the first changed line as unchanged
882	and the following line as changed in its place. */
883
884	/* You might ask, how could this run follow right after another?
885	Only because the previous run was shifted here. */
886
887	if (end != i_end && equivs[start] == equivs[end] && !other_changed[1+j]
888	&& !((preceding >= 0 && start == preceding) \|\| (other_preceding >= 0 && other_start == other_preceding))) {
889	changed[1+end++] = true;
890	changed[1+start++] = false;
891	++i;
892	/* Since one line-that-matches is now before this run
893	instead of after, we must advance in the other file
894	to keep in synch. */
895	++j;
896	} else {
897	break;
898	}
899	}
900
901	preceding = i;
902	other_preceding = j;
903	}
904	}
905
906	/** Number of elements (lines) in this file. */
907	private final int bufferedLines;
908
909	/** Vector, indexed by line number, containing an equivalence code for
910	each line. It is this vector that is actually compared with that
911	of another file to generate differences. */
912	private final int[] equivs;
913
914	/** Vector, like the previous one except that
915	the elements for discarded lines have been squeezed out. */
916	private final int[] undiscarded;
917
918	/** Vector mapping virtual line numbers (not counting discarded lines)
919	to real ones (counting those lines). Both are origin-0. */
920	private final int[] realindexes;
921
922	/** Total number of nondiscarded lines. */
923	private int nondiscardedLines;
924
925	/** Array, indexed by real origin-1 line number,
926	containing true for a line that is an insertion or a deletion.
927	The results of comparison are stored here. */
928	private boolean[] changedFlag;
929	}
930	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: