Context Navigation

source: josm/trunk/src/org/openstreetmap/josm/tools/Diff.java@ 7845

Last change on this file since 7845 was 7801, checked in by Don-vip, 9 years ago
fix some Sonar issues
File size: 36.1 KB

Line
1	// License: GPL.
2	package org.openstreetmap.josm.tools;
3
4	//// Taken from http://www.bmsi.com/java/#diff
5
6	// http://www.bmsi.com/java/DiffPrint.java could also be useful
7
8	/*
9	* $Log: Diff.java,v $
10	* Revision 1.15 2013/04/01 16:27:31 stuart
11	* Fix DiffPrint unified format with test cases.
12	* Begin porting some diff-2.7 features.
13	*
14	* Revision 1.14 2010/03/03 21:21:25 stuart
15	* Test new direct equivalence API
16	*
17	* Revision 1.13 2009/12/07 17:43:17 stuart
18	* Compute equiv_max for int[] ctor
19	*
20	* Revision 1.12 2009/12/07 17:34:46 stuart
21	* Ctor with int[].
22	*
23	* Revision 1.11 2009/11/15 01:11:54 stuart
24	* Diff doesn't need to be generic
25	*
26	* Revision 1.10 2009/11/15 00:54:03 stuart
27	* Update to Java 5 containers
28	*
29	* Revision 1.7 2009/01/19 03:05:26 stuart
30	* Fix StackOverflow bug with heuristic on reported by Jimmy Han.
31	*
32	* Revision 1.6 2003/03/06 22:51:32 stuart
33	* Convert to CVS
34	*
35	* Revision 1.5 2002/07/19 19:14:40 stuart
36	* fix reverseScript, make change ctor public, update docs
37	*
38	* Revision 1.4 2002/04/09 17:53:39 stuart
39	* More flexible interface for diff() function.
40	*
41	* Revision 1.3 2000/03/03 21:58:03 stuart
42	* move discard_confusing_lines and shift_boundaries to class file_data
43	*
44	* Revision 1.2 2000/03/02 16:37:38 stuart
45	* Add GPL and copyright
46	*
47	*/
48
49	import java.util.HashMap;
50	import java.util.Map;
51
52	/** A class to compare vectors of objects. The result of comparison
53	is a list of <code>change</code> objects which form an
54	edit script. The objects compared are traditionally lines
55	of text from two files. Comparison options such as "ignore
56	whitespace" are implemented by modifying the <code>equals</code>
57	and <code>hashcode</code> methods for the objects compared.
58	<p>
59	The basic algorithm is described in: <br>
60	"An O(ND) Difference Algorithm and its Variations", Eugene Myers,
61	Algorithmica Vol. 1 No. 2, 1986, p 251.
62	<p>
63	This class outputs different results from GNU diff 1.15 on some
64	inputs. Our results are actually better (smaller change list, smaller
65	total size of changes), but it would be nice to know why. Perhaps
66	there is a memory overwrite bug in GNU diff 1.15.
67
68	@author Stuart D. Gathman, translated from GNU diff 1.15
69	Copyright (C) 2000 Business Management Systems, Inc.
70	<p>
71	This program is free software; you can redistribute it and/or modify
72	it under the terms of the GNU General Public License as published by
73	the Free Software Foundation; either version 1, or (at your option)
74	any later version.
75	<p>
76	This program is distributed in the hope that it will be useful,
77	but WITHOUT ANY WARRANTY; without even the implied warranty of
78	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
79	GNU General Public License for more details.
80	<p>
81	You should have received a copy of the <a href=COPYING.txt>
82	GNU General Public License</a>
83	along with this program; if not, write to the Free Software
84	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
85
86	*/
87
88	public class Diff {
89
90	/** Prepare to find differences between two arrays. Each element of
91	the arrays is translated to an "equivalence number" based on
92	the result of <code>equals</code>. The original Object arrays
93	are no longer needed for computing the differences. They will
94	be needed again later to print the results of the comparison as
95	an edit script, if desired.
96	*/
97	public Diff(Object[] a, Object[] b) {
98	Map<Object,Integer> h = new HashMap<>(a.length + b.length);
99	filevec = new FileData[] { new FileData(a,h),new FileData(b,h) };
100	}
101
102	/** 1 more than the maximum equivalence value used for this or its
103	sibling file. */
104	private int equiv_max = 1;
105
106	/** When set to true, the comparison uses a heuristic to speed it up.
107	With this heuristic, for files with a constant small density
108	of changes, the algorithm is linear in the file size. */
109	public boolean heuristic = false;
110
111	/** When set to true, the algorithm returns a guarranteed minimal
112	set of changes. This makes things slower, sometimes much slower. */
113	public boolean no_discards = false;
114
115	private int[] xvec, yvec; /* Vectors being compared. */
116	private int[] fdiag; /* Vector, indexed by diagonal, containing
117	the X coordinate of the point furthest
118	along the given diagonal in the forward
119	search of the edit matrix. */
120	private int[] bdiag; /* Vector, indexed by diagonal, containing
121	the X coordinate of the point furthest
122	along the given diagonal in the backward
123	search of the edit matrix. */
124	private int fdiagoff, bdiagoff;
125	private final FileData[] filevec;
126	private int cost;
127	/** Snakes bigger than this are considered "big". */
128	private static final int SNAKE_LIMIT = 20;
129
130	/** Find the midpoint of the shortest edit script for a specified
131	portion of the two files.
132
133	We scan from the beginnings of the files, and simultaneously from the ends,
134	doing a breadth-first search through the space of edit-sequence.
135	When the two searches meet, we have found the midpoint of the shortest
136	edit sequence.
137
138	The value returned is the number of the diagonal on which the midpoint lies.
139	The diagonal number equals the number of inserted lines minus the number
140	of deleted lines (counting only lines before the midpoint).
141	The edit cost is stored into COST; this is the total number of
142	lines inserted or deleted (counting only lines before the midpoint).
143
144	This function assumes that the first lines of the specified portions
145	of the two files do not match, and likewise that the last lines do not
146	match. The caller must trim matching lines from the beginning and end
147	of the portions it is going to specify.
148
149	Note that if we return the "wrong" diagonal value, or if
150	the value of bdiag at that diagonal is "wrong",
151	the worst this can do is cause suboptimal diff output.
152	It cannot cause incorrect diff output. */
153
154	private int diag (int xoff, int xlim, int yoff, int ylim) {
155	final int[] fd = fdiag; // Give the compiler a chance.
156	final int[] bd = bdiag; // Additional help for the compiler.
157	final int[] xv = xvec; // Still more help for the compiler.
158	final int[] yv = yvec; // And more and more . . .
159	final int dmin = xoff - ylim; // Minimum valid diagonal.
160	final int dmax = xlim - yoff; // Maximum valid diagonal.
161	final int fmid = xoff - yoff; // Center diagonal of top-down search.
162	final int bmid = xlim - ylim; // Center diagonal of bottom-up search.
163	int fmin = fmid, fmax = fmid; // Limits of top-down search.
164	int bmin = bmid, bmax = bmid; // Limits of bottom-up search.
165	/* True if southeast corner is on an odd
166	diagonal with respect to the northwest. */
167	final boolean odd = (fmid - bmid & 1) != 0;
168
169	fd[fdiagoff + fmid] = xoff;
170	bd[bdiagoff + bmid] = xlim;
171
172	for (int c = 1;; ++c)
173	{
174	int d; /* Active diagonal. */
175	boolean big_snake = false;
176
177	/* Extend the top-down search by an edit step in each diagonal. */
178	if (fmin > dmin) {
179	fd[fdiagoff + --fmin - 1] = -1;
180	} else {
181	++fmin;
182	}
183	if (fmax < dmax) {
184	fd[fdiagoff + ++fmax + 1] = -1;
185	} else {
186	--fmax;
187	}
188	for (d = fmax; d >= fmin; d -= 2)
189	{
190	int x, y, oldx, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff + d + 1];
191
192	if (tlo >= thi) {
193	x = tlo + 1;
194	} else {
195	x = thi;
196	}
197	oldx = x;
198	y = x - d;
199	while (x < xlim && y < ylim && xv[x] == yv[y]) {
200	++x; ++y;
201	}
202	if (x - oldx > SNAKE_LIMIT) {
203	big_snake = true;
204	}
205	fd[fdiagoff + d] = x;
206	if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d])
207	{
208	cost = 2 * c - 1;
209	return d;
210	}
211	}
212
213	/* Similar extend the bottom-up search. */
214	if (bmin > dmin) {
215	bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE;
216	} else {
217	++bmin;
218	}
219	if (bmax < dmax) {
220	bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE;
221	} else {
222	--bmax;
223	}
224	for (d = bmax; d >= bmin; d -= 2)
225	{
226	int x, y, oldx, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1];
227
228	if (tlo < thi) {
229	x = tlo;
230	} else {
231	x = thi - 1;
232	}
233	oldx = x;
234	y = x - d;
235	while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) {
236	--x; --y;
237	}
238	if (oldx - x > SNAKE_LIMIT) {
239	big_snake = true;
240	}
241	bd[bdiagoff + d] = x;
242	if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d])
243	{
244	cost = 2 * c;
245	return d;
246	}
247	}
248
249	/* Heuristic: check occasionally for a diagonal that has made
250	lots of progress compared with the edit distance.
251	If we have any such, find the one that has made the most
252	progress and return it as if it had succeeded.
253
254	With this heuristic, for files with a constant small density
255	of changes, the algorithm is linear in the file size. */
256
257	if (c > 200 && big_snake && heuristic)
258	{
259	int best = 0;
260	int bestpos = -1;
261
262	for (d = fmax; d >= fmin; d -= 2)
263	{
264	int dd = d - fmid;
265	int x = fd[fdiagoff + d];
266	int y = x - d;
267	int v = (x - xoff) * 2 - dd;
268	if (v > 12 * (c + (dd < 0 ? -dd : dd)))
269	{
270	if (v > best
271	&& xoff + SNAKE_LIMIT <= x && x < xlim
272	&& yoff + SNAKE_LIMIT <= y && y < ylim)
273	{
274	/* We have a good enough best diagonal.
275	now insist that it end with a significant snake. */
276	int k;
277
278	for (k = 1; xvec[x - k] == yvec[y - k]; k++)
279	if (k == SNAKE_LIMIT)
280	{
281	best = v;
282	bestpos = d;
283	break;
284	}
285	}
286	}
287	}
288	if (best > 0)
289	{
290	cost = 2 * c - 1;
291	return bestpos;
292	}
293
294	best = 0;
295	for (d = bmax; d >= bmin; d -= 2)
296	{
297	int dd = d - bmid;
298	int x = bd[bdiagoff + d];
299	int y = x - d;
300	int v = (xlim - x) * 2 + dd;
301	if (v > 12 * (c + (dd < 0 ? -dd : dd)))
302	{
303	if (v > best
304	&& xoff < x && x <= xlim - SNAKE_LIMIT
305	&& yoff < y && y <= ylim - SNAKE_LIMIT)
306	{
307	/* We have a good enough best diagonal.
308	now insist that it end with a significant snake. */
309	int k;
310
311	for (k = 0; xvec[x + k] == yvec[y + k]; k++)
312	if (k == SNAKE_LIMIT)
313	{
314	best = v;
315	bestpos = d;
316	break;
317	}
318	}
319	}
320	}
321	if (best > 0)
322	{
323	cost = 2 * c - 1;
324	return bestpos;
325	}
326	}
327	}
328	}
329
330	/** Compare in detail contiguous subsequences of the two files
331	which are known, as a whole, to match each other.
332
333	The results are recorded in the vectors filevec[N].changed_flag, by
334	storing a 1 in the element for each line that is an insertion or deletion.
335
336	The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
337
338	Note that XLIM, YLIM are exclusive bounds.
339	All line numbers are origin-0 and discarded lines are not counted. */
340
341	private void compareseq (int xoff, int xlim, int yoff, int ylim) {
342	/* Slide down the bottom initial diagonal. */
343	while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) {
344	++xoff; ++yoff;
345	}
346	/* Slide up the top initial diagonal. */
347	while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) {
348	--xlim; --ylim;
349	}
350
351	/* Handle simple cases. */
352	if (xoff == xlim) {
353	while (yoff < ylim) {
354	filevec[1].changed_flag[1+filevec[1].realindexes[yoff++]] = true;
355	}
356	} else if (yoff == ylim) {
357	while (xoff < xlim) {
358	filevec[0].changed_flag[1+filevec[0].realindexes[xoff++]] = true;
359	}
360	} else {
361	/* Find a point of correspondence in the middle of the files. */
362
363	int d = diag (xoff, xlim, yoff, ylim);
364	int c = cost;
365	int b = bdiag[bdiagoff + d];
366
367	if (c == 1)
368	/* This should be impossible, because it implies that
369	one of the two subsequences is empty,
370	and that case was handled above without calling `diag'.
371	Let's verify that this is true. */
372	throw new IllegalArgumentException("Empty subsequence");
373	else
374	{
375	/* Use that point to split this problem into two subproblems. */
376	compareseq (xoff, b, yoff, b - d);
377	/* This used to use f instead of b,
378	but that is incorrect!
379	It is not necessarily the case that diagonal d
380	has a snake from b to f. */
381	compareseq (b, xlim, b - d, ylim);
382	}
383	}
384	}
385
386	/** Discard lines from one file that have no matches in the other file.
387	*/
388	private void discard_confusing_lines() {
389	filevec[0].discard_confusing_lines(filevec[1]);
390	filevec[1].discard_confusing_lines(filevec[0]);
391	}
392
393	private boolean inhibit = false;
394
395	/** Adjust inserts/deletes of blank lines to join changes
396	as much as possible.
397	*/
398	private void shift_boundaries() {
399	if (inhibit)
400	return;
401	filevec[0].shift_boundaries(filevec[1]);
402	filevec[1].shift_boundaries(filevec[0]);
403	}
404
405	public interface ScriptBuilder {
406	/** Scan the tables of which lines are inserted and deleted,
407	producing an edit script.
408	@param changed0 true for lines in first file which do not match 2nd
409	@param len0 number of lines in first file
410	@param changed1 true for lines in 2nd file which do not match 1st
411	@param len1 number of lines in 2nd file
412	@return a linked list of changes - or null
413	*/
414	public Change build_script(
415	boolean[] changed0,int len0,
416	boolean[] changed1,int len1
417	);
418	}
419
420	/** Scan the tables of which lines are inserted and deleted,
421	producing an edit script in reverse order. */
422
423	static class ReverseScript implements ScriptBuilder {
424	@Override
425	public Change build_script(
426	final boolean[] changed0,int len0,
427	final boolean[] changed1,int len1)
428	{
429	Change script = null;
430	int i0 = 0, i1 = 0;
431	while (i0 < len0 \|\| i1 < len1) {
432	if (changed0[1+i0] \|\| changed1[1+i1]) {
433	int line0 = i0, line1 = i1;
434
435	/* Find # lines changed here in each file. */
436	while (changed0[1+i0]) {
437	++i0;
438	}
439	while (changed1[1+i1]) {
440	++i1;
441	}
442
443	/* Record this change. */
444	script = new Change(line0, line1, i0 - line0, i1 - line1, script);
445	}
446
447	/* We have reached lines in the two files that match each other. */
448	i0++; i1++;
449	}
450
451	return script;
452	}
453	}
454
455	static class ForwardScript implements ScriptBuilder {
456	/** Scan the tables of which lines are inserted and deleted,
457	producing an edit script in forward order. */
458	@Override
459	public Change build_script(
460	final boolean[] changed0,int len0,
461	final boolean[] changed1,int len1)
462	{
463	Change script = null;
464	int i0 = len0, i1 = len1;
465
466	while (i0 >= 0 \|\| i1 >= 0)
467	{
468	if (changed0[i0] \|\| changed1[i1])
469	{
470	int line0 = i0, line1 = i1;
471
472	/* Find # lines changed here in each file. */
473	while (changed0[i0]) {
474	--i0;
475	}
476	while (changed1[i1]) {
477	--i1;
478	}
479
480	/* Record this change. */
481	script = new Change(i0, i1, line0 - i0, line1 - i1, script);
482	}
483
484	/* We have reached lines in the two files that match each other. */
485	i0--; i1--;
486	}
487
488	return script;
489	}
490	}
491
492	/** Standard ScriptBuilders. */
493	public static final ScriptBuilder
494	forwardScript = new ForwardScript(),
495	reverseScript = new ReverseScript();
496
497	/** Report the differences of two files. DEPTH is the current directory
498	depth. */
499	public final Change diff_2(final boolean reverse) {
500	return diff(reverse ? reverseScript : forwardScript);
501	}
502
503	/** Get the results of comparison as an edit script. The script
504	is described by a list of changes. The standard ScriptBuilder
505	implementations provide for forward and reverse edit scripts.
506	Alternate implementations could, for instance, list common elements
507	instead of differences.
508	@param bld an object to build the script from change flags
509	@return the head of a list of changes
510	*/
511	public Change diff(final ScriptBuilder bld) {
512
513	/* Some lines are obviously insertions or deletions
514	because they don't match anything. Detect them now,
515	and avoid even thinking about them in the main comparison algorithm. */
516
517	discard_confusing_lines ();
518
519	/* Now do the main comparison algorithm, considering just the
520	undiscarded lines. */
521
522	xvec = filevec[0].undiscarded;
523	yvec = filevec[1].undiscarded;
524
525	int diags =
526	filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines + 3;
527	fdiag = new int[diags];
528	fdiagoff = filevec[1].nondiscarded_lines + 1;
529	bdiag = new int[diags];
530	bdiagoff = filevec[1].nondiscarded_lines + 1;
531
532	compareseq (0, filevec[0].nondiscarded_lines,
533	0, filevec[1].nondiscarded_lines);
534	fdiag = null;
535	bdiag = null;
536
537	/* Modify the results slightly to make them prettier
538	in cases where that can validly be done. */
539
540	shift_boundaries ();
541
542	/* Get the results of comparison in the form of a chain
543	of `struct change's -- an edit script. */
544	return bld.build_script(
545	filevec[0].changed_flag,
546	filevec[0].buffered_lines,
547	filevec[1].changed_flag,
548	filevec[1].buffered_lines
549	);
550
551	}
552
553	/** The result of comparison is an "edit script": a chain of change objects.
554	Each change represents one place where some lines are deleted
555	and some are inserted.
556
557	LINE0 and LINE1 are the first affected lines in the two files (origin 0).
558	DELETED is the number of lines deleted here from file 0.
559	INSERTED is the number of lines inserted here in file 1.
560
561	If DELETED is 0 then LINE0 is the number of the line before
562	which the insertion was done; vice versa for INSERTED and LINE1. */
563
564	public static class Change {
565	/** Previous or next edit command. */
566	public Change link;
567	/** # lines of file 1 changed here. */
568	public final int inserted;
569	/** # lines of file 0 changed here. */
570	public final int deleted;
571	/** Line number of 1st deleted line. */
572	public final int line0;
573	/** Line number of 1st inserted line. */
574	public final int line1;
575
576	/** Cons an additional entry onto the front of an edit script OLD.
577	LINE0 and LINE1 are the first affected lines in the two files (origin 0).
578	DELETED is the number of lines deleted here from file 0.
579	INSERTED is the number of lines inserted here in file 1.
580
581	If DELETED is 0 then LINE0 is the number of the line before
582	which the insertion was done; vice versa for INSERTED and LINE1. */
583	public Change(int line0, int line1, int deleted, int inserted, Change old) {
584	this.line0 = line0;
585	this.line1 = line1;
586	this.inserted = inserted;
587	this.deleted = deleted;
588	this.link = old;
589	}
590
591	@Override
592	public String toString() {
593	String s = String.format("%d -%d +%d %d",line0,deleted,inserted,line1);
594	return (link != null) ? s = s + '\n' + link : s;
595	}
596	}
597
598	/** Data on one input file being compared.
599	*/
600
601	class FileData {
602
603	/** Allocate changed array for the results of comparison. */
604	void clear() {
605	/* Allocate a flag for each line of each file, saying whether that line
606	is an insertion or deletion.
607	Allocate an extra element, always zero, at each end of each vector.
608	*/
609	changed_flag = new boolean[buffered_lines + 2];
610	}
611
612	/** Return equiv_count[I] as the number of lines in this file
613	that fall in equivalence class I.
614	@return the array of equivalence class counts.
615	*/
616	int[] equivCount() {
617	int[] equiv_count = new int[equiv_max];
618	for (int i = 0; i < buffered_lines; ++i) {
619	++equiv_count[equivs[i]];
620	}
621	return equiv_count;
622	}
623
624	/** Discard lines that have no matches in another file.
625
626	A line which is discarded will not be considered by the actual
627	comparison algorithm; it will be as if that line were not in the file.
628	The file's `realindexes' table maps virtual line numbers
629	(which don't count the discarded lines) into real line numbers;
630	this is how the actual comparison algorithm produces results
631	that are comprehensible when the discarded lines are counted.
632	<p>
633	When we discard a line, we also mark it as a deletion or insertion
634	so that it will be printed in the output.
635	@param f the other file
636	*/
637	void discard_confusing_lines(FileData f) {
638	clear();
639	/* Set up table of which lines are going to be discarded. */
640	final byte[] discarded = discardable(f.equivCount());
641
642	/* Don't really discard the provisional lines except when they occur
643	in a run of discardables, with nonprovisionals at the beginning
644	and end. */
645	filterDiscards(discarded);
646
647	/* Actually discard the lines. */
648	discard(discarded);
649	}
650
651	/**
652	* Mark to be discarded each line that matches no line of another file.
653	* If a line matches many lines, mark it as provisionally discardable.
654	* @see #equivCount()
655	* @param counts The count of each equivalence number for the other file.
656	* @return 0=nondiscardable, 1=discardable or 2=provisionally discardable
657	* for each line
658	*/
659	private byte[] discardable(final int[] counts) {
660	final int end = buffered_lines;
661	final byte[] discards = new byte[end];
662	final int[] equivs = this.equivs;
663	int many = 5;
664	int tem = end / 64;
665
666	/* Multiply MANY by approximate square root of number of lines.
667	That is the threshold for provisionally discardable lines. */
668	while ((tem = tem >> 2) > 0) {
669	many *= 2;
670	}
671
672	for (int i = 0; i < end; i++)
673	{
674	int nmatch;
675	if (equivs[i] == 0) {
676	continue;
677	}
678	nmatch = counts[equivs[i]];
679	if (nmatch == 0) {
680	discards[i] = 1;
681	} else if (nmatch > many) {
682	discards[i] = 2;
683	}
684	}
685	return discards;
686	}
687
688	/**
689	* Don't really discard the provisional lines except when they occur
690	* in a run of discardables, with nonprovisionals at the beginning
691	* and end.
692	*/
693	private void filterDiscards(final byte[] discards) {
694	final int end = buffered_lines;
695
696	for (int i = 0; i < end; i++)
697	{
698	/* Cancel provisional discards not in middle of run of discards. */
699	if (discards[i] == 2) {
700	discards[i] = 0;
701	} else if (discards[i] != 0)
702	{
703	/* We have found a nonprovisional discard. */
704	int j;
705	int length;
706	int provisional = 0;
707
708	/* Find end of this run of discardable lines.
709	Count how many are provisionally discardable. */
710	for (j = i; j < end; j++)
711	{
712	if (discards[j] == 0) {
713	break;
714	}
715	if (discards[j] == 2) {
716	++provisional;
717	}
718	}
719
720	/* Cancel provisional discards at end, and shrink the run. */
721	while (j > i && discards[j - 1] == 2) {
722	discards[--j] = 0; --provisional;
723	}
724
725	/* Now we have the length of a run of discardable lines
726	whose first and last are not provisional. */
727	length = j - i;
728
729	/* If 1/4 of the lines in the run are provisional,
730	cancel discarding of all provisional lines in the run. */
731	if (provisional * 4 > length) {
732	while (j > i)
733	if (discards[--j] == 2) {
734	discards[j] = 0;
735	}
736	} else {
737	int consec;
738	int minimum = 1;
739	int tem = length / 4;
740
741	/* MINIMUM is approximate square root of LENGTH/4.
742	A subrun of two or more provisionals can stand
743	when LENGTH is at least 16.
744	A subrun of 4 or more can stand when LENGTH >= 64. */
745	while ((tem = tem >> 2) > 0) {
746	minimum *= 2;
747	}
748	minimum++;
749
750	/* Cancel any subrun of MINIMUM or more provisionals
751	within the larger run. */
752	for (j = 0, consec = 0; j < length; j++)
753	if (discards[i + j] != 2) {
754	consec = 0;
755	} else if (minimum == ++consec) {
756	/* Back up to start of subrun, to cancel it all. */
757	j -= consec;
758	} else if (minimum < consec) {
759	discards[i + j] = 0;
760	}
761
762	/* Scan from beginning of run
763	until we find 3 or more nonprovisionals in a row
764	or until the first nonprovisional at least 8 lines in.
765	Until that point, cancel any provisionals. */
766	for (j = 0, consec = 0; j < length; j++)
767	{
768	if (j >= 8 && discards[i + j] == 1) {
769	break;
770	}
771	if (discards[i + j] == 2) {
772	consec = 0; discards[i + j] = 0;
773	}
774	else if (discards[i + j] == 0) {
775	consec = 0;
776	} else {
777	consec++;
778	}
779	if (consec == 3) {
780	break;
781	}
782	}
783
784	/* I advances to the last line of the run. */
785	i += length - 1;
786
787	/* Same thing, from end. */
788	for (j = 0, consec = 0; j < length; j++)
789	{
790	if (j >= 8 && discards[i - j] == 1) {
791	break;
792	}
793	if (discards[i - j] == 2) {
794	consec = 0; discards[i - j] = 0;
795	}
796	else if (discards[i - j] == 0) {
797	consec = 0;
798	} else {
799	consec++;
800	}
801	if (consec == 3) {
802	break;
803	}
804	}
805	}
806	}
807	}
808	}
809
810	/** Actually discard the lines.
811	@param discards flags lines to be discarded
812	*/
813	private void discard(final byte[] discards) {
814	final int end = buffered_lines;
815	int j = 0;
816	for (int i = 0; i < end; ++i)
817	if (no_discards \|\| discards[i] == 0)
818	{
819	undiscarded[j] = equivs[i];
820	realindexes[j++] = i;
821	} else {
822	changed_flag[1+i] = true;
823	}
824	nondiscarded_lines = j;
825	}
826
827	FileData(int length) {
828	buffered_lines = length;
829	equivs = new int[length];
830	undiscarded = new int[buffered_lines];
831	realindexes = new int[buffered_lines];
832	}
833
834	FileData(Object[] data, Map<Object,Integer> h) {
835	this(data.length);
836	// FIXME: diff 2.7 removes common prefix and common suffix
837	for (int i = 0; i < data.length; ++i) {
838	Integer ir = h.get(data[i]);
839	if (ir == null) {
840	h.put(data[i],equivs[i] = equiv_max++);
841	} else {
842	equivs[i] = ir.intValue();
843	}
844	}
845	}
846
847	/** Adjust inserts/deletes of blank lines to join changes
848	as much as possible.
849
850	We do something when a run of changed lines include a blank
851	line at one end and have an excluded blank line at the other.
852	We are free to choose which blank line is included.
853	`compareseq' always chooses the one at the beginning,
854	but usually it is cleaner to consider the following blank line
855	to be the "change". The only exception is if the preceding blank line
856	would join this change to other changes.
857	@param f the file being compared against
858	*/
859
860	void shift_boundaries(FileData f) {
861	final boolean[] changed = changed_flag;
862	final boolean[] other_changed = f.changed_flag;
863	int i = 0;
864	int j = 0;
865	int i_end = buffered_lines;
866	int preceding = -1;
867	int other_preceding = -1;
868
869	for (;;)
870	{
871	int start, end, other_start;
872
873	/* Scan forwards to find beginning of another run of changes.
874	Also keep track of the corresponding point in the other file. */
875
876	while (i < i_end && !changed[1+i])
877	{
878	while (other_changed[1+j++]) {
879	/* Non-corresponding lines in the other file
880	will count as the preceding batch of changes. */
881	other_preceding = j;
882	}
883	i++;
884	}
885
886	if (i == i_end) {
887	break;
888	}
889
890	start = i;
891	other_start = j;
892
893	for (;;)
894	{
895	/* Now find the end of this run of changes. */
896
897	while (i < i_end && changed[1+i]) {
898	i++;
899	}
900	end = i;
901
902	/* If the first changed line matches the following unchanged one,
903	and this run does not follow right after a previous run,
904	and there are no lines deleted from the other file here,
905	then classify the first changed line as unchanged
906	and the following line as changed in its place. */
907
908	/* You might ask, how could this run follow right after another?
909	Only because the previous run was shifted here. */
910
911	if (end != i_end
912	&& equivs[start] == equivs[end]
913	&& !other_changed[1+j]
914	&& !((preceding >= 0 && start == preceding)
915	\|\| (other_preceding >= 0
916	&& other_start == other_preceding)))
917	{
918	changed[1+end++] = true;
919	changed[1+start++] = false;
920	++i;
921	/* Since one line-that-matches is now before this run
922	instead of after, we must advance in the other file
923	to keep in synch. */
924	++j;
925	} else {
926	break;
927	}
928	}
929
930	preceding = i;
931	other_preceding = j;
932	}
933	}
934
935	/** Number of elements (lines) in this file. */
936	final int buffered_lines;
937
938	/** Vector, indexed by line number, containing an equivalence code for
939	each line. It is this vector that is actually compared with that
940	of another file to generate differences. */
941	private final int[] equivs;
942
943	/** Vector, like the previous one except that
944	the elements for discarded lines have been squeezed out. */
945	final int[] undiscarded;
946
947	/** Vector mapping virtual line numbers (not counting discarded lines)
948	to real ones (counting those lines). Both are origin-0. */
949	final int[] realindexes;
950
951	/** Total number of nondiscarded lines. */
952	int nondiscarded_lines;
953
954	/** Array, indexed by real origin-1 line number,
955	containing true for a line that is an insertion or a deletion.
956	The results of comparison are stored here. */
957	boolean[] changed_flag;
958	}
959	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: