Context Navigation

source: josm/trunk/src/org/openstreetmap/josm/tools/Diff.java@ 5863

Last change on this file since 5863 was 5863, checked in by stoecker, 11 years ago
javadoc fixes
File size: 35.1 KB

Line
1	package org.openstreetmap.josm.tools;
2
3	//// Taken from http://www.bmsi.com/java/#diff
4
5	// http://www.bmsi.com/java/DiffPrint.java could also be useful
6
7	/*
8	* $Log: Diff.java,v $
9	* Revision 1.7 2009/01/19 03:05:26 stuart
10	* Fix StackOverflow bug with heuristic on reported by Jimmy Han.
11	*
12	* Revision 1.6 2003/03/06 22:51:32 stuart
13	* Convert to CVS
14	*
15	* Revision 1.5 2002/07/19 19:14:40 stuart
16	* fix reverseScript, make change ctor public, update docs
17	*
18	* Revision 1.4 2002/04/09 17:53:39 stuart
19	* More flexible interface for diff() function.
20	*
21	* Revision 1.3 2000/03/03 21:58:03 stuart
22	* move discard_confusing_lines and shift_boundaries to class file_data
23	*
24	* Revision 1.2 2000/03/02 16:37:38 stuart
25	* Add GPL and copyright
26	*
27	*/
28
29	import java.util.Hashtable;
30
31	/** A class to compare vectors of objects. The result of comparison
32	is a list of <code>change</code> objects which form an
33	edit script. The objects compared are traditionally lines
34	of text from two files. Comparison options such as "ignore
35	whitespace" are implemented by modifying the <code>equals</code>
36	and <code>hashcode</code> methods for the objects compared.
37	<p>
38	The basic algorithm is described in: </br>
39	"An O(ND) Difference Algorithm and its Variations", Eugene Myers,
40	Algorithmica Vol. 1 No. 2, 1986, p 251.
41	<p>
42	This class outputs different results from GNU diff 1.15 on some
43	inputs. Our results are actually better (smaller change list, smaller
44	total size of changes), but it would be nice to know why. Perhaps
45	there is a memory overwrite bug in GNU diff 1.15.
46
47	@author Stuart D. Gathman, translated from GNU diff 1.15
48	Copyright (C) 2000 Business Management Systems, Inc.
49	<p>
50	This program is free software; you can redistribute it and/or modify
51	it under the terms of the GNU General Public License as published by
52	the Free Software Foundation; either version 1, or (at your option)
53	any later version.
54	<p>
55	This program is distributed in the hope that it will be useful,
56	but WITHOUT ANY WARRANTY; without even the implied warranty of
57	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
58	GNU General Public License for more details.
59	<p>
60	You should have received a copy of the <a href=COPYING.txt>
61	GNU General Public License</a>
62	along with this program; if not, write to the Free Software
63	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
64
65	*/
66
67	public class Diff {
68
69	/** Prepare to find differences between two arrays. Each element of
70	the arrays is translated to an "equivalence number" based on
71	the result of <code>equals</code>. The original Object arrays
72	are no longer needed for computing the differences. They will
73	be needed again later to print the results of the comparison as
74	an edit script, if desired.
75	*/
76	public Diff(Object[] a,Object[] b) {
77	Hashtable<Object, Integer> h = new Hashtable<Object, Integer>(a.length + b.length);
78	filevec[0] = new file_data(a,h);
79	filevec[1] = new file_data(b,h);
80	}
81
82	/** 1 more than the maximum equivalence value used for this or its
83	sibling file. */
84	private int equiv_max = 1;
85
86	/** When set to true, the comparison uses a heuristic to speed it up.
87	With this heuristic, for files with a constant small density
88	of changes, the algorithm is linear in the file size. */
89	public boolean heuristic = false;
90
91	/** When set to true, the algorithm returns a guarranteed minimal
92	set of changes. This makes things slower, sometimes much slower. */
93	public boolean no_discards = false;
94
95	private int[] xvec, yvec; /* Vectors being compared. */
96	private int[] fdiag; /* Vector, indexed by diagonal, containing
97	the X coordinate of the point furthest
98	along the given diagonal in the forward
99	search of the edit matrix. */
100	private int[] bdiag; /* Vector, indexed by diagonal, containing
101	the X coordinate of the point furthest
102	along the given diagonal in the backward
103	search of the edit matrix. */
104	private int fdiagoff, bdiagoff;
105	private final file_data[] filevec = new file_data[2];
106	private int cost;
107	/** Snakes bigger than this are considered "big". */
108	private static final int SNAKE_LIMIT = 20;
109
110	/** Find the midpoint of the shortest edit script for a specified
111	portion of the two files.
112
113	We scan from the beginnings of the files, and simultaneously from the ends,
114	doing a breadth-first search through the space of edit-sequence.
115	When the two searches meet, we have found the midpoint of the shortest
116	edit sequence.
117
118	The value returned is the number of the diagonal on which the midpoint lies.
119	The diagonal number equals the number of inserted lines minus the number
120	of deleted lines (counting only lines before the midpoint).
121	The edit cost is stored into COST; this is the total number of
122	lines inserted or deleted (counting only lines before the midpoint).
123
124	This function assumes that the first lines of the specified portions
125	of the two files do not match, and likewise that the last lines do not
126	match. The caller must trim matching lines from the beginning and end
127	of the portions it is going to specify.
128
129	Note that if we return the "wrong" diagonal value, or if
130	the value of bdiag at that diagonal is "wrong",
131	the worst this can do is cause suboptimal diff output.
132	It cannot cause incorrect diff output. */
133
134	private int diag (int xoff, int xlim, int yoff, int ylim) {
135	final int[] fd = fdiag; // Give the compiler a chance.
136	final int[] bd = bdiag; // Additional help for the compiler.
137	final int[] xv = xvec; // Still more help for the compiler.
138	final int[] yv = yvec; // And more and more . . .
139	final int dmin = xoff - ylim; // Minimum valid diagonal.
140	final int dmax = xlim - yoff; // Maximum valid diagonal.
141	final int fmid = xoff - yoff; // Center diagonal of top-down search.
142	final int bmid = xlim - ylim; // Center diagonal of bottom-up search.
143	int fmin = fmid, fmax = fmid; // Limits of top-down search.
144	int bmin = bmid, bmax = bmid; // Limits of bottom-up search.
145	/* True if southeast corner is on an odd
146	diagonal with respect to the northwest. */
147	final boolean odd = (fmid - bmid & 1) != 0;
148
149	fd[fdiagoff + fmid] = xoff;
150	bd[bdiagoff + bmid] = xlim;
151
152	for (int c = 1;; ++c)
153	{
154	int d; /* Active diagonal. */
155	boolean big_snake = false;
156
157	/* Extend the top-down search by an edit step in each diagonal. */
158	if (fmin > dmin) {
159	fd[fdiagoff + --fmin - 1] = -1;
160	} else {
161	++fmin;
162	}
163	if (fmax < dmax) {
164	fd[fdiagoff + ++fmax + 1] = -1;
165	} else {
166	--fmax;
167	}
168	for (d = fmax; d >= fmin; d -= 2)
169	{
170	int x, y, oldx, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff + d + 1];
171
172	if (tlo >= thi) {
173	x = tlo + 1;
174	} else {
175	x = thi;
176	}
177	oldx = x;
178	y = x - d;
179	while (x < xlim && y < ylim && xv[x] == yv[y]) {
180	++x; ++y;
181	}
182	if (x - oldx > SNAKE_LIMIT) {
183	big_snake = true;
184	}
185	fd[fdiagoff + d] = x;
186	if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d])
187	{
188	cost = 2 * c - 1;
189	return d;
190	}
191	}
192
193	/* Similar extend the bottom-up search. */
194	if (bmin > dmin) {
195	bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE;
196	} else {
197	++bmin;
198	}
199	if (bmax < dmax) {
200	bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE;
201	} else {
202	--bmax;
203	}
204	for (d = bmax; d >= bmin; d -= 2)
205	{
206	int x, y, oldx, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1];
207
208	if (tlo < thi) {
209	x = tlo;
210	} else {
211	x = thi - 1;
212	}
213	oldx = x;
214	y = x - d;
215	while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) {
216	--x; --y;
217	}
218	if (oldx - x > SNAKE_LIMIT) {
219	big_snake = true;
220	}
221	bd[bdiagoff + d] = x;
222	if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d])
223	{
224	cost = 2 * c;
225	return d;
226	}
227	}
228
229	/* Heuristic: check occasionally for a diagonal that has made
230	lots of progress compared with the edit distance.
231	If we have any such, find the one that has made the most
232	progress and return it as if it had succeeded.
233
234	With this heuristic, for files with a constant small density
235	of changes, the algorithm is linear in the file size. */
236
237	if (c > 200 && big_snake && heuristic)
238	{
239	int best = 0;
240	int bestpos = -1;
241
242	for (d = fmax; d >= fmin; d -= 2)
243	{
244	int dd = d - fmid;
245	int x = fd[fdiagoff + d];
246	int y = x - d;
247	int v = (x - xoff) * 2 - dd;
248	if (v > 12 * (c + (dd < 0 ? -dd : dd)))
249	{
250	if (v > best
251	&& xoff + SNAKE_LIMIT <= x && x < xlim
252	&& yoff + SNAKE_LIMIT <= y && y < ylim)
253	{
254	/* We have a good enough best diagonal;
255	now insist that it end with a significant snake. */
256	int k;
257
258	for (k = 1; xvec[x - k] == yvec[y - k]; k++)
259	if (k == SNAKE_LIMIT)
260	{
261	best = v;
262	bestpos = d;
263	break;
264	}
265	}
266	}
267	}
268	if (best > 0)
269	{
270	cost = 2 * c - 1;
271	return bestpos;
272	}
273
274	best = 0;
275	for (d = bmax; d >= bmin; d -= 2)
276	{
277	int dd = d - bmid;
278	int x = bd[bdiagoff + d];
279	int y = x - d;
280	int v = (xlim - x) * 2 + dd;
281	if (v > 12 * (c + (dd < 0 ? -dd : dd)))
282	{
283	if (v > best
284	&& xoff < x && x <= xlim - SNAKE_LIMIT
285	&& yoff < y && y <= ylim - SNAKE_LIMIT)
286	{
287	/* We have a good enough best diagonal;
288	now insist that it end with a significant snake. */
289	int k;
290
291	for (k = 0; xvec[x + k] == yvec[y + k]; k++)
292	if (k == SNAKE_LIMIT)
293	{
294	best = v;
295	bestpos = d;
296	break;
297	}
298	}
299	}
300	}
301	if (best > 0)
302	{
303	cost = 2 * c - 1;
304	return bestpos;
305	}
306	}
307	}
308	}
309
310	/** Compare in detail contiguous subsequences of the two files
311	which are known, as a whole, to match each other.
312
313	The results are recorded in the vectors filevec[N].changed_flag, by
314	storing a 1 in the element for each line that is an insertion or deletion.
315
316	The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
317
318	Note that XLIM, YLIM are exclusive bounds.
319	All line numbers are origin-0 and discarded lines are not counted. */
320
321	private void compareseq (int xoff, int xlim, int yoff, int ylim) {
322	/* Slide down the bottom initial diagonal. */
323	while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) {
324	++xoff; ++yoff;
325	}
326	/* Slide up the top initial diagonal. */
327	while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) {
328	--xlim; --ylim;
329	}
330
331	/* Handle simple cases. */
332	if (xoff == xlim) {
333	while (yoff < ylim) {
334	filevec[1].changed_flag[1+filevec[1].realindexes[yoff++]] = true;
335	}
336	} else if (yoff == ylim) {
337	while (xoff < xlim) {
338	filevec[0].changed_flag[1+filevec[0].realindexes[xoff++]] = true;
339	}
340	} else
341	{
342	/* Find a point of correspondence in the middle of the files. */
343
344	int d = diag (xoff, xlim, yoff, ylim);
345	int c = cost;
346	//int f = fdiag[fdiagoff + d];
347	int b = bdiag[bdiagoff + d];
348
349	if (c == 1)
350	/* This should be impossible, because it implies that
351	one of the two subsequences is empty,
352	and that case was handled above without calling `diag'.
353	Let's verify that this is true. */
354	throw new IllegalArgumentException("Empty subsequence");
355	else
356	{
357	/* Use that point to split this problem into two subproblems. */
358	compareseq (xoff, b, yoff, b - d);
359	/* This used to use f instead of b,
360	but that is incorrect!
361	It is not necessarily the case that diagonal d
362	has a snake from b to f. */
363	compareseq (b, xlim, b - d, ylim);
364	}
365	}
366	}
367
368	/** Discard lines from one file that have no matches in the other file.
369	*/
370
371	private void discard_confusing_lines() {
372	filevec[0].discard_confusing_lines(filevec[1]);
373	filevec[1].discard_confusing_lines(filevec[0]);
374	}
375
376	private boolean inhibit = false;
377
378	/** Adjust inserts/deletes of blank lines to join changes
379	as much as possible.
380	*/
381
382	private void shift_boundaries() {
383	if (inhibit)
384	return;
385	filevec[0].shift_boundaries(filevec[1]);
386	filevec[1].shift_boundaries(filevec[0]);
387	}
388
389	public interface ScriptBuilder {
390	/** Scan the tables of which lines are inserted and deleted,
391	producing an edit script.
392	@param changed0 true for lines in first file which do not match 2nd
393	@param len0 number of lines in first file
394	@param changed1 true for lines in 2nd file which do not match 1st
395	@param len1 number of lines in 2nd file
396	@return a linked list of changes - or null
397	*/
398	public change build_script(
399	boolean[] changed0,int len0,
400	boolean[] changed1,int len1
401	);
402	}
403
404	/** Scan the tables of which lines are inserted and deleted,
405	producing an edit script in reverse order. */
406
407	static class ReverseScript implements ScriptBuilder {
408	public change build_script(
409	final boolean[] changed0,int len0,
410	final boolean[] changed1,int len1)
411	{
412	change script = null;
413	int i0 = 0, i1 = 0;
414	while (i0 < len0 \|\| i1 < len1) {
415	if (changed0[1+i0] \|\| changed1[1+i1]) {
416	int line0 = i0, line1 = i1;
417
418	/* Find # lines changed here in each file. */
419	while (changed0[1+i0]) {
420	++i0;
421	}
422	while (changed1[1+i1]) {
423	++i1;
424	}
425
426	/* Record this change. */
427	script = new change(line0, line1, i0 - line0, i1 - line1, script);
428	}
429
430	/* We have reached lines in the two files that match each other. */
431	i0++; i1++;
432	}
433
434	return script;
435	}
436	}
437
438	static class ForwardScript implements ScriptBuilder {
439	/** Scan the tables of which lines are inserted and deleted,
440	producing an edit script in forward order. */
441	public change build_script(
442	final boolean[] changed0,int len0,
443	final boolean[] changed1,int len1)
444	{
445	change script = null;
446	int i0 = len0, i1 = len1;
447
448	while (i0 >= 0 \|\| i1 >= 0)
449	{
450	if (changed0[i0] \|\| changed1[i1])
451	{
452	int line0 = i0, line1 = i1;
453
454	/* Find # lines changed here in each file. */
455	while (changed0[i0]) {
456	--i0;
457	}
458	while (changed1[i1]) {
459	--i1;
460	}
461
462	/* Record this change. */
463	script = new change(i0, i1, line0 - i0, line1 - i1, script);
464	}
465
466	/* We have reached lines in the two files that match each other. */
467	i0--; i1--;
468	}
469
470	return script;
471	}
472	}
473
474	/** Standard ScriptBuilders. */
475	public final static ScriptBuilder
476	forwardScript = new ForwardScript(),
477	reverseScript = new ReverseScript();
478
479	/* Report the differences of two files. DEPTH is the current directory
480	depth. */
481	public final change diff_2(final boolean reverse) {
482	return diff(reverse ? reverseScript : forwardScript);
483	}
484
485	/** Get the results of comparison as an edit script. The script
486	is described by a list of changes. The standard ScriptBuilder
487	implementations provide for forward and reverse edit scripts.
488	Alternate implementations could, for instance, list common elements
489	instead of differences.
490	@param bld an object to build the script from change flags
491	@return the head of a list of changes
492	*/
493	public change diff(final ScriptBuilder bld) {
494
495	/* Some lines are obviously insertions or deletions
496	because they don't match anything. Detect them now,
497	and avoid even thinking about them in the main comparison algorithm. */
498
499	discard_confusing_lines ();
500
501	/* Now do the main comparison algorithm, considering just the
502	undiscarded lines. */
503
504	xvec = filevec[0].undiscarded;
505	yvec = filevec[1].undiscarded;
506
507	int diags =
508	filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines + 3;
509	fdiag = new int[diags];
510	fdiagoff = filevec[1].nondiscarded_lines + 1;
511	bdiag = new int[diags];
512	bdiagoff = filevec[1].nondiscarded_lines + 1;
513
514	compareseq (0, filevec[0].nondiscarded_lines,
515	0, filevec[1].nondiscarded_lines);
516	fdiag = null;
517	bdiag = null;
518
519	/* Modify the results slightly to make them prettier
520	in cases where that can validly be done. */
521
522	shift_boundaries ();
523
524	/* Get the results of comparison in the form of a chain
525	of `struct change's -- an edit script. */
526	return bld.build_script(
527	filevec[0].changed_flag,
528	filevec[0].buffered_lines,
529	filevec[1].changed_flag,
530	filevec[1].buffered_lines
531	);
532
533	}
534
535	/** The result of comparison is an "edit script": a chain of change objects.
536	Each change represents one place where some lines are deleted
537	and some are inserted.
538
539	LINE0 and LINE1 are the first affected lines in the two files (origin 0).
540	DELETED is the number of lines deleted here from file 0.
541	INSERTED is the number of lines inserted here in file 1.
542
543	If DELETED is 0 then LINE0 is the number of the line before
544	which the insertion was done; vice versa for INSERTED and LINE1. */
545
546	public static class change {
547	/** Previous or next edit command. */
548	public change link;
549	/** # lines of file 1 changed here. */
550	public final int inserted;
551	/** # lines of file 0 changed here. */
552	public final int deleted;
553	/** Line number of 1st deleted line. */
554	public final int line0;
555	/** Line number of 1st inserted line. */
556	public final int line1;
557
558	/** Cons an additional entry onto the front of an edit script OLD.
559	LINE0 and LINE1 are the first affected lines in the two files (origin 0).
560	DELETED is the number of lines deleted here from file 0.
561	INSERTED is the number of lines inserted here in file 1.
562
563	If DELETED is 0 then LINE0 is the number of the line before
564	which the insertion was done; vice versa for INSERTED and LINE1. */
565	public change(int line0, int line1, int deleted, int inserted, change old) {
566	this.line0 = line0;
567	this.line1 = line1;
568	this.inserted = inserted;
569	this.deleted = deleted;
570	this.link = old;
571	//System.err.println(line0+","+line1+","+inserted+","+deleted);
572	}
573	}
574
575	/** Data on one input file being compared.
576	*/
577
578	class file_data {
579
580	/** Allocate changed array for the results of comparison. */
581	void clear() {
582	/* Allocate a flag for each line of each file, saying whether that line
583	is an insertion or deletion.
584	Allocate an extra element, always zero, at each end of each vector.
585	*/
586	changed_flag = new boolean[buffered_lines + 2];
587	}
588
589	/** Return equiv_count[I] as the number of lines in this file
590	that fall in equivalence class I.
591	@return the array of equivalence class counts.
592	*/
593	int[] equivCount() {
594	int[] equiv_count = new int[equiv_max];
595	for (int i = 0; i < buffered_lines; ++i) {
596	++equiv_count[equivs[i]];
597	}
598	return equiv_count;
599	}
600
601	/** Discard lines that have no matches in another file.
602
603	A line which is discarded will not be considered by the actual
604	comparison algorithm; it will be as if that line were not in the file.
605	The file's `realindexes' table maps virtual line numbers
606	(which don't count the discarded lines) into real line numbers;
607	this is how the actual comparison algorithm produces results
608	that are comprehensible when the discarded lines are counted.
609	<p>
610	When we discard a line, we also mark it as a deletion or insertion
611	so that it will be printed in the output.
612	@param f the other file
613	*/
614	void discard_confusing_lines(file_data f) {
615	clear();
616	/* Set up table of which lines are going to be discarded. */
617	final byte[] discarded = discardable(f.equivCount());
618
619	/* Don't really discard the provisional lines except when they occur
620	in a run of discardables, with nonprovisionals at the beginning
621	and end. */
622	filterDiscards(discarded);
623
624	/* Actually discard the lines. */
625	discard(discarded);
626	}
627
628	/**
629	* Mark to be discarded each line that matches no line of another file.
630	* If a line matches many lines, mark it as provisionally discardable.
631	* @see #equivCount()
632	* @param counts The count of each equivalence number for the other file.
633	* @return 0=nondiscardable, 1=discardable or 2=provisionally discardable
634	* for each line
635	*/
636	private byte[] discardable(final int[] counts) {
637	final int end = buffered_lines;
638	final byte[] discards = new byte[end];
639	final int[] equivs = this.equivs;
640	int many = 5;
641	int tem = end / 64;
642
643	/* Multiply MANY by approximate square root of number of lines.
644	That is the threshold for provisionally discardable lines. */
645	while ((tem = tem >> 2) > 0) {
646	many *= 2;
647	}
648
649	for (int i = 0; i < end; i++)
650	{
651	int nmatch;
652	if (equivs[i] == 0) {
653	continue;
654	}
655	nmatch = counts[equivs[i]];
656	if (nmatch == 0) {
657	discards[i] = 1;
658	} else if (nmatch > many) {
659	discards[i] = 2;
660	}
661	}
662	return discards;
663	}
664
665	/**
666	* Don't really discard the provisional lines except when they occur
667	* in a run of discardables, with nonprovisionals at the beginning
668	* and end.
669	*/
670	private void filterDiscards(final byte[] discards) {
671	final int end = buffered_lines;
672
673	for (int i = 0; i < end; i++)
674	{
675	/* Cancel provisional discards not in middle of run of discards. */
676	if (discards[i] == 2) {
677	discards[i] = 0;
678	} else if (discards[i] != 0)
679	{
680	/* We have found a nonprovisional discard. */
681	int j;
682	int length;
683	int provisional = 0;
684
685	/* Find end of this run of discardable lines.
686	Count how many are provisionally discardable. */
687	for (j = i; j < end; j++)
688	{
689	if (discards[j] == 0) {
690	break;
691	}
692	if (discards[j] == 2) {
693	++provisional;
694	}
695	}
696
697	/* Cancel provisional discards at end, and shrink the run. */
698	while (j > i && discards[j - 1] == 2) {
699	discards[--j] = 0; --provisional;
700	}
701
702	/* Now we have the length of a run of discardable lines
703	whose first and last are not provisional. */
704	length = j - i;
705
706	/* If 1/4 of the lines in the run are provisional,
707	cancel discarding of all provisional lines in the run. */
708	if (provisional * 4 > length)
709	{
710	while (j > i)
711	if (discards[--j] == 2) {
712	discards[j] = 0;
713	}
714	}
715	else
716	{
717	int consec;
718	int minimum = 1;
719	int tem = length / 4;
720
721	/* MINIMUM is approximate square root of LENGTH/4.
722	A subrun of two or more provisionals can stand
723	when LENGTH is at least 16.
724	A subrun of 4 or more can stand when LENGTH >= 64. */
725	while ((tem = tem >> 2) > 0) {
726	minimum *= 2;
727	}
728	minimum++;
729
730	/* Cancel any subrun of MINIMUM or more provisionals
731	within the larger run. */
732	for (j = 0, consec = 0; j < length; j++)
733	if (discards[i + j] != 2) {
734	consec = 0;
735	} else if (minimum == ++consec) {
736	/* Back up to start of subrun, to cancel it all. */
737	j -= consec;
738	} else if (minimum < consec) {
739	discards[i + j] = 0;
740	}
741
742	/* Scan from beginning of run
743	until we find 3 or more nonprovisionals in a row
744	or until the first nonprovisional at least 8 lines in.
745	Until that point, cancel any provisionals. */
746	for (j = 0, consec = 0; j < length; j++)
747	{
748	if (j >= 8 && discards[i + j] == 1) {
749	break;
750	}
751	if (discards[i + j] == 2) {
752	consec = 0; discards[i + j] = 0;
753	}
754	else if (discards[i + j] == 0) {
755	consec = 0;
756	} else {
757	consec++;
758	}
759	if (consec == 3) {
760	break;
761	}
762	}
763
764	/* I advances to the last line of the run. */
765	i += length - 1;
766
767	/* Same thing, from end. */
768	for (j = 0, consec = 0; j < length; j++)
769	{
770	if (j >= 8 && discards[i - j] == 1) {
771	break;
772	}
773	if (discards[i - j] == 2) {
774	consec = 0; discards[i - j] = 0;
775	}
776	else if (discards[i - j] == 0) {
777	consec = 0;
778	} else {
779	consec++;
780	}
781	if (consec == 3) {
782	break;
783	}
784	}
785	}
786	}
787	}
788	}
789
790	/** Actually discard the lines.
791	@param discards flags lines to be discarded
792	*/
793	private void discard(final byte[] discards) {
794	final int end = buffered_lines;
795	int j = 0;
796	for (int i = 0; i < end; ++i)
797	if (no_discards \|\| discards[i] == 0)
798	{
799	undiscarded[j] = equivs[i];
800	realindexes[j++] = i;
801	} else {
802	changed_flag[1+i] = true;
803	}
804	nondiscarded_lines = j;
805	}
806
807	file_data(Object[] data,Hashtable<Object, Integer> h) {
808	buffered_lines = data.length;
809
810	equivs = new int[buffered_lines];
811	undiscarded = new int[buffered_lines];
812	realindexes = new int[buffered_lines];
813
814	for (int i = 0; i < data.length; ++i) {
815	Integer ir = (Integer)h.get(data[i]);
816	if (ir == null) {
817	h.put(data[i],new Integer(equivs[i] = equiv_max++));
818	} else {
819	equivs[i] = ir.intValue();
820	}
821	}
822	}
823
824	/** Adjust inserts/deletes of blank lines to join changes
825	as much as possible.
826
827	We do something when a run of changed lines include a blank
828	line at one end and have an excluded blank line at the other.
829	We are free to choose which blank line is included.
830	`compareseq' always chooses the one at the beginning,
831	but usually it is cleaner to consider the following blank line
832	to be the "change". The only exception is if the preceding blank line
833	would join this change to other changes.
834	@param f the file being compared against
835	*/
836
837	void shift_boundaries(file_data f) {
838	final boolean[] changed = changed_flag;
839	final boolean[] other_changed = f.changed_flag;
840	int i = 0;
841	int j = 0;
842	int i_end = buffered_lines;
843	int preceding = -1;
844	int other_preceding = -1;
845
846	for (;;)
847	{
848	int start, end, other_start;
849
850	/* Scan forwards to find beginning of another run of changes.
851	Also keep track of the corresponding point in the other file. */
852
853	while (i < i_end && !changed[1+i])
854	{
855	while (other_changed[1+j++]) {
856	/* Non-corresponding lines in the other file
857	will count as the preceding batch of changes. */
858	other_preceding = j;
859	}
860	i++;
861	}
862
863	if (i == i_end) {
864	break;
865	}
866
867	start = i;
868	other_start = j;
869
870	for (;;)
871	{
872	/* Now find the end of this run of changes. */
873
874	while (i < i_end && changed[1+i]) {
875	i++;
876	}
877	end = i;
878
879	/* If the first changed line matches the following unchanged one,
880	and this run does not follow right after a previous run,
881	and there are no lines deleted from the other file here,
882	then classify the first changed line as unchanged
883	and the following line as changed in its place. */
884
885	/* You might ask, how could this run follow right after another?
886	Only because the previous run was shifted here. */
887
888	if (end != i_end
889	&& equivs[start] == equivs[end]
890	&& !other_changed[1+j]
891	&& end != i_end
892	&& !((preceding >= 0 && start == preceding)
893	\|\| (other_preceding >= 0
894	&& other_start == other_preceding)))
895	{
896	changed[1+end++] = true;
897	changed[1+start++] = false;
898	++i;
899	/* Since one line-that-matches is now before this run
900	instead of after, we must advance in the other file
901	to keep in synch. */
902	++j;
903	} else {
904	break;
905	}
906	}
907
908	preceding = i;
909	other_preceding = j;
910	}
911	}
912
913	/** Number of elements (lines) in this file. */
914	final int buffered_lines;
915
916	/** Vector, indexed by line number, containing an equivalence code for
917	each line. It is this vector that is actually compared with that
918	of another file to generate differences. */
919	private final int[] equivs;
920
921	/** Vector, like the previous one except that
922	the elements for discarded lines have been squeezed out. */
923	final int[] undiscarded;
924
925	/** Vector mapping virtual line numbers (not counting discarded lines)
926	to real ones (counting those lines). Both are origin-0. */
927	final int[] realindexes;
928
929	/** Total number of nondiscarded lines. */
930	int nondiscarded_lines;
931
932	/** Array, indexed by real origin-1 line number,
933	containing true for a line that is an insertion or a deletion.
934	The results of comparison are stored here. */
935	boolean[] changed_flag;
936
937	}
938	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: