Context Navigation

source: josm/trunk/src/org/openstreetmap/josm/tools/Diff.java@ 5508

Last change on this file since 5508 was 4534, checked in by Don-vip, 13 years ago
Fixed a bunch of compile warnings
File size: 35.0 KB

Line
1	package org.openstreetmap.josm.tools;
2
3	//// Taken from http://www.bmsi.com/java/#diff
4
5	// http://www.bmsi.com/java/DiffPrint.java could also be useful
6
7	/*
8	* $Log: Diff.java,v $
9	* Revision 1.7 2009/01/19 03:05:26 stuart
10	* Fix StackOverflow bug with heuristic on reported by Jimmy Han.
11	*
12	* Revision 1.6 2003/03/06 22:51:32 stuart
13	* Convert to CVS
14	*
15	* Revision 1.5 2002/07/19 19:14:40 stuart
16	* fix reverseScript, make change ctor public, update docs
17	*
18	* Revision 1.4 2002/04/09 17:53:39 stuart
19	* More flexible interface for diff() function.
20	*
21	* Revision 1.3 2000/03/03 21:58:03 stuart
22	* move discard_confusing_lines and shift_boundaries to class file_data
23	*
24	* Revision 1.2 2000/03/02 16:37:38 stuart
25	* Add GPL and copyright
26	*
27	*/
28
29	import java.util.Hashtable;
30
31	/** A class to compare vectors of objects. The result of comparison
32	is a list of <code>change</code> objects which form an
33	edit script. The objects compared are traditionally lines
34	of text from two files. Comparison options such as "ignore
35	whitespace" are implemented by modifying the <code>equals</code>
36	and <code>hashcode</code> methods for the objects compared.
37	<p>
38	The basic algorithm is described in: </br>
39	"An O(ND) Difference Algorithm and its Variations", Eugene Myers,
40	Algorithmica Vol. 1 No. 2, 1986, p 251.
41	<p>
42	This class outputs different results from GNU diff 1.15 on some
43	inputs. Our results are actually better (smaller change list, smaller
44	total size of changes), but it would be nice to know why. Perhaps
45	there is a memory overwrite bug in GNU diff 1.15.
46
47	@author Stuart D. Gathman, translated from GNU diff 1.15
48	Copyright (C) 2000 Business Management Systems, Inc.
49	<p>
50	This program is free software; you can redistribute it and/or modify
51	it under the terms of the GNU General Public License as published by
52	the Free Software Foundation; either version 1, or (at your option)
53	any later version.
54	<p>
55	This program is distributed in the hope that it will be useful,
56	but WITHOUT ANY WARRANTY; without even the implied warranty of
57	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
58	GNU General Public License for more details.
59	<p>
60	You should have received a copy of the <a href=COPYING.txt>
61	GNU General Public License</a>
62	along with this program; if not, write to the Free Software
63	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
64
65	*/
66
67	public class Diff {
68
69	/** Prepare to find differences between two arrays. Each element of
70	the arrays is translated to an "equivalence number" based on
71	the result of <code>equals</code>. The original Object arrays
72	are no longer needed for computing the differences. They will
73	be needed again later to print the results of the comparison as
74	an edit script, if desired.
75	*/
76	public Diff(Object[] a,Object[] b) {
77	Hashtable<Object, Integer> h = new Hashtable<Object, Integer>(a.length + b.length);
78	filevec[0] = new file_data(a,h);
79	filevec[1] = new file_data(b,h);
80	}
81
82	/** 1 more than the maximum equivalence value used for this or its
83	sibling file. */
84	private int equiv_max = 1;
85
86	/** When set to true, the comparison uses a heuristic to speed it up.
87	With this heuristic, for files with a constant small density
88	of changes, the algorithm is linear in the file size. */
89	public boolean heuristic = false;
90
91	/** When set to true, the algorithm returns a guarranteed minimal
92	set of changes. This makes things slower, sometimes much slower. */
93	public boolean no_discards = false;
94
95	private int[] xvec, yvec; /* Vectors being compared. */
96	private int[] fdiag; /* Vector, indexed by diagonal, containing
97	the X coordinate of the point furthest
98	along the given diagonal in the forward
99	search of the edit matrix. */
100	private int[] bdiag; /* Vector, indexed by diagonal, containing
101	the X coordinate of the point furthest
102	along the given diagonal in the backward
103	search of the edit matrix. */
104	private int fdiagoff, bdiagoff;
105	private final file_data[] filevec = new file_data[2];
106	private int cost;
107	/** Snakes bigger than this are considered "big". */
108	private static final int SNAKE_LIMIT = 20;
109
110	/** Find the midpoint of the shortest edit script for a specified
111	portion of the two files.
112
113	We scan from the beginnings of the files, and simultaneously from the ends,
114	doing a breadth-first search through the space of edit-sequence.
115	When the two searches meet, we have found the midpoint of the shortest
116	edit sequence.
117
118	The value returned is the number of the diagonal on which the midpoint lies.
119	The diagonal number equals the number of inserted lines minus the number
120	of deleted lines (counting only lines before the midpoint).
121	The edit cost is stored into COST; this is the total number of
122	lines inserted or deleted (counting only lines before the midpoint).
123
124	This function assumes that the first lines of the specified portions
125	of the two files do not match, and likewise that the last lines do not
126	match. The caller must trim matching lines from the beginning and end
127	of the portions it is going to specify.
128
129	Note that if we return the "wrong" diagonal value, or if
130	the value of bdiag at that diagonal is "wrong",
131	the worst this can do is cause suboptimal diff output.
132	It cannot cause incorrect diff output. */
133
134	private int diag (int xoff, int xlim, int yoff, int ylim) {
135	final int[] fd = fdiag; // Give the compiler a chance.
136	final int[] bd = bdiag; // Additional help for the compiler.
137	final int[] xv = xvec; // Still more help for the compiler.
138	final int[] yv = yvec; // And more and more . . .
139	final int dmin = xoff - ylim; // Minimum valid diagonal.
140	final int dmax = xlim - yoff; // Maximum valid diagonal.
141	final int fmid = xoff - yoff; // Center diagonal of top-down search.
142	final int bmid = xlim - ylim; // Center diagonal of bottom-up search.
143	int fmin = fmid, fmax = fmid; // Limits of top-down search.
144	int bmin = bmid, bmax = bmid; // Limits of bottom-up search.
145	/* True if southeast corner is on an odd
146	diagonal with respect to the northwest. */
147	final boolean odd = (fmid - bmid & 1) != 0;
148
149	fd[fdiagoff + fmid] = xoff;
150	bd[bdiagoff + bmid] = xlim;
151
152	for (int c = 1;; ++c)
153	{
154	int d; /* Active diagonal. */
155	boolean big_snake = false;
156
157	/* Extend the top-down search by an edit step in each diagonal. */
158	if (fmin > dmin) {
159	fd[fdiagoff + --fmin - 1] = -1;
160	} else {
161	++fmin;
162	}
163	if (fmax < dmax) {
164	fd[fdiagoff + ++fmax + 1] = -1;
165	} else {
166	--fmax;
167	}
168	for (d = fmax; d >= fmin; d -= 2)
169	{
170	int x, y, oldx, tlo = fd[fdiagoff + d - 1], thi = fd[fdiagoff + d + 1];
171
172	if (tlo >= thi) {
173	x = tlo + 1;
174	} else {
175	x = thi;
176	}
177	oldx = x;
178	y = x - d;
179	while (x < xlim && y < ylim && xv[x] == yv[y]) {
180	++x; ++y;
181	}
182	if (x - oldx > SNAKE_LIMIT) {
183	big_snake = true;
184	}
185	fd[fdiagoff + d] = x;
186	if (odd && bmin <= d && d <= bmax && bd[bdiagoff + d] <= fd[fdiagoff + d])
187	{
188	cost = 2 * c - 1;
189	return d;
190	}
191	}
192
193	/* Similar extend the bottom-up search. */
194	if (bmin > dmin) {
195	bd[bdiagoff + --bmin - 1] = Integer.MAX_VALUE;
196	} else {
197	++bmin;
198	}
199	if (bmax < dmax) {
200	bd[bdiagoff + ++bmax + 1] = Integer.MAX_VALUE;
201	} else {
202	--bmax;
203	}
204	for (d = bmax; d >= bmin; d -= 2)
205	{
206	int x, y, oldx, tlo = bd[bdiagoff + d - 1], thi = bd[bdiagoff + d + 1];
207
208	if (tlo < thi) {
209	x = tlo;
210	} else {
211	x = thi - 1;
212	}
213	oldx = x;
214	y = x - d;
215	while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1]) {
216	--x; --y;
217	}
218	if (oldx - x > SNAKE_LIMIT) {
219	big_snake = true;
220	}
221	bd[bdiagoff + d] = x;
222	if (!odd && fmin <= d && d <= fmax && bd[bdiagoff + d] <= fd[fdiagoff + d])
223	{
224	cost = 2 * c;
225	return d;
226	}
227	}
228
229	/* Heuristic: check occasionally for a diagonal that has made
230	lots of progress compared with the edit distance.
231	If we have any such, find the one that has made the most
232	progress and return it as if it had succeeded.
233
234	With this heuristic, for files with a constant small density
235	of changes, the algorithm is linear in the file size. */
236
237	if (c > 200 && big_snake && heuristic)
238	{
239	int best = 0;
240	int bestpos = -1;
241
242	for (d = fmax; d >= fmin; d -= 2)
243	{
244	int dd = d - fmid;
245	int x = fd[fdiagoff + d];
246	int y = x - d;
247	int v = (x - xoff) * 2 - dd;
248	if (v > 12 * (c + (dd < 0 ? -dd : dd)))
249	{
250	if (v > best
251	&& xoff + SNAKE_LIMIT <= x && x < xlim
252	&& yoff + SNAKE_LIMIT <= y && y < ylim)
253	{
254	/* We have a good enough best diagonal;
255	now insist that it end with a significant snake. */
256	int k;
257
258	for (k = 1; xvec[x - k] == yvec[y - k]; k++)
259	if (k == SNAKE_LIMIT)
260	{
261	best = v;
262	bestpos = d;
263	break;
264	}
265	}
266	}
267	}
268	if (best > 0)
269	{
270	cost = 2 * c - 1;
271	return bestpos;
272	}
273
274	best = 0;
275	for (d = bmax; d >= bmin; d -= 2)
276	{
277	int dd = d - bmid;
278	int x = bd[bdiagoff + d];
279	int y = x - d;
280	int v = (xlim - x) * 2 + dd;
281	if (v > 12 * (c + (dd < 0 ? -dd : dd)))
282	{
283	if (v > best
284	&& xoff < x && x <= xlim - SNAKE_LIMIT
285	&& yoff < y && y <= ylim - SNAKE_LIMIT)
286	{
287	/* We have a good enough best diagonal;
288	now insist that it end with a significant snake. */
289	int k;
290
291	for (k = 0; xvec[x + k] == yvec[y + k]; k++)
292	if (k == SNAKE_LIMIT)
293	{
294	best = v;
295	bestpos = d;
296	break;
297	}
298	}
299	}
300	}
301	if (best > 0)
302	{
303	cost = 2 * c - 1;
304	return bestpos;
305	}
306	}
307	}
308	}
309
310	/** Compare in detail contiguous subsequences of the two files
311	which are known, as a whole, to match each other.
312
313	The results are recorded in the vectors filevec[N].changed_flag, by
314	storing a 1 in the element for each line that is an insertion or deletion.
315
316	The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
317
318	Note that XLIM, YLIM are exclusive bounds.
319	All line numbers are origin-0 and discarded lines are not counted. */
320
321	private void compareseq (int xoff, int xlim, int yoff, int ylim) {
322	/* Slide down the bottom initial diagonal. */
323	while (xoff < xlim && yoff < ylim && xvec[xoff] == yvec[yoff]) {
324	++xoff; ++yoff;
325	}
326	/* Slide up the top initial diagonal. */
327	while (xlim > xoff && ylim > yoff && xvec[xlim - 1] == yvec[ylim - 1]) {
328	--xlim; --ylim;
329	}
330
331	/* Handle simple cases. */
332	if (xoff == xlim) {
333	while (yoff < ylim) {
334	filevec[1].changed_flag[1+filevec[1].realindexes[yoff++]] = true;
335	}
336	} else if (yoff == ylim) {
337	while (xoff < xlim) {
338	filevec[0].changed_flag[1+filevec[0].realindexes[xoff++]] = true;
339	}
340	} else
341	{
342	/* Find a point of correspondence in the middle of the files. */
343
344	int d = diag (xoff, xlim, yoff, ylim);
345	int c = cost;
346	//int f = fdiag[fdiagoff + d];
347	int b = bdiag[bdiagoff + d];
348
349	if (c == 1)
350	/* This should be impossible, because it implies that
351	one of the two subsequences is empty,
352	and that case was handled above without calling `diag'.
353	Let's verify that this is true. */
354	throw new IllegalArgumentException("Empty subsequence");
355	else
356	{
357	/* Use that point to split this problem into two subproblems. */
358	compareseq (xoff, b, yoff, b - d);
359	/* This used to use f instead of b,
360	but that is incorrect!
361	It is not necessarily the case that diagonal d
362	has a snake from b to f. */
363	compareseq (b, xlim, b - d, ylim);
364	}
365	}
366	}
367
368	/** Discard lines from one file that have no matches in the other file.
369	*/
370
371	private void discard_confusing_lines() {
372	filevec[0].discard_confusing_lines(filevec[1]);
373	filevec[1].discard_confusing_lines(filevec[0]);
374	}
375
376	private boolean inhibit = false;
377
378	/** Adjust inserts/deletes of blank lines to join changes
379	as much as possible.
380	*/
381
382	private void shift_boundaries() {
383	if (inhibit)
384	return;
385	filevec[0].shift_boundaries(filevec[1]);
386	filevec[1].shift_boundaries(filevec[0]);
387	}
388
389	public interface ScriptBuilder {
390	/** Scan the tables of which lines are inserted and deleted,
391	producing an edit script.
392	@param changed0 true for lines in first file which do not match 2nd
393	@param len0 number of lines in first file
394	@param changed1 true for lines in 2nd file which do not match 1st
395	@param len1 number of lines in 2nd file
396	@return a linked list of changes - or null
397	*/
398	public change build_script(
399	boolean[] changed0,int len0,
400	boolean[] changed1,int len1
401	);
402	}
403
404	/** Scan the tables of which lines are inserted and deleted,
405	producing an edit script in reverse order. */
406
407	static class ReverseScript implements ScriptBuilder {
408	public change build_script(
409	final boolean[] changed0,int len0,
410	final boolean[] changed1,int len1)
411	{
412	change script = null;
413	int i0 = 0, i1 = 0;
414	while (i0 < len0 \|\| i1 < len1) {
415	if (changed0[1+i0] \|\| changed1[1+i1]) {
416	int line0 = i0, line1 = i1;
417
418	/* Find # lines changed here in each file. */
419	while (changed0[1+i0]) {
420	++i0;
421	}
422	while (changed1[1+i1]) {
423	++i1;
424	}
425
426	/* Record this change. */
427	script = new change(line0, line1, i0 - line0, i1 - line1, script);
428	}
429
430	/* We have reached lines in the two files that match each other. */
431	i0++; i1++;
432	}
433
434	return script;
435	}
436	}
437
438	static class ForwardScript implements ScriptBuilder {
439	/** Scan the tables of which lines are inserted and deleted,
440	producing an edit script in forward order. */
441	public change build_script(
442	final boolean[] changed0,int len0,
443	final boolean[] changed1,int len1)
444	{
445	change script = null;
446	int i0 = len0, i1 = len1;
447
448	while (i0 >= 0 \|\| i1 >= 0)
449	{
450	if (changed0[i0] \|\| changed1[i1])
451	{
452	int line0 = i0, line1 = i1;
453
454	/* Find # lines changed here in each file. */
455	while (changed0[i0]) {
456	--i0;
457	}
458	while (changed1[i1]) {
459	--i1;
460	}
461
462	/* Record this change. */
463	script = new change(i0, i1, line0 - i0, line1 - i1, script);
464	}
465
466	/* We have reached lines in the two files that match each other. */
467	i0--; i1--;
468	}
469
470	return script;
471	}
472	}
473
474	/** Standard ScriptBuilders. */
475	public final static ScriptBuilder
476	forwardScript = new ForwardScript(),
477	reverseScript = new ReverseScript();
478
479	/* Report the differences of two files. DEPTH is the current directory
480	depth. */
481	public final change diff_2(final boolean reverse) {
482	return diff(reverse ? reverseScript : forwardScript);
483	}
484
485	/** Get the results of comparison as an edit script. The script
486	is described by a list of changes. The standard ScriptBuilder
487	implementations provide for forward and reverse edit scripts.
488	Alternate implementations could, for instance, list common elements
489	instead of differences.
490	@param bld an object to build the script from change flags
491	@return the head of a list of changes
492	*/
493	public change diff(final ScriptBuilder bld) {
494
495	/* Some lines are obviously insertions or deletions
496	because they don't match anything. Detect them now,
497	and avoid even thinking about them in the main comparison algorithm. */
498
499	discard_confusing_lines ();
500
501	/* Now do the main comparison algorithm, considering just the
502	undiscarded lines. */
503
504	xvec = filevec[0].undiscarded;
505	yvec = filevec[1].undiscarded;
506
507	int diags =
508	filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines + 3;
509	fdiag = new int[diags];
510	fdiagoff = filevec[1].nondiscarded_lines + 1;
511	bdiag = new int[diags];
512	bdiagoff = filevec[1].nondiscarded_lines + 1;
513
514	compareseq (0, filevec[0].nondiscarded_lines,
515	0, filevec[1].nondiscarded_lines);
516	fdiag = null;
517	bdiag = null;
518
519	/* Modify the results slightly to make them prettier
520	in cases where that can validly be done. */
521
522	shift_boundaries ();
523
524	/* Get the results of comparison in the form of a chain
525	of `struct change's -- an edit script. */
526	return bld.build_script(
527	filevec[0].changed_flag,
528	filevec[0].buffered_lines,
529	filevec[1].changed_flag,
530	filevec[1].buffered_lines
531	);
532
533	}
534
535	/** The result of comparison is an "edit script": a chain of change objects.
536	Each change represents one place where some lines are deleted
537	and some are inserted.
538
539	LINE0 and LINE1 are the first affected lines in the two files (origin 0).
540	DELETED is the number of lines deleted here from file 0.
541	INSERTED is the number of lines inserted here in file 1.
542
543	If DELETED is 0 then LINE0 is the number of the line before
544	which the insertion was done; vice versa for INSERTED and LINE1. */
545
546	public static class change {
547	/** Previous or next edit command. */
548	public change link;
549	/** # lines of file 1 changed here. */
550	public final int inserted;
551	/** # lines of file 0 changed here. */
552	public final int deleted;
553	/** Line number of 1st deleted line. */
554	public final int line0;
555	/** Line number of 1st inserted line. */
556	public final int line1;
557
558	/** Cons an additional entry onto the front of an edit script OLD.
559	LINE0 and LINE1 are the first affected lines in the two files (origin 0).
560	DELETED is the number of lines deleted here from file 0.
561	INSERTED is the number of lines inserted here in file 1.
562
563	If DELETED is 0 then LINE0 is the number of the line before
564	which the insertion was done; vice versa for INSERTED and LINE1. */
565	public change(int line0, int line1, int deleted, int inserted, change old) {
566	this.line0 = line0;
567	this.line1 = line1;
568	this.inserted = inserted;
569	this.deleted = deleted;
570	this.link = old;
571	//System.err.println(line0+","+line1+","+inserted+","+deleted);
572	}
573	}
574
575	/** Data on one input file being compared.
576	*/
577
578	class file_data {
579
580	/** Allocate changed array for the results of comparison. */
581	void clear() {
582	/* Allocate a flag for each line of each file, saying whether that line
583	is an insertion or deletion.
584	Allocate an extra element, always zero, at each end of each vector.
585	*/
586	changed_flag = new boolean[buffered_lines + 2];
587	}
588
589	/** Return equiv_count[I] as the number of lines in this file
590	that fall in equivalence class I.
591	@return the array of equivalence class counts.
592	*/
593	int[] equivCount() {
594	int[] equiv_count = new int[equiv_max];
595	for (int i = 0; i < buffered_lines; ++i) {
596	++equiv_count[equivs[i]];
597	}
598	return equiv_count;
599	}
600
601	/** Discard lines that have no matches in another file.
602
603	A line which is discarded will not be considered by the actual
604	comparison algorithm; it will be as if that line were not in the file.
605	The file's `realindexes' table maps virtual line numbers
606	(which don't count the discarded lines) into real line numbers;
607	this is how the actual comparison algorithm produces results
608	that are comprehensible when the discarded lines are counted.
609	<p>
610	When we discard a line, we also mark it as a deletion or insertion
611	so that it will be printed in the output.
612	@param f the other file
613	*/
614	void discard_confusing_lines(file_data f) {
615	clear();
616	/* Set up table of which lines are going to be discarded. */
617	final byte[] discarded = discardable(f.equivCount());
618
619	/* Don't really discard the provisional lines except when they occur
620	in a run of discardables, with nonprovisionals at the beginning
621	and end. */
622	filterDiscards(discarded);
623
624	/* Actually discard the lines. */
625	discard(discarded);
626	}
627
628	/** Mark to be discarded each line that matches no line of another file.
629	If a line matches many lines, mark it as provisionally discardable.
630	@see equivCount()
631	@param counts The count of each equivalence number for the other file.
632	@return 0=nondiscardable, 1=discardable or 2=provisionally discardable
633	for each line
634	*/
635
636	private byte[] discardable(final int[] counts) {
637	final int end = buffered_lines;
638	final byte[] discards = new byte[end];
639	final int[] equivs = this.equivs;
640	int many = 5;
641	int tem = end / 64;
642
643	/* Multiply MANY by approximate square root of number of lines.
644	That is the threshold for provisionally discardable lines. */
645	while ((tem = tem >> 2) > 0) {
646	many *= 2;
647	}
648
649	for (int i = 0; i < end; i++)
650	{
651	int nmatch;
652	if (equivs[i] == 0) {
653	continue;
654	}
655	nmatch = counts[equivs[i]];
656	if (nmatch == 0) {
657	discards[i] = 1;
658	} else if (nmatch > many) {
659	discards[i] = 2;
660	}
661	}
662	return discards;
663	}
664
665	/** Don't really discard the provisional lines except when they occur
666	in a run of discardables, with nonprovisionals at the beginning
667	and end. */
668
669	private void filterDiscards(final byte[] discards) {
670	final int end = buffered_lines;
671
672	for (int i = 0; i < end; i++)
673	{
674	/* Cancel provisional discards not in middle of run of discards. */
675	if (discards[i] == 2) {
676	discards[i] = 0;
677	} else if (discards[i] != 0)
678	{
679	/* We have found a nonprovisional discard. */
680	int j;
681	int length;
682	int provisional = 0;
683
684	/* Find end of this run of discardable lines.
685	Count how many are provisionally discardable. */
686	for (j = i; j < end; j++)
687	{
688	if (discards[j] == 0) {
689	break;
690	}
691	if (discards[j] == 2) {
692	++provisional;
693	}
694	}
695
696	/* Cancel provisional discards at end, and shrink the run. */
697	while (j > i && discards[j - 1] == 2) {
698	discards[--j] = 0; --provisional;
699	}
700
701	/* Now we have the length of a run of discardable lines
702	whose first and last are not provisional. */
703	length = j - i;
704
705	/* If 1/4 of the lines in the run are provisional,
706	cancel discarding of all provisional lines in the run. */
707	if (provisional * 4 > length)
708	{
709	while (j > i)
710	if (discards[--j] == 2) {
711	discards[j] = 0;
712	}
713	}
714	else
715	{
716	int consec;
717	int minimum = 1;
718	int tem = length / 4;
719
720	/* MINIMUM is approximate square root of LENGTH/4.
721	A subrun of two or more provisionals can stand
722	when LENGTH is at least 16.
723	A subrun of 4 or more can stand when LENGTH >= 64. */
724	while ((tem = tem >> 2) > 0) {
725	minimum *= 2;
726	}
727	minimum++;
728
729	/* Cancel any subrun of MINIMUM or more provisionals
730	within the larger run. */
731	for (j = 0, consec = 0; j < length; j++)
732	if (discards[i + j] != 2) {
733	consec = 0;
734	} else if (minimum == ++consec) {
735	/* Back up to start of subrun, to cancel it all. */
736	j -= consec;
737	} else if (minimum < consec) {
738	discards[i + j] = 0;
739	}
740
741	/* Scan from beginning of run
742	until we find 3 or more nonprovisionals in a row
743	or until the first nonprovisional at least 8 lines in.
744	Until that point, cancel any provisionals. */
745	for (j = 0, consec = 0; j < length; j++)
746	{
747	if (j >= 8 && discards[i + j] == 1) {
748	break;
749	}
750	if (discards[i + j] == 2) {
751	consec = 0; discards[i + j] = 0;
752	}
753	else if (discards[i + j] == 0) {
754	consec = 0;
755	} else {
756	consec++;
757	}
758	if (consec == 3) {
759	break;
760	}
761	}
762
763	/* I advances to the last line of the run. */
764	i += length - 1;
765
766	/* Same thing, from end. */
767	for (j = 0, consec = 0; j < length; j++)
768	{
769	if (j >= 8 && discards[i - j] == 1) {
770	break;
771	}
772	if (discards[i - j] == 2) {
773	consec = 0; discards[i - j] = 0;
774	}
775	else if (discards[i - j] == 0) {
776	consec = 0;
777	} else {
778	consec++;
779	}
780	if (consec == 3) {
781	break;
782	}
783	}
784	}
785	}
786	}
787	}
788
789	/** Actually discard the lines.
790	@param discards flags lines to be discarded
791	*/
792	private void discard(final byte[] discards) {
793	final int end = buffered_lines;
794	int j = 0;
795	for (int i = 0; i < end; ++i)
796	if (no_discards \|\| discards[i] == 0)
797	{
798	undiscarded[j] = equivs[i];
799	realindexes[j++] = i;
800	} else {
801	changed_flag[1+i] = true;
802	}
803	nondiscarded_lines = j;
804	}
805
806	file_data(Object[] data,Hashtable<Object, Integer> h) {
807	buffered_lines = data.length;
808
809	equivs = new int[buffered_lines];
810	undiscarded = new int[buffered_lines];
811	realindexes = new int[buffered_lines];
812
813	for (int i = 0; i < data.length; ++i) {
814	Integer ir = (Integer)h.get(data[i]);
815	if (ir == null) {
816	h.put(data[i],new Integer(equivs[i] = equiv_max++));
817	} else {
818	equivs[i] = ir.intValue();
819	}
820	}
821	}
822
823	/** Adjust inserts/deletes of blank lines to join changes
824	as much as possible.
825
826	We do something when a run of changed lines include a blank
827	line at one end and have an excluded blank line at the other.
828	We are free to choose which blank line is included.
829	`compareseq' always chooses the one at the beginning,
830	but usually it is cleaner to consider the following blank line
831	to be the "change". The only exception is if the preceding blank line
832	would join this change to other changes.
833	@param f the file being compared against
834	*/
835
836	void shift_boundaries(file_data f) {
837	final boolean[] changed = changed_flag;
838	final boolean[] other_changed = f.changed_flag;
839	int i = 0;
840	int j = 0;
841	int i_end = buffered_lines;
842	int preceding = -1;
843	int other_preceding = -1;
844
845	for (;;)
846	{
847	int start, end, other_start;
848
849	/* Scan forwards to find beginning of another run of changes.
850	Also keep track of the corresponding point in the other file. */
851
852	while (i < i_end && !changed[1+i])
853	{
854	while (other_changed[1+j++]) {
855	/* Non-corresponding lines in the other file
856	will count as the preceding batch of changes. */
857	other_preceding = j;
858	}
859	i++;
860	}
861
862	if (i == i_end) {
863	break;
864	}
865
866	start = i;
867	other_start = j;
868
869	for (;;)
870	{
871	/* Now find the end of this run of changes. */
872
873	while (i < i_end && changed[1+i]) {
874	i++;
875	}
876	end = i;
877
878	/* If the first changed line matches the following unchanged one,
879	and this run does not follow right after a previous run,
880	and there are no lines deleted from the other file here,
881	then classify the first changed line as unchanged
882	and the following line as changed in its place. */
883
884	/* You might ask, how could this run follow right after another?
885	Only because the previous run was shifted here. */
886
887	if (end != i_end
888	&& equivs[start] == equivs[end]
889	&& !other_changed[1+j]
890	&& end != i_end
891	&& !((preceding >= 0 && start == preceding)
892	\|\| (other_preceding >= 0
893	&& other_start == other_preceding)))
894	{
895	changed[1+end++] = true;
896	changed[1+start++] = false;
897	++i;
898	/* Since one line-that-matches is now before this run
899	instead of after, we must advance in the other file
900	to keep in synch. */
901	++j;
902	} else {
903	break;
904	}
905	}
906
907	preceding = i;
908	other_preceding = j;
909	}
910	}
911
912	/** Number of elements (lines) in this file. */
913	final int buffered_lines;
914
915	/** Vector, indexed by line number, containing an equivalence code for
916	each line. It is this vector that is actually compared with that
917	of another file to generate differences. */
918	private final int[] equivs;
919
920	/** Vector, like the previous one except that
921	the elements for discarded lines have been squeezed out. */
922	final int[] undiscarded;
923
924	/** Vector mapping virtual line numbers (not counting discarded lines)
925	to real ones (counting those lines). Both are origin-0. */
926	final int[] realindexes;
927
928	/** Total number of nondiscarded lines. */
929	int nondiscarded_lines;
930
931	/** Array, indexed by real origin-1 line number,
932	containing true for a line that is an insertion or a deletion.
933	The results of comparison are stored here. */
934	boolean[] changed_flag;
935
936	}
937	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: