Context Navigation

source: josm/trunk/src/com/drew/metadata/iptc/IptcReader.java@ 8132

Last change on this file since 8132 was 8132, checked in by Don-vip, 11 years ago
fix #11162 - update to metadata-extractor 2.7.2
File size: 9.5 KB

Line
1	/*
2	* Copyright 2002-2015 Drew Noakes
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*
16	* More information about this project is available at:
17	*
18	* https://drewnoakes.com/code/exif/
19	* https://github.com/drewnoakes/metadata-extractor
20	*/
21	package com.drew.metadata.iptc;
22
23	import com.drew.imaging.jpeg.JpegSegmentMetadataReader;
24	import com.drew.imaging.jpeg.JpegSegmentType;
25	import com.drew.lang.SequentialByteArrayReader;
26	import com.drew.lang.SequentialReader;
27	import com.drew.lang.annotations.NotNull;
28	import com.drew.metadata.Directory;
29	import com.drew.metadata.Metadata;
30
31	import java.io.IOException;
32	import java.util.Arrays;
33	import java.util.Date;
34
35	/**
36	* Decodes IPTC binary data, populating a {@link Metadata} object with tag values in an {@link IptcDirectory}.
37	* <p>
38	* http://www.iptc.org/std/IIM/4.1/specification/IIMV4.1.pdf
39	*
40	* @author Drew Noakes https://drewnoakes.com
41	*/
42	public class IptcReader implements JpegSegmentMetadataReader
43	{
44	// TODO consider breaking the IPTC section up into multiple directories and providing segregation of each IPTC directory
45	/*
46	public static final int DIRECTORY_IPTC = 2;
47
48	public static final int ENVELOPE_RECORD = 1;
49	public static final int APPLICATION_RECORD_2 = 2;
50	public static final int APPLICATION_RECORD_3 = 3;
51	public static final int APPLICATION_RECORD_4 = 4;
52	public static final int APPLICATION_RECORD_5 = 5;
53	public static final int APPLICATION_RECORD_6 = 6;
54	public static final int PRE_DATA_RECORD = 7;
55	public static final int DATA_RECORD = 8;
56	public static final int POST_DATA_RECORD = 9;
57	*/
58
59	@NotNull
60	public Iterable<JpegSegmentType> getSegmentTypes()
61	{
62	return Arrays.asList(JpegSegmentType.APPD);
63	}
64
65	public boolean canProcess(@NotNull byte[] segmentBytes, @NotNull JpegSegmentType segmentType)
66	{
67	// Check whether the first byte resembles
68	return segmentBytes.length != 0 && segmentBytes[0] == 0x1c;
69	}
70
71	public void extract(@NotNull byte[] segmentBytes, @NotNull Metadata metadata, @NotNull JpegSegmentType segmentType)
72	{
73	extract(new SequentialByteArrayReader(segmentBytes), metadata, segmentBytes.length);
74	}
75
76	/**
77	* Performs the IPTC data extraction, adding found values to the specified instance of {@link Metadata}.
78	*/
79	public void extract(@NotNull final SequentialReader reader, @NotNull final Metadata metadata, long length)
80	{
81	IptcDirectory directory = metadata.getOrCreateDirectory(IptcDirectory.class);
82
83	int offset = 0;
84
85	// for each tag
86	while (offset < length) {
87
88	// identifies start of a tag
89	short startByte;
90	try {
91	startByte = reader.getUInt8();
92	offset++;
93	} catch (IOException e) {
94	directory.addError("Unable to read starting byte of IPTC tag");
95	return;
96	}
97
98	if (startByte != 0x1c) {
99	// NOTE have seen images where there was one extra byte at the end, giving
100	// offset==length at this point, which is not worth logging as an error.
101	if (offset != length)
102	directory.addError("Invalid IPTC tag marker at offset " + (offset - 1) + ". Expected '0x1c' but got '0x" + Integer.toHexString(startByte) + "'.");
103	return;
104	}
105
106	// we need at least five bytes left to read a tag
107	if (offset + 5 >= length) {
108	directory.addError("Too few bytes remain for a valid IPTC tag");
109	return;
110	}
111
112	int directoryType;
113	int tagType;
114	int tagByteCount;
115	try {
116	directoryType = reader.getUInt8();
117	tagType = reader.getUInt8();
118	// TODO support Extended DataSet Tag (see 1.5(c), p14, IPTC-IIMV4.2.pdf)
119	tagByteCount = reader.getUInt16();
120	offset += 4;
121	} catch (IOException e) {
122	directory.addError("IPTC data segment ended mid-way through tag descriptor");
123	return;
124	}
125
126	if (offset + tagByteCount > length) {
127	directory.addError("Data for tag extends beyond end of IPTC segment");
128	return;
129	}
130
131	try {
132	processTag(reader, directory, directoryType, tagType, tagByteCount);
133	} catch (IOException e) {
134	directory.addError("Error processing IPTC tag");
135	return;
136	}
137
138	offset += tagByteCount;
139	}
140	}
141
142	private void processTag(@NotNull SequentialReader reader, @NotNull Directory directory, int directoryType, int tagType, int tagByteCount) throws IOException
143	{
144	int tagIdentifier = tagType \| (directoryType << 8);
145
146	// Some images have been seen that specify a zero byte tag, which cannot be of much use.
147	// We elect here to completely ignore the tag. The IPTC specification doesn't mention
148	// anything about the interpretation of this situation.
149	// https://raw.githubusercontent.com/wiki/drewnoakes/metadata-extractor/docs/IPTC-IIMV4.2.pdf
150	if (tagByteCount == 0) {
151	directory.setString(tagIdentifier, "");
152	return;
153	}
154
155	String string = null;
156
157	switch (tagIdentifier) {
158	case IptcDirectory.TAG_CODED_CHARACTER_SET:
159	byte[] bytes = reader.getBytes(tagByteCount);
160	String charset = Iso2022Converter.convertISO2022CharsetToJavaCharset(bytes);
161	if (charset == null) {
162	// Unable to determine the charset, so fall through and treat tag as a regular string
163	string = new String(bytes);
164	break;
165	}
166	directory.setString(tagIdentifier, charset);
167	return;
168	case IptcDirectory.TAG_ENVELOPE_RECORD_VERSION:
169	case IptcDirectory.TAG_APPLICATION_RECORD_VERSION:
170	case IptcDirectory.TAG_FILE_VERSION:
171	case IptcDirectory.TAG_ARM_VERSION:
172	case IptcDirectory.TAG_PROGRAM_VERSION:
173	// short
174	if (tagByteCount >= 2) {
175	int shortValue = reader.getUInt16();
176	reader.skip(tagByteCount - 2);
177	directory.setInt(tagIdentifier, shortValue);
178	return;
179	}
180	break;
181	case IptcDirectory.TAG_URGENCY:
182	// byte
183	directory.setInt(tagIdentifier, reader.getUInt8());
184	reader.skip(tagByteCount - 1);
185	return;
186	case IptcDirectory.TAG_RELEASE_DATE:
187	case IptcDirectory.TAG_DATE_CREATED:
188	// Date object
189	if (tagByteCount >= 8) {
190	string = reader.getString(tagByteCount);
191	try {
192	int year = Integer.parseInt(string.substring(0, 4));
193	int month = Integer.parseInt(string.substring(4, 6)) - 1;
194	int day = Integer.parseInt(string.substring(6, 8));
195	Date date = new java.util.GregorianCalendar(year, month, day).getTime();
196	directory.setDate(tagIdentifier, date);
197	return;
198	} catch (NumberFormatException e) {
199	// fall through and we'll process the 'string' value below
200	}
201	} else {
202	reader.skip(tagByteCount);
203	}
204	case IptcDirectory.TAG_RELEASE_TIME:
205	case IptcDirectory.TAG_TIME_CREATED:
206	// time...
207	default:
208	// fall through
209	}
210
211	// If we haven't returned yet, treat it as a string
212	// NOTE that there's a chance we've already loaded the value as a string above, but failed to parse the value
213	if (string == null) {
214	String encoding = directory.getString(IptcDirectory.TAG_CODED_CHARACTER_SET);
215	if (encoding != null) {
216	string = reader.getString(tagByteCount, encoding);
217	} else {
218	byte[] bytes = reader.getBytes(tagByteCount);
219	encoding = Iso2022Converter.guessEncoding(bytes);
220	string = encoding != null ? new String(bytes, encoding) : new String(bytes);
221	}
222	}
223
224	if (directory.containsTag(tagIdentifier)) {
225	// this fancy string[] business avoids using an ArrayList for performance reasons
226	String[] oldStrings = directory.getStringArray(tagIdentifier);
227	String[] newStrings;
228	if (oldStrings == null) {
229	newStrings = new String[1];
230	} else {
231	newStrings = new String[oldStrings.length + 1];
232	System.arraycopy(oldStrings, 0, newStrings, 0, oldStrings.length);
233	}
234	newStrings[newStrings.length - 1] = string;
235	directory.setStringArray(tagIdentifier, newStrings);
236	} else {
237	directory.setString(tagIdentifier, string);
238	}
239	}
240	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: