Context Navigation

source: josm/trunk/src/com/drew/metadata/iptc/IptcReader.java@ 15754

Last change on this file since 15754 was 15217, checked in by Don-vip, 7 years ago
see #17848 - update to metadata-extractor 2.12.0
File size: 9.6 KB

Line
1	/*
2	* Copyright 2002-2019 Drew Noakes and contributors
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*
16	* More information about this project is available at:
17	*
18	* https://drewnoakes.com/code/exif/
19	* https://github.com/drewnoakes/metadata-extractor
20	*/
21	package com.drew.metadata.iptc;
22
23	import com.drew.imaging.jpeg.JpegSegmentMetadataReader;
24	import com.drew.imaging.jpeg.JpegSegmentType;
25	import com.drew.lang.SequentialByteArrayReader;
26	import com.drew.lang.SequentialReader;
27	import com.drew.lang.annotations.NotNull;
28	import com.drew.lang.annotations.Nullable;
29	import com.drew.metadata.Directory;
30	import com.drew.metadata.Metadata;
31	import com.drew.metadata.StringValue;
32
33	import java.io.IOException;
34	import java.nio.charset.Charset;
35	import java.util.Collections;
36
37	/**
38	* Decodes IPTC binary data, populating a {@link Metadata} object with tag values in an {@link IptcDirectory}.
39	* <p>
40	* http://www.iptc.org/std/IIM/4.1/specification/IIMV4.1.pdf
41	*
42	* @author Drew Noakes https://drewnoakes.com
43	*/
44	public class IptcReader implements JpegSegmentMetadataReader
45	{
46	// TODO consider breaking the IPTC section up into multiple directories and providing segregation of each IPTC directory
47	/*
48	public static final int DIRECTORY_IPTC = 2;
49
50	public static final int ENVELOPE_RECORD = 1;
51	public static final int APPLICATION_RECORD_2 = 2;
52	public static final int APPLICATION_RECORD_3 = 3;
53	public static final int APPLICATION_RECORD_4 = 4;
54	public static final int APPLICATION_RECORD_5 = 5;
55	public static final int APPLICATION_RECORD_6 = 6;
56	public static final int PRE_DATA_RECORD = 7;
57	public static final int DATA_RECORD = 8;
58	public static final int POST_DATA_RECORD = 9;
59	*/
60	private static final byte IptcMarkerByte = 0x1c;
61
62	@NotNull
63	public Iterable<JpegSegmentType> getSegmentTypes()
64	{
65	return Collections.singletonList(JpegSegmentType.APPD);
66	}
67
68	public void readJpegSegments(@NotNull Iterable<byte[]> segments, @NotNull Metadata metadata, @NotNull JpegSegmentType segmentType)
69	{
70	for (byte[] segmentBytes : segments) {
71	// Ensure data starts with the IPTC marker byte
72	if (segmentBytes.length != 0 && segmentBytes[0] == IptcMarkerByte) {
73	extract(new SequentialByteArrayReader(segmentBytes), metadata, segmentBytes.length);
74	}
75	}
76	}
77
78	/**
79	* Performs the IPTC data extraction, adding found values to the specified instance of {@link Metadata}.
80	*/
81	public void extract(@NotNull final SequentialReader reader, @NotNull final Metadata metadata, long length)
82	{
83	extract(reader, metadata, length, null);
84	}
85
86	/**
87	* Performs the IPTC data extraction, adding found values to the specified instance of {@link Metadata}.
88	*/
89	public void extract(@NotNull final SequentialReader reader, @NotNull final Metadata metadata, long length, @Nullable Directory parentDirectory)
90	{
91	IptcDirectory directory = new IptcDirectory();
92	metadata.addDirectory(directory);
93
94	if (parentDirectory != null)
95	directory.setParent(parentDirectory);
96
97	int offset = 0;
98
99	// for each tag
100	while (offset < length) {
101
102	// identifies start of a tag
103	short startByte;
104	try {
105	startByte = reader.getUInt8();
106	offset++;
107	} catch (IOException e) {
108	directory.addError("Unable to read starting byte of IPTC tag");
109	return;
110	}
111
112	if (startByte != IptcMarkerByte) {
113	// NOTE have seen images where there was one extra byte at the end, giving
114	// offset==length at this point, which is not worth logging as an error.
115	if (offset != length)
116	directory.addError("Invalid IPTC tag marker at offset " + (offset - 1) + ". Expected '0x" + Integer.toHexString(IptcMarkerByte) + "' but got '0x" + Integer.toHexString(startByte) + "'.");
117	return;
118	}
119
120	// we need at least four bytes left to read a tag
121	if (offset + 4 > length) {
122	directory.addError("Too few bytes remain for a valid IPTC tag");
123	return;
124	}
125
126	int directoryType;
127	int tagType;
128	int tagByteCount;
129	try {
130	directoryType = reader.getUInt8();
131	tagType = reader.getUInt8();
132	tagByteCount = reader.getUInt16();
133	if (tagByteCount > 32767) {
134	// Extended DataSet Tag (see 1.5(c), p14, IPTC-IIMV4.2.pdf)
135	tagByteCount = ((tagByteCount & 0x7FFF) << 16) \| reader.getUInt16();
136	offset += 2;
137	}
138	offset += 4;
139	} catch (IOException e) {
140	directory.addError("IPTC data segment ended mid-way through tag descriptor");
141	return;
142	}
143
144	if (offset + tagByteCount > length) {
145	directory.addError("Data for tag extends beyond end of IPTC segment");
146	return;
147	}
148
149	try {
150	processTag(reader, directory, directoryType, tagType, tagByteCount);
151	} catch (IOException e) {
152	directory.addError("Error processing IPTC tag");
153	return;
154	}
155
156	offset += tagByteCount;
157	}
158	}
159
160	private void processTag(@NotNull SequentialReader reader, @NotNull Directory directory, int directoryType, int tagType, int tagByteCount) throws IOException
161	{
162	int tagIdentifier = tagType \| (directoryType << 8);
163
164	// Some images have been seen that specify a zero byte tag, which cannot be of much use.
165	// We elect here to completely ignore the tag. The IPTC specification doesn't mention
166	// anything about the interpretation of this situation.
167	// https://raw.githubusercontent.com/wiki/drewnoakes/metadata-extractor/docs/IPTC-IIMV4.2.pdf
168	if (tagByteCount == 0) {
169	directory.setString(tagIdentifier, "");
170	return;
171	}
172
173	switch (tagIdentifier) {
174	case IptcDirectory.TAG_CODED_CHARACTER_SET:
175	byte[] bytes = reader.getBytes(tagByteCount);
176	String charsetName = Iso2022Converter.convertISO2022CharsetToJavaCharset(bytes);
177	if (charsetName == null) {
178	// Unable to determine the charset, so fall through and treat tag as a regular string
179	charsetName = new String(bytes);
180	}
181	directory.setString(tagIdentifier, charsetName);
182	return;
183	case IptcDirectory.TAG_ENVELOPE_RECORD_VERSION:
184	case IptcDirectory.TAG_APPLICATION_RECORD_VERSION:
185	case IptcDirectory.TAG_FILE_VERSION:
186	case IptcDirectory.TAG_ARM_VERSION:
187	case IptcDirectory.TAG_PROGRAM_VERSION:
188	// short
189	if (tagByteCount >= 2) {
190	int shortValue = reader.getUInt16();
191	reader.skip(tagByteCount - 2);
192	directory.setInt(tagIdentifier, shortValue);
193	return;
194	}
195	break;
196	case IptcDirectory.TAG_URGENCY:
197	// byte
198	directory.setInt(tagIdentifier, reader.getUInt8());
199	reader.skip(tagByteCount - 1);
200	return;
201	default:
202	// fall through
203	}
204
205	// If we haven't returned yet, treat it as a string
206	// NOTE that there's a chance we've already loaded the value as a string above, but failed to parse the value
207	String charSetName = directory.getString(IptcDirectory.TAG_CODED_CHARACTER_SET);
208	Charset charset = null;
209	try {
210	if (charSetName != null)
211	charset = Charset.forName(charSetName);
212	} catch (Throwable ignored) {
213	}
214
215	StringValue string;
216	if (charSetName != null) {
217	string = reader.getStringValue(tagByteCount, charset);
218	} else {
219	byte[] bytes = reader.getBytes(tagByteCount);
220	Charset charSet = Iso2022Converter.guessCharSet(bytes);
221	string = charSet != null ? new StringValue(bytes, charSet) : new StringValue(bytes, null);
222	}
223
224	if (directory.containsTag(tagIdentifier)) {
225	// this fancy StringValue[] business avoids using an ArrayList for performance reasons
226	StringValue[] oldStrings = directory.getStringValueArray(tagIdentifier);
227	StringValue[] newStrings;
228	if (oldStrings == null) {
229	// TODO hitting this block means any prior value(s) are discarded
230	newStrings = new StringValue[1];
231	} else {
232	newStrings = new StringValue[oldStrings.length + 1];
233	System.arraycopy(oldStrings, 0, newStrings, 0, oldStrings.length);
234	}
235	newStrings[newStrings.length - 1] = string;
236	directory.setStringValueArray(tagIdentifier, newStrings);
237	} else {
238	directory.setStringValue(tagIdentifier, string);
239	}
240	}
241	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: