Context Navigation

source: josm/trunk/src/com/drew/metadata/iptc/Iso2022Converter.java@ 13500

Last change on this file since 13500 was 13061, checked in by Don-vip, 6 years ago
fix #15505 - update to metadata-extractor 2.10.1
File size: 3.7 KB

Line
1	/*
2	* Copyright 2002-2017 Drew Noakes
3	*
4	* Licensed under the Apache License, Version 2.0 (the "License");
5	* you may not use this file except in compliance with the License.
6	* You may obtain a copy of the License at
7	*
8	* http://www.apache.org/licenses/LICENSE-2.0
9	*
10	* Unless required by applicable law or agreed to in writing, software
11	* distributed under the License is distributed on an "AS IS" BASIS,
12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13	* See the License for the specific language governing permissions and
14	* limitations under the License.
15	*
16	* More information about this project is available at:
17	*
18	* https://drewnoakes.com/code/exif/
19	* https://github.com/drewnoakes/metadata-extractor
20	*/
21	package com.drew.metadata.iptc;
22
23	import com.drew.lang.annotations.NotNull;
24	import com.drew.lang.annotations.Nullable;
25
26	import java.nio.ByteBuffer;
27	import java.nio.charset.CharacterCodingException;
28	import java.nio.charset.Charset;
29	import java.nio.charset.CharsetDecoder;
30
31	public final class Iso2022Converter
32	{
33	private static final String ISO_8859_1 = "ISO-8859-1";
34	private static final String UTF_8 = "UTF-8";
35
36	private static final byte LATIN_CAPITAL_A = 0x41;
37	private static final int DOT = 0xe280a2;
38	private static final byte LATIN_CAPITAL_G = 0x47;
39	private static final byte PERCENT_SIGN = 0x25;
40	private static final byte ESC = 0x1B;
41
42	/**
43	* Converts the given ISO2022 char set to a Java charset name.
44	*
45	* @param bytes string data encoded using ISO2022
46	* @return the Java charset name as a string, or <code>null</code> if the conversion was not possible
47	*/
48	@Nullable
49	public static String convertISO2022CharsetToJavaCharset(@NotNull final byte[] bytes)
50	{
51	if (bytes.length > 2 && bytes[0] == ESC && bytes[1] == PERCENT_SIGN && bytes[2] == LATIN_CAPITAL_G)
52	return UTF_8;
53
54	if (bytes.length > 3 && bytes[0] == ESC && (bytes[3] & 0xFF \| ((bytes[2] & 0xFF) << 8) \| ((bytes[1] & 0xFF) << 16)) == DOT && bytes[4] == LATIN_CAPITAL_A)
55	return ISO_8859_1;
56
57	return null;
58	}
59
60	/**
61	* Attempts to guess the {@link Charset} of a string provided as a byte array.
62	* <p>
63	* Charsets trialled are, in order:
64	* <ul>
65	* <li>UTF-8</li>
66	* <li><code>System.getProperty("file.encoding")</code></li>
67	* <li>ISO-8859-1</li>
68	* </ul>
69	* <p>
70	* Its only purpose is to guess the Charset if and only if IPTC tag coded character set is not set. If the
71	* encoding is not UTF-8, the tag should be set. Otherwise it is bad practice. This method tries to
72	* workaround this issue since some metadata manipulating tools do not prevent such bad practice.
73	* <p>
74	* About the reliability of this method: The check if some bytes are UTF-8 or not has a very high reliability.
75	* The two other checks are less reliable.
76	*
77	* @param bytes some text as bytes
78	* @return the name of the encoding or null if none could be guessed
79	*/
80	@Nullable
81	static Charset guessCharSet(@NotNull final byte[] bytes)
82	{
83	String[] encodings = { UTF_8, System.getProperty("file.encoding"), ISO_8859_1 };
84
85	for (String encoding : encodings)
86	{
87	Charset charset = Charset.forName(encoding);
88	CharsetDecoder cs = charset.newDecoder();
89
90	try {
91	cs.decode(ByteBuffer.wrap(bytes));
92	return charset;
93	} catch (CharacterCodingException e) {
94	// fall through...
95	}
96	}
97
98	// No encodings succeeded. Return null.
99	return null;
100	}
101
102	private Iso2022Converter()
103	{}
104	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: