Index: trunk/src/com/drew/metadata/iptc/IptcDescriptor.java
===================================================================
--- trunk/src/com/drew/metadata/iptc/IptcDescriptor.java	(revision 6127)
+++ trunk/src/com/drew/metadata/iptc/IptcDescriptor.java	(revision 8132)
@@ -1,4 +1,4 @@
 /*
- * Copyright 2002-2012 Drew Noakes
+ * Copyright 2002-2015 Drew Noakes
  *
  *    Licensed under the Apache License, Version 2.0 (the "License");
@@ -16,6 +16,6 @@
  * More information about this project is available at:
  *
- *    http://drewnoakes.com/code/exif/
- *    http://code.google.com/p/metadata-extractor/
+ *    https://drewnoakes.com/code/exif/
+ *    https://github.com/drewnoakes/metadata-extractor
  */
 package com.drew.metadata.iptc;
@@ -27,9 +27,9 @@
 
 /**
- * Provides human-readable string representations of tag values stored in a <code>IptcDirectory</code>.
- * <p/>
+ * Provides human-readable string representations of tag values stored in a {@link IptcDirectory}.
+ * <p>
  * As the IPTC directory already stores values as strings, this class simply returns the tag's value.
  *
- * @author Drew Noakes http://drewnoakes.com
+ * @author Drew Noakes https://drewnoakes.com
  */
 public class IptcDescriptor extends TagDescriptor<IptcDirectory>
@@ -40,4 +40,5 @@
     }
 
+    @Override
     @Nullable
     public String getDescription(int tagType)
Index: trunk/src/com/drew/metadata/iptc/IptcDirectory.java
===================================================================
--- trunk/src/com/drew/metadata/iptc/IptcDirectory.java	(revision 6127)
+++ trunk/src/com/drew/metadata/iptc/IptcDirectory.java	(revision 8132)
@@ -1,4 +1,4 @@
 /*
- * Copyright 2002-2012 Drew Noakes
+ * Copyright 2002-2015 Drew Noakes
  *
  *    Licensed under the Apache License, Version 2.0 (the "License");
@@ -16,6 +16,6 @@
  * More information about this project is available at:
  *
- *    http://drewnoakes.com/code/exif/
- *    http://code.google.com/p/metadata-extractor/
+ *    https://drewnoakes.com/code/exif/
+ *    https://github.com/drewnoakes/metadata-extractor
  */
 package com.drew.metadata.iptc;
@@ -32,5 +32,5 @@
  * Describes tags used by the International Press Telecommunications Council (IPTC) metadata format.
  *
- * @author Drew Noakes http://drewnoakes.com
+ * @author Drew Noakes https://drewnoakes.com
  */
 public class IptcDirectory extends Directory
@@ -211,10 +211,12 @@
     }
 
+    @Override
     @NotNull
     public String getName()
     {
-        return "Iptc";
-    }
-
+        return "IPTC";
+    }
+
+    @Override
     @NotNull
     protected HashMap<Integer, String> getTagNameMap()
Index: trunk/src/com/drew/metadata/iptc/IptcReader.java
===================================================================
--- trunk/src/com/drew/metadata/iptc/IptcReader.java	(revision 6127)
+++ trunk/src/com/drew/metadata/iptc/IptcReader.java	(revision 8132)
@@ -1,4 +1,4 @@
 /*
- * Copyright 2002-2012 Drew Noakes
+ * Copyright 2002-2015 Drew Noakes
  *
  *    Licensed under the Apache License, Version 2.0 (the "License");
@@ -16,24 +16,29 @@
  * More information about this project is available at:
  *
- *    http://drewnoakes.com/code/exif/
- *    http://code.google.com/p/metadata-extractor/
+ *    https://drewnoakes.com/code/exif/
+ *    https://github.com/drewnoakes/metadata-extractor
  */
 package com.drew.metadata.iptc;
 
-import com.drew.lang.BufferBoundsException;
-import com.drew.lang.BufferReader;
+import com.drew.imaging.jpeg.JpegSegmentMetadataReader;
+import com.drew.imaging.jpeg.JpegSegmentType;
+import com.drew.lang.SequentialByteArrayReader;
+import com.drew.lang.SequentialReader;
 import com.drew.lang.annotations.NotNull;
 import com.drew.metadata.Directory;
 import com.drew.metadata.Metadata;
-import com.drew.metadata.MetadataReader;
-
+
+import java.io.IOException;
+import java.util.Arrays;
 import java.util.Date;
 
 /**
- * Decodes IPTC binary data, populating a <code>Metadata</code> object with tag values in an <code>IptcDirectory</code>.
- *
- * @author Drew Noakes http://drewnoakes.com
+ * Decodes IPTC binary data, populating a {@link Metadata} object with tag values in an {@link IptcDirectory}.
+ * <p>
+ * http://www.iptc.org/std/IIM/4.1/specification/IIMV4.1.pdf
+ *
+ * @author Drew Noakes https://drewnoakes.com
  */
-public class IptcReader implements MetadataReader
+public class IptcReader implements JpegSegmentMetadataReader
 {
     // TODO consider breaking the IPTC section up into multiple directories and providing segregation of each IPTC directory
@@ -52,6 +57,25 @@
 */
 
-    /** Performs the IPTC data extraction, adding found values to the specified instance of <code>Metadata</code>. */
-    public void extract(@NotNull final BufferReader reader, @NotNull final Metadata metadata)
+    @NotNull
+    public Iterable<JpegSegmentType> getSegmentTypes()
+    {
+        return Arrays.asList(JpegSegmentType.APPD);
+    }
+
+    public boolean canProcess(@NotNull byte[] segmentBytes, @NotNull JpegSegmentType segmentType)
+    {
+        // Check whether the first byte resembles
+        return segmentBytes.length != 0 && segmentBytes[0] == 0x1c;
+    }
+
+    public void extract(@NotNull byte[] segmentBytes, @NotNull Metadata metadata, @NotNull JpegSegmentType segmentType)
+    {
+        extract(new SequentialByteArrayReader(segmentBytes), metadata, segmentBytes.length);
+    }
+
+    /**
+     * Performs the IPTC data extraction, adding found values to the specified instance of {@link Metadata}.
+     */
+    public void extract(@NotNull final SequentialReader reader, @NotNull final Metadata metadata, long length)
     {
         IptcDirectory directory = metadata.getOrCreateDirectory(IptcDirectory.class);
@@ -59,39 +83,30 @@
         int offset = 0;
 
-/*
-        // find start-of-segment marker (potentially need to skip some ASCII photoshop header info)
-        try {
-            while (offset < data.length - 1 && reader.getUInt16(offset) != 0x1c01 && reader.getUInt16(offset) != 0x1c02)
-                offset++;
-        } catch (BufferBoundsException e) {
-            directory.addError("Couldn't find start of IPTC data (invalid segment)");
-            return;
-        }
-*/
-
         // for each tag
-        while (offset < reader.getLength()) {
+        while (offset < length) {
 
             // identifies start of a tag
             short startByte;
             try {
-                startByte = reader.getUInt8(offset);
-            } catch (BufferBoundsException e) {
+                startByte = reader.getUInt8();
+                offset++;
+            } catch (IOException e) {
                 directory.addError("Unable to read starting byte of IPTC tag");
-                break;
+                return;
             }
 
             if (startByte != 0x1c) {
-                directory.addError("Invalid start to IPTC tag");
-                break;
+                // NOTE have seen images where there was one extra byte at the end, giving
+                // offset==length at this point, which is not worth logging as an error.
+                if (offset != length)
+                    directory.addError("Invalid IPTC tag marker at offset " + (offset - 1) + ". Expected '0x1c' but got '0x" + Integer.toHexString(startByte) + "'.");
+                return;
             }
 
             // we need at least five bytes left to read a tag
-            if (offset + 5 >= reader.getLength()) {
+            if (offset + 5 >= length) {
                 directory.addError("Too few bytes remain for a valid IPTC tag");
-                break;
-            }
-
-            offset++;
+                return;
+            }
 
             int directoryType;
@@ -99,42 +114,73 @@
             int tagByteCount;
             try {
-                directoryType = reader.getUInt8(offset++);
-                tagType = reader.getUInt8(offset++);
-                tagByteCount = reader.getUInt16(offset);
-                offset += 2;
-            } catch (BufferBoundsException e) {
+                directoryType = reader.getUInt8();
+                tagType = reader.getUInt8();
+                // TODO support Extended DataSet Tag (see 1.5(c), p14, IPTC-IIMV4.2.pdf)
+                tagByteCount = reader.getUInt16();
+                offset += 4;
+            } catch (IOException e) {
                 directory.addError("IPTC data segment ended mid-way through tag descriptor");
                 return;
             }
 
-            if (offset + tagByteCount > reader.getLength()) {
+            if (offset + tagByteCount > length) {
                 directory.addError("Data for tag extends beyond end of IPTC segment");
+                return;
+            }
+
+            try {
+                processTag(reader, directory, directoryType, tagType, tagByteCount);
+            } catch (IOException e) {
+                directory.addError("Error processing IPTC tag");
+                return;
+            }
+
+            offset += tagByteCount;
+        }
+    }
+
+    private void processTag(@NotNull SequentialReader reader, @NotNull Directory directory, int directoryType, int tagType, int tagByteCount) throws IOException
+    {
+        int tagIdentifier = tagType | (directoryType << 8);
+
+        // Some images have been seen that specify a zero byte tag, which cannot be of much use.
+        // We elect here to completely ignore the tag. The IPTC specification doesn't mention
+        // anything about the interpretation of this situation.
+        // https://raw.githubusercontent.com/wiki/drewnoakes/metadata-extractor/docs/IPTC-IIMV4.2.pdf
+        if (tagByteCount == 0) {
+            directory.setString(tagIdentifier, "");
+            return;
+        }
+
+        String string = null;
+
+        switch (tagIdentifier) {
+            case IptcDirectory.TAG_CODED_CHARACTER_SET:
+                byte[] bytes = reader.getBytes(tagByteCount);
+                String charset = Iso2022Converter.convertISO2022CharsetToJavaCharset(bytes);
+                if (charset == null) {
+                    // Unable to determine the charset, so fall through and treat tag as a regular string
+                    string = new String(bytes);
+                    break;
+                }
+                directory.setString(tagIdentifier, charset);
+                return;
+            case IptcDirectory.TAG_ENVELOPE_RECORD_VERSION:
+            case IptcDirectory.TAG_APPLICATION_RECORD_VERSION:
+            case IptcDirectory.TAG_FILE_VERSION:
+            case IptcDirectory.TAG_ARM_VERSION:
+            case IptcDirectory.TAG_PROGRAM_VERSION:
+                // short
+                if (tagByteCount >= 2) {
+                    int shortValue = reader.getUInt16();
+                    reader.skip(tagByteCount - 2);
+                    directory.setInt(tagIdentifier, shortValue);
+                    return;
+                }
                 break;
-            }
-
-            try {
-                processTag(reader, directory, directoryType, tagType, offset, tagByteCount);
-            } catch (BufferBoundsException e) {
-                directory.addError("Error processing IPTC tag");
-                break;
-            }
-
-            offset += tagByteCount;
-        }
-    }
-
-    private void processTag(@NotNull BufferReader reader, @NotNull Directory directory, int directoryType, int tagType, int offset, int tagByteCount) throws BufferBoundsException
-    {
-        int tagIdentifier = tagType | (directoryType << 8);
-
-        switch (tagIdentifier) {
-            case IptcDirectory.TAG_APPLICATION_RECORD_VERSION:
-                // short
-                int shortValue = reader.getUInt16(offset);
-                directory.setInt(tagIdentifier, shortValue);
-                return;
             case IptcDirectory.TAG_URGENCY:
                 // byte
-                directory.setInt(tagIdentifier, reader.getUInt8(offset));
+                directory.setInt(tagIdentifier, reader.getUInt8());
+                reader.skip(tagByteCount - 1);
                 return;
             case IptcDirectory.TAG_RELEASE_DATE:
@@ -142,15 +188,17 @@
                 // Date object
                 if (tagByteCount >= 8) {
-                    String dateStr = reader.getString(offset, tagByteCount);
+                    string = reader.getString(tagByteCount);
                     try {
-                        int year = Integer.parseInt(dateStr.substring(0, 4));
-                        int month = Integer.parseInt(dateStr.substring(4, 6)) - 1;
-                        int day = Integer.parseInt(dateStr.substring(6, 8));
+                        int year = Integer.parseInt(string.substring(0, 4));
+                        int month = Integer.parseInt(string.substring(4, 6)) - 1;
+                        int day = Integer.parseInt(string.substring(6, 8));
                         Date date = new java.util.GregorianCalendar(year, month, day).getTime();
                         directory.setDate(tagIdentifier, date);
                         return;
                     } catch (NumberFormatException e) {
-                        // fall through and we'll store whatever was there as a String
+                        // fall through and we'll process the 'string' value below
                     }
+                } else {
+                    reader.skip(tagByteCount);
                 }
             case IptcDirectory.TAG_RELEASE_TIME:
@@ -162,9 +210,14 @@
 
         // If we haven't returned yet, treat it as a string
-        String str;
-        if (tagByteCount < 1) {
-            str = "";
-        } else {
-            str = reader.getString(offset, tagByteCount, System.getProperty("file.encoding")); // "ISO-8859-1"
+        // NOTE that there's a chance we've already loaded the value as a string above, but failed to parse the value
+        if (string == null) {
+            String encoding = directory.getString(IptcDirectory.TAG_CODED_CHARACTER_SET);
+            if (encoding != null) {
+                string = reader.getString(tagByteCount, encoding);
+            } else {
+                byte[] bytes = reader.getBytes(tagByteCount);
+                encoding = Iso2022Converter.guessEncoding(bytes);
+                string = encoding != null ? new String(bytes, encoding) : new String(bytes);
+            }
         }
 
@@ -179,8 +232,8 @@
                 System.arraycopy(oldStrings, 0, newStrings, 0, oldStrings.length);
             }
-            newStrings[newStrings.length - 1] = str;
+            newStrings[newStrings.length - 1] = string;
             directory.setStringArray(tagIdentifier, newStrings);
         } else {
-            directory.setString(tagIdentifier, str);
+            directory.setString(tagIdentifier, string);
         }
     }
Index: trunk/src/com/drew/metadata/iptc/Iso2022Converter.java
===================================================================
--- trunk/src/com/drew/metadata/iptc/Iso2022Converter.java	(revision 8132)
+++ trunk/src/com/drew/metadata/iptc/Iso2022Converter.java	(revision 8132)
@@ -0,0 +1,83 @@
+package com.drew.metadata.iptc;
+
+import com.drew.lang.annotations.NotNull;
+import com.drew.lang.annotations.Nullable;
+
+import java.nio.ByteBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+
+public final class Iso2022Converter
+{
+    private static final String ISO_8859_1 = "ISO-8859-1";
+    private static final String UTF_8 = "UTF-8";
+
+    private static final byte LATIN_CAPITAL_A = 0x41;
+    private static final int DOT = 0xe280a2;
+    private static final byte LATIN_CAPITAL_G = 0x47;
+    private static final byte PERCENT_SIGN = 0x25;
+    private static final byte ESC = 0x1B;
+
+    /**
+     * Converts the given ISO2022 char set to a Java charset name.
+     *
+     * @param bytes string data encoded using ISO2022
+     * @return the Java charset name as a string, or <code>null</code> if the conversion was not possible
+     */
+    @Nullable
+    public static String convertISO2022CharsetToJavaCharset(@NotNull final byte[] bytes)
+    {
+        if (bytes.length > 2 && bytes[0] == ESC && bytes[1] == PERCENT_SIGN && bytes[2] == LATIN_CAPITAL_G)
+            return UTF_8;
+
+        if (bytes.length > 3 && bytes[0] == ESC && (bytes[3] & 0xFF | ((bytes[2] & 0xFF) << 8) | ((bytes[1] & 0xFF) << 16)) == DOT && bytes[4] == LATIN_CAPITAL_A)
+            return ISO_8859_1;
+
+        return null;
+    }
+
+    /**
+     * Attempts to guess the encoding of a string provided as a byte array.
+     * <p/>
+     * Encodings trialled are, in order:
+     * <ul>
+     *     <li>UTF-8</li>
+     *     <li><code>System.getProperty("file.encoding")</code></li>
+     *     <li>ISO-8859-1</li>
+     * </ul>
+     * <p/>
+     * Its only purpose is to guess the encoding if and only if iptc tag coded character set is not set. If the
+     * encoding is not UTF-8, the tag should be set. Otherwise it is bad practice. This method tries to
+     * workaround this issue since some metadata manipulating tools do not prevent such bad practice.
+     * <p/>
+     * About the reliability of this method: The check if some bytes are UTF-8 or not has a very high reliability.
+     * The two other checks are less reliable.
+     *
+     * @param bytes some text as bytes
+     * @return the name of the encoding or null if none could be guessed
+     */
+    @Nullable
+    static String guessEncoding(@NotNull final byte[] bytes)
+    {
+        String[] encodings = { UTF_8, System.getProperty("file.encoding"), ISO_8859_1 };
+
+        for (String encoding : encodings)
+        {
+            CharsetDecoder cs = Charset.forName(encoding).newDecoder();
+
+            try {
+                cs.decode(ByteBuffer.wrap(bytes));
+                return encoding;
+            } catch (CharacterCodingException e) {
+                // fall through...
+            }
+        }
+
+        // No encodings succeeded. Return null.
+        return null;
+    }
+
+    private Iso2022Converter()
+    {}
+}
Index: trunk/src/com/drew/metadata/iptc/package.html
===================================================================
--- trunk/src/com/drew/metadata/iptc/package.html	(revision 8132)
+++ trunk/src/com/drew/metadata/iptc/package.html	(revision 8132)
@@ -0,0 +1,33 @@
+<!--
+  ~ Copyright 2002-2015 Drew Noakes
+  ~
+  ~    Licensed under the Apache License, Version 2.0 (the "License");
+  ~    you may not use this file except in compliance with the License.
+  ~    You may obtain a copy of the License at
+  ~
+  ~        http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~    Unless required by applicable law or agreed to in writing, software
+  ~    distributed under the License is distributed on an "AS IS" BASIS,
+  ~    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~    See the License for the specific language governing permissions and
+  ~    limitations under the License.
+  ~
+  ~ More information about this project is available at:
+  ~
+  ~    https://drewnoakes.com/code/exif/
+  ~    https://github.com/drewnoakes/metadata-extractor
+  -->
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head>
+</head>
+<body bgcolor="white">
+
+Contains classes for the extraction and modelling of IPTC metadata.
+
+<!-- Put @see and @since tags down here. -->
+
+</body>
+</html>
