Ignore:
Timestamp:
2015-03-10T01:17:39+01:00 (9 years ago)
Author:
Don-vip
Message:

fix #11162 - update to metadata-extractor 2.7.2

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/com/drew/metadata/iptc/IptcReader.java

    r6127 r8132  
    11/*
    2  * Copyright 2002-2012 Drew Noakes
     2 * Copyright 2002-2015 Drew Noakes
    33 *
    44 *    Licensed under the Apache License, Version 2.0 (the "License");
     
    1616 * More information about this project is available at:
    1717 *
    18  *    http://drewnoakes.com/code/exif/
    19  *    http://code.google.com/p/metadata-extractor/
     18 *    https://drewnoakes.com/code/exif/
     19 *    https://github.com/drewnoakes/metadata-extractor
    2020 */
    2121package com.drew.metadata.iptc;
    2222
    23 import com.drew.lang.BufferBoundsException;
    24 import com.drew.lang.BufferReader;
     23import com.drew.imaging.jpeg.JpegSegmentMetadataReader;
     24import com.drew.imaging.jpeg.JpegSegmentType;
     25import com.drew.lang.SequentialByteArrayReader;
     26import com.drew.lang.SequentialReader;
    2527import com.drew.lang.annotations.NotNull;
    2628import com.drew.metadata.Directory;
    2729import com.drew.metadata.Metadata;
    28 import com.drew.metadata.MetadataReader;
    29 
     30
     31import java.io.IOException;
     32import java.util.Arrays;
    3033import java.util.Date;
    3134
    3235/**
    33  * Decodes IPTC binary data, populating a <code>Metadata</code> object with tag values in an <code>IptcDirectory</code>.
    34  *
    35  * @author Drew Noakes http://drewnoakes.com
     36 * Decodes IPTC binary data, populating a {@link Metadata} object with tag values in an {@link IptcDirectory}.
     37 * <p>
     38 * http://www.iptc.org/std/IIM/4.1/specification/IIMV4.1.pdf
     39 *
     40 * @author Drew Noakes https://drewnoakes.com
    3641 */
    37 public class IptcReader implements MetadataReader
     42public class IptcReader implements JpegSegmentMetadataReader
    3843{
    3944    // TODO consider breaking the IPTC section up into multiple directories and providing segregation of each IPTC directory
     
    5257*/
    5358
    54     /** Performs the IPTC data extraction, adding found values to the specified instance of <code>Metadata</code>. */
    55     public void extract(@NotNull final BufferReader reader, @NotNull final Metadata metadata)
     59    @NotNull
     60    public Iterable<JpegSegmentType> getSegmentTypes()
     61    {
     62        return Arrays.asList(JpegSegmentType.APPD);
     63    }
     64
     65    public boolean canProcess(@NotNull byte[] segmentBytes, @NotNull JpegSegmentType segmentType)
     66    {
     67        // Check whether the first byte resembles
     68        return segmentBytes.length != 0 && segmentBytes[0] == 0x1c;
     69    }
     70
     71    public void extract(@NotNull byte[] segmentBytes, @NotNull Metadata metadata, @NotNull JpegSegmentType segmentType)
     72    {
     73        extract(new SequentialByteArrayReader(segmentBytes), metadata, segmentBytes.length);
     74    }
     75
     76    /**
     77     * Performs the IPTC data extraction, adding found values to the specified instance of {@link Metadata}.
     78     */
     79    public void extract(@NotNull final SequentialReader reader, @NotNull final Metadata metadata, long length)
    5680    {
    5781        IptcDirectory directory = metadata.getOrCreateDirectory(IptcDirectory.class);
     
    5983        int offset = 0;
    6084
    61 /*
    62         // find start-of-segment marker (potentially need to skip some ASCII photoshop header info)
    63         try {
    64             while (offset < data.length - 1 && reader.getUInt16(offset) != 0x1c01 && reader.getUInt16(offset) != 0x1c02)
    65                 offset++;
    66         } catch (BufferBoundsException e) {
    67             directory.addError("Couldn't find start of IPTC data (invalid segment)");
    68             return;
    69         }
    70 */
    71 
    7285        // for each tag
    73         while (offset < reader.getLength()) {
     86        while (offset < length) {
    7487
    7588            // identifies start of a tag
    7689            short startByte;
    7790            try {
    78                 startByte = reader.getUInt8(offset);
    79             } catch (BufferBoundsException e) {
     91                startByte = reader.getUInt8();
     92                offset++;
     93            } catch (IOException e) {
    8094                directory.addError("Unable to read starting byte of IPTC tag");
    81                 break;
     95                return;
    8296            }
    8397
    8498            if (startByte != 0x1c) {
    85                 directory.addError("Invalid start to IPTC tag");
    86                 break;
     99                // NOTE have seen images where there was one extra byte at the end, giving
     100                // offset==length at this point, which is not worth logging as an error.
     101                if (offset != length)
     102                    directory.addError("Invalid IPTC tag marker at offset " + (offset - 1) + ". Expected '0x1c' but got '0x" + Integer.toHexString(startByte) + "'.");
     103                return;
    87104            }
    88105
    89106            // we need at least five bytes left to read a tag
    90             if (offset + 5 >= reader.getLength()) {
     107            if (offset + 5 >= length) {
    91108                directory.addError("Too few bytes remain for a valid IPTC tag");
    92                 break;
    93             }
    94 
    95             offset++;
     109                return;
     110            }
    96111
    97112            int directoryType;
     
    99114            int tagByteCount;
    100115            try {
    101                 directoryType = reader.getUInt8(offset++);
    102                 tagType = reader.getUInt8(offset++);
    103                 tagByteCount = reader.getUInt16(offset);
    104                 offset += 2;
    105             } catch (BufferBoundsException e) {
     116                directoryType = reader.getUInt8();
     117                tagType = reader.getUInt8();
     118                // TODO support Extended DataSet Tag (see 1.5(c), p14, IPTC-IIMV4.2.pdf)
     119                tagByteCount = reader.getUInt16();
     120                offset += 4;
     121            } catch (IOException e) {
    106122                directory.addError("IPTC data segment ended mid-way through tag descriptor");
    107123                return;
    108124            }
    109125
    110             if (offset + tagByteCount > reader.getLength()) {
     126            if (offset + tagByteCount > length) {
    111127                directory.addError("Data for tag extends beyond end of IPTC segment");
     128                return;
     129            }
     130
     131            try {
     132                processTag(reader, directory, directoryType, tagType, tagByteCount);
     133            } catch (IOException e) {
     134                directory.addError("Error processing IPTC tag");
     135                return;
     136            }
     137
     138            offset += tagByteCount;
     139        }
     140    }
     141
     142    private void processTag(@NotNull SequentialReader reader, @NotNull Directory directory, int directoryType, int tagType, int tagByteCount) throws IOException
     143    {
     144        int tagIdentifier = tagType | (directoryType << 8);
     145
     146        // Some images have been seen that specify a zero byte tag, which cannot be of much use.
     147        // We elect here to completely ignore the tag. The IPTC specification doesn't mention
     148        // anything about the interpretation of this situation.
     149        // https://raw.githubusercontent.com/wiki/drewnoakes/metadata-extractor/docs/IPTC-IIMV4.2.pdf
     150        if (tagByteCount == 0) {
     151            directory.setString(tagIdentifier, "");
     152            return;
     153        }
     154
     155        String string = null;
     156
     157        switch (tagIdentifier) {
     158            case IptcDirectory.TAG_CODED_CHARACTER_SET:
     159                byte[] bytes = reader.getBytes(tagByteCount);
     160                String charset = Iso2022Converter.convertISO2022CharsetToJavaCharset(bytes);
     161                if (charset == null) {
     162                    // Unable to determine the charset, so fall through and treat tag as a regular string
     163                    string = new String(bytes);
     164                    break;
     165                }
     166                directory.setString(tagIdentifier, charset);
     167                return;
     168            case IptcDirectory.TAG_ENVELOPE_RECORD_VERSION:
     169            case IptcDirectory.TAG_APPLICATION_RECORD_VERSION:
     170            case IptcDirectory.TAG_FILE_VERSION:
     171            case IptcDirectory.TAG_ARM_VERSION:
     172            case IptcDirectory.TAG_PROGRAM_VERSION:
     173                // short
     174                if (tagByteCount >= 2) {
     175                    int shortValue = reader.getUInt16();
     176                    reader.skip(tagByteCount - 2);
     177                    directory.setInt(tagIdentifier, shortValue);
     178                    return;
     179                }
    112180                break;
    113             }
    114 
    115             try {
    116                 processTag(reader, directory, directoryType, tagType, offset, tagByteCount);
    117             } catch (BufferBoundsException e) {
    118                 directory.addError("Error processing IPTC tag");
    119                 break;
    120             }
    121 
    122             offset += tagByteCount;
    123         }
    124     }
    125 
    126     private void processTag(@NotNull BufferReader reader, @NotNull Directory directory, int directoryType, int tagType, int offset, int tagByteCount) throws BufferBoundsException
    127     {
    128         int tagIdentifier = tagType | (directoryType << 8);
    129 
    130         switch (tagIdentifier) {
    131             case IptcDirectory.TAG_APPLICATION_RECORD_VERSION:
    132                 // short
    133                 int shortValue = reader.getUInt16(offset);
    134                 directory.setInt(tagIdentifier, shortValue);
    135                 return;
    136181            case IptcDirectory.TAG_URGENCY:
    137182                // byte
    138                 directory.setInt(tagIdentifier, reader.getUInt8(offset));
     183                directory.setInt(tagIdentifier, reader.getUInt8());
     184                reader.skip(tagByteCount - 1);
    139185                return;
    140186            case IptcDirectory.TAG_RELEASE_DATE:
     
    142188                // Date object
    143189                if (tagByteCount >= 8) {
    144                     String dateStr = reader.getString(offset, tagByteCount);
     190                    string = reader.getString(tagByteCount);
    145191                    try {
    146                         int year = Integer.parseInt(dateStr.substring(0, 4));
    147                         int month = Integer.parseInt(dateStr.substring(4, 6)) - 1;
    148                         int day = Integer.parseInt(dateStr.substring(6, 8));
     192                        int year = Integer.parseInt(string.substring(0, 4));
     193                        int month = Integer.parseInt(string.substring(4, 6)) - 1;
     194                        int day = Integer.parseInt(string.substring(6, 8));
    149195                        Date date = new java.util.GregorianCalendar(year, month, day).getTime();
    150196                        directory.setDate(tagIdentifier, date);
    151197                        return;
    152198                    } catch (NumberFormatException e) {
    153                         // fall through and we'll store whatever was there as a String
     199                        // fall through and we'll process the 'string' value below
    154200                    }
     201                } else {
     202                    reader.skip(tagByteCount);
    155203                }
    156204            case IptcDirectory.TAG_RELEASE_TIME:
     
    162210
    163211        // If we haven't returned yet, treat it as a string
    164         String str;
    165         if (tagByteCount < 1) {
    166             str = "";
    167         } else {
    168             str = reader.getString(offset, tagByteCount, System.getProperty("file.encoding")); // "ISO-8859-1"
     212        // NOTE that there's a chance we've already loaded the value as a string above, but failed to parse the value
     213        if (string == null) {
     214            String encoding = directory.getString(IptcDirectory.TAG_CODED_CHARACTER_SET);
     215            if (encoding != null) {
     216                string = reader.getString(tagByteCount, encoding);
     217            } else {
     218                byte[] bytes = reader.getBytes(tagByteCount);
     219                encoding = Iso2022Converter.guessEncoding(bytes);
     220                string = encoding != null ? new String(bytes, encoding) : new String(bytes);
     221            }
    169222        }
    170223
     
    179232                System.arraycopy(oldStrings, 0, newStrings, 0, oldStrings.length);
    180233            }
    181             newStrings[newStrings.length - 1] = str;
     234            newStrings[newStrings.length - 1] = string;
    182235            directory.setStringArray(tagIdentifier, newStrings);
    183236        } else {
    184             directory.setString(tagIdentifier, str);
     237            directory.setString(tagIdentifier, string);
    185238        }
    186239    }
Note: See TracChangeset for help on using the changeset viewer.