Index: applications/editors/josm/plugins/pbf/src/crosby/binary/BinaryParser.java
===================================================================
--- applications/editors/josm/plugins/pbf/src/crosby/binary/BinaryParser.java	(revision 30341)
+++ applications/editors/josm/plugins/pbf/src/crosby/binary/BinaryParser.java	(revision 30490)
@@ -55,5 +55,5 @@
     }
     
-    //@Override
+    @Override
     public void handleBlock(FileBlock message) {
         // TODO Auto-generated method stub
@@ -77,5 +77,5 @@
 
 
-    //@Override
+    @Override
     public boolean skipBlock(FileBlockPosition block) {
         // System.out.println("Seeing block of type: "+block.getType());
Index: applications/editors/josm/plugins/pbf/src/crosby/binary/StringTable.java
===================================================================
--- applications/editors/josm/plugins/pbf/src/crosby/binary/StringTable.java	(revision 30341)
+++ applications/editors/josm/plugins/pbf/src/crosby/binary/StringTable.java	(revision 30490)
@@ -2,6 +2,6 @@
 
    This program is free software: you can redistribute it and/or modify
-   it under the terms of the GNU Lesser General Public License as 
-   published by the Free Software Foundation, either version 3 of the 
+   it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.
 
@@ -46,5 +46,5 @@
 
     /** After the stringtable has been built, return the offset of a string in it.
-     * 
+     *
      * Note, value '0' is reserved for use as a delimiter and will not be returned.
      * @param s
@@ -57,5 +57,5 @@
     public void finish() {
         Comparator<String> comparator = new Comparator<String>() {
-            //@Override
+            @Override
             public int compare(final String s1, String s2) {
                 int diff = counts.get(s2) - counts.get(s1);
@@ -63,4 +63,39 @@
             }
         };
+
+        /* Sort the stringtable */
+
+        /*
+        When a string is referenced, strings in the stringtable with indices:
+               0                : Is reserved (used as a delimiter in tags
+         A:  1 to 127          : Uses can be represented with 1 byte
+         B: 128 to 128**2-1 : Uses can be represented with 2 bytes,
+         C: 128*128  to X    : Uses can be represented with 3 bytes in the unlikely case we have >16k strings in a block. No block will contain enough strings that we'll need 4 bytes.
+
+        There are goals that will improve compression:
+          1. I want to use 1 bytes for the most frequently occurring strings, then 2 bytes, then 3 bytes.
+          2. I want to use low integers as frequently as possible (for better
+             entropy encoding out of deflate)
+          3. I want the stringtable to compress as small as possible.
+
+        Condition 1 is obvious. Condition 2 makes deflate compress stringtable references more effectively.
+        When compressing entities, delta coding causes small positive integers to occur more frequently
+        than larger integers. Even though a stringtable references to indices of 1 and 127 both use one
+        byte in a decompressed file, the small integer bias causes deflate to use fewer bits to represent
+        the smaller index when compressed. Condition 3 is most effective when adjacent strings in the
+        stringtable have a lot of common substrings.
+
+        So, when I decide on the master stringtable to use, I put the 127 most frequently occurring
+        strings into A (accomplishing goal 1), and sort them by frequency (to accomplish goal 2), but
+        for B and C, which contain the less progressively less frequently encountered strings, I sort
+        them lexiconographically, to maximize goal 3 and ignoring goal 2.
+
+        Goal 1 is the most important. Goal 2 helped enough to be worth it, and goal 3 was pretty minor,
+        but all should be re-benchmarked.
+
+
+        */
+
+
 
         set = counts.keySet().toArray(new String[0]);
@@ -71,8 +106,8 @@
           // sorted lexiconographically.
           // to maximize deflate compression.
-          
+
           // Don't sort the first array. There's not likely to be much benefit, and we want frequent values to be small.
           //Arrays.sort(set, Math.min(0, set.length-1), Math.min(1 << 7, set.length-1));
-          
+
           Arrays.sort(set, Math.min(1 << 7, set.length-1), Math.min(1 << 14,
               set.length-1));
Index: applications/editors/josm/plugins/pbf/src/crosby/binary/fileformat.proto
===================================================================
--- applications/editors/josm/plugins/pbf/src/crosby/binary/fileformat.proto	(revision 30490)
+++ applications/editors/josm/plugins/pbf/src/crosby/binary/fileformat.proto	(revision 30490)
@@ -0,0 +1,54 @@
+/** Copyright (c) 2010 Scott A. Crosby. <scott@sacrosby.com>
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as 
+   published by the Free Software Foundation, either version 3 of the 
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+*/
+
+option optimize_for = LITE_RUNTIME;
+option java_package = "crosby.binary";
+package OSMPBF;
+
+//protoc --java_out=../.. fileformat.proto
+
+
+//
+//  STORAGE LAYER: Storing primitives.
+//
+
+message Blob {
+  optional bytes raw = 1; // No compression
+  optional int32 raw_size = 2; // When compressed, the uncompressed size
+
+  // Possible compressed versions of the data.
+  optional bytes zlib_data = 3;
+
+  // PROPOSED feature for LZMA compressed data. SUPPORT IS NOT REQUIRED.
+  optional bytes lzma_data = 4;
+
+  // Formerly used for bzip2 compressed data. Depreciated in 2010.
+  optional bytes OBSOLETE_bzip2_data = 5 [deprecated=true]; // Don't reuse this tag number.
+}
+
+/* A file contains an sequence of fileblock headers, each prefixed by
+their length in network byte order, followed by a data block
+containing the actual data. types staring with a "_" are reserved.
+*/
+
+message BlobHeader {
+  required string type = 1;
+  optional bytes indexdata = 2;
+  required int32 datasize = 3;
+}
+
+
Index: applications/editors/josm/plugins/pbf/src/crosby/binary/osmformat.proto
===================================================================
--- applications/editors/josm/plugins/pbf/src/crosby/binary/osmformat.proto	(revision 30490)
+++ applications/editors/josm/plugins/pbf/src/crosby/binary/osmformat.proto	(revision 30490)
@@ -0,0 +1,260 @@
+/** Copyright (c) 2010 Scott A. Crosby. <scott@sacrosby.com>
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as 
+   published by the Free Software Foundation, either version 3 of the 
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+*/
+
+option optimize_for = LITE_RUNTIME;
+option java_package = "crosby.binary";
+package OSMPBF;
+
+/* OSM Binary file format 
+
+This is the master schema file of the OSM binary file format. This
+file is designed to support limited random-access and future
+extendability.
+
+A binary OSM file consists of a sequence of FileBlocks (please see
+fileformat.proto). The first fileblock contains a serialized instance
+of HeaderBlock, followed by a sequence of PrimitiveBlock blocks that
+contain the primitives.
+
+Each primitiveblock is designed to be independently parsable. It
+contains a string table storing all strings in that block (keys and
+values in tags, roles in relations, usernames, etc.) as well as
+metadata containing the precision of coordinates or timestamps in that
+block.
+
+A primitiveblock contains a sequence of primitive groups, each
+containing primitives of the same type (nodes, densenodes, ways,
+relations). Coordinates are stored in signed 64-bit integers. Lat&lon
+are measured in units <granularity> nanodegrees. The default of
+granularity of 100 nanodegrees corresponds to about 1cm on the ground,
+and a full lat or lon fits into 32 bits.
+
+Converting an integer to a lattitude or longitude uses the formula:
+$OUT = IN * granularity / 10**9$. Many encoding schemes use delta
+coding when representing nodes and relations.
+
+*/
+
+//////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////
+
+/* Contains the file header. */
+
+message HeaderBlock {
+  optional HeaderBBox bbox = 1;
+  /* Additional tags to aid in parsing this dataset */
+  repeated string required_features = 4;
+  repeated string optional_features = 5;
+
+  optional string writingprogram = 16; 
+  optional string source = 17; // From the bbox field.
+
+  /* Tags that allow continuing an Osmosis replication */
+
+  // replication timestamp, expressed in seconds since the epoch, 
+  // otherwise the same value as in the "timestamp=..." field
+  // in the state.txt file used by Osmosis
+  optional int64 osmosis_replication_timestamp = 32;
+
+  // replication sequence number (sequenceNumber in state.txt)
+  optional int64 osmosis_replication_sequence_number = 33;
+
+  // replication base URL (from Osmosis' configuration.txt file)
+  optional string osmosis_replication_base_url = 34;
+}
+
+
+/** The bounding box field in the OSM header. BBOX, as used in the OSM
+header. Units are always in nanodegrees -- they do not obey
+granularity rules. */
+
+message HeaderBBox {
+   required sint64 left = 1;
+   required sint64 right = 2;
+   required sint64 top = 3;
+   required sint64 bottom = 4;
+}
+
+
+///////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////
+
+
+message PrimitiveBlock {
+  required StringTable stringtable = 1;
+  repeated PrimitiveGroup primitivegroup = 2;
+
+  // Granularity, units of nanodegrees, used to store coordinates in this block
+  optional int32 granularity = 17 [default=100]; 
+  // Offset value between the output coordinates coordinates and the granularity grid in unites of nanodegrees.
+  optional int64 lat_offset = 19 [default=0];
+  optional int64 lon_offset = 20 [default=0]; 
+
+// Granularity of dates, normally represented in units of milliseconds since the 1970 epoch.
+  optional int32 date_granularity = 18 [default=1000]; 
+
+
+  // Proposed extension:
+  //optional BBox bbox = XX;
+}
+
+// Group of OSMPrimitives. All primitives in a group must be the same type.
+message PrimitiveGroup {
+  repeated Node     nodes = 1;
+  optional DenseNodes dense = 2;
+  repeated Way      ways = 3;
+  repeated Relation relations = 4;
+  repeated ChangeSet changesets = 5;
+}
+
+
+/** String table, contains the common strings in each block.
+
+ Note that we reserve index '0' as a delimiter, so the entry at that
+ index in the table is ALWAYS blank and unused.
+
+ */
+message StringTable {
+   repeated bytes s = 1;
+}
+
+/* Optional metadata that may be included into each primitive. */
+message Info {
+   optional int32 version = 1 [default = -1];
+   optional int64 timestamp = 2;
+   optional int64 changeset = 3;
+   optional int32 uid = 4;
+   optional uint32 user_sid = 5; // String IDs
+
+   // The visible flag is used to store history information. It indicates that
+   // the current object version has been created by a delete operation on the
+   // OSM API.
+   // When a writer sets this flag, it MUST add a required_features tag with
+   // value "HistoricalInformation" to the HeaderBlock.
+   // If this flag is not available for some object it MUST be assumed to be
+   // true if the file has the required_features tag "HistoricalInformation"
+   // set.
+   optional bool visible = 6;
+}
+
+/** Optional metadata that may be included into each primitive. Special dense format used in DenseNodes. */
+message DenseInfo {
+   repeated int32 version = 1 [packed = true]; 
+   repeated sint64 timestamp = 2 [packed = true]; // DELTA coded
+   repeated sint64 changeset = 3 [packed = true]; // DELTA coded
+   repeated sint32 uid = 4 [packed = true]; // DELTA coded
+   repeated sint32 user_sid = 5 [packed = true]; // String IDs for usernames. DELTA coded
+
+   // The visible flag is used to store history information. It indicates that
+   // the current object version has been created by a delete operation on the
+   // OSM API.
+   // When a writer sets this flag, it MUST add a required_features tag with
+   // value "HistoricalInformation" to the HeaderBlock.
+   // If this flag is not available for some object it MUST be assumed to be
+   // true if the file has the required_features tag "HistoricalInformation"
+   // set.
+   repeated bool visible = 6 [packed = true];
+}
+
+
+// THIS IS STUB DESIGN FOR CHANGESETS. NOT USED RIGHT NOW.
+// TODO:    REMOVE THIS?
+message ChangeSet {
+   required int64 id = 1;
+//   
+//   // Parallel arrays.
+//   repeated uint32 keys = 2 [packed = true]; // String IDs.
+//   repeated uint32 vals = 3 [packed = true]; // String IDs.
+//
+//   optional Info info = 4;
+
+//   optional int64 created_at = 8;
+//   optional int64 closetime_delta = 9;
+//   optional bool open = 10;
+//   optional HeaderBBox bbox = 11;
+}
+
+
+message Node {
+   required sint64 id = 1;
+   // Parallel arrays.
+   repeated uint32 keys = 2 [packed = true]; // String IDs.
+   repeated uint32 vals = 3 [packed = true]; // String IDs.
+
+   optional Info info = 4; // May be omitted in omitmeta
+
+   required sint64 lat = 8;
+   required sint64 lon = 9;
+}
+
+/* Used to densly represent a sequence of nodes that do not have any tags.
+
+We represent these nodes columnwise as five columns: ID's, lats, and
+lons, all delta coded. When metadata is not omitted, 
+
+We encode keys & vals for all nodes as a single array of integers
+containing key-stringid and val-stringid, using a stringid of 0 as a
+delimiter between nodes.
+
+   ( (<keyid> <valid>)* '0' )*
+ */
+
+message DenseNodes {
+   repeated sint64 id = 1 [packed = true]; // DELTA coded
+
+   //repeated Info info = 4;
+   optional DenseInfo denseinfo = 5;
+
+   repeated sint64 lat = 8 [packed = true]; // DELTA coded
+   repeated sint64 lon = 9 [packed = true]; // DELTA coded
+
+   // Special packing of keys and vals into one array. May be empty if all nodes in this block are tagless.
+   repeated int32 keys_vals = 10 [packed = true]; 
+}
+
+
+message Way {
+   required int64 id = 1;
+   // Parallel arrays.
+   repeated uint32 keys = 2 [packed = true];
+   repeated uint32 vals = 3 [packed = true];
+
+   optional Info info = 4;
+
+   repeated sint64 refs = 8 [packed = true];  // DELTA coded
+}
+
+message Relation {
+  enum MemberType {
+    NODE = 0;
+    WAY = 1;
+    RELATION = 2;
+  } 
+   required int64 id = 1;
+
+   // Parallel arrays.
+   repeated uint32 keys = 2 [packed = true];
+   repeated uint32 vals = 3 [packed = true];
+
+   optional Info info = 4;
+
+   // Parallel arrays
+   repeated int32 roles_sid = 8 [packed = true];
+   repeated sint64 memids = 9 [packed = true]; // DELTA encoded
+   repeated MemberType types = 10 [packed = true];
+}
+
