Opened 7 years ago

Last modified 21 months ago

#16944 closed enhancement

svn -> git migration script — at Version 2

Reported by: wiktorn Owned by: team
Priority: normal Milestone:
Component: unspecified Version:
Keywords: hack-weekend-2018-10 svn git migration script Cc:

Description (last modified by wiktorn)

Split to 4 scripts to make it easier to restart script on errors

  • git_migrate-1-init.sh
    #!/bin/sh
    
    set -e
    
    git init josm-tests
    cd josm-tests
    # create commit, to keep master branch clear
    git commit -m 'init' --allow-empty
    
    ###
    # define remotes
    ###
    
    git svn init --trunk=. --prefix=svn_jmapviewer -R jmapviewer https://svn.openstreetmap.org/applications/viewer/jmapviewer
    git svn init --trunk=. --prefix=svn_josm -R josm https://josm.openstreetmap.de/svn/trunk
    # two separate remotes for commons as this is 2x ~10 minutes instead of 2+ houres to fetch
    git svn init --trunk=. --include-paths='commons/proper/jcs/trunk/.*src/main/java/org/apache/commons/jcs' --prefix=svn_commons_jcs -R svn_commons_jcs https://svn.apache.org/repos/asf/commons/proper/jcs/
    git svn init --trunk=. --include-paths='commons/proper/logging/trunk/src/main/java/org/apache/commons/logging' --prefix=svn_commons_logging -R svn_commons_logging https://svn.apache.org/repos/asf/commons/proper/logging/trunk/
    git remote add commons_compress https://github.com/apache/commons-compress.git
    
    echo Done
    
  • git_migrate-2-fetch.sh
    #!/bin/sh
    
    set -e
    
    cd josm-tests
    ###
    # fetch from remotes
    ###
    git svn fetch jmapviewer
    git svn fetch josm
    git svn fetch --log-window-size=100000 svn_commons_jcs
    git svn fetch --log-window-size=100000 svn_commons_logging
    git svn fetch svn_commons_jcs
    git svn fetch svn_commons_logging
    git fetch commons_compress
    
    echo Done
    
  • git_migrate-3-prepare.sh
    #!/bin/sh
    
    set -e
    
    cd josm-tests
    ###
    # Reorganize externals
    ###
    
    # JMapViewer
    git branch -f svn_jmapviewer_rewrite remotes/svn_jmapviewertrunk
    git filter-branch -f --tree-filter '
      if [ -d src/org/openstreetmap/gui/jmapviewer ] ; then
          git mv src src.new &&
          find . -depth 1 |
          grep -v ^./src.new | grep -v ^./.git$ |
          xargs -r git rm -fr &&
          mkdir -p src/org/openstreetmap/gui &&
          git add src/org/openstreetmap/gui &&
          git mv src.new/org/openstreetmap/gui/jmapviewer src/org/openstreetmap/gui &&
          git rm -rf --ignore-unmatch src.new
      else
          find . -depth 1 |
          grep -v ^./.git$ |
          xargs -r git rm -fr
      fi' \
      svn_jmapviewer_rewrite
    
    # Apache Commons Compress
    # compress is already on git, need first to somehow mix with other repositories (using commit date, not author date)
    # as commits sorted by date do not constitue the history.
    # first - create subdirectory where all commons-compress files will go
    git branch -f svn_commons_compress_rewrite remotes/commons_compress/master
    git filter-branch -f --index-filter "
      git ls-files -s | sed -e \$'s#\t#\tcommons-compress/#' |
          GIT_INDEX_FILE=\$GIT_INDEX_FILE.new git update-index --index-info &&
          if [ -f \$GIT_INDEX_FILE.new ] ; then mv \$GIT_INDEX_FILE.new \$GIT_INDEX_FILE ; fi" \
      svn_commons_compress_rewrite
    
    # Apache Commons JCS
    git branch -f svn_commons_jcstrunk_rewrite remotes/svn_commons_jcstrunk
    git filter-branch -f --tree-filter '
      if [ -d trunk/commons-jcs-core/src/main/java/ ] ; then
          git rm -rf src ;
          git mv trunk/commons-jcs-core/src/main/java src &&
          find . -depth 1 |
          grep -v ^./src$ | grep -v ^./.git$ |
          xargs -r git rm -fr
      else
          find . -depth 1 |
          grep -v ^./.git$ |
          xargs -r git rm -fr
      fi' \
      svn_commons_jcstrunk_rewrite
    
    # Apache Commons Logging
    git branch -f svn_commons_loggingtrunk_rewrite remotes/svn_commons_loggingtrunk
    git filter-branch -f --tree-filter '
      if [ -d src/main/java ] ; then
          git mv src src.new &&
          find . -depth 1 |
          grep -v ^./src.new | grep -v ^./.git$ |
          xargs -r git rm -fr &&
          git mv src.new/main/java src &&
          git rm -rf --ignore-unmatch src.new
      else
          find . -depth 1 |
          grep -v ^./.git$ |
          xargs -r git rm -fr
      fi' \
      svn_commons_loggingtrunk_rewrite
    
    ###
    # Linearlize commons_compress history
    ###
    
    git branch -f svn_commons_compress_clean svn_commons_compress_rewrite
    git filter-branch -f --parent-filter 'cut -f 2,3 -d " "' svn_commons_compress_clean
    
    ###
    # Remove empty commits
    ###
    
    for i in svn_jmapviewer svn_commons_jcstrunk svn_commons_loggingtrunk ; do
      git branch -f "${i}_clean" "${i}_rewrite"
      git rebase $(git log --reverse --oneline --no-abbrev-commit "${i}_clean" | head -n 1 | cut -d ' ' -f 1) "${i}_clean" || exit 1
    done
    
  • git_migrate-4-merge.sh
    #!/bin/sh
    
    set -e
    
    cd josm-tests
    
    ###
    # Merge JOSM repository with externals, commits ordered by date
    ###
    
    REBASE_CONF=$(mktemp)
    (
      git log --pretty="format:%at %H %s" remotes/svn_josmtrunk
      git log --pretty="format:%at %H %s" svn_jmapviewer_rewrite
    ) | sort -n > "${REBASE_CONF}"
    
    JOSM_EPOCH=$(head -n 1 ${REBASE_CONF} | cut -d ' ' -f1 )
    (
      #git log --ancestry-path  --pretty="format:%ct %H %s" \
      #   $(git log --reverse --oneline --no-abbrev-commit svn_commons_compress_rewrite |
      #   head -n 1 | cut -d ' ' -f 1)..svn_commons_compress_rewrite # we are not rebasing this, to preserve commit timestamps which we use here
      git log --first-parent --pretty="format:%at %H %s" svn_commons_compress_clean
      git log --pretty="format:%at %H %s" svn_commons_jcstrunk_clean
      git log --pretty="format:%at %H %s" svn_commons_loggingtrunk_clean
      # could use it:
      # | awk -F ' ' "\$1 > ${JOSM_EPOCH} {print \$1 \" \" \$2} {}" >> "${REBASE_CONF}"
      # to keep only history after fist JOSM commit, but we would need to squash all the previous history into one commit
    ) >> "${REBASE_CONF}"
    
    sort -n ${REBASE_CONF} | cut -d ' ' -f2 | sed -e 's/^/p /' > ../rebase_config
    rm ${REBASE_CONF}
    git config merge.renamelimit 5000
    git checkout master
    git rebase --keep-empty -i $(git log --reverse --oneline --no-abbrev-commit | head -n 1 | cut -d ' ' -f 1)
    
    # fix Apache Commons Compress paths
    git filter-branch -f --tree-filter '
      if [ -d commons-compress/src/main/java ] ; then
          mkdir -p src/java/org/apache/commons/
          git mv commons-compress/src/main/java/org/apache/commons/compress src/java/org/apache/commons/ &&
          find commons-compress -depth 1 |
          xargs -r git rm -fr
      else
          find commons-compress -depth 1 |
          xargs -r git rm -fr
      fi' \
      master
    
    # helpful one-liner: git diff --name-only --diff-filter=U | xargs git checkout  ...
    
    # remove empty commits
    git rebase $(git log --reverse --oneline --no-abbrev-commit | head -n 1 | cut -d ' ' -f 1)
    
    ###
    # Move jar files to Git LFS
    ###
    git lfs migrate import --include="*.jar" --include-ref=refs/heads/master
    git reflog expire --expire-unreachable=now --all
    git gc --prune=now
    

Change History (2)

comment:1 by wiktorn, 7 years ago

This script merges all (?) our externals into one git, ordering commits by date. Need some work on Apache Commons Compress, as date ordering is not always what should be.

Result of this script is downloadable from: http://gitlab.vink.pl/w/josm-tests.git
Repository has size of: 157MB
LFS objects: 918MB
Full clone (including current version of jar files): 233MB

comment:2 by wiktorn, 7 years ago

Description: modified (diff)

Split to 4 scripts. Apart from 1, all may be restarted.

I'll also try to check if JOSM actually builds from this (like every 100 commit or so).

Note: See TracTickets for help on using tickets.