Ticket #21881: josm21881_loop_detector_wip.patch

File josm21881_loop_detector_wip.patch, 17.8 KB (added by gaben, 3 years ago)

work in progress patch: the algorithm is correct but insanely slow on a large dataset

  • src/org/openstreetmap/josm/data/validation/tests/LoopDetector.java

     
     1package org.openstreetmap.josm.data.validation.tests;
     2
     3import static org.openstreetmap.josm.gui.MainApplication.getLayerManager;
     4import static org.openstreetmap.josm.tools.I18n.tr;
     5
     6import java.util.ArrayDeque;
     7import java.util.ArrayList;
     8import java.util.Arrays;
     9import java.util.Collection;
     10import java.util.Deque;
     11import java.util.HashMap;
     12import java.util.HashSet;
     13import java.util.Map;
     14import java.util.Set;
     15import java.util.stream.Collectors;
     16
     17import org.openstreetmap.josm.data.osm.IPrimitive;
     18import org.openstreetmap.josm.data.osm.Node;
     19import org.openstreetmap.josm.data.osm.NodeGraph;
     20import org.openstreetmap.josm.data.osm.NodePair;
     21import org.openstreetmap.josm.data.osm.OsmPrimitive;
     22import org.openstreetmap.josm.data.osm.Way;
     23import org.openstreetmap.josm.data.validation.Severity;
     24import org.openstreetmap.josm.data.validation.Test;
     25import org.openstreetmap.josm.data.validation.TestError;
     26import org.openstreetmap.josm.gui.progress.ProgressMonitor;
     27import org.openstreetmap.josm.tools.Logging;
     28
     29
     30/**
     31 * Test for detecting cycles in a directed graph, currently used for waterwoys only.
     32 * The graph consist of OSM dataset ways.
     33 *
     34 * @since xxx
     35 */
     36public class LoopDetector extends Test {
     37    public static final int LOOP_DETECTED = 4100;
     38
     39    /**
     40     * Currently used directional waterways from the OSM wiki
     41     */
     42    private static final Set<String> directionalWaterways = new HashSet<>(
     43            Arrays.asList("river", "stream", "tidal_channel", "drain", "ditch", "fish_pass", "fairway"));
     44
     45    private final Set<Way> visitedWays = new HashSet<>();
     46
     47    public LoopDetector() {
     48        super(tr("Loop detector"), tr("Detects loops in connected directional ways."));
     49    }
     50
     51    @Override
     52    public boolean isPrimitiveUsable(OsmPrimitive p) {
     53        return p.isUsable() && (p instanceof Way) && ((Way) p).getNodesCount() > 1 && p.hasTag("waterway", directionalWaterways);
     54    }
     55
     56    @Override
     57    public void startTest(ProgressMonitor progressMonitor) {
     58        super.startTest(progressMonitor);
     59        setShowElements(true);
     60        // TODO: osm partial selection validator doesn't work
     61
     62        Collection<Collection<Way>> graphs = getGraphs();
     63
     64        // FIXME debug variables
     65        int i =0, g = 0;
     66
     67        for (Collection<Way> graph : graphs) {
     68            NodeGraph nodeGraph = NodeGraph.createDirectedGraphFromWays(graph);
     69            Tarjan tarjan = new Tarjan(nodeGraph);
     70            Logging.debug("Graph looping... " + ++g);
     71            Collection<Collection<Node>> scc = tarjan.getSCC();
     72            for (Collection<Node> possibleCycle : scc) {
     73                // contains a cycle (or loop) if the strongly connected components set size is larger than 1
     74                if (possibleCycle.size() > 1) {
     75                    Logging.debug("Cycle detected! " + ++i);
     76                    errors.add(
     77                            TestError.builder(this, Severity.ERROR, LOOP_DETECTED)
     78                                    .message(tr("Cycle in directional waterway network"))
     79                                    .primitives(possibleCycle)
     80                                    .build()
     81                    );
     82                }
     83            }
     84            progressMonitor.worked(1);
     85        }
     86    }
     87
     88    @Override
     89    public void clear() {
     90        super.clear();
     91        visitedWays.clear();
     92    }
     93
     94    /**
     95     * Returns all directional waterways which connects to at least one other usable way.
     96     *
     97     * @return all directional waterways which connects to at least one other usable way
     98     */
     99    private Collection<Collection<Way>> getGraphs() {
     100        // 70127 waterway
     101        Set<Way> usableWaterways = getLayerManager()
     102                .getActiveDataSet()
     103                .getWays()
     104                .stream()
     105                .filter(this::isPrimitiveUsable)
     106                .collect(Collectors.toSet());
     107
     108        // HashSet doesn't make a difference here
     109        Collection<Collection<Way>> graphs = new ArrayList<>();
     110
     111        for (Way current : usableWaterways) {
     112            Collection<Way> graph = buildGraph(current);
     113
     114            if (!graph.isEmpty())
     115                graphs.add(graph);
     116        }
     117
     118        Logging.debug("All usable waterways " + usableWaterways.size() + ", visited ways " + visitedWays.size() + ", graphs size " + graphs.size());
     119        usableWaterways.removeAll(visitedWays);
     120        Logging.debug(String.format("Ignored ways (%s): %s", usableWaterways.size(),
     121                usableWaterways.stream().map(IPrimitive::getId).collect(Collectors.toList())));
     122
     123        return graphs;
     124    }
     125
     126    /**
     127     * Returns a collection of ways which belongs to the same graph.
     128     *
     129     * @param way starting way to extend the graph from
     130     * @return a collection of ways which belongs to the same graph
     131     */
     132    private Collection<Way> buildGraph(Way way) {
     133        if (visitedWays.contains(way))
     134            return new HashSet<>();
     135
     136        final Set<Way> graph = new HashSet<>();
     137
     138        for (Node node : way.getNodes()) {
     139            final Collection<Way> referrers = node.referrers(Way.class).filter(this::isPrimitiveUsable).collect(Collectors.toSet());
     140            referrers.remove(way);
     141
     142            if (!referrers.isEmpty()) {
     143                visitedWays.add(way);
     144                for (Way referrer : referrers) {
     145                    graph.addAll(buildGraph(referrer));
     146                }
     147            }
     148            graph.addAll(referrers);
     149        }
     150        return graph;
     151    }
     152
     153    /**
     154     * Tarjan's algorithm implementation for JOSM.
     155     *
     156     * @see <a href="https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm">
     157     * Tarjan's strongly connected components algorithm</a>
     158     */
     159    public static class Tarjan {
     160
     161        /**
     162         * Helper class for storing algorithm runtime metadata.
     163         */
     164        private static class TarjanHelper {
     165            private final int index;
     166            private int lowlink;
     167
     168            public TarjanHelper(int index, int lowlink) {
     169                this.index = index;
     170                this.lowlink = lowlink;
     171            }
     172        }
     173
     174        /**
     175         * A simple key-value registry to store visited nodes and its metadata
     176         */
     177        private final Map<Node, TarjanHelper> registry = new HashMap<>();
     178
     179        private int index = 0;
     180        private final Collection<Collection<Node>> scc = new HashSet<>();
     181        private final Deque<Node> stack = new ArrayDeque<>();
     182        private final NodeGraph graph;
     183
     184        public Tarjan(NodeGraph graph) {
     185            this.graph = graph;
     186        }
     187
     188        /**
     189         * Returns the strongly connected components in the current graph.
     190         *
     191         * @return the strongly connected components in the current graph
     192         */
     193        public Collection<Collection<Node>> getSCC() {
     194            for (Node node : graph.getNodes()) {
     195                if (!registry.containsKey(node)) {
     196                    Logging.debug("Started SCC for n" + node.getId());
     197                    strongConnect(node);
     198                    Logging.debug("...done");
     199                }
     200            }
     201            return scc;
     202        }
     203
     204        /**
     205         * Calculates strongly connected components available from the given node.
     206         *
     207         * @param v
     208         */
     209        private void strongConnect(Node v) {
     210            registry.put(v, new TarjanHelper(index, index));
     211            index++;
     212            stack.push(v);
     213
     214            // FIXME: performance issue here, possibly because of constant getSuccessors() call
     215            for (Node w : getSuccessors(v)) {
     216                if (!registry.containsKey(w)) {
     217                    strongConnect(w);
     218                    TarjanHelper vHelper = registry.get(v);
     219                    TarjanHelper wHelper = registry.get(w);
     220                    vHelper.lowlink = Math.min(vHelper.lowlink, wHelper.lowlink);
     221                } else if (stack.contains(w)) {
     222                    TarjanHelper vHelper = registry.get(v);
     223                    TarjanHelper wHelper = registry.get(w);
     224                    vHelper.lowlink = Math.min(vHelper.lowlink, wHelper.index);
     225                }
     226            }
     227
     228            final TarjanHelper vHelper = registry.get(v);
     229            if (vHelper.lowlink == vHelper.index) {
     230                Collection<Node> currentSCC = new HashSet<>();
     231                Node w;
     232                do {
     233                    w = stack.remove();
     234                    currentSCC.add(w);
     235                } while (!w.equals(v));
     236                scc.add(currentSCC);
     237            }
     238        }
     239
     240        /**
     241         * Returns direct successors from the graph of the given node.
     242         *
     243         * @param node a node to start search from
     244         * @return a collection of nodes from the graph which are direct neighbors of the given node
     245         */
     246        private Collection<Node> getSuccessors(Node node) {
     247            final Collection<Node> successors = new HashSet<>();
     248            for (NodePair pair : graph.getEdges()) {
     249                if (pair.getA().equals(node)) {
     250                    successors.add(pair.getB());
     251                }
     252            }
     253            return successors;
     254        }
     255    }
     256}
  • src/org/openstreetmap/josm/data/validation/OsmValidator.java

     
    5353import org.openstreetmap.josm.data.validation.tests.InternetTags;
    5454import org.openstreetmap.josm.data.validation.tests.Lanes;
    5555import org.openstreetmap.josm.data.validation.tests.LongSegment;
     56import org.openstreetmap.josm.data.validation.tests.LoopDetector;
    5657import org.openstreetmap.josm.data.validation.tests.MapCSSTagChecker;
    5758import org.openstreetmap.josm.data.validation.tests.MultipolygonTest;
    5859import org.openstreetmap.josm.data.validation.tests.NameMismatch;
     
    154155        SharpAngles.class, // 3800 .. 3899
    155156        ConnectivityRelations.class, // 3900 .. 3999
    156157        DirectionNodes.class, // 4000-4099
     158        LoopDetector.class, // 4100-4199
    157159    };
    158160
    159161    /**
     
    365367    public static JTree buildJTreeList() {
    366368        DefaultMutableTreeNode root = new DefaultMutableTreeNode(tr("Ignore list"));
    367369        final Pattern elemId1Pattern = Pattern.compile(":([rwn])_");
    368         final Pattern elemId2Pattern = Pattern.compile("^[0-9]+$");
     370        final Pattern elemId2Pattern = Pattern.compile("^\\d+$");
    369371        for (Entry<String, String> e: ignoredErrors.entrySet()) {
    370372            String key = e.getKey();
    371373            // key starts with a code, it maybe followed by a string (eg. a MapCSS rule) and
     
    457459                }
    458460                if (tr("Ignore list").equals(description))
    459461                    description = "";
    460                 if (!key.matches("^[0-9]+(_.*|$)")) {
     462                if (!key.matches("^\\d+(_.*|$)")) {
    461463                    description = key;
    462464                    key = "";
    463465                }
     
    470472                } else if (item.matches("^([rwn])_.*")) {
    471473                    // single element
    472474                    entry = key + ":" + item;
    473                 } else if (item.matches("^[0-9]+(_.*|)$")) {
     475                } else if (item.matches("^\\d+(_.*|)$")) {
    474476                    // no element ids
    475477                    entry = item;
    476478                }
  • src/org/openstreetmap/josm/data/osm/NodeGraph.java

     
    3232     * Builds a list of pair of nodes from the given way.
    3333     * @param way way
    3434     * @param directed if {@code true} each pair of nodes will occur once, in the way nodes order.
    35      *                 if {@code false} each pair of nodes will occur twice (the pair and its inversed copy)
     35     *                 if {@code false} each pair of nodes will occur twice (the pair and its inverse copy)
    3636     * @return a list of pair of nodes from the given way
    3737     */
    3838    public static List<NodePair> buildNodePairs(Way way, boolean directed) {
    3939        List<NodePair> pairs = new ArrayList<>();
    40         for (Pair<Node, Node> pair: way.getNodePairs(false /* don't sort */)) {
     40        for (Pair<Node, Node> pair: way.getNodePairs(false)) {
    4141            pairs.add(new NodePair(pair));
    4242            if (!directed) {
    4343                pairs.add(new NodePair(pair).swap());
     
    5050     * Builds a list of pair of nodes from the given ways.
    5151     * @param ways ways
    5252     * @param directed if {@code true} each pair of nodes will occur once, in the way nodes order.
    53      *                 if {@code false} each pair of nodes will occur twice (the pair and its inversed copy)
     53     *                 if {@code false} each pair of nodes will occur twice (the pair and its inverse copy)
    5454     * @return a list of pair of nodes from the given ways
    5555     */
    5656    public static List<NodePair> buildNodePairs(List<Way> ways, boolean directed) {
     
    6262    }
    6363
    6464    /**
    65      * Builds a new list of pair nodes without the duplicated pairs (including inversed copies).
     65     * Builds a new list of pair nodes without the duplicated pairs (including inverse copies).
    6666     * @param pairs existing list of pairs
    6767     * @return a new list of pair nodes without the duplicated pairs
    6868     */
     
    7676        return cleaned;
    7777    }
    7878
     79    /**
     80     * Create a directed graph from the given node pairs.
     81     * @param pairs Node pairs to build the graph from
     82     * @return node graph structure
     83     */
    7984    public static NodeGraph createDirectedGraphFromNodePairs(List<NodePair> pairs) {
    8085        NodeGraph graph = new NodeGraph();
    8186        for (NodePair pair: pairs) {
     
    8489        return graph;
    8590    }
    8691
     92    /**
     93     * Create a directed graph from the given ways.
     94     * @param ways ways to build the graph from
     95     * @return node graph structure
     96     */
    8797    public static NodeGraph createDirectedGraphFromWays(Collection<Way> ways) {
    8898        NodeGraph graph = new NodeGraph();
    8999        for (Way w: ways) {
    90             graph.add(buildNodePairs(w, true /* directed */));
     100            graph.add(buildNodePairs(w, true));
    91101        }
    92102        return graph;
    93103    }
     
    116126    public static NodeGraph createUndirectedGraphFromNodeWays(Collection<Way> ways) {
    117127        NodeGraph graph = new NodeGraph();
    118128        for (Way w: ways) {
    119             graph.add(buildNodePairs(w, false /* undirected */));
     129            graph.add(buildNodePairs(w, false));
    120130        }
    121131        return graph;
    122132    }
     
    130140                graph.add(buildNodePairs(w, dir));
    131141                dir = false;
    132142            } else {
    133                 graph.add(buildNodePairs(w, false /* undirected */));
     143                graph.add(buildNodePairs(w, false));
    134144            }
    135145        }
    136146        return graph;
    137147    }
    138148
    139     private final Set<NodePair> edges;
    140     private int numUndirectedEges;
     149    public Set<NodePair> getEdges() {
     150        return edges;
     151    }
     152    private int numUndirectedEdges;
     153
    141154    /** counts the number of edges that were added */
    142155    private int addedEdges;
     156    private final Set<NodePair> edges;
    143157    private final Map<Node, List<NodePair>> successors = new LinkedHashMap<>();
    144158    private final Map<Node, List<NodePair>> predecessors = new LinkedHashMap<>();
    145159
     
    181195            rememberSuccessor(pair);
    182196            rememberPredecessors(pair);
    183197        }
    184         numUndirectedEges = undirectedEdges.size();
     198        numUndirectedEdges = undirectedEdges.size();
    185199    }
    186200
    187201    /**
     
    202216
    203217    /**
    204218     * Add a list of node pairs.
    205      * @param pairs list of node pairs
     219     * @param pairs collection of node pairs
    206220     */
    207221    public void add(Collection<NodePair> pairs) {
    208222        for (NodePair pair: pairs) {
     
    229243        return Optional.ofNullable(successors.get(node)).orElseGet(Collections::emptyList);
    230244    }
    231245
    232     protected Set<Node> getNodes() {
     246    public Set<Node> getNodes() {
    233247        Set<Node> nodes = new LinkedHashSet<>(2 * edges.size());
    234248        for (NodePair pair: edges) {
    235249            nodes.add(pair.getA());
     
    239253    }
    240254
    241255    protected boolean isSpanningWay(Collection<NodePair> way) {
    242         return numUndirectedEges == way.size();
     256        return numUndirectedEdges == way.size();
    243257    }
    244258
    245259    protected List<Node> buildPathFromNodePairs(Deque<NodePair> path) {
     
    287301     */
    288302    public List<Node> buildSpanningPath() {
    289303        prepare();
    290         if (numUndirectedEges > 0 && isConnected()) {
     304        if (numUndirectedEdges > 0 && isConnected()) {
    291305            // try to find a path from each "terminal node", i.e. from a
    292306            // node which is connected by exactly one undirected edges (or
    293307            // two directed edges in opposite direction) to the graph. A
     
    324338
    325339    /**
    326340     * Find out if the graph is connected.
    327      * @return true if it is connected.
     341     * @return {@code true} if it is connected
    328342     */
    329343    private boolean isConnected() {
    330344        Set<Node> nodes = getNodes();
     
    350364
    351365    /**
    352366     * Sort the nodes by number of appearances in the edges.
    353      * @return set of nodes which can be start nodes in a spanning way.
     367     * @return set of nodes which can be start nodes in a spanning way
    354368     */
    355369    private Set<Node> getMostFrequentVisitedNodesFirst() {
    356370        if (edges.isEmpty())