1 | // License: GPL. For details, see LICENSE file.
|
---|
2 | package org.openstreetmap.josm.tools;
|
---|
3 |
|
---|
4 | import java.io.IOException;
|
---|
5 | import java.io.InputStream;
|
---|
6 | import java.net.URL;
|
---|
7 | import java.util.List;
|
---|
8 | import java.util.Optional;
|
---|
9 | import java.util.function.BiConsumer;
|
---|
10 | import java.util.stream.Collectors;
|
---|
11 |
|
---|
12 | import javax.xml.parsers.ParserConfigurationException;
|
---|
13 | import javax.xml.xpath.XPath;
|
---|
14 | import javax.xml.xpath.XPathConstants;
|
---|
15 | import javax.xml.xpath.XPathExpressionException;
|
---|
16 | import javax.xml.xpath.XPathFactory;
|
---|
17 |
|
---|
18 | import org.openstreetmap.josm.data.Bounds;
|
---|
19 | import org.openstreetmap.josm.data.coor.LatLon;
|
---|
20 | import org.w3c.dom.Document;
|
---|
21 | import org.w3c.dom.NamedNodeMap;
|
---|
22 | import org.w3c.dom.Node;
|
---|
23 | import org.w3c.dom.NodeList;
|
---|
24 | import org.xml.sax.SAXException;
|
---|
25 |
|
---|
26 | /**
|
---|
27 | * Interaction with Mediawiki instances, such as the OSM wiki.
|
---|
28 | * @since 14641
|
---|
29 | */
|
---|
30 | public class Mediawiki {
|
---|
31 |
|
---|
32 | private final String baseUrl;
|
---|
33 |
|
---|
34 | /**
|
---|
35 | * Constructs a new {@code Mediawiki} for the given base URL.
|
---|
36 | * @param baseUrl The wiki base URL
|
---|
37 | */
|
---|
38 | public Mediawiki(String baseUrl) {
|
---|
39 | this.baseUrl = baseUrl;
|
---|
40 | }
|
---|
41 |
|
---|
42 | /**
|
---|
43 | * Determines which page exists on the Mediawiki instance.
|
---|
44 | * @param pages the pages to check
|
---|
45 | * @return the first existing page
|
---|
46 | * @throws IOException if any I/O error occurs
|
---|
47 | * @throws ParserConfigurationException if a parser cannot be created
|
---|
48 | * @throws SAXException if any XML error occurs
|
---|
49 | * @throws XPathExpressionException if any error in an XPath expression occurs
|
---|
50 | */
|
---|
51 | public Optional<String> findExistingPage(List<String> pages)
|
---|
52 | throws IOException, ParserConfigurationException, SAXException, XPathExpressionException {
|
---|
53 | List<String> distinctPages = pages.stream().distinct().collect(Collectors.toList());
|
---|
54 | // find a page that actually exists in the wiki
|
---|
55 | // API documentation: https://wiki.openstreetmap.org/w/api.php?action=help&modules=query
|
---|
56 | final URL url = new URL(baseUrl + "/w/api.php?action=query&format=xml&titles=" + distinctPages.stream()
|
---|
57 | .map(Utils::encodeUrl)
|
---|
58 | .collect(Collectors.joining(Utils.encodeUrl("|")))
|
---|
59 | );
|
---|
60 | final Document document = getDocument(url);
|
---|
61 | final XPath xPath = XPathFactory.newInstance().newXPath();
|
---|
62 | for (String page : distinctPages) {
|
---|
63 | String normalized = xPath.evaluate("/api/query/normalized/n[@from='" + page + "']/@to", document);
|
---|
64 | if (Utils.isEmpty(normalized)) {
|
---|
65 | normalized = page;
|
---|
66 | }
|
---|
67 | final Node node = (Node) xPath.evaluate("/api/query/pages/page[@title='" + normalized + "']", document, XPathConstants.NODE);
|
---|
68 | if (node != null
|
---|
69 | && node.getAttributes().getNamedItem("missing") == null
|
---|
70 | && node.getAttributes().getNamedItem("invalid") == null) {
|
---|
71 | return Optional.of(page);
|
---|
72 | }
|
---|
73 | }
|
---|
74 | return Optional.empty();
|
---|
75 | }
|
---|
76 |
|
---|
77 | private Document getDocument(URL url) throws IOException, ParserConfigurationException, SAXException {
|
---|
78 | final HttpClient.Response conn = HttpClient.create(url).connect();
|
---|
79 | try (InputStream content = conn.getContent()) {
|
---|
80 | return XmlUtils.parseSafeDOM(content);
|
---|
81 | } finally {
|
---|
82 | conn.disconnect();
|
---|
83 | }
|
---|
84 | }
|
---|
85 |
|
---|
86 | /**
|
---|
87 | * Searches geocoded images from <a href="https://commons.wikimedia.org/">Wikimedia Commons</a> for the given bounding box.
|
---|
88 | * @param bounds the bounds to load
|
---|
89 | * @param imageConsumer a consumer to receive the file title and the coordinates for every geocoded image
|
---|
90 | * @throws IOException if any I/O error occurs
|
---|
91 | * @throws ParserConfigurationException if a parser cannot be created
|
---|
92 | * @throws SAXException if any XML error occurs
|
---|
93 | * @throws XPathExpressionException if any error in an XPath expression occurs
|
---|
94 | */
|
---|
95 | public void searchGeoImages(Bounds bounds, BiConsumer<String, LatLon> imageConsumer)
|
---|
96 | throws IOException, ParserConfigurationException, SAXException, XPathExpressionException {
|
---|
97 | final URL url = new URL(getGeoImagesUrl(baseUrl, bounds));
|
---|
98 | final Document document = getDocument(url);
|
---|
99 | final XPath xPath = XPathFactory.newInstance().newXPath();
|
---|
100 | NodeList nodes = (NodeList) xPath.evaluate("/api/query/geosearch/gs", document, XPathConstants.NODESET);
|
---|
101 | for (int i = 0; i < nodes.getLength(); i++) {
|
---|
102 | NamedNodeMap attributes = nodes.item(i).getAttributes();
|
---|
103 | String title = attributes.getNamedItem("title").getNodeValue();
|
---|
104 | double lat = Double.parseDouble(attributes.getNamedItem("lat").getNodeValue());
|
---|
105 | double lon = Double.parseDouble(attributes.getNamedItem("lon").getNodeValue());
|
---|
106 | imageConsumer.accept(title, new LatLon(lat, lon));
|
---|
107 | }
|
---|
108 | }
|
---|
109 |
|
---|
110 | /**
|
---|
111 | * Returns the URL for searching geolocated images in given bounds.
|
---|
112 | * @param baseUrl The wiki base URL
|
---|
113 | * @param bounds the bounds of the search area
|
---|
114 | * @return the URL for searching geolocated images in given bounds
|
---|
115 | * @since 18046
|
---|
116 | */
|
---|
117 | public static String getGeoImagesUrl(String baseUrl, Bounds bounds) {
|
---|
118 | String sep = Utils.encodeUrl("|");
|
---|
119 | return baseUrl +
|
---|
120 | "?format=xml" +
|
---|
121 | "&action=query" +
|
---|
122 | "&list=geosearch" +
|
---|
123 | "&gsnamespace=6" +
|
---|
124 | "&gslimit=500" +
|
---|
125 | "&gsprop=type" + sep + "name" +
|
---|
126 | "&gsbbox=" + bounds.getMaxLat() + sep + bounds.getMinLon() + sep + bounds.getMinLat() + sep + bounds.getMaxLon();
|
---|
127 | }
|
---|
128 |
|
---|
129 | /**
|
---|
130 | * Computes the URL for the given filename on the MediaWiki server
|
---|
131 | * @param fileBaseUrl the base URL of the file MediaWiki storage, such as {@code "https://upload.wikimedia.org/wikipedia/commons/"}
|
---|
132 | * @param filename the filename
|
---|
133 | * @return the URL for the given filename on the MediaWiki server
|
---|
134 | * @see <a href="https://www.mediawiki.org/wiki/Manual:$wgHashedUploadDirectory">MediaWiki $wgHashedUploadDirectory</a>
|
---|
135 | */
|
---|
136 | public static String getImageUrl(String fileBaseUrl, String filename) {
|
---|
137 | final String md5 = Utils.md5Hex(filename);
|
---|
138 | return String.join("/", Utils.strip(fileBaseUrl, "/"), md5.substring(0, 1), md5.substring(0, 2), filename);
|
---|
139 | }
|
---|
140 | }
|
---|