source: josm/trunk/src/org/openstreetmap/josm/tools/Mediawiki.java@ 18208

Last change on this file since 18208 was 18208, checked in by Don-vip, 3 years ago

global use of Utils.isEmpty/isBlank

File size: 6.1 KB
Line 
1// License: GPL. For details, see LICENSE file.
2package org.openstreetmap.josm.tools;
3
4import java.io.IOException;
5import java.io.InputStream;
6import java.net.URL;
7import java.util.List;
8import java.util.Optional;
9import java.util.function.BiConsumer;
10import java.util.stream.Collectors;
11
12import javax.xml.parsers.ParserConfigurationException;
13import javax.xml.xpath.XPath;
14import javax.xml.xpath.XPathConstants;
15import javax.xml.xpath.XPathExpressionException;
16import javax.xml.xpath.XPathFactory;
17
18import org.openstreetmap.josm.data.Bounds;
19import org.openstreetmap.josm.data.coor.LatLon;
20import org.w3c.dom.Document;
21import org.w3c.dom.NamedNodeMap;
22import org.w3c.dom.Node;
23import org.w3c.dom.NodeList;
24import org.xml.sax.SAXException;
25
26/**
27 * Interaction with Mediawiki instances, such as the OSM wiki.
28 * @since 14641
29 */
30public class Mediawiki {
31
32 private final String baseUrl;
33
34 /**
35 * Constructs a new {@code Mediawiki} for the given base URL.
36 * @param baseUrl The wiki base URL
37 */
38 public Mediawiki(String baseUrl) {
39 this.baseUrl = baseUrl;
40 }
41
42 /**
43 * Determines which page exists on the Mediawiki instance.
44 * @param pages the pages to check
45 * @return the first existing page
46 * @throws IOException if any I/O error occurs
47 * @throws ParserConfigurationException if a parser cannot be created
48 * @throws SAXException if any XML error occurs
49 * @throws XPathExpressionException if any error in an XPath expression occurs
50 */
51 public Optional<String> findExistingPage(List<String> pages)
52 throws IOException, ParserConfigurationException, SAXException, XPathExpressionException {
53 List<String> distinctPages = pages.stream().distinct().collect(Collectors.toList());
54 // find a page that actually exists in the wiki
55 // API documentation: https://wiki.openstreetmap.org/w/api.php?action=help&modules=query
56 final URL url = new URL(baseUrl + "/w/api.php?action=query&format=xml&titles=" + distinctPages.stream()
57 .map(Utils::encodeUrl)
58 .collect(Collectors.joining(Utils.encodeUrl("|")))
59 );
60 final Document document = getDocument(url);
61 final XPath xPath = XPathFactory.newInstance().newXPath();
62 for (String page : distinctPages) {
63 String normalized = xPath.evaluate("/api/query/normalized/n[@from='" + page + "']/@to", document);
64 if (Utils.isEmpty(normalized)) {
65 normalized = page;
66 }
67 final Node node = (Node) xPath.evaluate("/api/query/pages/page[@title='" + normalized + "']", document, XPathConstants.NODE);
68 if (node != null
69 && node.getAttributes().getNamedItem("missing") == null
70 && node.getAttributes().getNamedItem("invalid") == null) {
71 return Optional.of(page);
72 }
73 }
74 return Optional.empty();
75 }
76
77 private Document getDocument(URL url) throws IOException, ParserConfigurationException, SAXException {
78 final HttpClient.Response conn = HttpClient.create(url).connect();
79 try (InputStream content = conn.getContent()) {
80 return XmlUtils.parseSafeDOM(content);
81 } finally {
82 conn.disconnect();
83 }
84 }
85
86 /**
87 * Searches geocoded images from <a href="https://commons.wikimedia.org/">Wikimedia Commons</a> for the given bounding box.
88 * @param bounds the bounds to load
89 * @param imageConsumer a consumer to receive the file title and the coordinates for every geocoded image
90 * @throws IOException if any I/O error occurs
91 * @throws ParserConfigurationException if a parser cannot be created
92 * @throws SAXException if any XML error occurs
93 * @throws XPathExpressionException if any error in an XPath expression occurs
94 */
95 public void searchGeoImages(Bounds bounds, BiConsumer<String, LatLon> imageConsumer)
96 throws IOException, ParserConfigurationException, SAXException, XPathExpressionException {
97 final URL url = new URL(getGeoImagesUrl(baseUrl, bounds));
98 final Document document = getDocument(url);
99 final XPath xPath = XPathFactory.newInstance().newXPath();
100 NodeList nodes = (NodeList) xPath.evaluate("/api/query/geosearch/gs", document, XPathConstants.NODESET);
101 for (int i = 0; i < nodes.getLength(); i++) {
102 NamedNodeMap attributes = nodes.item(i).getAttributes();
103 String title = attributes.getNamedItem("title").getNodeValue();
104 double lat = Double.parseDouble(attributes.getNamedItem("lat").getNodeValue());
105 double lon = Double.parseDouble(attributes.getNamedItem("lon").getNodeValue());
106 imageConsumer.accept(title, new LatLon(lat, lon));
107 }
108 }
109
110 /**
111 * Returns the URL for searching geolocated images in given bounds.
112 * @param baseUrl The wiki base URL
113 * @param bounds the bounds of the search area
114 * @return the URL for searching geolocated images in given bounds
115 * @since 18046
116 */
117 public static String getGeoImagesUrl(String baseUrl, Bounds bounds) {
118 String sep = Utils.encodeUrl("|");
119 return baseUrl +
120 "?format=xml" +
121 "&action=query" +
122 "&list=geosearch" +
123 "&gsnamespace=6" +
124 "&gslimit=500" +
125 "&gsprop=type" + sep + "name" +
126 "&gsbbox=" + bounds.getMaxLat() + sep + bounds.getMinLon() + sep + bounds.getMinLat() + sep + bounds.getMaxLon();
127 }
128
129 /**
130 * Computes the URL for the given filename on the MediaWiki server
131 * @param fileBaseUrl the base URL of the file MediaWiki storage, such as {@code "https://upload.wikimedia.org/wikipedia/commons/"}
132 * @param filename the filename
133 * @return the URL for the given filename on the MediaWiki server
134 * @see <a href="https://www.mediawiki.org/wiki/Manual:$wgHashedUploadDirectory">MediaWiki $wgHashedUploadDirectory</a>
135 */
136 public static String getImageUrl(String fileBaseUrl, String filename) {
137 final String md5 = Utils.md5Hex(filename);
138 return String.join("/", Utils.strip(fileBaseUrl, "/"), md5.substring(0, 1), md5.substring(0, 2), filename);
139 }
140}
Note: See TracBrowser for help on using the repository browser.