source: josm/trunk/resources/data/validator/wikipedia.mapcss@ 17933

Last change on this file since 17933 was 16861, checked in by Klumbumbus, 4 years ago

see #11541, see #19655 - Add wikidata textbox below every wikipedia textbox in all internal presets and warn if a wikipedia value looks like a wikidata value

  • Property svn:eol-style set to native
File size: 9.1 KB
Line 
1/* validation for the wikipedia=* tag - see tickets #8383, #14425, #18251 */
2
3/* If there is no language at all, this is broken. Also catches 'wikipedia' used as 'email', 'website', 'ele' [sic!] ... */
4*[wikipedia][wikipedia !~ /(?i)^[-a-z]{2,12}:/] {
5 throwError: tr("wikipedia tag has no language given, use ''wikipedia''=''language:page title''");
6 assertMatch: "node wikipedia=Foobar";
7 assertNoMatch: "node wikipedia=en:Foobar";
8 assertNoMatch: "node wikipedia=en-GB:Foobar";
9}
10
11/* Valid languages are extracted from <https://www.wikidata.org/w/api.php?action=sitematrix&formatversion=2>, which may change, so this is a warning only. */
12*[wikipedia =~ /(?i)^[-a-z]{2,12}:/][wikipedia !~ /^https?:\/\//][wikipedia !~ /^(ab|ace|ady|af|ak|als|am|an|ang|ar|arc|arz|as|ast|atj|av|ay|az|azb|ba|ban|bar|bat-smg|bcl|be|be-x-old|bg|bh|bi|bjn|bm|bn|bo|bpy|br|bs|bug|bxr|ca|cbk-zam|cdo|ce|ceb|ch|chr|chy|ckb|co|cr|crh|cs|csb|cu|cv|cy|da|de|din|diq|dsb|dty|dv|dz|ee|el|eml|en|eo|es|et|eu|ext|fa|ff|fi|fiu-vro|fj|fo|fr|frp|frr|fur|fy|ga|gag|gan|gcr|gd|gl|glk|gn|gom|gor|got|gu|gv|ha|hak|haw|he|hi|hif|hr|hsb|ht|hu|hy|hyw|ia|id|ie|ig|ik|ilo|inh|io|is|it|iu|ja|jam|jbo|jv|ka|kaa|kab|kbd|kbp|kg|ki|kk|kl|km|kn|ko|koi|krc|ks|ksh|ku|kv|kw|ky|la|lad|lb|lbe|lez|lfn|lg|li|lij|lmo|ln|lo|lrc|lt|ltg|lv|mai|map-bms|mdf|mg|mhr|mi|min|mk|ml|mn|mnw|mr|mrj|ms|mt|mwl|my|myv|mzn|na|nah|nap|nds|nds-nl|ne|new|nl|nn|no|nov|nqo|nrm|nso|nv|ny|oc|olo|om|or|os|pa|pag|pam|pap|pcd|pdc|pfl|pi|pih|pl|pms|pnb|pnt|ps|pt|qu|rm|rmy|rn|ro|roa-rup|roa-tara|ru|rue|rw|sa|sah|sat|sc|scn|sco|sd|se|sg|sh|shn|si|simple|sk|sl|sm|sn|so|sq|sr|srn|ss|st|stq|su|sv|sw|szl|szy|ta|tcy|te|tet|tg|th|ti|tk|tl|tn|to|tpi|tr|ts|tt|tum|tw|ty|tyv|udm|ug|uk|ur|uz|ve|vec|vep|vi|vls|vo|wa|war|wo|wuu|xal|xh|xmf|yi|yo|za|zea|zh|zh-classical|zh-min-nan|zh-yue|zu):/] {
13 throwWarning: tr("wikipedia tag has an unknown language prefix");
14 assertMatch: "node wikipedia=X-Y-Z:Foobar";
15 assertNoMatch: "node wikipedia=en:Foobar";
16}
17
18*[wikipedia =~ /^https?:\/\//],
19*[wikipedia =~ /(?i)^[-a-z]{2,12}:https?:\/\//] {
20 throwWarning: tr("wikipedia tag format is deprecated");
21 suggestAlternative: tr("''wikipedia''=''language:page title''");
22 group: tr("deprecated tagging");
23 assertMatch: "node wikipedia=http://en.wikipedia.org/wiki/OpenStreetMap";
24 assertNoMatch: "node wikipedia=en:OpenStreetMap";
25}
26
27*[wikipedia =~ /^be-x-old:/] {
28 throwWarning: tr("wikipedia ''{0}'' language is obsolete, use ''{1}'' instead", "be-x-old", "be-tarask");
29 fixAdd: concat("wikipedia=be-tarask:", get(regexp_match("^be-x-old:(.+)$", tag("wikipedia")),1));
30 assertMatch: "node wikipedia=be-x-old:foo";
31 assertNoMatch: "node wikipedia=abe-x-old:foo";
32}
33
34*[wikipedia =~ /^cz:/] {
35 throwWarning: tr("wikipedia ''{0}'' language is invalid, use ''{1}'' instead", "cz", "cs");
36 fixAdd: concat("wikipedia=cs:", get(regexp_match("^cz:(.+)$", tag("wikipedia")),1));
37 assertMatch: "node wikipedia=cz:foo";
38 assertNoMatch: "node wikipedia=en:cz:foo";
39}
40
41*[wikimedia_commons =~ /%[0-9A-F][0-9A-F]/] {
42 throwError: tr("{0} tag should not have URL-encoded values like ''%27''", "{0.key}");
43 fixAdd: concat("wikimedia_commons=", trim(replace(URL_decode(tag("wikimedia_commons")), "_", " ")));
44 assertMatch: "node wikimedia_commons=File:Foo%27s";
45 assertNoMatch: "node wikimedia_commons=File:Foo";
46}
47
48*[wikipedia =~ /(?i)^[-a-z]{2,12}:.*%[0-9A-F][0-9A-F]/] {
49 throwError: tr("{0} tag should not have URL-encoded values like ''%27''", "{0.key}");
50 fixAdd: concat("wikipedia=", get(regexp_match("(?i)^([-a-z]+:)(.*)$", tag("wikipedia")),1), trim(replace(URL_decode(get(regexp_match("(?i)^([-a-z]+:)(.+)$", tag("wikipedia")),2)), "_", " ")));
51 assertMatch: "node wikipedia=en:Foo%27s";
52 assertNoMatch: "node wikipedia=en:Foo";
53}
54
55*[/^wikipedia:[-a-z]{2,12}$/][/^wikipedia:[-a-z]{2,12}$/ =~ /(?i).*%[0-9A-F][0-9A-F]/] {
56 throwError: tr("{0} tag should not have URL-encoded values like ''%27''", "{0.key}");
57 /* fixAdd: concat("{0.key}", "=", get(regexp_match("(?i)^([-a-z]+:)(.*)$", tag("{0.key}")),1), trim(replace(URL_decode(get(regexp_match("(?i)^([-a-z]+:)(.+)$", tag("{0.key}")),2)), "_", " "))); */
58 /* tag("{0.key}") is not yet supported */
59 assertMatch: "node wikipedia:de=Foo%27s";
60 assertNoMatch: "node wikipedia:de=Foo";
61}
62
63*[wikipedia =~ /(?i)^[-a-z]{2,12}: /] {
64 throwWarning: tr("wikipedia title should not start with a space after language code");
65 fixAdd: concat("wikipedia=", get(regexp_match("(?i)^([-a-z]+:)(.*)$", tag("wikipedia")),1), trim(get(regexp_match("(?i)^([-a-z]+:)(.*)$", tag("wikipedia")),2)));
66 assertMatch: "node wikipedia=en: foo";
67 assertNoMatch: "node wikipedia=en:foo";
68}
69
70*[wikipedia =~ /(?i)^[-a-z]{2,12}:wiki\//] {
71 throwWarning: tr("wikipedia title should not have ''{0}'' prefix", "wiki/");
72 fixAdd: concat("wikipedia=", get(regexp_match("(?i)^([-a-z]+:)wiki/(.*)$", tag("wikipedia")),1), trim(get(regexp_match("(?i)^([-a-z]+:)wiki/(.*)$", tag("wikipedia")),2)));
73 assertMatch: "node wikipedia=en:wiki/foo";
74 assertNoMatch: "node wikipedia=en:foo";
75}
76
77/* All wikipedias except "jbo" automatically capitalize first letter of the page title.
78 To see the latest list, see <https://noc.wikimedia.org/conf/highlight.php?file=InitialiseSettings.php>
79 and look for 'wgCapitalLinks' setting. */
80*[wikipedia =~ /^[-a-zA-Z]{2,12}:\p{Ll}/][wikipedia !~ /^jbo:/][wikipedia !~ /(?i)^[-a-z]{2,12}:https?:/] {
81 throwWarning: tr("wikipedia page title should have first letter capitalized");
82 fixAdd: concat("wikipedia=", get(regexp_match("(?i)^([-a-z]+:)(.)(.*)$", tag("wikipedia")),1), upper(get(regexp_match("(?i)^([-a-z]+:)(.)(.*)$", tag("wikipedia")),2)), get(regexp_match("(?i)^([-a-z]+:)(.)(.*)$", tag("wikipedia")),3));
83 assertMatch: "node wikipedia=en:foo";
84 assertNoMatch: "node wikipedia=en:Foo";
85 assertMatch: "node wikipedia=ru:абв";
86 assertNoMatch: "node wikipedia=ru:Абв";
87}
88
89*[wikipedia =~ /(?i)^[-a-z]{2,12}:.*_/][wikipedia !~ /(?i)^[-a-z]{2,12}:https?:/] {
90 throwWarning: tr("wikipedia page title should have spaces instead of underscores (''_''→'' '')");
91 fixAdd: concat("wikipedia=", get(regexp_match("(?i)^([-a-z]+:)(.+)$", tag("wikipedia")),1), trim(replace(get(regexp_match("(?i)^([-a-z]+:)(.+)$", tag("wikipedia")),2), "_", " ")));
92 assertMatch: "node wikipedia=en:foo_bar";
93 assertNoMatch: "node wikipedia=en:foo bar";
94}
95
96*[wikipedia ^= "da:da:"],
97*[wikipedia ^= "da:dk:"],
98*[wikipedia ^= "de:de:"],
99*[wikipedia ^= "dk:dk:"],
100*[wikipedia ^= "en:de:"],
101*[wikipedia ^= "en:en:"],
102*[wikipedia ^= "en:es:"],
103*[wikipedia ^= "en:eu:"],
104*[wikipedia ^= "en:fr:"],
105*[wikipedia ^= "en:ja:"],
106*[wikipedia ^= "en:pl:"],
107*[wikipedia ^= "en:pt:"],
108*[wikipedia ^= "en:zh:"],
109*[wikipedia ^= "es:es:"],
110*[wikipedia ^= "eu:eu:"],
111*[wikipedia ^= "fr:fr:"],
112*[wikipedia ^= "ja:ja:"],
113*[wikipedia ^= "pl:en:"],
114*[wikipedia ^= "pl:pl:"],
115*[wikipedia ^= "pt:pt:"],
116*[wikipedia ^= "ru:fr:"],
117*[wikipedia ^= "ru:ru:"],
118*[wikipedia ^= "zh:zh:"] {
119 throwWarning: tr("wikipedia language seems to be duplicated, e.g. en:en:Foo");
120 fixAdd: concat("wikipedia=", get(regexp_match("(?i)^([-a-z]+:)([-a-z]+:)(.*)$", tag("wikipedia")),2), trim(get(regexp_match("(?i)^([-a-z]+:)([-a-z]+:)(.*)$", tag("wikipedia")),3)));
121 assertMatch: "node wikipedia=en:en:Foo";
122 assertMatch: "node wikipedia=en:fr:Foo";
123 assertNoMatch: "node wikipedia=en:Bar";
124}
125
126/* Detect invalid wikidata tags */
127*[wikidata][wikidata !~ /^Q[1-9][0-9]{0,8}$/] {
128 throwError: tr("wikidata tag must be in Qnnnn format, where n is a digit");
129 assertMatch: "node wikidata=a";
130 assertMatch: "node wikidata=Q";
131 assertMatch: "node wikidata=Q0";
132 assertMatch: "node wikidata=Q0123";
133 assertNoMatch: "node wikidata=Q123";
134 assertNoMatch: "node wikidata=Q1";
135}
136
137/* Wikipedia without wikidata */
138*[wikipedia][!wikidata] {
139 throwOther: tr("wikipedia tag is set, but there is no wikidata tag. Wikipedia plugin might help with wikidata id lookups");
140 group: tr("missing tag");
141 assertMatch: "node wikipedia=a";
142 assertNoMatch: "node wikipedia=a wikidata=Q123";
143 assertNoMatch: "node wikidata=Q1";
144 assertNoMatch: "node foo=bar";
145}
146
147/* Detect wikidata value wrongly in wikipedia key, not 100% safe as there might be wikipedia articles matching the regexp, therefore no fixChangeKey */
148*[wikipedia][wikipedia =~ /^[-a-zA-Z]{2,12}:Q[1-9][0-9]{0,8}$/] {
149 throwWarning: tr("{0} value looks like a {1} value", "{0.key}", "wikidata");
150 assertNoMatch: "node wikipedia=a";
151 assertNoMatch: "node wikipedia=de:a";
152 assertNoMatch: "node wikipedia=de:Q";
153 assertNoMatch: "node wikipedia=de:Q0";
154 assertNoMatch: "node wikipedia=de:Q0123";
155 assertNoMatch: "node wikipedia=en-GB:Q0123";
156 assertMatch: "node wikipedia=de:Q123";
157 assertMatch: "node wikipedia=de:Q1";
158 assertMatch: "node wikipedia=en-GB:Q123";
159 assertMatch: "node wikipedia=en-GB:Q1";
160}
161
162/* Wikipedia:lang without wikipedia */
163*[!wikipedia][/^wikipedia:/] {
164 throwWarning: tr("''{0}'' tag is set, but no ''{1}'' tag. Make sure to set ''wikipedia=language:value'' for the main article and optional ''wikipedia:language=value'' only for additional articles that are not just other language variants of the main article.", "{1.key}", "{0.key}");
165 assertMatch: "node wikipedia:en=a";
166 assertNoMatch: "node wikipedia=a wikipedia:en=b";
167 assertNoMatch: "node wikipedia=Foo";
168}
Note: See TracBrowser for help on using the repository browser.