1 | /* validation for the wikipedia=* tag - see tickets #8383, #14425 */ |
---|
2 | |
---|
3 | /* If there is no language at all, this is broken. Also catches 'wikipedia' used as 'email', 'website', 'ele' [sic!] ... */ |
---|
4 | *[wikipedia][wikipedia !~ /(?i)^[-a-z]{2,12}:/] { |
---|
5 | throwError: tr("wikipedia tag has no language given, use ''wikipedia''=''language:page title''"); |
---|
6 | assertMatch: "node wikipedia=Foobar"; |
---|
7 | assertNoMatch: "node wikipedia=en:Foobar"; |
---|
8 | assertNoMatch: "node wikipedia=en-GB:Foobar"; |
---|
9 | } |
---|
10 | |
---|
11 | /* Valid languages are extracted from <https://www.wikidata.org/w/api.php?action=help&modules=wbgetentities> (languages field), which may change, so this is a warning only. |
---|
12 | Also includes "cz" and "be-x-old" because they can be auto-fixed */ |
---|
13 | *[wikipedia =~ /(?i)^[-a-z]{2,12}:/][wikipedia !~ /^https?:\/\//][wikipedia !~ /^(aa|ab|ace|ady|ady-cyrl|aeb|aeb-arab|aeb-latn|af|ak|aln|als|am|an|ang|anp|ar|arc|arn|arq|ary|arz|as|ase|ast|av|avk|awa|ay|az|azb|ba|ban|bar|bat-smg|bbc|bbc-latn|bcc|bcl|be|be-tarask|be-x-old|bg|bgn|bh|bho|bi|bjn|bm|bn|bo|bpy|bqi|br|brh|bs|bto|bug|bxr|ca|cbk-zam|cdo|ce|ceb|ch|cho|chr|chy|ckb|co|cps|cr|crh|crh-cyrl|crh-latn|cs|csb|cu|cv|cy|cz|da|de|de-at|de-ch|de-formal|din|diq|dsb|dtp|dty|dv|dz|ee|egl|el|eml|en|en-ca|en-gb|eo|es|et|eu|ext|fa|ff|fi|fit|fiu-vro|fj|fo|fr|frc|frp|frr|fur|fy|ga|gag|gan|gan-hans|gan-hant|gd|gl|glk|gn|gom|gom-deva|gom-latn|gor|got|grc|gsw|gu|gv|ha|hak|haw|he|hi|hif|hif-latn|hil|ho|hr|hrx|hsb|ht|hu|hy|hz|ia|id|ie|ig|ii|ik|ike-cans|ike-latn|ilo|inh|io|is|it|iu|ja|jam|jbo|jut|jv|ka|kaa|kab|kbd|kbd-cyrl|kea|kg|khw|ki|kiu|kj|kk|kk-arab|kk-cn|kk-cyrl|kk-kz|kk-latn|kk-tr|kl|km|kn|ko|ko-kp|koi|kr|krc|kri|krj|krl|ks|ks-arab|ks-deva|ksh|ku|ku-arab|ku-latn|kv|kw|ky|la|lad|lb|lbe|lez|lfn|lg|li|lij|liv|lki|lmo|ln|lo|loz|lrc|lt|ltg|lus|luz|lv|lzh|lzz|mai|map-bms|mdf|mg|mh|mhr|mi|min|mk|ml|mn|mo|mr|mrj|ms|mt|mus|mwl|my|myv|mzn|na|nah|nan|nap|nb|nds|nds-nl|ne|new|ng|niu|nl|nl-informal|nn|no|nod|nov|nrm|nso|nv|ny|nys|oc|olo|om|or|os|ota|pa|pag|pam|pap|pcd|pdc|pdt|pfl|pi|pih|pl|pms|pnb|pnt|prg|ps|pt|pt-br|qu|qug|rgn|rif|rm|rmy|rn|ro|roa-rup|roa-tara|ru|rue|rup|ruq|ruq-cyrl|ruq-latn|rw|rwr|sa|sah|sat|sc|scn|sco|sd|sdc|sdh|se|sei|ses|sg|sgs|sh|shi|shi-latn|shi-tfng|shn|si|simple|sje|sk|sl|sli|sm|sma|smj|sn|so|sq|sr|sr-ec|sr-el|srn|srq|ss|st|stq|su|sv|sw|szl|ta|tcy|te|tet|tg|tg-cyrl|tg-latn|th|ti|tk|tl|tly|tn|to|tokipona|tpi|tr|tru|ts|tt|tt-cyrl|tt-latn|tum|tw|ty|tyv|tzm|udm|ug|ug-arab|ug-latn|uk|ur|uz|uz-cyrl|uz-latn|ve|vec|vep|vi|vls|vmf|vo|vot|vro|wa|war|wo|wuu|xal|xh|xmf|yi|yo|yue|za|zea|zh|zh-classical|zh-cn|zh-hans|zh-hant|zh-hk|zh-min-nan|zh-mo|zh-my|zh-sg|zh-tw|zh-yue|zu):/] { |
---|
14 | throwWarning: tr("wikipedia tag has an unknown language prefix"); |
---|
15 | assertMatch: "node wikipedia=X-Y-Z:Foobar"; |
---|
16 | assertNoMatch: "node wikipedia=en:Foobar"; |
---|
17 | } |
---|
18 | |
---|
19 | *[wikipedia =~ /^https?:\/\//], |
---|
20 | *[wikipedia =~ /(?i)^[-a-z]{2,12}:https?:\/\//] { |
---|
21 | throwWarning: tr("wikipedia tag format is deprecated"); |
---|
22 | suggestAlternative: tr("''wikipedia''=''language:page title''"); |
---|
23 | group: tr("deprecated tagging"); |
---|
24 | assertMatch: "node wikipedia=http://en.wikipedia.org/wiki/OpenStreetMap"; |
---|
25 | assertNoMatch: "node wikipedia=en:OpenStreetMap"; |
---|
26 | } |
---|
27 | |
---|
28 | *[wikipedia =~ /^be-x-old:/] { |
---|
29 | throwWarning: tr("wikipedia ''{0}'' language is obsolete, use ''{1}'' instead", "be-x-old", "be-tarask"); |
---|
30 | fixAdd: concat("wikipedia=be-tarask:", get(regexp_match("^be-x-old:(.+)$", tag("wikipedia")),1)); |
---|
31 | assertMatch: "node wikipedia=be-x-old:foo"; |
---|
32 | assertNoMatch: "node wikipedia=abe-x-old:foo"; |
---|
33 | } |
---|
34 | |
---|
35 | *[wikipedia =~ /^cz:/] { |
---|
36 | throwWarning: tr("wikipedia ''{0}'' language is invalid, use ''{1}'' instead", "cz", "cs"); |
---|
37 | fixAdd: concat("wikipedia=cs:", get(regexp_match("^cz:(.+)$", tag("wikipedia")),1)); |
---|
38 | assertMatch: "node wikipedia=cz:foo"; |
---|
39 | assertNoMatch: "node wikipedia=en:cz:foo"; |
---|
40 | } |
---|
41 | |
---|
42 | *[wikimedia_commons =~ /%[0-9A-F][0-9A-F]/] { |
---|
43 | throwError: tr("{0} tag should not have URL-encoded values like ''%27''", "{0.key}"); |
---|
44 | fixAdd: concat("wikimedia_commons=", trim(replace(URL_decode(tag("wikimedia_commons")), "_", " "))); |
---|
45 | assertMatch: "node wikimedia_commons=File:Foo%27s"; |
---|
46 | assertNoMatch: "node wikimedia_commons=File:Foo"; |
---|
47 | } |
---|
48 | |
---|
49 | *[wikipedia =~ /(?i)^[-a-z]{2,12}:.*%[0-9A-F][0-9A-F]/] { |
---|
50 | throwError: tr("{0} tag should not have URL-encoded values like ''%27''", "{0.tag}"); |
---|
51 | fixAdd: concat("wikipedia=", get(regexp_match("^([-a-z]+:)(.*)$", tag("wikipedia")),1), trim(replace(URL_decode(get(regexp_match("^([-a-z]+:)(.+)$", tag("wikipedia")),2)), "_", " "))); |
---|
52 | assertMatch: "node wikipedia=en:Foo%27s"; |
---|
53 | assertNoMatch: "node wikipedia=en:Foo"; |
---|
54 | } |
---|
55 | |
---|
56 | *[wikipedia =~ /(?i)^[-a-z]{2,12}: /] { |
---|
57 | throwWarning: tr("wikipedia title should not start with a space after language code"); |
---|
58 | fixAdd: concat("wikipedia=", get(regexp_match("^([-a-z]+:)(.*)$", tag("wikipedia")),1), trim(get(regexp_match("^([-a-z]+:)(.*)$", tag("wikipedia")),2))); |
---|
59 | assertMatch: "node wikipedia=en: foo"; |
---|
60 | assertNoMatch: "node wikipedia=en:foo"; |
---|
61 | } |
---|
62 | |
---|
63 | *[wikipedia =~ /(?i)^[-a-z]{2,12}:wiki\//] { |
---|
64 | throwWarning: tr("wikipedia title should not have ''{0}'' prefix", "wiki/"); |
---|
65 | fixAdd: concat("wikipedia=", get(regexp_match("^([-a-z]+:)wiki/(.*)$", tag("wikipedia")),1), trim(get(regexp_match("^([-a-z]+:)wiki/(.*)$", tag("wikipedia")),2))); |
---|
66 | assertMatch: "node wikipedia=en:wiki/foo"; |
---|
67 | assertNoMatch: "node wikipedia=en:foo"; |
---|
68 | } |
---|
69 | |
---|
70 | /* All wikipedias except "jbo" automatically capitalize first letter of the page title. |
---|
71 | To see the latest list, see <https://noc.wikimedia.org/conf/highlight.php?file=InitialiseSettings.php> |
---|
72 | and look for 'wgCapitalLinks' setting. */ |
---|
73 | *[wikipedia =~ /^[-a-zA-Z]{2,12}:\p{Ll}/][wikipedia !~ /^jbo:/][wikipedia !~ /(?i)^[-a-z]{2,12}:https?:/] { |
---|
74 | throwWarning: tr("wikipedia page title should have first letter capitalized"); |
---|
75 | fixAdd: concat("wikipedia=", get(regexp_match("^([-a-z]+:)(.)(.*)$", tag("wikipedia")),1), upper(get(regexp_match("^([-a-z]+:)(.)(.*)$", tag("wikipedia")),2)), get(regexp_match("^([-a-z]+:)(.)(.*)$", tag("wikipedia")),3)); |
---|
76 | assertMatch: "node wikipedia=en:foo"; |
---|
77 | assertNoMatch: "node wikipedia=en:Foo"; |
---|
78 | assertMatch: "node wikipedia=ru:абв"; |
---|
79 | assertNoMatch: "node wikipedia=ru:Абв"; |
---|
80 | } |
---|
81 | |
---|
82 | *[wikipedia =~ /(?i)^[-a-z]{2,12}:.*_/][wikipedia !~ /(?i)^[-a-z]{2,12}:https?:/] { |
---|
83 | throwWarning: tr("wikipedia page title should have spaces instead of underscores (''_''→'' '')"); |
---|
84 | fixAdd: concat("wikipedia=", get(regexp_match("^([-a-z]+:)(.+)$", tag("wikipedia")),1), trim(replace(get(regexp_match("^([-a-z]+:)(.+)$", tag("wikipedia")),2), "_", " "))); |
---|
85 | assertMatch: "node wikipedia=en:foo_bar"; |
---|
86 | assertNoMatch: "node wikipedia=en:foo bar"; |
---|
87 | } |
---|
88 | |
---|
89 | *[wikipedia ^= "da:da:"], |
---|
90 | *[wikipedia ^= "da:dk:"], |
---|
91 | *[wikipedia ^= "de:de:"], |
---|
92 | *[wikipedia ^= "dk:dk:"], |
---|
93 | *[wikipedia ^= "en:de:"], |
---|
94 | *[wikipedia ^= "en:en:"], |
---|
95 | *[wikipedia ^= "en:es:"], |
---|
96 | *[wikipedia ^= "en:eu:"], |
---|
97 | *[wikipedia ^= "en:fr:"], |
---|
98 | *[wikipedia ^= "en:ja:"], |
---|
99 | *[wikipedia ^= "en:pl:"], |
---|
100 | *[wikipedia ^= "en:pt:"], |
---|
101 | *[wikipedia ^= "en:zh:"], |
---|
102 | *[wikipedia ^= "es:es:"], |
---|
103 | *[wikipedia ^= "eu:eu:"], |
---|
104 | *[wikipedia ^= "fr:fr:"], |
---|
105 | *[wikipedia ^= "ja:ja:"], |
---|
106 | *[wikipedia ^= "pl:en:"], |
---|
107 | *[wikipedia ^= "pl:pl:"], |
---|
108 | *[wikipedia ^= "pt:pt:"], |
---|
109 | *[wikipedia ^= "ru:fr:"], |
---|
110 | *[wikipedia ^= "ru:ru:"], |
---|
111 | *[wikipedia ^= "zh:zh:"] { |
---|
112 | throwWarning: tr("wikipedia language seems to be duplicated, e.g. en:en:Foo"); |
---|
113 | fixAdd: concat("wikipedia=", get(regexp_match("^([-a-z]+:)([-a-z]+:)(.*)$", tag("wikipedia")),2), trim(get(regexp_match("^([-a-z]+:)([-a-z]+:)(.*)$", tag("wikipedia")),3))); |
---|
114 | assertMatch: "node wikipedia=en:en:Foo"; |
---|
115 | assertMatch: "node wikipedia=en:fr:Foo"; |
---|
116 | assertNoMatch: "node wikipedia=en:Bar"; |
---|
117 | } |
---|
118 | |
---|
119 | /* Detect invalid wikidata tags */ |
---|
120 | *[wikidata][wikidata !~ /^Q[1-9][0-9]{0,8}$/] { |
---|
121 | throwError: tr("wikidata tag must be in Qnnnn format, where n is a digit"); |
---|
122 | assertMatch: "node wikidata=a"; |
---|
123 | assertMatch: "node wikidata=Q"; |
---|
124 | assertMatch: "node wikidata=Q0"; |
---|
125 | assertMatch: "node wikidata=Q0123"; |
---|
126 | assertNoMatch: "node wikidata=Q123"; |
---|
127 | assertNoMatch: "node wikidata=Q1"; |
---|
128 | } |
---|
129 | |
---|
130 | /* Wikipedia without wikidata */ |
---|
131 | *[wikipedia][!wikidata] { |
---|
132 | throwOther: tr("wikipedia tag is set, but there is no wikidata tag. Wikipedia plugin might help with wikidata id lookups"); |
---|
133 | assertMatch: "node wikipedia=a"; |
---|
134 | assertNoMatch: "node wikipedia=a wikidata=Q123"; |
---|
135 | assertNoMatch: "node wikidata=Q1"; |
---|
136 | assertNoMatch: "node foo=bar"; |
---|
137 | } |
---|
138 | |
---|
139 | /* Wikipedia without wikidata */ |
---|
140 | *[!wikipedia][/^wikipedia:/] { |
---|
141 | throwWarning: tr("wikipedia tag is not set, but a ''{0}'' tag is. Make sure to use wikipedia=language:value together with wikidata tag.", "{0.key}"); |
---|
142 | assertMatch: "node wikipedia:en=a"; |
---|
143 | assertNoMatch: "node wikipedia=a wikipedia:en=b"; |
---|
144 | assertNoMatch: "node wikipedia=Foo"; |
---|
145 | } |
---|