[15473] | 1 | /* validation for the wikipedia=* tag - see tickets #8383, #14425, #18251 */
|
---|
[6548] | 2 |
|
---|
| 3 | /* If there is no language at all, this is broken. Also catches 'wikipedia' used as 'email', 'website', 'ele' [sic!] ... */
|
---|
[11800] | 4 | *[wikipedia][wikipedia !~ /(?i)^[-a-z]{2,12}:/] {
|
---|
[11756] | 5 | throwError: tr("wikipedia tag has no language given, use ''wikipedia''=''language:page title''");
|
---|
[6548] | 6 | assertMatch: "node wikipedia=Foobar";
|
---|
| 7 | assertNoMatch: "node wikipedia=en:Foobar";
|
---|
| 8 | assertNoMatch: "node wikipedia=en-GB:Foobar";
|
---|
| 9 | }
|
---|
| 10 |
|
---|
[15545] | 11 | /* Valid languages are extracted from <https://www.wikidata.org/w/api.php?action=sitematrix&formatversion=2>, which may change, so this is a warning only. */
|
---|
[18509] | 12 | *[wikipedia =~ /(?i)^[-a-z]{2,12}:/][wikipedia !~ /^https?:\/\//][wikipedia !~ /^(aa|ab|ace|ady|af|ak|als|alt|am|ami|an|ang|ar|arc|ary|arz|as|ast|atj|av|avk|awa|ay|az|azb|ba|ban|bar|bat-smg|bcl|be|be-tarask|be-x-old|bg|bh|bi|bjn|bm|bn|bo|bpy|br|bs|bug|bxr|ca|cbk-zam|cdo|ce|ceb|ch|cho|chr|chy|ckb|co|cr|crh|cs|csb|cu|cv|cy|da|dag|de|din|diq|dsb|dty|dv|dz|ee|el|eml|en|eo|es|et|eu|ext|fa|ff|fi|fiu-vro|fj|fo|fr|frp|frr|fur|fy|ga|gag|gan|gcr|gd|gl|glk|gn|gom|gor|got|gu|guw|gv|ha|hak|haw|he|hi|hif|ho|hr|hsb|ht|hu|hy|hyw|hz|ia|id|ie|ig|ii|ik|ilo|inh|io|is|it|iu|ja|jam|jbo|jv|ka|kaa|kab|kbd|kbp|kcg|kg|ki|kj|kk|kl|km|kn|ko|koi|kr|krc|ks|ksh|ku|kv|kw|ky|la|lad|lb|lbe|lez|lfn|lg|li|lij|lld|lmo|ln|lo|lrc|lt|ltg|lv|mad|mai|map-bms|mdf|mg|mh|mhr|mi|min|mk|ml|mn|mni|mnw|mo|mr|mrj|ms|mt|mus|mwl|my|myv|mzn|na|nah|nap|nds|nds-nl|ne|new|ng|nia|nl|nn|no|nov|nqo|nrm|nso|nv|ny|oc|olo|om|or|os|pa|pag|pam|pap|pcd|pdc|pfl|pi|pih|pl|pms|pnb|pnt|ps|pt|pwn|qu|rm|rmy|rn|ro|roa-rup|roa-tara|ru|rue|rw|sa|sah|sat|sc|scn|sco|sd|se|sg|sh|shi|shn|shy|si|simple|sk|skr|sl|sm|smn|sn|so|sq|sr|srn|ss|st|stq|su|sv|sw|szl|szy|ta|tay|tcy|te|tet|tg|th|ti|tk|tl|tn|to|tpi|tr|trv|ts|tt|tum|tw|ty|tyv|udm|ug|uk|ur|uz|ve|vec|vep|vi|vls|vo|wa|war|wo|wuu|xal|xh|xmf|yi|yo|yue|za|zea|zh|zh-classical|zh-min-nan|zh-yue|zu):/] {
|
---|
[11756] | 13 | throwWarning: tr("wikipedia tag has an unknown language prefix");
|
---|
[6548] | 14 | assertMatch: "node wikipedia=X-Y-Z:Foobar";
|
---|
| 15 | assertNoMatch: "node wikipedia=en:Foobar";
|
---|
| 16 | }
|
---|
[7035] | 17 |
|
---|
[11756] | 18 | *[wikipedia =~ /^https?:\/\//],
|
---|
[11800] | 19 | *[wikipedia =~ /(?i)^[-a-z]{2,12}:https?:\/\//] {
|
---|
[11756] | 20 | throwWarning: tr("wikipedia tag format is deprecated");
|
---|
[7078] | 21 | suggestAlternative: tr("''wikipedia''=''language:page title''");
|
---|
[11146] | 22 | group: tr("deprecated tagging");
|
---|
[7035] | 23 | assertMatch: "node wikipedia=http://en.wikipedia.org/wiki/OpenStreetMap";
|
---|
| 24 | assertNoMatch: "node wikipedia=en:OpenStreetMap";
|
---|
| 25 | }
|
---|
[11756] | 26 |
|
---|
| 27 | *[wikipedia =~ /^be-x-old:/] {
|
---|
| 28 | throwWarning: tr("wikipedia ''{0}'' language is obsolete, use ''{1}'' instead", "be-x-old", "be-tarask");
|
---|
| 29 | fixAdd: concat("wikipedia=be-tarask:", get(regexp_match("^be-x-old:(.+)$", tag("wikipedia")),1));
|
---|
| 30 | assertMatch: "node wikipedia=be-x-old:foo";
|
---|
| 31 | assertNoMatch: "node wikipedia=abe-x-old:foo";
|
---|
| 32 | }
|
---|
| 33 |
|
---|
| 34 | *[wikipedia =~ /^cz:/] {
|
---|
| 35 | throwWarning: tr("wikipedia ''{0}'' language is invalid, use ''{1}'' instead", "cz", "cs");
|
---|
| 36 | fixAdd: concat("wikipedia=cs:", get(regexp_match("^cz:(.+)$", tag("wikipedia")),1));
|
---|
| 37 | assertMatch: "node wikipedia=cz:foo";
|
---|
| 38 | assertNoMatch: "node wikipedia=en:cz:foo";
|
---|
| 39 | }
|
---|
| 40 |
|
---|
| 41 | *[wikimedia_commons =~ /%[0-9A-F][0-9A-F]/] {
|
---|
| 42 | throwError: tr("{0} tag should not have URL-encoded values like ''%27''", "{0.key}");
|
---|
| 43 | fixAdd: concat("wikimedia_commons=", trim(replace(URL_decode(tag("wikimedia_commons")), "_", " ")));
|
---|
| 44 | assertMatch: "node wikimedia_commons=File:Foo%27s";
|
---|
| 45 | assertNoMatch: "node wikimedia_commons=File:Foo";
|
---|
| 46 | }
|
---|
| 47 |
|
---|
[11800] | 48 | *[wikipedia =~ /(?i)^[-a-z]{2,12}:.*%[0-9A-F][0-9A-F]/] {
|
---|
[15060] | 49 | throwError: tr("{0} tag should not have URL-encoded values like ''%27''", "{0.key}");
|
---|
[14057] | 50 | fixAdd: concat("wikipedia=", get(regexp_match("(?i)^([-a-z]+:)(.*)$", tag("wikipedia")),1), trim(replace(URL_decode(get(regexp_match("(?i)^([-a-z]+:)(.+)$", tag("wikipedia")),2)), "_", " ")));
|
---|
[11756] | 51 | assertMatch: "node wikipedia=en:Foo%27s";
|
---|
| 52 | assertNoMatch: "node wikipedia=en:Foo";
|
---|
| 53 | }
|
---|
| 54 |
|
---|
[15473] | 55 | *[/^wikipedia:[-a-z]{2,12}$/][/^wikipedia:[-a-z]{2,12}$/ =~ /(?i).*%[0-9A-F][0-9A-F]/] {
|
---|
| 56 | throwError: tr("{0} tag should not have URL-encoded values like ''%27''", "{0.key}");
|
---|
[18757] | 57 | fixAdd: concat("{0.key}", "=", get(regexp_match("(?i)^([-a-z]+:)?(.*)$", tag("{0.key}")),1), trim(replace(URL_decode(get(println(regexp_match("(?i)^([-a-z]+:)?(.+)$", tag("{0.key}"))),2)), "_", " ")));
|
---|
[15473] | 58 | assertMatch: "node wikipedia:de=Foo%27s";
|
---|
| 59 | assertNoMatch: "node wikipedia:de=Foo";
|
---|
| 60 | }
|
---|
| 61 |
|
---|
[11800] | 62 | *[wikipedia =~ /(?i)^[-a-z]{2,12}: /] {
|
---|
[11756] | 63 | throwWarning: tr("wikipedia title should not start with a space after language code");
|
---|
[14057] | 64 | fixAdd: concat("wikipedia=", get(regexp_match("(?i)^([-a-z]+:)(.*)$", tag("wikipedia")),1), trim(get(regexp_match("(?i)^([-a-z]+:)(.*)$", tag("wikipedia")),2)));
|
---|
[11756] | 65 | assertMatch: "node wikipedia=en: foo";
|
---|
| 66 | assertNoMatch: "node wikipedia=en:foo";
|
---|
| 67 | }
|
---|
| 68 |
|
---|
[11800] | 69 | *[wikipedia =~ /(?i)^[-a-z]{2,12}:wiki\//] {
|
---|
[11756] | 70 | throwWarning: tr("wikipedia title should not have ''{0}'' prefix", "wiki/");
|
---|
[14057] | 71 | fixAdd: concat("wikipedia=", get(regexp_match("(?i)^([-a-z]+:)wiki/(.*)$", tag("wikipedia")),1), trim(get(regexp_match("(?i)^([-a-z]+:)wiki/(.*)$", tag("wikipedia")),2)));
|
---|
[11800] | 72 | assertMatch: "node wikipedia=en:wiki/foo";
|
---|
[11756] | 73 | assertNoMatch: "node wikipedia=en:foo";
|
---|
| 74 | }
|
---|
| 75 |
|
---|
| 76 | /* All wikipedias except "jbo" automatically capitalize first letter of the page title.
|
---|
| 77 | To see the latest list, see <https://noc.wikimedia.org/conf/highlight.php?file=InitialiseSettings.php>
|
---|
| 78 | and look for 'wgCapitalLinks' setting. */
|
---|
[11800] | 79 | *[wikipedia =~ /^[-a-zA-Z]{2,12}:\p{Ll}/][wikipedia !~ /^jbo:/][wikipedia !~ /(?i)^[-a-z]{2,12}:https?:/] {
|
---|
[11756] | 80 | throwWarning: tr("wikipedia page title should have first letter capitalized");
|
---|
[14057] | 81 | fixAdd: concat("wikipedia=", get(regexp_match("(?i)^([-a-z]+:)(.)(.*)$", tag("wikipedia")),1), upper(get(regexp_match("(?i)^([-a-z]+:)(.)(.*)$", tag("wikipedia")),2)), get(regexp_match("(?i)^([-a-z]+:)(.)(.*)$", tag("wikipedia")),3));
|
---|
[11756] | 82 | assertMatch: "node wikipedia=en:foo";
|
---|
| 83 | assertNoMatch: "node wikipedia=en:Foo";
|
---|
[15546] | 84 | assertMatch: "node wikipedia=ru:абв";
|
---|
| 85 | assertNoMatch: "node wikipedia=ru:Абв";
|
---|
[11756] | 86 | }
|
---|
| 87 |
|
---|
[11800] | 88 | *[wikipedia =~ /(?i)^[-a-z]{2,12}:.*_/][wikipedia !~ /(?i)^[-a-z]{2,12}:https?:/] {
|
---|
[15546] | 89 | throwWarning: tr("wikipedia page title should have spaces instead of underscores (''_''→'' '')");
|
---|
[14057] | 90 | fixAdd: concat("wikipedia=", get(regexp_match("(?i)^([-a-z]+:)(.+)$", tag("wikipedia")),1), trim(replace(get(regexp_match("(?i)^([-a-z]+:)(.+)$", tag("wikipedia")),2), "_", " ")));
|
---|
[11756] | 91 | assertMatch: "node wikipedia=en:foo_bar";
|
---|
| 92 | assertNoMatch: "node wikipedia=en:foo bar";
|
---|
| 93 | }
|
---|
| 94 |
|
---|
| 95 | *[wikipedia ^= "da:da:"],
|
---|
| 96 | *[wikipedia ^= "da:dk:"],
|
---|
| 97 | *[wikipedia ^= "de:de:"],
|
---|
| 98 | *[wikipedia ^= "dk:dk:"],
|
---|
| 99 | *[wikipedia ^= "en:de:"],
|
---|
| 100 | *[wikipedia ^= "en:en:"],
|
---|
| 101 | *[wikipedia ^= "en:es:"],
|
---|
| 102 | *[wikipedia ^= "en:eu:"],
|
---|
| 103 | *[wikipedia ^= "en:fr:"],
|
---|
| 104 | *[wikipedia ^= "en:ja:"],
|
---|
| 105 | *[wikipedia ^= "en:pl:"],
|
---|
| 106 | *[wikipedia ^= "en:pt:"],
|
---|
| 107 | *[wikipedia ^= "en:zh:"],
|
---|
| 108 | *[wikipedia ^= "es:es:"],
|
---|
| 109 | *[wikipedia ^= "eu:eu:"],
|
---|
| 110 | *[wikipedia ^= "fr:fr:"],
|
---|
| 111 | *[wikipedia ^= "ja:ja:"],
|
---|
| 112 | *[wikipedia ^= "pl:en:"],
|
---|
| 113 | *[wikipedia ^= "pl:pl:"],
|
---|
| 114 | *[wikipedia ^= "pt:pt:"],
|
---|
| 115 | *[wikipedia ^= "ru:fr:"],
|
---|
| 116 | *[wikipedia ^= "ru:ru:"],
|
---|
| 117 | *[wikipedia ^= "zh:zh:"] {
|
---|
| 118 | throwWarning: tr("wikipedia language seems to be duplicated, e.g. en:en:Foo");
|
---|
[14057] | 119 | fixAdd: concat("wikipedia=", get(regexp_match("(?i)^([-a-z]+:)([-a-z]+:)(.*)$", tag("wikipedia")),2), trim(get(regexp_match("(?i)^([-a-z]+:)([-a-z]+:)(.*)$", tag("wikipedia")),3)));
|
---|
[11756] | 120 | assertMatch: "node wikipedia=en:en:Foo";
|
---|
| 121 | assertMatch: "node wikipedia=en:fr:Foo";
|
---|
| 122 | assertNoMatch: "node wikipedia=en:Bar";
|
---|
| 123 | }
|
---|
| 124 |
|
---|
| 125 | /* Detect invalid wikidata tags */
|
---|
| 126 | *[wikidata][wikidata !~ /^Q[1-9][0-9]{0,8}$/] {
|
---|
[11766] | 127 | throwError: tr("wikidata tag must be in Qnnnn format, where n is a digit");
|
---|
[11756] | 128 | assertMatch: "node wikidata=a";
|
---|
| 129 | assertMatch: "node wikidata=Q";
|
---|
| 130 | assertMatch: "node wikidata=Q0";
|
---|
| 131 | assertMatch: "node wikidata=Q0123";
|
---|
| 132 | assertNoMatch: "node wikidata=Q123";
|
---|
| 133 | assertNoMatch: "node wikidata=Q1";
|
---|
| 134 | }
|
---|
| 135 |
|
---|
| 136 | /* Wikipedia without wikidata */
|
---|
| 137 | *[wikipedia][!wikidata] {
|
---|
[11771] | 138 | throwOther: tr("wikipedia tag is set, but there is no wikidata tag. Wikipedia plugin might help with wikidata id lookups");
|
---|
[11976] | 139 | group: tr("missing tag");
|
---|
[11756] | 140 | assertMatch: "node wikipedia=a";
|
---|
| 141 | assertNoMatch: "node wikipedia=a wikidata=Q123";
|
---|
| 142 | assertNoMatch: "node wikidata=Q1";
|
---|
| 143 | assertNoMatch: "node foo=bar";
|
---|
| 144 | }
|
---|
| 145 |
|
---|
[16861] | 146 | /* Detect wikidata value wrongly in wikipedia key, not 100% safe as there might be wikipedia articles matching the regexp, therefore no fixChangeKey */
|
---|
| 147 | *[wikipedia][wikipedia =~ /^[-a-zA-Z]{2,12}:Q[1-9][0-9]{0,8}$/] {
|
---|
| 148 | throwWarning: tr("{0} value looks like a {1} value", "{0.key}", "wikidata");
|
---|
| 149 | assertNoMatch: "node wikipedia=a";
|
---|
| 150 | assertNoMatch: "node wikipedia=de:a";
|
---|
| 151 | assertNoMatch: "node wikipedia=de:Q";
|
---|
| 152 | assertNoMatch: "node wikipedia=de:Q0";
|
---|
| 153 | assertNoMatch: "node wikipedia=de:Q0123";
|
---|
| 154 | assertNoMatch: "node wikipedia=en-GB:Q0123";
|
---|
| 155 | assertMatch: "node wikipedia=de:Q123";
|
---|
| 156 | assertMatch: "node wikipedia=de:Q1";
|
---|
| 157 | assertMatch: "node wikipedia=en-GB:Q123";
|
---|
| 158 | assertMatch: "node wikipedia=en-GB:Q1";
|
---|
| 159 | }
|
---|
| 160 |
|
---|
[15473] | 161 | /* Wikipedia:lang without wikipedia */
|
---|
[11756] | 162 | *[!wikipedia][/^wikipedia:/] {
|
---|
[15473] | 163 | throwWarning: tr("''{0}'' tag is set, but no ''{1}'' tag. Make sure to set ''wikipedia=language:value'' for the main article and optional ''wikipedia:language=value'' only for additional articles that are not just other language variants of the main article.", "{1.key}", "{0.key}");
|
---|
[11756] | 164 | assertMatch: "node wikipedia:en=a";
|
---|
| 165 | assertNoMatch: "node wikipedia=a wikipedia:en=b";
|
---|
| 166 | assertNoMatch: "node wikipedia=Foo";
|
---|
| 167 | }
|
---|