Ticket #14425: wikipedia-validations.3.patch

File wikipedia-validations.3.patch, 10.9 KB (added by nyurik, 7 years ago)
  • data/validator/wikipedia.mapcss

     
    11/* validation for the wikipedia=* tag - see ticket #8383 */
    22
    33/* If there is no language at all, this is broken. Also catches 'wikipedia' used as 'email', 'website', 'ele' [sic!] ... */
    4 *[wikipedia][wikipedia !~ /^[a-zA-Z_-]{2,12}:/] {
    5   throwError: tr("no wikipedia-language given, use ''wikipedia''=''language:page title''");
     4*[wikipedia][wikipedia !~ /^[-a-z]{2,12}:/] {
     5  throwError: tr("wikipedia tag has no language given, use ''wikipedia''=''language:page title''");
    66  assertMatch: "node wikipedia=Foobar";
    77  assertNoMatch: "node wikipedia=en:Foobar";
    88  assertNoMatch: "node wikipedia=en-GB:Foobar";
    99}
    1010
    11 /* Valid languages are extracted from <http://de.wikipedia.org/w/api.php?action=sitematrix&format=xml>, which may change, so this is a warning only. */
    12 *[wikipedia =~ /^[a-zA-Z_-]{2,12}:/][wikipedia !~ /^https?:\/\//][wikipedia !~ /^(aa|ab|ace|af|ak|als|am|an|ang|ar|arc|arz|as|ast|av|ay|az|ba|bar|bat-smg|bcl|be|be-x-old|bg|bh|bi|bjn|bm|bn|bo|bpy|br|bs|bug|bxr|ca|cbk-zam|cdo|ce|ceb|ch|cho|chr|chy|ckb|co|cr|crh|cs|csb|cu|cv|cy|cz|da|de|diq|dk|dsb|dv|dz|ee|el|eml|en|eo|epo|es|et|eu|ext|fa|ff|fi|fiu-vro|fj|fo|fr|frp|frr|fur|fy|ga|gag|gan|gd|gl|glk|gn|got|gu|gv|ha|hak|haw|he|hi|hif|ho|hr|hsb|ht|hu|hy|hz|ia|id|ie|ig|ii|ik|ilo|io|is|it|iu|ja|jbo|jp|jv|ka|kaa|kab|kbd|kg|ki|kj|kk|kl|km|kn|ko|koi|kr|krc|ks|ksh|ku|kv|kw|ky|la|lad|lb|lbe|lez|lg|li|lij|lmo|ln|lo|lt|ltg|lv|map-bms|mdf|mg|mh|mhr|mi|minnan|mk|ml|mn|mo|mr|mrj|ms|mt|mus|mwl|my|myv|mzn|na|nah|nan|nap|nb|nds|nds-nl|ne|new|ng|nl|nn|no|nov|nrm|nso|nv|ny|oc|om|or|os|pa|pag|pam|pap|pcd|pdc|pfl|pi|pih|pl|pms|pnb|pnt|ps|pt|qu|rm|rmy|rn|ro|roa-rup|roa-tara|ru|rue|rw|sa|sah|sc|scn|sco|sd|se|sg|sh|si|simple|sk|sl|sm|sn|so|sq|sr|srn|ss|st|stq|su|sv|sw|szl|ta|te|tet|tg|th|ti|tk|tl|tn|to|tpi|tr|ts|tt|tum|tw|ty|udm|ug|uk|ur|uz|ve|vec|vep|vi|vls|vo|wa|war|wo|wuu|xal|xh|xmf|yi|yo|za|zea|zh|zh-cfr|zh-classical|zh-min-nan|zh-yue|zu):/] {
    13   throwWarning: tr("unknown language prefix in wikipedia tag");
     11/* Valid languages are extracted from <https://www.wikidata.org/w/api.php?action=help&modules=wbgetentities> (languages field), which may change, so this is a warning only.
     12   Also includes "cz" and "be-x-old" because they can be auto-fixed */
     13*[wikipedia =~ /^[-a-z]{2,12}:/][wikipedia !~ /^https?:\/\//][wikipedia !~ /^(aa|ab|ace|ady|ady-cyrl|aeb|aeb-arab|aeb-latn|af|ak|aln|als|am|an|ang|anp|ar|arc|arn|arq|ary|arz|as|ase|ast|av|avk|awa|ay|az|azb|ba|ban|bar|bat-smg|bbc|bbc-latn|bcc|bcl|be|be-tarask|be-x-old|bg|bgn|bh|bho|bi|bjn|bm|bn|bo|bpy|bqi|br|brh|bs|bto|bug|bxr|ca|cbk-zam|cdo|ce|ceb|ch|cho|chr|chy|ckb|co|cps|cr|crh|crh-cyrl|crh-latn|cs|csb|cu|cv|cy|cz|da|de|de-at|de-ch|de-formal|din|diq|dsb|dtp|dty|dv|dz|ee|egl|el|eml|en|en-ca|en-gb|eo|es|et|eu|ext|fa|ff|fi|fit|fiu-vro|fj|fo|fr|frc|frp|frr|fur|fy|ga|gag|gan|gan-hans|gan-hant|gd|gl|glk|gn|gom|gom-deva|gom-latn|gor|got|grc|gsw|gu|gv|ha|hak|haw|he|hi|hif|hif-latn|hil|ho|hr|hrx|hsb|ht|hu|hy|hz|ia|id|ie|ig|ii|ik|ike-cans|ike-latn|ilo|inh|io|is|it|iu|ja|jam|jbo|jut|jv|ka|kaa|kab|kbd|kbd-cyrl|kea|kg|khw|ki|kiu|kj|kk|kk-arab|kk-cn|kk-cyrl|kk-kz|kk-latn|kk-tr|kl|km|kn|ko|ko-kp|koi|kr|krc|kri|krj|krl|ks|ks-arab|ks-deva|ksh|ku|ku-arab|ku-latn|kv|kw|ky|la|lad|lb|lbe|lez|lfn|lg|li|lij|liv|lki|lmo|ln|lo|loz|lrc|lt|ltg|lus|luz|lv|lzh|lzz|mai|map-bms|mdf|mg|mh|mhr|mi|min|mk|ml|mn|mo|mr|mrj|ms|mt|mus|mwl|my|myv|mzn|na|nah|nan|nap|nb|nds|nds-nl|ne|new|ng|niu|nl|nl-informal|nn|no|nod|nov|nrm|nso|nv|ny|nys|oc|olo|om|or|os|ota|pa|pag|pam|pap|pcd|pdc|pdt|pfl|pi|pih|pl|pms|pnb|pnt|prg|ps|pt|pt-br|qu|qug|rgn|rif|rm|rmy|rn|ro|roa-rup|roa-tara|ru|rue|rup|ruq|ruq-cyrl|ruq-latn|rw|rwr|sa|sah|sat|sc|scn|sco|sd|sdc|sdh|se|sei|ses|sg|sgs|sh|shi|shi-latn|shi-tfng|shn|si|simple|sje|sk|sl|sli|sm|sma|smj|sn|so|sq|sr|sr-ec|sr-el|srn|srq|ss|st|stq|su|sv|sw|szl|ta|tcy|te|tet|tg|tg-cyrl|tg-latn|th|ti|tk|tl|tly|tn|to|tokipona|tpi|tr|tru|ts|tt|tt-cyrl|tt-latn|tum|tw|ty|tyv|tzm|udm|ug|ug-arab|ug-latn|uk|ur|uz|uz-cyrl|uz-latn|ve|vec|vep|vi|vls|vmf|vo|vot|vro|wa|war|wo|wuu|xal|xh|xmf|yi|yo|yue|za|zea|zh|zh-classical|zh-cn|zh-hans|zh-hant|zh-hk|zh-min-nan|zh-mo|zh-my|zh-sg|zh-tw|zh-yue|zu):/] {
     14  throwWarning: tr("wikipedia tag has an unknown language prefix");
    1415  assertMatch: "node wikipedia=X-Y-Z:Foobar";
    1516  assertNoMatch: "node wikipedia=en:Foobar";
    1617}
    1718
    18 *[wikipedia =~ /https?:\/\//] {
    19   throwWarning: tr("deprecated format for wikipedia tag");
     19*[wikipedia =~ /^https?:\/\//],
     20*[wikipedia =~ /^[-a-z]{2,12}:https?:\/\//] {
     21  throwWarning: tr("wikipedia tag format is deprecated");
    2022  suggestAlternative: tr("''wikipedia''=''language:page title''");
    2123  group: tr("deprecated tagging");
    2224  assertMatch: "node wikipedia=http://en.wikipedia.org/wiki/OpenStreetMap";
    2325  assertNoMatch: "node wikipedia=en:OpenStreetMap";
    2426}
     27
     28*[wikipedia =~ /^be-x-old:/] {
     29  throwWarning: tr("wikipedia ''be-x-old'' language is obsolete, use ''be-tarask'' instead");
     30  fixAdd: concat("wikipedia=be-tarask:", get(regexp_match("^be-x-old:(.+)$", tag("wikipedia")),1));
     31  assertMatch: "node wikipedia=be-x-old:foo";
     32  assertNoMatch: "node wikipedia=abe-x-old:foo";
     33}
     34
     35*[wikipedia =~ /^cz:/] {
     36  throwWarning: tr("wikipedia ''cz'' language is invalid, use ''cs'' instead");
     37  fixAdd: concat("wikipedia=cs:", get(regexp_match("^cz:(.+)$", tag("wikipedia")),1));
     38  assertMatch: "node wikipedia=cz:foo";
     39  assertNoMatch: "node wikipedia=en:cz:foo";
     40}
     41
     42*[wikipedia =~ /^[-a-z]{2,12}:.*%[0-9A-F][0-9A-F]/] {
     43  throwError: tr("wikipedia title should not have URL-encoded values like ''%27''");
     44  fixAdd: concat("wikipedia=", get(regexp_match("^([-a-z]+:)(.*)$", tag("wikipedia")),1), trim(replace(URL_decode(get(regexp_match("^([-a-z]+:)(.+)$", tag("wikipedia")),2)), "_", " ")));
     45  assertMatch: "node wikipedia=en:Foo%27s";
     46  assertNoMatch: "node wikipedia=en:Foo";
     47}
     48
     49*[wikipedia =~ /^[-a-z]{2,12}: /] {
     50  throwWarning: tr("wikipedia title should not start with a space after language code");
     51  fixAdd: concat("wikipedia=", get(regexp_match("^([-a-z]+:)(.*)$", tag("wikipedia")),1), trim(get(regexp_match("^([-a-z]+:)(.*)$", tag("wikipedia")),2)));
     52  assertMatch: "node wikipedia=en: foo";
     53  assertNoMatch: "node wikipedia=en:foo";
     54}
     55
     56*[wikipedia =~ /^[-a-z]{2,12}:wiki\//] {
     57  throwWarning: tr("wikipedia title should not have ''wiki/'' prefix");
     58  fixAdd: concat("wikipedia=", get(regexp_match("^([-a-z]+:)wiki/(.*)$", tag("wikipedia")),1), trim(get(regexp_match("^([-a-z]+:)wiki/(.*)$", tag("wikipedia")),2)));
     59  assertMatch: "node wikipedia=en: foo";
     60  assertNoMatch: "node wikipedia=en:foo";
     61}
     62
     63/* All wikipedias except "jbo" automatically capitalize first letter of the page title.
     64   To see the latest list, see <https://noc.wikimedia.org/conf/highlight.php?file=InitialiseSettings.php>
     65   and look for 'wgCapitalLinks' setting. */
     66*[wikipedia =~ /^[-a-z]{2,12}:\p{Ll}/][wikipedia !~ /^jbo:/][wikipedia !~ /^[-a-z]{2,12}:https?:/] {
     67  throwWarning: tr("wikipedia page title should have first letter capitalized");
     68  fixAdd: concat("wikipedia=", get(regexp_match("^([-a-z]+:)(.)(.*)$", tag("wikipedia")),1), upper(get(regexp_match("^([-a-z]+:)(.)(.*)$", tag("wikipedia")),2)), get(regexp_match("^([-a-z]+:)(.)(.*)$", tag("wikipedia")),3));
     69  assertMatch: "node wikipedia=en:foo";
     70  assertNoMatch: "node wikipedia=en:Foo";
     71  assertMatch: "node wikipedia=ru:абв";
     72  assertNoMatch: "node wikipedia=ru:Абв";
     73}
     74
     75*[wikipedia =~ /^[-a-z]{2,12}:.*_/][wikipedia !~ /^[-a-z]{2,12}:https?:/] {
     76  throwWarning: tr("wikipedia page title should have spaces instead of underscores (''_''→'' '')");
     77  fixAdd: concat("wikipedia=", get(regexp_match("^([-a-z]+:)(.+)$", tag("wikipedia")),1), trim(replace(get(regexp_match("^([-a-z]+:)(.+)$", tag("wikipedia")),2), "_", " ")));
     78  assertMatch: "node wikipedia=en:foo_bar";
     79  assertNoMatch: "node wikipedia=en:foo bar";
     80}
     81
     82*[wikipedia ^= "da:da:"],
     83*[wikipedia ^= "da:dk:"],
     84*[wikipedia ^= "de:de:"],
     85*[wikipedia ^= "dk:dk:"],
     86*[wikipedia ^= "en:de:"],
     87*[wikipedia ^= "en:en:"],
     88*[wikipedia ^= "en:es:"],
     89*[wikipedia ^= "en:eu:"],
     90*[wikipedia ^= "en:fr:"],
     91*[wikipedia ^= "en:ja:"],
     92*[wikipedia ^= "en:pl:"],
     93*[wikipedia ^= "en:pt:"],
     94*[wikipedia ^= "en:zh:"],
     95*[wikipedia ^= "es:es:"],
     96*[wikipedia ^= "eu:eu:"],
     97*[wikipedia ^= "fr:fr:"],
     98*[wikipedia ^= "ja:ja:"],
     99*[wikipedia ^= "pl:en:"],
     100*[wikipedia ^= "pl:pl:"],
     101*[wikipedia ^= "pt:pt:"],
     102*[wikipedia ^= "ru:fr:"],
     103*[wikipedia ^= "ru:ru:"],
     104*[wikipedia ^= "zh:zh:"] {
     105  throwWarning: tr("wikipedia language seems to be duplicated, e.g. en:en:Foo");
     106  fixAdd: concat("wikipedia=", get(regexp_match("^([-a-z]+:)([-a-z]+:)(.*)$", tag("wikipedia")),2), trim(get(regexp_match("^([-a-z]+:)([-a-z]+:)(.*)$", tag("wikipedia")),3)));
     107  assertMatch: "node wikipedia=en:en:Foo";
     108  assertMatch: "node wikipedia=en:fr:Foo";
     109  assertNoMatch: "node wikipedia=en:Bar";
     110}
  • src/org/openstreetmap/josm/gui/mappaint/mapcss/ExpressionFactory.java

     
    831831        }
    832832
    833833        /**
     834         * Converts string {@code s} to uppercase.
     835         * @param s The source string
     836         * @return The resulting string
     837         * @see String#toUpperCase()
     838         */
     839        public static String upper(String s) {
     840            return s == null ? null : s.toUpperCase();
     841        }
     842
     843        /**
     844         * Converts string {@code s} to lowercase.
     845         * @param s The source string
     846         * @return The resulting string
     847         * @see String#toLowerCase()
     848         */
     849        public static String lower(String s) {
     850            return s == null ? null : s.toLowerCase();
     851        }
     852
     853        /**
     854         * Trim whitespaces from the string {@code s}.
     855         * @param s The source string
     856         * @return The resulting string
     857         * @see String#trim()
     858         */
     859        public static String trim(String s) {
     860            return s == null ? null : s.trim();
     861        }
     862
     863        /**
     864         * Percent-decode a string. (See https://en.wikipedia.org/wiki/Percent-encoding)
     865         * This is especially useful for wikipedia titles
     866         * @param s url-encoded string
     867         * @return the decoded string, or original in case of an error
     868         */
     869        public static String URL_decode(String s) {
     870            if (s == null) return null;
     871            try {
     872                return Utils.decodeUrl(s);
     873            } catch (java.lang.Exception e) {
     874                return s;
     875            }
     876        }
     877
     878        /**
    834879         * Percent-encode a string. (See https://en.wikipedia.org/wiki/Percent-encoding)
    835880         * This is especially useful for data urls, e.g.
    836881         * <code>concat("data:image/svg+xml,", URL_encode("&lt;svg&gt;...&lt;/svg&gt;"));</code>