С одной стороны, формат OSM допускает наличие каких угодно, в том числе “авторских” тегов.
С другой - как только возникает потребность тем или иным способом использовать накопленные данные, появляется вопрос о конвертации в какой-либо формат и, следовательно, сведение произвольных тегов в стандартные типы, применяемые в том или ином приложении.
Одновременно с тем стандартизация позволяет находить ошибки, а их среди данных OSM достаточно много.
Не так давно провел статистику по используемым тегам, пока только по ключевому слову. Оказалось по России (данные от 23 апреля) почти 800 тегов, причем добрая половина из них встречается в ЕДИНСТВЕННОМ экземпляре, что для 3.5 Гбайт информации (всего обработано 594622 тегов) выглядит, мягко говоря, странно.
В основном, естественно, уникальные теги - следствие ошибок (как орфографических, так и путаницы ключа и значения). Но встречается немало и явно странных тегов, например “game:…”.
Статистику приведу в конце сообщения (если влезет), а пока приведу еще небольшой файлик, сделанный из станички ссправки
http://wiki.openstreetmap.org/wiki/RU:Map_Features
Сам файл:
http://slil.ru/29127684
можно просматривать любым XML-радактором или вьюером.
Ну а теперь отчет о частоте тазличных тегов (номер по порядку / сколько раз встретился / сам тег):
1 5 "randomjunk_bot"
2 32739 "highway"
3 4405 "crossing"
4 5489 "barrier"
5 9046 "railway"
6 711 "crossing_ref"
7 549 "layer"
8 1955 "bicycle"
9 120447 "name"
10 1247 "traffic_sign"
11 2019 "foot"
12 556 "motorcar"
13 1401 "alt_name"
14 10033 "name:en"
15 1402 "wikipedia"
16 338 "wikipedia:en"
17 295 "name:fi"
18 183 "old_name"
19 13 "hgv"
20 361 "fixme"
21 213 "horse"
22 388 "motorcycle"
23 1223 "traffic_calming"
24 3548 "tourism"
25 1687 "historic"
26 1839 "note"
27 5635 "is_in"
28 56 "name:pl"
29 67680 "place"
30 713 "population"
31 101 "noexit"
32 535 "access"
33 89 "name:hr"
34 9357 "name:ru"
35 12 "name:uk"
36 639 "aeroway"
37 279 "closest_town"
38 911 "ele"
39 114 "iata"
40 203 "icao"
41 27 "name_1"
42 3824 "operator"
43 1947 "type"
44 58 "capital"
45 5617 "int_name"
46 64 "is_in:continent"
47 4898 "name:be"
48 16 "name:cs"
49 573 "name:de"
50 7 "name:el"
51 17 "name:es"
52 19 "name:eu"
53 6 "name:fa"
54 12 "name:he"
55 12 "name:hu"
56 6 "name:is"
57 17 "name:lv"
58 14 "name:nl"
59 80 "name:sk"
60 74 "name:sl"
61 3 "nat_name"
62 2 "old_name:be"
63 25542 "addr:country"
64 23734 "addr:district"
65 25985 "addr:postcode"
66 25065 "addr:region"
67 24692 "cladr:code"
68 24830 "cladr:name"
69 24815 "cladr:suffix"
70 15 "name:sv"
71 13 "is_in:city"
72 1182 "is_in:country"
73 33 "name:lt"
74 143 "website"
75 2 "alt_name:fi"
76 4 "name:se"
77 1797 "cladr:note"
78 1 "name:krl"
79 27 "comment"
80 258 "area"
81 34 "boundary"
82 1 "game:patrizer2:bier"
83 1 "game:patrizer2:eisenerz"
84 1 "game:patrizer2:felle"
85 1 "game:patrizer2:fleisch"
86 1 "game:patrizer2:holz"
87 1 "game:patrizer2:honig"
88 1 "game:patrizer2:leder"
89 1 "game:patrizer2:pech"
90 1 "game:patrizer2:ziegel"
91 34927 "amenity"
92 13 "name:et"
93 9 "name:la"
94 202 "is_in:country_code"
95 7 "name:pt"
96 3 "old_name:de"
97 43 "postal_code"
98 19 "alternate_name"
99 1387 "is_in:county"
100 1384 "is_in:municipality"
101 12 "old_name:en"
102 1 "old_name:pl"
103 185 "network"
104 560 "description"
105 86 "admin_level"
106 4 "address:a3"
107 4 "address:type"
108 2 "alt_name:vi"
109 13 "name:da"
110 10 "name:it"
111 6 "name:vi"
112 3 "official_name:ru"
113 1 "watch:Aleksandr_Dezhin"
114 26 "place_name"
115 2 "alt_name:sl"
116 1 "old_name:sk"
117 10 "name:chm"
118 13 "name:eo"
119 16 "name:fr"
120 12 "name:no"
121 4 "name:udm"
122 1 "wikipedia:de"
123 3 "watch:Aleksandr Dezhin"
124 1 "alt_name:sk"
125 6 "name:tat"
126 3 "name:ba"
127 2 "wikipedia:sl"
128 4 "name:mn"
129 1 "island"
130 1112 "denomination"
131 1489 "religion"
132 4 "name:yi"
133 8 "gns:ufi"
134 277 "url"
135 1 "freight"
136 4 "official_name:en"
137 1380 "is_in:province"
138 427 "source:population"
139 1096 "teryt:rm"
140 1098 "teryt:simc"
141 1096 "teryt:stan_na"
142 1103 "teryt:terc"
143 680 "teryt:updated_by"
144 3 "old_name:"
145 1659 "ref"
146 4006 "addr:city"
147 6 "is_capital"
148 3737 "addr:housenumber"
149 4131 "addr:street"
150 4 "mooring"
151 540 "leisure"
152 370 "sport"
153 1 "name:ab"
154 9 "name:ka"
155 23 "is_in:state"
156 8 "alt_name:en"
157 10503 "shop"
158 331 "fee"
159 513 "parking"
160 277 "dispensing"
161 1 "AND_nodes"
162 451 "landuse"
163 1308 "man_made"
164 935 "opening_hours"
165 4706 "natural"
166 4 "divider"
167 29 "junction"
168 13 "addr:streetnumber"
169 3 "name_old"
170 183 "station"
171 185 "transport"
172 143 "tunnel"
173 314 "waterway"
174 38 "abandoned"
175 1 "is_in:fi"
176 1356 "shelter"
177 9 "emergency"
178 16 "official_name"
179 2 "addr:suburb"
180 1397 "building"
181 4 "name:cv"
182 15 "name:ja"
183 5 "name:ko"
184 25 "name:zh"
185 4 "name:af"
186 6 "name:ar"
187 8 "name:ca"
188 6 "name:id"
189 4 "name:lb"
190 2 "name:ms"
191 3 "name:nn"
192 2 "name:oc"
193 7 "name:ro"
194 2 "name:scn"
195 4 "name:sr"
196 5 "name:tr"
197 3 "name:bg"
198 2 "name:pam"
199 12874 "power"
200 18 "power_source"
201 8 "name_2"
202 78 "loc_name"
203 236 "maxspeed"
204 2 "name:csb"
205 2 "name:cu"
206 1 "name:cz"
207 2 "name:io"
208 6 "name:kv"
209 2 "name:os"
210 5 "name:sah"
211 2 "name:tl"
212 2 "name:war"
213 55 "addr:housename"
214 461 "fuel:diesel"
215 396 "fuel:octane_91"
216 206 "fuel:octane_95"
217 63 "fuel:octane_98"
218 1 "name:mis"
219 13 "geomorphology"
220 4 "name:ce"
221 1 "name:jp"
222 8 "construction"
223 2 "is_in:krai"
224 772 "atm"
225 27 "source:name"
226 12 "wheelchair"
227 245 "fuel:lpg"
228 1 "name_old:be"
229 2 "name:ua"
230 1 "inat_name"
231 14 "city_served"
232 5 "gns:ADM1"
233 5 "gns:DSG"
234 5 "gns:UFI"
235 5 "gns:UNI"
236 9 "name:zh_pinyin"
237 19 "status"
238 29 "fuel:cng"
239 1 "oldname"
240 132 "service"
241 44 "fuel:octane_80"
242 4 "toll"
243 3 "capital_city"
244 3 "country_code_fips"
245 3 "country_code_iso3166_1_alpha_2"
246 2 "geonames:id"
247 1 "name:als"
248 1 "name:am"
249 1 "name:an"
250 1 "name:ang"
251 1 "name:arc"
252 1 "name:ast"
253 3 "name:az"
254 1 "name:bat-smg"
255 1 "name:bcl"
256 1 "name:bn"
257 1 "name:bpy"
258 3 "name:br"
259 1 "name:bs"
260 1 "name:bxr"
261 1 "name:ceb"
262 1 "name:chr"
263 1 "name:crh"
264 3 "name:cy"
265 1 "name:de_CH"
266 1 "name:diq"
267 1 "name:dsb"
268 1 "name:dv"
269 1 "name:dz"
270 14 "name:ee"
271 1 "name:fiu-vro"
272 1 "name:fo"
273 1 "name:frp"
274 3 "name:fy"
275 3 "name:ga"
276 3 "name:gd"
277 1 "name:gl"
278 1 "name:gn"
279 1 "name:gv"
280 1 "name:hak"
281 3 "name:haw"
282 1 "name:hi"
283 1 "name:hif"
284 1 "name:hsb"
285 1 "name:ht"
286 2 "name:hy"
287 1 "name:ia"
288 1 "name:ie"
289 1 "name:ilo"
290 1 "name:jbo"
291 1 "name:jv"
292 1 "name:kg"
293 4 "name:kk"
294 1 "name:kn"
295 1 "name:ku"
296 1 "name:kw"
297 1 "name:ky"
298 3 "name:li"
299 1 "name:lij"
300 1 "name:ln"
301 1 "name:mk"
302 1 "name:ml"
303 1 "name:mr"
304 1 "name:mt"
305 1 "name:na"
306 1 "name:nah"
307 1 "name:nds"
308 1 "name:nds-nl"
309 1 "name:ne"
310 1 "name:new"
311 1 "name:nov"
312 1 "name:pih"
313 1 "name:pms"
314 1 "name:pnb"
315 1 "name:ps"
316 1 "name:qu"
317 1 "name:rm"
318 1 "name:rmy"
319 1 "name:roa-rup"
320 1 "name:sa"
321 1 "name:sco"
322 1 "name:sh"
323 1 "name:simple"
324 1 "name:sq"
325 1 "name:sw"
326 1 "name:szl"
327 1 "name:ta"
328 1 "name:te"
329 1 "name:tg"
330 3 "name:th"
331 1 "name:tk"
332 1 "name:tpi"
333 3 "name:tt"
334 1 "name:ug"
335 1 "name:ur"
336 1 "name:uz"
337 1 "name:vec"
338 1 "name:vo"
339 1 "name:wo"
340 1 "name:wuu"
341 1 "name:xal"
342 1 "name:zh-min-nan"
343 1 "name:zh-yue"
344 3 "official_name:be"
345 2 "official_name:br"
346 3 "official_name:el"
347 3 "official_name:es"
348 3 "official_name:et"
349 3 "official_name:id"
350 3 "official_name:lt"
351 2 "official_name:lv"
352 3 "official_name:pl"
353 2 "official_name:pt"
354 2 "official_name:sk"
355 3 "official_name:sv"
356 3 "official_name:vi"
357 2 "watch:hakan"
358 37 "supervised"
359 1 "software"
360 7 "restriction"
361 17 "direction"
362 77 "moor"
363 6 "seamark"
364 26 "food"
365 2 "denomination:ru"
366 549 "cuisine"
367 4 "description:be"
368 148 "bridge"
369 87 "fuel:octane_92"
370 52 "military"
371 2 "OKATO"
372 144 "attribution"
373 144 "koatuu"
374 1 "denomination:fi"
375 2 "kadastr:ru"
376 3 "name:su"
377 503 "phone"
378 1 "source:highway"
379 4 "oneway"
380 140 "address:postindex"
381 5 "cargo"
382 33 "vehicle"
383 13 "route"
384 1 "name:sms"
385 1 "old_name:fi"
386 43 "aerialway"
387 1 "name_engl"
388 2 "point"
389 1 "place_numbers"
390 19 "wikipedia:ru"
391 4 "alternate_name:en"
392 13 "uic_ref"
393 111 "garmin:extnode"
394 101 "information"
395 14 "fuel:propane"
396 7 "floor"
397 1 "old_name:ru"
398 29 "last_eruption"
399 2 "ont_name"
400 5 "bus_routes"
401 106 "capacity"
402 1 "name_3"
403 2 "is_in:country_ru"
404 53 "internet_access"
405 68 "tower:type"
406 50 "FIXME"
407 4 "addr:state"
408 3 "place:en"
409 2 "is_in:town"
410 3 "date"
411 1 "abbr_name"
412 33 "garmin_type"
413 1 "peak"
414 4 "fuel:HGV_diesel"
415 1 "drivein"
416 4 "lock"
417 1 "covered"
418 1 "fuel:gasoline"
419 6 "waste"
420 1 "moped"
421 2 "religion:ru"
422 7 "charge"
423 71 "payment:telephone_cards"
424 17 "addr:full"
425 1 "name:in"
426 1 "ent_name"
427 197 "route_ref"
428 43 "key"
429 1 "private"
430 8 "building_entrance:auto_open"
431 16 "gns:dsg"
432 16 "gns:uni"
433 2 "name_alt"
434 1 "name_old:ru"
435 13 "border_type"
436 10 "fuel:biogas"
437 21 "disabled_spaces"
438 17 "address"
439 1 "warehouse2"
440 1 "boat"
441 6 "goods"
442 1 "motorboat"
443 10 "psv"
444 7 "noname"
445 11 "payment:coins"
446 7 "payment:notes"
447 1 "vending"
448 5 "alt_name:ru"
449 5 "description:ru"
450 77 "location"
451 1 "currency"
452 1 "postcode"
453 1 "description:pl"
454 10 "castle_type"
455 10 "ruins"
456 2 "loc_name:ru"
457 35 "traffic_sign:forward"
458 2 "bus"
459 9 "motor_vehicle"
460 18 "traffic_sign:backward"
461 60 "image"
462 17 "_name"
463 18 "building:levels"
464 13 "piste:type"
465 2 "addr:building"
466 2 "addr:floor"
467 20 "country"
468 150 "telephone"
469 25 "notes:official"
470 25 "source:fi"
471 25 "source:no"
472 6 "iso_3166_2"
473 4 "is_in:sea"
474 1 "unesco_world_heritage_site"
475 41 "stars"
476 1 "is:country_code"
477 2 "is_in:ocean"
478 1 "name:tyv"
479 6 "is_in:mountain_range"
480 1 "books"
481 1 "polulation"
482 1 "name:EN"
483 109 "label"
484 26 "fireplace"
485 2 "adm2"
486 1 "old"
487 1 "name:mo"
488 1 "source:old_name"
489 2 "name:mhr"
490 5 "lanes"
491 40 "mountain_pass"
492 3 "fuel:disel"
493 33 "brand"
494 5 "email"
495 6 "fax"
496 27 "public_transport"
497 26 "subway"
498 8 "wifi"
499 4 "okato:rule"
500 5 "depth"
501 31 "material"
502 16 "fuel:octane_76"
503 1 "value"
504 1 "artist_name"
505 12 "artwork_type"
506 2 "vechicle"
507 339 "bench"
508 7 "contact:email"
509 6 "fuel:octane"
510 11 "addr:phone"
511 5 "recycling:cans"
512 6 "recycling:glass"
513 5 "recycling:paper"
514 3 "recycling:scrap_metal"
515 1 "hsv"
516 1 "name:mrj"
517 5 "car"
518 7 "disused"
519 1 "name:trans"
520 1 "truck"
521 80 "wpt_description"
522 99 "wpt_symbol"
523 17 "kiosk"
524 1 "voltage"
525 14 "bunker_type"
526 1 "wikipedia:etn"
527 3 "payment:electronic_purses"
528 8 "wood"
529 2 "addr:interpolation"
530 21 "name_int"
531 2 "site"
532 1 "index"
533 1 "school"
534 2 "name:local"
535 4 "local_name"
536 1 "traffic"
537 1 "historical"
538 2 "traffic_signals"
539 15 "quality"
540 1 "official_name:fr"
541 1 "official_name:it"
542 1 "wikipedia:fr"
543 2 "name:by"
544 1 "official_name:af"
545 1 "official_name:ca"
546 1 "official_name:lb"
547 1 "official_name:sl"
548 51 "number"
549 7 "contact:phone"
550 168 "surface"
551 1 "name:old"
552 39 "height"
553 37 "name:botanical"
554 1 "tower"
555 7 "bus_stop:side"
556 1407 "species"
557 1 "wikipedia:lt"
558 5 "living_street"
559 2 "openfire"
560 2 "tents"
561 1 "type:fi"
562 1 "fuel:propan"
563 3 "hour_off"
564 3 "hour_on"
565 3 "address:country"
566 1 "x-point-id"
567 1 "open_hours"
568 3 "so"
569 1 "incline"
570 10 "transport stop"
571 3 "operator:ru"
572 1 "fuel:methanol"
573 3 "narrow"
574 3 "piste:difficulty"
575 1 "omkum:code"
576 1 "dogs"
577 28 "trolley_wire"
578 1 "wikipedia:pl"
579 8 "source:url"
580 1 "fenced"
581 1 "passengers"
582 1 "note:route"
583 11 "office"
584 2 "room"
585 1 "poi"
586 1 "description:en"
587 38 "tactile_paving"
588 3 "full_name"
589 1 "priority"
590 2 "operator:en"
591 3 "lat"
592 3 "lon"
593 98 "source_ref"
594 9 "capacity:disabled"
595 4 "capacity:parent"
596 5 "capacity:women"
597 40 "park_ride"
598 4 "recycling:clothes"
599 1 "name_en"
600 1 "fiel:lpg"
601 1 "drinking_water"
602 91 "heating"
603 92 "toilets"
604 11 "description:fi"
605 1 "garmin:extnodge"
606 1 "_сladr:code"
607 1 "recycling:glass_bottles"
608 297 "Время"
609 3 "passing_places"
610 1 "closed"
611 1 "reconstruction"
612 1 "game"
613 2 "level"
614 5 "traffic_signals:sound"
615 1 "сladr:note"
616 1 "building:use"
617 2 "maxheight"
618 3 "fuel"
619 3 "zoo"
620 21 "colour"
621 1 "name:int"
622 2 "route_ref:trolleybus"
623 162 "smoothness"
624 25 "lit"
625 1 "zip"
626 1 "seamark:light:1"
627 1 "seamark:light:2"
628 1 "seamark:light:3"
629 1 "seamark:light:4"
630 1 "seamark:light:character"
631 2 "drinkable"
632 1 "pump"
633 1 "bank"
634 3 "undefined"
635 2 "map_type"
636 1 "Участковый"
637 1 "gate:open"
638 1 "Гагарина"
639 1 "tel"
640 1 "man_mand"
641 2 "function"
642 1 "female"
643 1 "male"
644 1 "power_rating"
645 3 "resource"
646 3 "recycling:batteries"
647 595 "addr:settlement"
648 3 "adm_name"
649 1 "note:ru"
650 1 "
name"
651 1 "name:be-tarask"
652 2 "mtb:description"
653 1 "name_loc"
654 1 "phone2"
655 1 "phone3"
656 3 "e-mail"
657 32 "bus_ruz_net_id"
658 1 "alt_name:chm"
659 2 "marked_trail_yellow"
660 15 "aera"
661 3 "marked_trail_black"
662 1 "marked_trail_red"
663 1 "marked_trail_green"
664 1 "gate"
665 1 "communications_transponder:service"
666 1 "tower:construction"
667 3 "is_in:region"
668 17 "backrest"
669 134 "sourcedb:id"
670 9 "sourcedb:type"
671 6 "landsat:id"
672 1 "pedestrian"
673 6 "display"
674 6 "support"
675 6 "visibility"
676 9 "branch"
677 1 "maxspeed:practical"
678 2 "denominatons"
679 1 "sorting_name"
680 1 "diplomatic"
681 7 "target"
682 1 "is_in:okrug"
683 1 "subsea"
684 1 "notes"
685 1 "levels"
686 1 "node"
687 2 "маршруты"
688 3 "note2"
689 1 "Bogoyavleniye"
690 2 "fuel:e10"
691 3 "fuel:e85"
692 1 "Лесоарк"
693 1 "№"
694 2 "Автобусы"
695 2 "Маршрутки"
696 2 "Тралейбусы"
697 1 "name2"
698 2 "schelter"
699 2 "примечание"
700 8 "board_type"
701 3 "guide"
702 3 "website_description:faq"
703 3 "website:faq"
704 2 "Picture"
705 1 "denjmination"
706 1 "custom"
707 1 "Custom code"
708 2 "is_in:district"
709 1 "denomination_"
710 1 "name_"
711 1 "name:en_"
712 1 "religion_"
713 1 "bus_road"
714 2 "fuel:octane_93"
715 1 "cladr:namt"
716 1 "addr:"
717 1 "restruktion"
718 2 "turning_point"
719 1 "дом 59"
720 2 "mam_made"
721 1 "addr:сountry"
722 1 "addr:ru"
723 1 "shop:ru"
724 2 "whitewater"
725 1 "hiking"
726 1 "map_size"
727 2 "Школа ДОСААФ"
728 1 "power_sourse"
729 1 "отделение милиции Хабаровск-2"
730 8 "verified"
731 8 "www"
732 1 "harbour"
733 1 "harbour:category"
734 1 "harbour:entrance:LAT"
735 1 "harbour:entrance:LON"
736 1 "harbour:name"
737 1 "harbour:namenational"
738 1 "harbour:phone"
739 1 "harbour:size"
740 1 "harbour:web"
741 1 "crane:mobile"
742 2 "thermometer"
743 1 "addr:suffix"
744 1 "game:patrizier2:eisenerz"
745 1 "game:patrizier2:felle"
746 1 "game:patrizier2:fisch"
747 1 "game:patrizier2:getreide"
748 1 "game:patrizier2:hanf"
749 1 "game:patrizier2:holz"
750 1 "game:patrizier2:honig"
751 1 "game:patrizier2:wolle"
752 2 "Summer residences"
753 1 "reklama"
754 1 "reklama_type"
755 1 "
website"
756 1 "alt"
757 3 "backerst"
758 2 "water"
759 1 "wate"
760 1 "cafe"
761 1 "naturism"
762 1 "Калi ласка"
763 1 "internet_acess"
764 1 "sportbar"
765 1 "opening"
766 14 "muuseum_id"
767 1 "recreation:health"
768 2 "code"
769 2 "frequency"
770 7 "barier"
771 1 "name:short"
772 8 "speedlimit"
773 1 "EHAK:countycode"
774 1 "note:ee"
775 1 "free"
776 1 "gateway"