I want to list top contributors by edits (not by changelog) from certain region. Ideally I want an output of the form
user1 - X areas - Y ways - Z nodes
user 2 - X2 areas - Y2 ways - Z2 nodes
Is there any script to do that?
I want to list top contributors by edits (not by changelog) from certain region. Ideally I want an output of the form
user1 - X areas - Y ways - Z nodes
user 2 - X2 areas - Y2 ways - Z2 nodes
Is there any script to do that?
osmium extract (do not forget -H command line switch)python count.py myfile.osh.pbf (requires pyosmium):import sys
import osmium
class CountMapperHandler(osmium.SimpleHandler):
def __init__(self):
osmium.SimpleHandler.__init__(self)
self.edit_count = {}
def way(self, w):
if w.user in self.edit_count:
self.edit_count[w.user]+=1;
else:
self.edit_count[w.user]=1
if __name__ == '__main__':
mch = CountMapperHandler()
mch.apply_file(sys.argv[1])
sorted_dict = sorted(
mch.edit_count.items(),
key = lambda kv: kv[1])
for user, edits in sorted_dict:
print(f"{user}: {edits}")
(Splitting this by node/way/relation edits left as an exercise to the reader ![]()
(see also this previous related question)
Thanks a lot, @woodpeck . Can you give me some hints to extract my region from “islas-baleares-internal.osh.pbf” with osmium extract?
I run osmium extract --bbox 2.604,39.598,2.606,39.600 islas-baleares-internal.osh.pbf -o sonpuig.pbf becuase IA says bbox of OpenStreetMap is 2.604,39.598,2.606,39.600. But output gives me nothing:
$ osmium extract --bbox 2.604,39.598,2.606,39.600 islas-baleares-internal.osh.pbf -o sonpuig.pbf
[======================================================================] 100%
Node ID twice in input. Maybe you are using a history or change file?
This command expects the input file to be ordered: First nodes in order of ID,
then ways in order of ID, then relations in order of ID.
python yourscript.py sonpuig.pbf
You did it almost right, but if you re-read my message above there’s a mention of a command line flag that you should not forget ![]()
This script could help users
#!/usr/bin/env python3
"""
edits_by_user_from_osmurl.py
Uso:
python edits_by_user_from_osmurl.py archivo_historial.pbf osm_url fecha_inicio fecha_fin [--json-output salida.json] [--console]
Ejemplo de osm_url:
https://www.openstreetmap.org/#map=15/39.60180/2.59501
Asume resoluciĂłn 1920x1080 para calcular el bbox a partir de zoom/lat/lon.
Fechas en formato YYYY-MM-DD (incluye fecha_inicio y fecha_fin).
"""
import re
import osmium as o
import datetime
from collections import defaultdict
import json
import math
import pytz
import argparse
# Argumentos
parser = argparse.ArgumentParser(description="Resumen de ediciones por usuario desde archivo .pbf y URL OSM")
parser.add_argument("pbf_file")
parser.add_argument("osm_url")
parser.add_argument("date_from")
parser.add_argument("date_to")
parser.add_argument("--json-output", dest="json_output")
parser.add_argument("--console", action="store_true", dest="console")
args = parser.parse_args()
pbf_file = args.pbf_file
osm_url = args.osm_url
date_from = datetime.datetime.strptime(args.date_from, "%Y-%m-%d")
date_to = datetime.datetime.strptime(args.date_to, "%Y-%m-%d") + datetime.timedelta(days=1)
json_output = args.json_output
console = args.console
# Parámetros de pantalla asumidos
SCREEN_W = 1920
SCREEN_H = 1080
# Parsear URL estilo #map=Z/lat/lon
m = re.search(r"#map=(\d+)\/([\-0-9\.]+)\/([\-0-9\.]+)", osm_url)
if not m:
print("URL OSM inválida. Debe contener '#map=zoom/lat/lon'")
raise SystemExit(1)
zoom = int(m.group(1))
center_lat = float(m.group(2))
center_lon = float(m.group(3))
# Convertir lat/lon/zoom+screen -> bbox (lon_min, lat_min, lon_max, lat_max)
def latlon_to_pixel(lat, lon, zoom):
siny = math.sin(math.radians(lat))
siny = min(max(siny, -0.9999), 0.9999)
x = 256 * (0.5 + lon / 360.0) * (2**zoom)
y = 256 * (0.5 - math.log((1 + siny) / (1 - siny)) / (4 * math.pi)) * (2**zoom)
return x, y
def pixel_to_latlon(x, y, zoom):
n = math.pi - 2.0 * math.pi * y / (256 * (2**zoom))
lon = x / (256 * (2**zoom)) * 360.0 - 180.0
lat = math.degrees(math.atan(math.sinh(n)))
return lat, lon
cx, cy = latlon_to_pixel(center_lat, center_lon, zoom)
half_w = SCREEN_W / 2.0
half_h = SCREEN_H / 2.0
nw_x = cx - half_w
nw_y = cy - half_h
se_x = cx + half_w
se_y = cy + half_h
nw_lat, nw_lon = pixel_to_latlon(nw_x, nw_y, zoom)
se_lat, se_lon = pixel_to_latlon(se_x, se_y, zoom)
min_lon = min(nw_lon, se_lon)
max_lon = max(nw_lon, se_lon)
min_lat = min(se_lat, nw_lat)
max_lat = max(se_lat, nw_lat)
def in_bbox(lon, lat):
return (lon >= min_lon and lon <= max_lon and lat >= min_lat and lat <= max_lat)
edits_by_user = defaultdict(list)
class HistoryHandler(o.SimpleHandler):
def process_obj(self, obj, obj_type):
ts = getattr(obj, "timestamp", None)
if ts is None:
return
if ts.tzinfo is None:
ts = ts.replace(tzinfo=pytz.UTC)
df = date_from.replace(tzinfo=pytz.UTC)
dt = date_to.replace(tzinfo=pytz.UTC)
if not (df <= ts < dt):
return
in_area = False
lat = lon = None
if obj_type == "node":
if obj.location.valid():
lon = obj.location.lon
lat = obj.location.lat
in_area = in_bbox(lon, lat)
elif obj_type == "way":
try:
for n in obj.nodes:
if n.location.valid() and in_bbox(n.location.lon, n.location.lat):
in_area = True
break
except Exception:
pass
elif obj_type == "relation":
try:
bbox = obj.bounds
if bbox:
if not (bbox.min_lon > max_lon or bbox.max_lon < min_lon or bbox.min_lat > max_lat or bbox.max_lat < min_lat):
in_area = True
except Exception:
pass
if not in_area:
return
user = obj.user if obj.user else "Anonymous"
entry = {
"type": obj_type,
"id": obj.id,
"version": obj.version,
"changeset": getattr(obj, "changeset", None),
"timestamp": ts.isoformat(),
"lat": lat,
"lon": lon,
"tags": dict(obj.tags) if hasattr(obj, "tags") else {}
}
edits_by_user[user].append(entry)
def node(self, n):
self.process_obj(n, "node")
def way(self, w):
self.process_obj(w, "way")
def relation(self, r):
self.process_obj(r, "relation")
handler = HistoryHandler()
print(f"Procesando {pbf_file} — bbox: {min_lon:.7f},{min_lat:.7f} to {max_lon:.7f},{max_lat:.7f}")
handler.apply_file(pbf_file, locations=True)
# Preparar salida ordenada por usuario
out = []
# Contadores por usuario
counters = {}
# Primero calcular totales por usuario
for user in sorted(edits_by_user.keys()):
nodes = sum(1 for e in edits_by_user[user] if e["type"] == "node")
ways = sum(1 for e in edits_by_user[user] if e["type"] == "way")
relations = sum(1 for e in edits_by_user[user] if e["type"] == "relation")
total = nodes + ways + relations
counters[user] = {"nodes": nodes, "ways": ways, "relations": relations, "total": total}
# Calcular totales globales
total_all = sum(counters[u]["total"] for u in counters)
total_nodes = sum(counters[u]["nodes"] for u in counters)
total_ways = sum(counters[u]["ways"] for u in counters)
total_relations = sum(counters[u]["relations"] for u in counters)
# Construir lista ordenada por porcentaje (descendente)
users_with_percent = []
for user, c in counters.items():
percent = (c["total"] / total_all * 100.0) if total_all > 0 else 0.0
users_with_percent.append({
"user": user,
"edits_count": c["total"],
"percent_total": round(percent, 2),
"nodes": c["nodes"],
"ways": c["ways"],
"relations": c["relations"],
"edits": edits_by_user[user]
})
# Ordenar por percent_total descendente
users_with_percent.sort(key=lambda x: x["percent_total"], reverse=True)
# Añadir a salida
out = users_with_percent
# Añadir sección de totales al JSON/salida
summary = {
"total_users": len(counters),
"total_edits": total_all,
"total_nodes": total_nodes,
"total_ways": total_ways,
"total_relations": total_relations
}
if console:
# Imprimir en el formato solicitado:
# usuario: Numero de ediciones TOTAL (X.XX%), N nodos M ways P relations
for u in out:
print(f"{u['user']}: Numero de ediciones {u['edits_count']} ({u['percent_total']:.2f}%), {u['nodes']} nodos {u['ways']} ways {u['relations']} relations")
# Imprimir totales
print("-----")
print(f"TOTAL: {summary['total_edits']} ediciones, {summary['total_nodes']} nodos, {summary['total_ways']} ways, {summary['total_relations']} relations (usuarios: {summary['total_users']})")
else:
if json_output:
# Guardar un objeto que contiene summary + lista de usuarios
to_write = {"summary": summary, "users": out}
with open(json_output, "w", encoding="utf-8") as fh:
json.dump(to_write, fh, ensure_ascii=False, indent=2)
print("Salida JSON escrita en", json_output)
else:
print(json.dumps({"summary": summary, "users": out}, ensure_ascii=False, indent=2))
It is invoked as:
python h.llista.edicions-f.py islas-baleares-internal.osh.pbf
“``https://www.openstreetmap.org/#map=14/39.59646/2.56333”`` 2008-01-01 2008-12-31 --console