Know all users who have edited certain region ever

I want to list top contributors by edits (not by changelog) from certain region. Ideally I want an output of the form

user1 - X areas - Y ways - Z nodes
user 2 - X2 areas - Y2 ways - Z2 nodes

Is there any script to do that?

  1. download the full history file (.osh.pbf) that contains the region of interest; in this case, https://osm-internal.download.geofabrik.de/europe/spain/islas-baleares-internal.osh.pbf
  2. if required, make an extract for a smaller area from that using osmium extract (do not forget -H command line switch)
  3. run the following small python program as python count.py myfile.osh.pbf (requires pyosmium):
import sys
import osmium

class CountMapperHandler(osmium.SimpleHandler):

    def __init__(self):
        osmium.SimpleHandler.__init__(self)
        self.edit_count = {}

    def way(self, w):
        if w.user in self.edit_count:
            self.edit_count[w.user]+=1;
        else:
            self.edit_count[w.user]=1

if __name__ ==  '__main__':
    mch = CountMapperHandler()
    mch.apply_file(sys.argv[1])
    sorted_dict = sorted(
       mch.edit_count.items(),
       key = lambda kv: kv[1])

    for user, edits in sorted_dict:
        print(f"{user}: {edits}")

(Splitting this by node/way/relation edits left as an exercise to the reader :wink:

2 Likes

(see also this previous related question)

1 Like

Thanks a lot, @woodpeck . Can you give me some hints to extract my region from “islas-baleares-internal.osh.pbf” with osmium extract?

I run osmium extract --bbox 2.604,39.598,2.606,39.600 islas-baleares-internal.osh.pbf -o sonpuig.pbf becuase IA says bbox of OpenStreetMap is 2.604,39.598,2.606,39.600. But output gives me nothing:

$ osmium extract --bbox 2.604,39.598,2.606,39.600 islas-baleares-internal.osh.pbf  -o sonpuig.pbf
[======================================================================] 100% 
Node ID twice in input. Maybe you are using a history or change file?
This command expects the input file to be ordered: First nodes in order of ID,
then ways in order of ID, then relations in order of ID.
python yourscript.py sonpuig.pbf 

You did it almost right, but if you re-read my message above there’s a mention of a command line flag that you should not forget :wink:

1 Like

This script could help users

#!/usr/bin/env python3
"""
edits_by_user_from_osmurl.py

Uso:
  python edits_by_user_from_osmurl.py archivo_historial.pbf osm_url fecha_inicio fecha_fin [--json-output salida.json] [--console]

Ejemplo de osm_url:
  https://www.openstreetmap.org/#map=15/39.60180/2.59501

Asume resoluciĂłn 1920x1080 para calcular el bbox a partir de zoom/lat/lon.
Fechas en formato YYYY-MM-DD (incluye fecha_inicio y fecha_fin).
"""
import re
import osmium as o
import datetime
from collections import defaultdict
import json
import math
import pytz
import argparse

# Argumentos
parser = argparse.ArgumentParser(description="Resumen de ediciones por usuario desde archivo .pbf y URL OSM")
parser.add_argument("pbf_file")
parser.add_argument("osm_url")
parser.add_argument("date_from")
parser.add_argument("date_to")
parser.add_argument("--json-output", dest="json_output")
parser.add_argument("--console", action="store_true", dest="console")
args = parser.parse_args()

pbf_file = args.pbf_file
osm_url = args.osm_url
date_from = datetime.datetime.strptime(args.date_from, "%Y-%m-%d")
date_to = datetime.datetime.strptime(args.date_to, "%Y-%m-%d") + datetime.timedelta(days=1)
json_output = args.json_output
console = args.console

# Parámetros de pantalla asumidos
SCREEN_W = 1920
SCREEN_H = 1080

# Parsear URL estilo #map=Z/lat/lon
m = re.search(r"#map=(\d+)\/([\-0-9\.]+)\/([\-0-9\.]+)", osm_url)
if not m:
    print("URL OSM inválida. Debe contener '#map=zoom/lat/lon'")
    raise SystemExit(1)

zoom = int(m.group(1))
center_lat = float(m.group(2))
center_lon = float(m.group(3))

# Convertir lat/lon/zoom+screen -> bbox (lon_min, lat_min, lon_max, lat_max)
def latlon_to_pixel(lat, lon, zoom):
    siny = math.sin(math.radians(lat))
    siny = min(max(siny, -0.9999), 0.9999)
    x = 256 * (0.5 + lon / 360.0) * (2**zoom)
    y = 256 * (0.5 - math.log((1 + siny) / (1 - siny)) / (4 * math.pi)) * (2**zoom)
    return x, y

def pixel_to_latlon(x, y, zoom):
    n = math.pi - 2.0 * math.pi * y / (256 * (2**zoom))
    lon = x / (256 * (2**zoom)) * 360.0 - 180.0
    lat = math.degrees(math.atan(math.sinh(n)))
    return lat, lon

cx, cy = latlon_to_pixel(center_lat, center_lon, zoom)
half_w = SCREEN_W / 2.0
half_h = SCREEN_H / 2.0

nw_x = cx - half_w
nw_y = cy - half_h
se_x = cx + half_w
se_y = cy + half_h

nw_lat, nw_lon = pixel_to_latlon(nw_x, nw_y, zoom)
se_lat, se_lon = pixel_to_latlon(se_x, se_y, zoom)

min_lon = min(nw_lon, se_lon)
max_lon = max(nw_lon, se_lon)
min_lat = min(se_lat, nw_lat)
max_lat = max(se_lat, nw_lat)

def in_bbox(lon, lat):
    return (lon >= min_lon and lon <= max_lon and lat >= min_lat and lat <= max_lat)

edits_by_user = defaultdict(list)

class HistoryHandler(o.SimpleHandler):
    def process_obj(self, obj, obj_type):
        ts = getattr(obj, "timestamp", None)
        if ts is None:
            return
        if ts.tzinfo is None:
            ts = ts.replace(tzinfo=pytz.UTC)
        df = date_from.replace(tzinfo=pytz.UTC)
        dt = date_to.replace(tzinfo=pytz.UTC)
        if not (df <= ts < dt):
            return

        in_area = False
        lat = lon = None
        if obj_type == "node":
            if obj.location.valid():
                lon = obj.location.lon
                lat = obj.location.lat
                in_area = in_bbox(lon, lat)
        elif obj_type == "way":
            try:
                for n in obj.nodes:
                    if n.location.valid() and in_bbox(n.location.lon, n.location.lat):
                        in_area = True
                        break
            except Exception:
                pass
        elif obj_type == "relation":
            try:
                bbox = obj.bounds
                if bbox:
                    if not (bbox.min_lon > max_lon or bbox.max_lon < min_lon or bbox.min_lat > max_lat or bbox.max_lat < min_lat):
                        in_area = True
            except Exception:
                pass

        if not in_area:
            return

        user = obj.user if obj.user else "Anonymous"
        entry = {
            "type": obj_type,
            "id": obj.id,
            "version": obj.version,
            "changeset": getattr(obj, "changeset", None),
            "timestamp": ts.isoformat(),
            "lat": lat,
            "lon": lon,
            "tags": dict(obj.tags) if hasattr(obj, "tags") else {}
        }
        edits_by_user[user].append(entry)

    def node(self, n):
        self.process_obj(n, "node")
    def way(self, w):
        self.process_obj(w, "way")
    def relation(self, r):
        self.process_obj(r, "relation")

handler = HistoryHandler()
print(f"Procesando {pbf_file} — bbox: {min_lon:.7f},{min_lat:.7f} to {max_lon:.7f},{max_lat:.7f}")
handler.apply_file(pbf_file, locations=True)

# Preparar salida ordenada por usuario
out = []
# Contadores por usuario
counters = {}
# Primero calcular totales por usuario
for user in sorted(edits_by_user.keys()):
    nodes = sum(1 for e in edits_by_user[user] if e["type"] == "node")
    ways = sum(1 for e in edits_by_user[user] if e["type"] == "way")
    relations = sum(1 for e in edits_by_user[user] if e["type"] == "relation")
    total = nodes + ways + relations
    counters[user] = {"nodes": nodes, "ways": ways, "relations": relations, "total": total}

# Calcular totales globales
total_all = sum(counters[u]["total"] for u in counters)
total_nodes = sum(counters[u]["nodes"] for u in counters)
total_ways = sum(counters[u]["ways"] for u in counters)
total_relations = sum(counters[u]["relations"] for u in counters)

# Construir lista ordenada por porcentaje (descendente)
users_with_percent = []
for user, c in counters.items():
    percent = (c["total"] / total_all * 100.0) if total_all > 0 else 0.0
    users_with_percent.append({
        "user": user,
        "edits_count": c["total"],
        "percent_total": round(percent, 2),
        "nodes": c["nodes"],
        "ways": c["ways"],
        "relations": c["relations"],
        "edits": edits_by_user[user]
    })

# Ordenar por percent_total descendente
users_with_percent.sort(key=lambda x: x["percent_total"], reverse=True)

# Añadir a salida
out = users_with_percent

# Añadir sección de totales al JSON/salida
summary = {
    "total_users": len(counters),
    "total_edits": total_all,
    "total_nodes": total_nodes,
    "total_ways": total_ways,
    "total_relations": total_relations
}

if console:
    # Imprimir en el formato solicitado:
    # usuario: Numero de ediciones TOTAL (X.XX%), N nodos M ways P relations
    for u in out:
        print(f"{u['user']}: Numero de ediciones {u['edits_count']} ({u['percent_total']:.2f}%), {u['nodes']} nodos {u['ways']} ways {u['relations']} relations")
    # Imprimir totales
    print("-----")
    print(f"TOTAL: {summary['total_edits']} ediciones, {summary['total_nodes']} nodos, {summary['total_ways']} ways, {summary['total_relations']} relations (usuarios: {summary['total_users']})")
else:
    if json_output:
        # Guardar un objeto que contiene summary + lista de usuarios
        to_write = {"summary": summary, "users": out}
        with open(json_output, "w", encoding="utf-8") as fh:
            json.dump(to_write, fh, ensure_ascii=False, indent=2)
        print("Salida JSON escrita en", json_output)
    else:
        print(json.dumps({"summary": summary, "users": out}, ensure_ascii=False, indent=2))

It is invoked as:

python h.llista.edicions-f.py islas-baleares-internal.osh.pbf
“``https://www.openstreetmap.org/#map=14/39.59646/2.56333”`` 2008-01-01 2008-12-31 --console