Convert CSV to GPX with Python

I wanted to optimize the route for my kids’ newspaper delivery, and I realized that not all addresses were available for use in GPX format. To convert addresses to coordinates for GPX usage, I needed a solution. In case someone else might need it in the future, here is my Python script for you.

CSV structure:
The CSV file you provided appears to have the following structure with four columns:

  1. Straße (Street): Contains the street address.
  2. Postleitzahl (Postalcode): Contains the postal code.
  3. Ort (City): Contains the city name.
  4. Landkreis (County): Contains the county or region information.

Each row in the CSV file represents an address with these four pieces of information.

Land;Landkreis;Postleitzahl;Ort;Straße;Hausnummer
Deutschland;Nürnberg;90425;Nürnberg;Kilianstraße;100
Deutschland;Nürnberg;90425;Nürnberg;Kilianstraße;102
Deutschland;Nürnberg;90425;Nürnberg;Kilianstraße;104
Deutschland;Nürnberg;90425;Nürnberg;Kilianstraße;106

Install Python on windows:

Python Script (create a file csv-to-gpx.py):

import pandas as pd
from lxml import etree
from tqdm import tqdm
from geopy.geocoders import Nominatim
import time
import requests
import urllib.parse

def geocode_address(str_hnr_plz, city, postalcode, landkreis):
    geolocator = Nominatim(user_agent="myGeocoder")
    location = None

    for _ in range(3):
        try:
            plz = postalcode
            strasse = str_hnr_plz
            str_hnr_plz_url = f'{strasse} {plz}'
            country = landkreis
            url = f'https://nominatim.openstreetmap.org/search.php?street={urllib.parse.quote(strasse.encode("utf-8"))}&city={urllib.parse.quote(city.encode("utf-8"))}&county={urllib.parse.quote(country.encode("utf-8"))}&postalcode={urllib.parse.quote(plz.encode("utf-8"))}&limit=1&format=jsonv2'
            
            print(f"Aufgerufene PLZ: {plz}")
            print(f"Aufgerufene Straße: {strasse}")
            print(f"Aufgerufene str_hnr_plz_url: {str_hnr_plz_url}")
            print(f'Geokodiere Adresse: {str_hnr_plz_url}, Deutschland')
            print(f"Aufgerufene URL: {url}")

            response = requests.get(url, timeout=10)
            response.raise_for_status()
            data = response.json()
            if data:
                lat = data[0]["lat"]
                lon = data[0]["lon"]
                return (lat, lon, str_hnr_plz)
        except Exception as e:
            print(f"Fehler bei der Geokodierung: {str(e)}")
            print(f"Aufgerufene URL: {url}")
            time.sleep(2)

    return None

def geocode_addresses(df, output_gpx_file, output_not_found_file):
    coordinates = []
    not_found_addresses = []

    for index, row in tqdm(df.iterrows(), total=len(df), desc='Geocoding'):
        str_hnr_plz = f'{row["Straße"]} {row["Hausnummer"]}'  # Include house number
        location = geocode_address(str_hnr_plz, row["Ort"], row["Postleitzahl"], row["Landkreis"])
        if location:
            lat, lon, address = location
            coordinates.append((lat, lon, address))
        else:
            not_found_addresses.append(str_hnr_plz)

    create_gpx_file(coordinates, output_gpx_file)

    pd.DataFrame(not_found_addresses, columns=['Address']).to_csv(output_not_found_file, index=False)
    print("Nicht gefundene Adressen:")
    for address in not_found_addresses:
        print(address)

def create_gpx_file(coordinates, output_file):
    gpx = etree.Element("gpx", version="1.1", xmlns="http://www.topografix.com/GPX/1/1")

    for lat, lon, address in coordinates:
        wpt = etree.SubElement(gpx, "wpt", lat=str(lat), lon=str(lon))
        etree.SubElement(wpt, "name").text = address
        etree.SubElement(wpt, "desc").text = "Geocoded Address"

    tree = etree.ElementTree(gpx)
    tree.write(output_file, encoding="utf-8", xml_declaration=True, pretty_print=True)

if __name__ == '__main__':
    # EDIT THE INPUT CSV FILE
    csv_file = 'adressen-incl-street-and-number.csv'
    df = pd.read_csv(csv_file, delimiter=';')

    df['Postleitzahl'] = df['Postleitzahl'].astype(str)

    #EDIT THE OUTPUT FILE
    output_gpx_file = 'output.gpx'
    output_not_found_file = 'not_found_addresses.csv'
    geocode_addresses(df, output_gpx_file, output_not_found_file)

Install Python Modules:

python -m pip install pandas lxml tqdm geopy

Start the Script:

python csv-to-gpx.py

After this, i upload the GPX file to https://gpx.studio/ to check the entries and add some other informations.

Have fun with the script and feel free to optimized, improove or expant the script!
Thank you, greetings Daniel

2 Likes