I wanted to optimize the route for my kids’ newspaper delivery, and I realized that not all addresses were available for use in GPX format. To convert addresses to coordinates for GPX usage, I needed a solution. In case someone else might need it in the future, here is my Python script for you.
CSV structure:
The CSV file you provided appears to have the following structure with four columns:
- Straße (Street): Contains the street address.
- Postleitzahl (Postalcode): Contains the postal code.
- Ort (City): Contains the city name.
- Landkreis (County): Contains the county or region information.
Each row in the CSV file represents an address with these four pieces of information.
Land;Landkreis;Postleitzahl;Ort;Straße;Hausnummer
Deutschland;Nürnberg;90425;Nürnberg;Kilianstraße;100
Deutschland;Nürnberg;90425;Nürnberg;Kilianstraße;102
Deutschland;Nürnberg;90425;Nürnberg;Kilianstraße;104
Deutschland;Nürnberg;90425;Nürnberg;Kilianstraße;106
Install Python on windows:
Python Script (create a file csv-to-gpx.py):
import pandas as pd
from lxml import etree
from tqdm import tqdm
from geopy.geocoders import Nominatim
import time
import requests
import urllib.parse
def geocode_address(str_hnr_plz, city, postalcode, landkreis):
geolocator = Nominatim(user_agent="myGeocoder")
location = None
for _ in range(3):
try:
plz = postalcode
strasse = str_hnr_plz
str_hnr_plz_url = f'{strasse} {plz}'
country = landkreis
url = f'https://nominatim.openstreetmap.org/search.php?street={urllib.parse.quote(strasse.encode("utf-8"))}&city={urllib.parse.quote(city.encode("utf-8"))}&county={urllib.parse.quote(country.encode("utf-8"))}&postalcode={urllib.parse.quote(plz.encode("utf-8"))}&limit=1&format=jsonv2'
print(f"Aufgerufene PLZ: {plz}")
print(f"Aufgerufene Straße: {strasse}")
print(f"Aufgerufene str_hnr_plz_url: {str_hnr_plz_url}")
print(f'Geokodiere Adresse: {str_hnr_plz_url}, Deutschland')
print(f"Aufgerufene URL: {url}")
response = requests.get(url, timeout=10)
response.raise_for_status()
data = response.json()
if data:
lat = data[0]["lat"]
lon = data[0]["lon"]
return (lat, lon, str_hnr_plz)
except Exception as e:
print(f"Fehler bei der Geokodierung: {str(e)}")
print(f"Aufgerufene URL: {url}")
time.sleep(2)
return None
def geocode_addresses(df, output_gpx_file, output_not_found_file):
coordinates = []
not_found_addresses = []
for index, row in tqdm(df.iterrows(), total=len(df), desc='Geocoding'):
str_hnr_plz = f'{row["Straße"]} {row["Hausnummer"]}' # Include house number
location = geocode_address(str_hnr_plz, row["Ort"], row["Postleitzahl"], row["Landkreis"])
if location:
lat, lon, address = location
coordinates.append((lat, lon, address))
else:
not_found_addresses.append(str_hnr_plz)
create_gpx_file(coordinates, output_gpx_file)
pd.DataFrame(not_found_addresses, columns=['Address']).to_csv(output_not_found_file, index=False)
print("Nicht gefundene Adressen:")
for address in not_found_addresses:
print(address)
def create_gpx_file(coordinates, output_file):
gpx = etree.Element("gpx", version="1.1", xmlns="http://www.topografix.com/GPX/1/1")
for lat, lon, address in coordinates:
wpt = etree.SubElement(gpx, "wpt", lat=str(lat), lon=str(lon))
etree.SubElement(wpt, "name").text = address
etree.SubElement(wpt, "desc").text = "Geocoded Address"
tree = etree.ElementTree(gpx)
tree.write(output_file, encoding="utf-8", xml_declaration=True, pretty_print=True)
if __name__ == '__main__':
# EDIT THE INPUT CSV FILE
csv_file = 'adressen-incl-street-and-number.csv'
df = pd.read_csv(csv_file, delimiter=';')
df['Postleitzahl'] = df['Postleitzahl'].astype(str)
#EDIT THE OUTPUT FILE
output_gpx_file = 'output.gpx'
output_not_found_file = 'not_found_addresses.csv'
geocode_addresses(df, output_gpx_file, output_not_found_file)
Install Python Modules:
python -m pip install pandas lxml tqdm geopy
Start the Script:
python csv-to-gpx.py
After this, i upload the GPX file to https://gpx.studio/ to check the entries and add some other informations.
Have fun with the script and feel free to optimized, improove or expant the script!
Thank you, greetings Daniel