Source code for districtheatingsim.geocoding.geocoding

"""
Geocoding module for address to coordinate transformation.

Provides Nominatim-based geocoding with coordinate system transformation
from WGS84 to UTM Zone 33N (ETRS89).

:author: Dipl.-Ing. (FH) Jonas Pfeiffer
"""

import os
import csv
import tempfile
import shutil

from geopy.geocoders import Nominatim
from pyproj import Transformer

[docs] def get_coordinates(address, from_crs="epsg:4326", to_crs="epsg:25833"): """ Geocode address and transform coordinates to UTM. :param address: Address to geocode :type address: str :param from_crs: Source CRS (default: WGS84) :type from_crs: str :param to_crs: Target CRS (default: ETRS89/UTM Zone 33N) :type to_crs: str :return: (UTM_X, UTM_Y) coordinates or (None, None) if failed :rtype: tuple of float """ geolocator = Nominatim(user_agent="DistrictHeatingSim") transformer = Transformer.from_crs(from_crs, to_crs, always_xy=True) try: location = geolocator.geocode(address) if location: utm_x, utm_y = transformer.transform(location.longitude, location.latitude) return (utm_x, utm_y) else: print(f"Could not geocode the address {address}.") return (None, None) except Exception as e: print(f"An error occurred: {e}") return (None, None)
[docs] def process_data(input_csv): """ Add UTM coordinates to CSV file via geocoding. :param input_csv: Path to CSV file (delimiter ';', columns: country, state, city, address) :type input_csv: str """ temp_fd, temp_path = tempfile.mkstemp() os.close(temp_fd) try: with open(input_csv, mode='r', encoding='utf-8') as infile, \ open(temp_path, mode='w', newline='', encoding='utf-8-sig') as outfile: reader = csv.reader(infile, delimiter=';') writer = csv.writer(outfile, delimiter=';') headers = next(reader) # Check if UTM_X and UTM_Y columns are already in the headers if "UTM_X" in headers and "UTM_Y" in headers: utm_x_index = headers.index("UTM_X") utm_y_index = headers.index("UTM_Y") headers_written = True writer.writerow(headers) else: utm_x_index = len(headers) utm_y_index = len(headers) + 1 headers_written = False writer.writerow(headers + ["UTM_X", "UTM_Y"]) for row in reader: country, state, city, address = row[0], row[1], row[2], row[3] full_address = f"{address}, {city}, {state}, {country}" utm_x, utm_y = get_coordinates(full_address) if headers_written: # Ensure the row has enough columns before assignment if len(row) > utm_x_index: row[utm_x_index] = utm_x else: row.extend([utm_x]) if len(row) > utm_y_index: row[utm_y_index] = utm_y else: row.extend([utm_y]) else: row.extend([utm_x, utm_y]) writer.writerow(row) # Replace the original file with the updated temporary file using shutil.move shutil.move(temp_path, input_csv) print("Processing completed.") finally: try: os.remove(temp_path) except OSError: pass
if __name__ == '__main__': # File name of the data file with addresses input_csv = "data/data_geocoded.csv" # dummy file name, replace with actual file path # Call the process_data function to read from input_csv and write to #process_data(input_csv)