Source code for upoints.geonames

#
# coding=utf-8
"""geonames - Imports geonames.org data files."""
# Copyright © 2007-2017  James Rowe <jnrowe@gmail.com>
#
# This file is part of upoints.
#
# upoints is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
#
# upoints is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# upoints.  If not, see <http://www.gnu.org/licenses/>.

import datetime

try:
    from dateutil import tz
except ImportError:
    #: ``dateutil`` module reference if available
    tz = None

from . import (point, trigpoints, utils)


[docs]class Location(trigpoints.Trigpoint): """Class for representing a location from a geonames.org_ data file. All country codes are specified with their two letter ISO-3166 country code. .. versionadded:: 0.3.0 :cvar __TIMEZONES: ``dateutil.gettz`` cache to speed up generation .. _geonames.org: http://www.geonames.org/ """ __slots__ = ('geonameid', 'asciiname', 'alt_names', 'feature_class', 'feature_code', 'country', 'alt_country', 'admin1', 'admin2', 'admin3', 'admin4', 'population', 'altitude', 'gtopo30', 'tzname', 'modified_date') if tz: __TIMEZONES = {} def __init__(self, geonameid, name, asciiname, alt_names, latitude, longitude, feature_class, feature_code, country, alt_country, admin1, admin2, admin3, admin4, population, altitude, gtopo30, tzname, modified_date, timezone=None): """Initialise a new ``Location`` object. Args: geonameid (int): ID of record in geonames database name (unicode): Name of geographical location asciiname (str): Name of geographical location in ASCII encoding alt_names (list of unicode): Alternate names for the location latitude (float): Location's latitude longitude (float): Location's longitude feature_class (str): Location's type feature_code (str): Location's code country (str): Location's country alt_country (str): Alternate country codes for location admin1 (str): FIPS code (subject to change to ISO code), ISO code for the US and CH admin2 (str): Code for the second administrative division, a county in the US admin3 (str): Code for third level administrative division admin4 (str): Code for fourth level administrative division population (int): Location's population, if applicable altitude (int): Location's elevation gtopo30 (int): Average elevation of 900 square metre region, if available tzname (str): The timezone identifier using POSIX timezone names modified_date (datetime.date): Location's last modification date in the geonames databases timezone (int): The non-DST timezone offset from UTC in minutes """ super(Location, self).__init__(latitude, longitude, altitude, name) self.geonameid = geonameid self.name = name self.asciiname = asciiname self.alt_names = alt_names self.latitude = latitude self.longitude = longitude self.feature_class = feature_class self.feature_code = feature_code self.country = country self.alt_country = alt_country self.admin1 = admin1 self.admin2 = admin2 self.admin3 = admin3 self.admin4 = admin4 self.population = population self.altitude = altitude self.gtopo30 = gtopo30 self.tzname = tzname self.modified_date = modified_date if timezone is not None: self.timezone = timezone elif tz: if tzname in Location.__TIMEZONES: self.timezone = Location.__TIMEZONES[tzname] else: self.timezone = int(tz.gettz(tzname)._ttinfo_std.offset / 60) Location.__TIMEZONES[tzname] = self.timezone else: self.timezone = None def __str__(self): """Pretty printed location string. See also: trigpoints.point.Point Returns: str: Human readable string representation of ``Location`` object """ return self.__format__() def __format__(self, format_spec='dd'): """Extended pretty printing for location strings. Args: format_spec (str): Coordinate formatting system to use Returns: str: Human readable string representation of ``Point`` object Raises: ValueError: Unknown value for ``format_spec`` """ text = super(Location.__base__, self).__format__(format_spec) if self.alt_names: return '%s (%s - %s)' % (self.name, ', '.join(self.alt_names), text) else: return '%s (%s)' % (self.name, text)
[docs]class Locations(point.Points): """Class for representing a group of :class:`Location` objects. .. versionadded:: 0.5.1 """ def __init__(self, data=None, tzfile=None): """Initialise a new ``Locations`` object.""" super(Locations, self).__init__() if tzfile: self.import_timezones_file(tzfile) else: self.timezones = {} self._data = data self._tzfile = tzfile if data: self.import_locations(data)
[docs] def import_locations(self, data): """Parse geonames.org country database exports. ``import_locations()`` returns a list of :class:`trigpoints.Trigpoint` objects generated from the data exported by geonames.org_. It expects data files in the following tab separated format:: 2633441 Afon Wyre Afon Wyre River Wayrai,River Wyrai,Wyre 52.3166667 -4.1666667 H STM GB GB 00 0 -9999 Europe/London 1994-01-13 2633442 Wyre Wyre Viera 59.1166667 -2.9666667 T ISL GB GB V9 0 1 Europe/London 2004-09-24 2633443 Wraysbury Wraysbury Wyrardisbury 51.45 -0.55 P PPL GB P9 0 28 Europe/London 2006-08-21 Files containing the data in this format can be downloaded from the geonames.org_ site in their `database export page`_. Files downloaded from the geonames site when processed by ``import_locations()`` will return ``list`` objects of the following style:: [Location(2633441, "Afon Wyre", "Afon Wyre", ['River Wayrai', 'River Wyrai', 'Wyre'], 52.3166667, -4.1666667, "H", "STM", "GB", ['GB'], "00", None, None, None, 0, None, -9999, "Europe/London", datetime.date(1994, 1, 13)), Location(2633442, "Wyre", "Wyre", ['Viera'], 59.1166667, -2.9666667, "T", "ISL", "GB", ['GB'], "V9", None, None, None, 0, None, 1, "Europe/London", datetime.date(2004, 9, 24)), Location(2633443, "Wraysbury", "Wraysbury", ['Wyrardisbury'], 51.45, -0.55, "P", "PPL", "GB", None, "P9", None, None, None, 0, None, 28, "Europe/London", datetime.date(2006, 8, 21))] Args: data (iter): geonames.org locations data to read Returns: list: geonames.org identifiers with :class:`Location` objects Raises: FileFormatError: Unknown file format .. _geonames.org: http://www.geonames.org/ .. _database export page: http://download.geonames.org/export/dump/ """ self._data = data field_names = ('geonameid', 'name', 'asciiname', 'alt_names', 'latitude', 'longitude', 'feature_class', 'feature_code', 'country', 'alt_country', 'admin1', 'admin2', 'admin3', 'admin4', 'population', 'altitude', 'gtopo30', 'tzname', 'modified_date') comma_split = lambda s: s.split(',') date_parse = lambda s: datetime.date(*map(int, s.split('-'))) or_none = lambda x, s: x(s) if s else None str_or_none = lambda s: or_none(str, s) float_or_none = lambda s: or_none(float, s) int_or_none = lambda s: or_none(int, s) tz_parse = lambda s: self.timezones[s][0] if self.timezones else None field_parsers = (int_or_none, str_or_none, str_or_none, comma_split, float_or_none, float_or_none, str_or_none, str_or_none, str_or_none, comma_split, str_or_none, str_or_none, str_or_none, str_or_none, int_or_none, int_or_none, int_or_none, tz_parse, date_parse) data = utils.prepare_csv_read(data, field_names, delimiter=r" ") for row in data: try: for name, parser in zip(field_names, field_parsers): row[name] = parser(row[name]) except ValueError: raise utils.FileFormatError('geonames.org') self.append(Location(**row))
[docs] def import_timezones_file(self, data): """Parse geonames.org_ timezone exports. ``import_timezones_file()`` returns a dictionary with keys containing the timezone identifier, and values consisting of a UTC offset and UTC offset during daylight savings time in minutes. It expects data files in the following format:: Europe/Andorra 1.0 2.0 Asia/Dubai 4.0 4.0 Asia/Kabul 4.5 4.5 Files containing the data in this format can be downloaded from the geonames site in their `database export page`_ Files downloaded from the geonames site when processed by ``import_timezones_file()`` will return ``dict`` object of the following style:: {"Europe/Andorra": (60, 120), "Asia/Dubai": (240, 240), "Asia/Kabul": (270, 270)} Args: data (iter): geonames.org timezones data to read Returns: list: geonames.org timezone identifiers with their UTC offsets Raises: FileFormatError: Unknown file format .. _geonames.org: http://www.geonames.org/ .. _database export page: http://download.geonames.org/export/dump/ """ self._tzfile = data field_names = ('ident', 'gmt_offset', 'dst_offset') time_parse = lambda n: int(float(n) * 60) data = utils.prepare_csv_read(data, field_names, delimiter=r" ") self.timezones = {} for row in data: if row['ident'] == 'TimeZoneId': continue try: delta = list(map(time_parse, (row['gmt_offset'], row['dst_offset']))) except ValueError: raise utils.FileFormatError('geonames.org') self.timezones[row['ident']] = delta