To develop a Python toolkit capable of supporting
complex ETL workflows by creating a "common language" for heterogenous data sources.
Address Information System (AIS)
import cx_Oracle
# Connect to DB
db = cx_Oracle.connect('ais/password@ais_source')
cursor = db.cursor()
# Get parcel IDs and geometry
sql = '''
select parcel_id, sdo_util.to_wktgeometry(shape)
from parcels
where address = '1 ALLEN LN'
'''
cursor.execute(sql)
parcels = cursor.fetchall()
for parcel in parcel:
# Do something here
# Tediously form giant insert statement
insert_stmt = 'insert into parcels values({})'from db import connect_to_db
# Connect to DB
db = connect_to_db(db_type='postgres', dsn='ais/password@ais')
# Get parcel ID and geometry
parcels = db.read('parcels', ['parcel_id'], geom_field='shape', where="address = '1 ALLEN LN'"
for parcel in parcels:
parcel_id = parcel['parcel_id']
# Do something here...
# Write out
db.truncate('parcels')
db.bulk_insert('parcels', parcels, geom_field='shape', to_srid=4326, chunk_size=100000)
db.create_index('parcels', 'parcel_id')CONFIG = {
'db': {
'ais_work': {
'db_type': 'postgres',
'dsn': 'dbname=ais_newschema host=192.168.104.170 user=postgres \
password=pwd',
},
'ulrs': {
'db_type': 'oracle',
'dsn': 'gis_ulrs2/pwd@gis'
},
},
'addresses': {
'sources': [
{
'table': 'opa_property',
'db': 'ais_work',
'address_field': 'street_address',
'key_fields': [
{
'source_name': 'account_num',
'name': 'opa_account_num',
'type': 'text'
},
{
'source_name': 'pwd_parcel_id',
'name': 'pwd_parcel_id',
'type': 'integer'
}
]
},
{
'table': 'pwd_parcel',
'db': 'ais_work',
'address_field': 'street_address',
'key_fields': [
{
'source_name': 'parcel_id',
'name': 'pwd_parcel_id',
'type': 'integer'
}
]
},Connection spec lets you write a data adapter for anything that is Python-accessible.
import ingress
# Connect to DB
db = ingress.connect('postgres://user:password@localhost:5432/db')
# Read in 5 random parcels sorted by address
parcels = db.table('parcels').sort('address').random().limit(5)
# Convert parcel ID to an integer
parcels.transform('parcel_id', int)
# Make address uppercase
parcels.transform('address', 'uppercase')
# Apply a custom function
def standardize_address(addr):
# Do complicated things here...
return addr
parcels.transform('address', standardize_address)
# Write back out
db.table('parcels').reload(parcels)
# Get errors
for error in parcels.errors:
print(error)import ingress
# Connect to OPA Property file
props = ingress.connect('csv://path/to/prop2016.txt')
# Make sure numeric fields are numeric. Any non-passing rows
# are set aside and may be logged.
props.check('unit_num', 'isnumeric')\
.check('house_num', 'isnumeric')
# Make sure string fields are < 255 characters
props.check('owner', lambda x: len(x) < 255)
# Write out passing rows to GeoDB2
geodb2 = bodega.connect('oracle+sde://gis_opa:password@gis')
geodb2.table('real_estate').reload(props)import ingress
from shapely.geometry import Point
# Connect to DB
db = ingress.connect('postgres://user:password@localhost:5432/db')
# Read in service areas
service_areas = db.table('service_areas').all()
# Make spatial index
service_areas.create_spatial_index('geometry')
# Loop over addresses
for address in db.table('addresses').all():
xy = address['geometry']
service_area = service_areas.intersect(xy)
# Do something else...