Vector Data I/O in Python

There are various different file formats and data sources for geographic information. This tutorial will show some typical examples how to read (and write) data from different sources.

  • To see all possible file formats supported by GDAL, execute following:

# See all available drivers supported by GDAL
import fiona
from fiona._drivers import GDALEnv
env = GDALEnv()

# This will print all available Drivers supported by GDAL
# All these can be used for reading data from them 
# and most often also writing data into

env.start().drivers().keys()
dict_keys(['PCIDSK', 'netCDF', 'PDS4', 'JP2OpenJPEG', 'PDF', 'MBTiles', 'EEDA', 'ESRI Shapefile', 'MapInfo File', 'UK .NTF', 'OGR_SDTS', 'S57', 'DGN', 'OGR_VRT', 'REC', 'Memory', 'BNA', 'CSV', 'NAS', 'GML', 'GPX', 'LIBKML', 'KML', 'GeoJSON', 'GeoJSONSeq', 'ESRIJSON', 'TopoJSON', 'Interlis 1', 'Interlis 2', 'OGR_GMT', 'GPKG', 'SQLite', 'OGR_DODS', 'WAsP', 'PostgreSQL', 'OpenFileGDB', 'XPlane', 'DXF', 'CAD', 'Geoconcept', 'GeoRSS', 'GPSTrackMaker', 'VFK', 'PGDUMP', 'OSM', 'GPSBabel', 'SUA', 'OpenAir', 'OGR_PDS', 'WFS', 'WFS3', 'HTF', 'AeronavFAA', 'EDIGEO', 'GFT', 'SVG', 'CouchDB', 'Cloudant', 'Idrisi', 'ARCGEN', 'SEGUKOOA', 'SEGY', 'XLS', 'ODS', 'XLSX', 'ElasticSearch', 'Carto', 'AmigoCloud', 'SXF', 'Selafin', 'JML', 'PLSCENES', 'CSW', 'VDV', 'GMLAS', 'MVT', 'TIGER', 'AVCBin', 'AVCE00', 'NGW', 'HTTP'])
# Available drivers in geopandas/fiona. Same as: fiona.supported_drivers
import geopandas as gpd
gpd.io.file.fiona.drvsupport.supported_drivers
{'AeronavFAA': 'r',
 'ARCGEN': 'r',
 'BNA': 'raw',
 'DXF': 'raw',
 'CSV': 'raw',
 'OpenFileGDB': 'r',
 'ESRIJSON': 'r',
 'ESRI Shapefile': 'raw',
 'GeoJSON': 'rw',
 'GeoJSONSeq': 'rw',
 'GPKG': 'rw',
 'GML': 'raw',
 'GPX': 'raw',
 'GPSTrackMaker': 'raw',
 'Idrisi': 'r',
 'MapInfo File': 'raw',
 'DGN': 'raw',
 'PCIDSK': 'r',
 'S57': 'r',
 'SEGY': 'r',
 'SUA': 'r',
 'TopoJSON': 'r'}

Read / write Shapefile

import geopandas as gpd

# Read file from Shapefile
fp = "L2_data/Finland.shp"
data = gpd.read_file(fp)

# Write to Shapefile (just make a copy)
outfp = "L2_data/Finland_copy.shp"
data.to_file(outfp)

Read / write GeoJSON

import geopandas as gpd

# Read file from GeoJSON
fp = "L2_data/Finland.geojson"
data = gpd.read_file(fp, driver="GeoJSON")

# Write to GeoJSON (just make a copy)
outfp = "L2_data/Finland_copy.geojson"
data.to_file(outfp, driver="GeoJSON")

Read / write KML

import geopandas as gpd

# Enable KML driver
gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'

# Read file from KML
fp = "L2_data/Finland.kml"
data = gpd.read_file(fp)

# Write to KML (just make a copy)
outfp = "L2_data/Finland_copy.kml"
data.to_file(outfp, driver="KML")

Read / write Geopackage

import geopandas as gpd

# Read file from Geopackage
fp = "L2_data/Finland.gpkg"
data = gpd.read_file(fp)

# Write to Geopackage (just make a copy)
outfp = "L2_data/Finland_copy.gpkg"
data.to_file(outfp, driver="GPKG")

Read / write GeoDatabase

import geopandas as gpd

# Read file from File Geodatabase
fp = "L2_data/Finland.gdb"
data = gpd.read_file(fp, driver="FileGDB", layer='country')

# Write to same FileGDB (just add a new layer)
outfp = "L2_data/Finland.gdb"
data.to_file(outfp, driver="FileGDB", layer="country_copy")

Read / write MapInfo Tab

# Read file from MapInfo Tab
fp = "L2_data/Finland.tab"
data = gpd.read_file(fp, driver="MapInfo File")

# Write to same FileGDB (just add a new layer)
outfp = "L2_data/Finland_copy.tab"
data.to_file(outfp, driver="MapInfo File")

Read PostGIS database using psycopg2

import geopandas as gpd
import psycopg2

# Create connection to database with psycopg2 module (update params according your db)
conn, cursor = psycopg2.connect(dbname='my_postgis_database', user='my_usrname', password='my_pwd', 
                                host='123.22.432.16', port=5432)

# Specify sql query
sql = "SELECT * FROM MY_TABLE;"

# Read data from PostGIS
data = gpd.read_postgis(sql=sql, con=conn)

Read / write PostGIS database using SqlAlchemy + GeoAlchemy

from sqlalchemy.engine.url import URL
from sqlalchemy import create_engine
from sqlalchemy import MetaData
from sqlalchemy.orm import sessionmaker
from geoalchemy2 import WKTElement, Geometry

# Update with your db parameters
HOST = '123.234.345.16'
DB = 'my_database'
USER = 'my_user'
PORT = 5432
PWD = 'my_password'

# Database info
db_url = URL(drivername='postgresql+psycopg2', host=HOST, database=DB,
                   username=USER, port=PORT, password=PWD)

# Create engine
engine = create_engine(db_url)

# Init Metadata
meta = MetaData()

# Load table definitions from db
meta.reflect(engine)

# Create session
Session = sessionmaker(bind=engine)
session = Session()

# ========================
# Read data from PostGIS
# ========================

# Specify sql query
sql = "SELECT * FROM finland;"

# Pull the data
data = gpd.read_postgis(sql=sql, con=engine)

# Close session
session.close()

# =========================================
# Write data to PostGIS (make a copy table)
# =========================================

# Coordinate Reference System (srid)
crs = 4326

# Target table
target_table = 'finland_copy'

# Convert Shapely geometries to WKTElements into column 'geom' (default in PostGIS)
data['geom'] = data['geometry'].apply(lambda row: WKTElement(row.wkt, srid=crs))

# Drop Shapely geometries
data = data.drop('geometry', axis=1)

# Write to PostGIS (overwrite if table exists, be careful with this! )
# Possible behavior: 'replace', 'append', 'fail'

data.to_sql(target_table, engine, if_exists='replace', index=False)

Read / write Spatialite database

import geopandas as gpd
import sqlite3
import shapely.wkb as swkb
from sqlalchemy import create_engine, event

# DB path
dbfp = 'L2_data/Finland.sqlite'

# Name for the table
tbl_name = 'finland'

# SRID (crs of your data)
srid = 4326

# Parse Geometry type of the input Data
gtype = data.geom_type.unique()
assert len(gtype) == 1, "Mixed Geometries! Cannot insert into SQLite table."
geom_type = gtype[0].upper()

# Initialize database engine
engine = create_engine('sqlite:///{db}'.format(db=dbfp), module=sqlite)

# Initialize table without geometries
geo = data.drop(['geometry'], axis=1)

with sqlite3.connect(dbfp) as conn:
    geo.to_sql(tbl_name, conn, if_exists='replace', index=False)

# Enable spatialite extension    
with sqlite3.connect(dbfp) as conn:
    conn.enable_load_extension(True)
    conn.load_extension("mod_spatialite")
    conn.execute("SELECT InitSpatialMetaData(1);")
    # Add geometry column with specified CRS with defined geometry typehaving two dimensions
    conn.execute(
        "SELECT AddGeometryColumn({table}, 'wkb_geometry',\
        {srid}, {geom_type}, 2);".format(table=tbl_name, srid=srid, geom_type=geom_type)
    )
    
# Convert Shapely geometries into well-known-binary format
data['geometry'] = data['geometry'].apply(lambda geom: swkb.dumps(geom))

# Push to database (overwrite if table exists)
data.to_sql(tbl_name, engine, if_exists='replace', index=False)

Read Web Feature Service (WFS)

import geopandas as gpd
import requests
import geojson

# Specify the url for the backend. Here we are using data from Statistics Finland: https://www.stat.fi/org/avoindata/paikkatietoaineistot_en.html
url = 'http://geo.stat.fi/geoserver/vaestoruutu/wfs'

# Specify parameters (read data in json format). 
# Available feature types in this particular data source: http://geo.stat.fi/geoserver/vaestoruutu/wfs?service=wfs&version=2.0.0&request=describeFeatureType
params = dict(service='WFS', version='2.0.0', request='GetFeature', 
         typeName='vaestoruutu:vaki2017_5km', outputFormat='json')

# Fetch data from WFS using requests
r = requests.get(url, params=params)

# Create GeoDataFrame from geojson
data = gpd.GeoDataFrame.from_features(geojson.loads(r.content))