
import os
from collections import Counter
import geopandas as gpd
import math
import numpy as np
import pandas as pd
from IPython.core.display import display, HTML
import datetime
import fiona
from tqdm import tqdm
from shapely.geometry import shape, box, Polygon, MultiPolygon, LineString, Point
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.font_manager
import matplotlib.patches as patches
from mpl_toolkits.axes_grid1 import make_axes_locatable
import folium
import warnings
import contextily as cx
import mapclassify
from pathlib import Path
# set some master parameters to make the font look good
mpl.rcParams['mathtext.fontset'] = 'custom'
mpl.rcParams['mathtext.rm'] = 'Bitstream Vera Sans'
mpl.rcParams['mathtext.it'] = 'Bitstream Vera Sans:italic'
mpl.rcParams['mathtext.bf'] = 'Bitstream Vera Sans:bold'
mpl.rc('font', **{'family': 'serif', 'serif': ['Times New Roman']})
plt.rcParams.update({'font.size': 14})
matplotlib.rcParams['text.usetex'] = False
# ddlpy imports
from ddlpy.utils.constants import IEC, TMP
from ddlpy.utils.tools import load_stata, gen_groups
from ddlpy.geospatialtools.utils import import_vector_data, check_polygon_validity
from ddlpy.geospatialtools.geospatialtools import create_thiessens, constrain_thiessens, polygon_polygon_join
from geovoronoi import coords_to_points, points_to_coords, voronoi_regions_from_coords, calculate_polygon_areas
from geovoronoi.plotting import subplot_for_map, plot_voronoi_polys_with_points_in_area
# this expands the cells of the jupyter notebook- helpful if you have a wide screen
display(HTML("<style>.container { width:90% !important; }</style>"))
# pd.set_option('display.float_format', lambda x: '%.5f' % x)
#####################
# IMPORT SHAPEFILES #
#####################
# Import MLInfo state polygons
mlinfo_state = import_vector_data(IEC / "gis/pc11/mlinfo/pc11-state.shp")
# Import MLInfo district polygons
mlinfo_district = import_vector_data(IEC / "gis/pc11/mlinfo/pc11-district.shp")
# import bharatmaps village points
bmap_village_points = import_vector_data(IEC / "gis/bharatmaps/bmap_village_points.shp")
/dartfs-hpc/rc/home/y/f0051xy/ddl/tools/py/ddlpy/geospatialtools/utils.py:112: UserWarning: Shapefile has no CRS, setting to WGS84. If this is incorrect, results will be incorrect UserWarning
Reprojecting shapefile to WGS84
/dartfs-hpc/rc/home/y/f0051xy/ddl/tools/py/ddlpy/geospatialtools/utils.py:200: UserWarning: 1 polygons are invalid, applying .buffer(0) correction. UserWarning /dartfs-hpc/rc/home/y/f0051xy/ddl/tools/py/ddlpy/geospatialtools/utils.py:204: UserWarning: Corrected corrupt polygons indices: [54] UserWarning
Reprojecting shapefile to WGS84
################################################
# SPATIAL DISTRIBUTION OF VILLAGES IN NAGALAND #
################################################
# subset village points for Nagaland
nagaland_village_pts = bmap_village_points[bmap_village_points["pc11_s_id"] == "13"]
# extract latitude and longitude of village point geometry
nagaland_village_pts["lat"] = gpd.GeoSeries(nagaland_village_pts["geometry"]).y
nagaland_village_pts["lon"] = gpd.GeoSeries(nagaland_village_pts["geometry"]).x
# convert point coordinates to numpy arrary
coords = nagaland_village_pts[["lon","lat"]].values
# subset nagaland state polygon - this will be the bounding polygon for the thiessen polygons
nagaland = mlinfo_state[mlinfo_state["pc11_s_id"] == "13"]
nagaland = nagaland[nagaland["geometry"].is_valid == True]
# plot Nagaland state boundary and village points
ax = nagaland.boundary.plot(figsize = (12, 12), linewidth = 2, edgecolor = "black")
nagaland_village_pts.plot(ax = ax, markersize = 9).set_title("Nagaland Villages", fontsize = 24)
ax.grid(True)
/dartfs-hpc/rc/home/y/f0051xy/.conda/envs/py_spatial/lib/python3.7/site-packages/geopandas/geodataframe.py:1351: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy super().__setitem__(key, value)
#################################################################################
# GENERATE THIESSEN POLYGONS FROM VILLAGE POINTS WITHIN NAGALAND STATE BOUNDARY #
#################################################################################
# use geovoronoi to generate thiessen polygons within Nagaland state boundary
region_polys, region_pts = voronoi_regions_from_coords(coords, nagaland["geometry"].iloc[0])
# define an empty container to store thiessen polygons
gpd_input_dict= {'x': [], 'y': [], 'geometry': []}
# convert output dictionary to dataframe consisting of centroid lat/lon and corresponding thiessen polygon
for pt, poly in region_pts.items():
gpd_input_dict['x'] += [coords[poly[0]][0]]
gpd_input_dict['y'] += [coords[poly[0]][1]]
gpd_input_dict['geometry'] += [region_polys[pt]]
# append output to geodataframe
vor = gpd.GeoDataFrame(gpd_input_dict)
# rename variables
vor = vor.rename(columns = {"x": "lon", "y": "lat"})
####################################################
# plot thiessen polygons grown from village points #
####################################################
# Make two subplots - first one shows the full state and the other one zooms into a particular region to display how the thiessen polygons look like
f, (ax1, ax2) = plt.subplots(1, 2, sharey=False, figsize = (18, 18))
# add a rectangular box to highlight zoomed-in region
rect = patches.Rectangle((94.8, 25.6), 0.2, 0.2, linewidth=3, edgecolor='r', facecolor='none')
# plot thiessen polygons
thiessens = vor.plot(linewidth = 1, edgecolor = "black", color = "lightblue", alpha = 0.4, ax = ax1)
# plot village points
nagaland_village_pts.plot(ax = thiessens, markersize = 2)
ax1.add_patch(rect)
ax1.set_title("Thiessen Polygons: Nagaland", fontsize = 24)
thiessens = vor.plot(linewidth = 1, edgecolor = "black", color = "lightblue", alpha = 0.4, ax = ax2)
nagaland_village_pts.plot(ax = thiessens, markersize = 8)
thiessens.set_xlim(94.6, 94.8)
thiessens.set_ylim(25.8, 26)
plt.show()
##############################
# REFINING THIESSEN POLYGONS #
##############################
nagaland_district = mlinfo_district[mlinfo_district["pc11_s_id"] == "13"]
nagaland_district = nagaland_district[nagaland_district["geometry"].is_valid == True]
ax = nagaland_district.plot(figsize = (10, 10), linewidth = 1, edgecolor = "black", alpha = 0.4, color = "red")
nagaland_village_pts.plot(ax = ax, markersize = 2)
<AxesSubplot:>
# spatially join village points to district polygons
nagaland_village_pts_sjoin = nagaland_village_pts.sjoin(nagaland_district, how = "inner")
# check how many points have mismatched identifiers
print(len(nagaland_village_pts_sjoin) - len(nagaland_village_pts_sjoin[nagaland_village_pts_sjoin["pc11_d_id_left"] == nagaland_village_pts_sjoin["pc11_d_id_right"]]),
"village points have mismatched identifiers")
# rename features
nagaland_village_pts_sjoin = nagaland_village_pts_sjoin[nagaland_village_pts_sjoin["pc11_d_id_left"] == nagaland_village_pts_sjoin["pc11_d_id_right"]]
nagaland_village_pts_sjoin = nagaland_village_pts_sjoin.drop(columns = ["index_right", "pc11_s_id_right", "pc11_d_id_right", "poly_fix"])
nagaland_village_pts_sjoin = nagaland_village_pts_sjoin.rename(columns = {"pc11_s_id_left": "pc11_s_id", "pc11_d_id_left": "pc11_d_id"})
###########################################
# GROW THIESSEN POLYGONS WITHIN DISTRICTS #
###########################################
nagaland_thiessens = create_thiessens(nagaland_village_pts_sjoin, nagaland_district, "geometry", "pc11_d_id")
# merge thiessen polygons with points dataframe
nagaland_village_pts_sjoin = nagaland_village_pts_sjoin.merge(nagaland_thiessens, on = ["lat", "lon"]).rename(columns = {"geometry_x": "geometry", "geometry_y": "thiessen"})
0%| | 0/11 [00:00<?, ?it/s]
13 village points have mismatched identifiers Total: 1116 points. Growing thiessens within 11 polygons...
100%|██████████| 11/11 [00:10<00:00, 1.01it/s]
SUCCESS!: Generated 1116 thiessen polygons
y = nagaland_thiessens.plot(linewidth = 1, edgecolor = "black", color = "lightblue", alpha = 0.4, figsize = (10, 10))
z = nagaland_village_pts.plot(ax = y, markersize = 4)
nagaland_district.boundary.plot(color="black", linewidth=0.5, alpha = 0.3, ax = z).set_title("Nagaland: Thiessen polygons", fontsize = 24)
Text(0.5, 1.0, 'Nagaland: Thiessen polygons')
Let's take known village areas from the PC11 Village Directory
# import PC11 VD
pc11_vd = pd.read_stata("~/iec/pc11/pc11_vd_clean.dta")
# subset nagaland villages
pc11_vd_nagaland = pc11_vd.loc[pc11_vd["pc11_state_id"] == "13", ["pc11_state_id", "pc11_district_id", "pc11_subdistrict_id", "pc11_village_id", "pc11_vd_area"]]
# merge in known village areas
nagaland_village_pts_sjoin = nagaland_village_pts_sjoin.merge(pc11_vd_nagaland[pc11_vd_nagaland["pc11_vd_area"] > 0],
left_on = ["pc11_s_id", "pc11_d_id", "pc11_sd_id", "pc11_v_id"],
right_on = ["pc11_state_id", "pc11_district_id", "pc11_subdistrict_id", "pc11_village_id"], how = "left")
nagaland_village_pts_sjoin = nagaland_village_pts_sjoin.drop(columns = ["pc11_state_id", "pc11_district_id", "pc11_subdistrict_id", "pc11_village_id"])
# set CRS for thiessen polygons
nagaland_village_pts_sjoin["thiessen"] = gpd.GeoSeries(nagaland_village_pts_sjoin["thiessen"]).set_crs("epsg:4326")
# we only know non-zero areas for 83 villages
nagaland_village_pts_sjoin[nagaland_village_pts_sjoin["pc11_vd_area"] > 0].iloc[0:3]
| pc11_s_id | pc11_d_id | pc11_sd_id | pc11_v_id | pc11_name | type | geometry | lat | lon | thiessen | pc11_vd_area | area_thiessen | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 202 | 13 | 263 | 01767 | 267207 | izheto | Village | POINT (94.42150 26.20607) | 26.206066 | 94.421503 | MULTIPOLYGON (((1426.585 936.457, 1426.808 935... | 85.0 | 5.504975 |
| 203 | 13 | 263 | 01767 | 267208 | sastami | Village | POINT (94.41860 26.18872) | 26.188725 | 94.418603 | POLYGON ((1421.731 935.546, 1426.808 935.049, ... | 500.0 | 8.249970 |
| 204 | 13 | 263 | 01767 | 267209 | v k hq | Village | POINT (94.40461 26.17545) | 26.175454 | 94.404605 | POLYGON ((1422.442 934.121, 1423.691 931.623, ... | 350.0 | 5.015239 |
# Now constrain thiessen polygons based on known area
nagaland_constrained_thiessens = constrain_thiessens(nagaland_village_pts_sjoin, thiessen = "thiessen",
point = "geometry", area = "pc11_vd_area",
crs = "+proj=laea +lon_0=80.15625 +lat_0=18.2989446 +datum=WGS84 +units=km +no_defs")
0it [00:00, ?it/s]
CRS of Thiessen polygons and points conformal Calculated area of 1116 thiessen polygons.. Number of thiessen polygons with area greater than known area: 2 ( 0.17921146953405018 % ) Buffers created.. Calculated intersection area between thiessens and buffers..
0%| | 0/2 [00:00<?, ?it/s]
SUCCESS! 2 Thiessen polygons constrained
######################################
# PLOT CONSTRAINED THIESSEN POLYGONS #
######################################
a = gpd.GeoDataFrame(nagaland_constrained_thiessens.loc[nagaland_constrained_thiessens["pc11_vd_area"] < nagaland_constrained_thiessens["area_thiessen"], "intersection"]).set_geometry("intersection").set_crs("+proj=laea +lon_0=80.15625 +lat_0=18.2989446 +datum=WGS84 +units=km +no_defs").to_crs("epsg:4326").plot(figsize = (15, 15))
gpd.GeoDataFrame(nagaland_constrained_thiessens.loc[nagaland_constrained_thiessens["pc11_vd_area"] < nagaland_constrained_thiessens["area_thiessen"], "thiessen"]).set_geometry("thiessen").set_crs("+proj=laea +lon_0=80.15625 +lat_0=18.2989446 +datum=WGS84 +units=km +no_defs").to_crs("epsg:4326").plot(ax = a, color = "red", alpha = 0.5, linewidth = 1, edgecolor = "black")
0%| | 0/2 [00:03<?, ?it/s]
<AxesSubplot:>
######################################
# PLOT CONSTRAINED THIESSEN POLYGONS #
######################################
nagaland_constrained_thiessens["constrained_thiessens"] = np.where(nagaland_constrained_thiessens["intersection"].isnull(),
nagaland_constrained_thiessens["thiessen"],
nagaland_constrained_thiessens["intersection"])
nagaland_constrained_thiessens["constrained_thiessens"] = gpd.GeoSeries(nagaland_constrained_thiessens["constrained_thiessens"]).set_crs("+proj=laea +lon_0=80.15625 +lat_0=18.2989446 +datum=WGS84 +units=km +no_defs").to_crs("epsg:4326")
nagaland_constrained_thiessens["geometry"] = gpd.GeoSeries(nagaland_constrained_thiessens["geometry"]).set_crs("+proj=laea +lon_0=80.15625 +lat_0=18.2989446 +datum=WGS84 +units=km +no_defs").to_crs("epsg:4326")
nagaland_constrained_thiessens = nagaland_constrained_thiessens.drop(columns = ["lat", "lon", "buffer", "intersection", "intersection_area", "area_diff", "type", "thiessen"])
nagaland_constrained_thiessens.set_geometry("constrained_thiessens").plot(column = "area_thiessen", k = 1000,
scheme = "quantiles", cmap = "cividis",
figsize = (12, 12), linewidth = 0.1, edgecolor = "black")
<AxesSubplot:>
Let's estimate coverage areas for railway stations in India
# open point data for railway stations
railway_stations = gpd.read_file("https://gist.githubusercontent.com/sankalpsharmaa/0c0587f3ae31277411960f70128d682f/raw/075361e1625bac0e8138dfbc68880c639d871e7b/india_railway_stations.geojson")
# plot railway stations
ax = mlinfo_state.boundary.plot(figsize = (12, 12) ,linewidth = 0.7, edgecolor = "black")
railway_stations.plot(ax = ax, markersize = 1).set_title("Railway Stations", fontsize = 18)
ax.grid(True)
railway_stations = railway_stations[["code", "name", "geometry"]]
railway_stations_sjoin = gpd.GeoDataFrame(railway_stations).to_crs("epsg:4326").sjoin(mlinfo_district, how = "inner").drop(columns = ["poly_fix", "index_right"])
railway_stations_sjoin["lat"] = railway_stations["geometry"].y
railway_stations_sjoin["lon"] = railway_stations["geometry"].x
railway_thiessens = create_thiessens(railway_stations_sjoin, mlinfo_state, "geometry", "pc11_s_id")
railway_stations_sjoin = railway_stations_sjoin.merge(railway_thiessens, on = ["lat", "lon"], how = "inner")
railway_stations_sjoin = railway_stations_sjoin.rename(columns = {"geometry_x":"geometry", "geometry_y":"thiessen"})
railway_stations_sjoin["area"] = gpd.GeoSeries(railway_stations_sjoin["thiessen"]).set_crs("epsg:4326").to_crs("+proj=laea +lon_0=80.15625 +lat_0=18.2989446 +datum=WGS84 +units=km +no_defs").area
railway_stations_sjoin['area'].clip(railway_stations_sjoin['area'].quantile(0.0001), railway_stations_sjoin['area'].quantile(0.999), inplace=True)
# Make two subplots - first one shows the full state and the other one zooms into a particular region to display how the thiessen polygons look like
f, (ax1, ax2) = plt.subplots(1, 2, sharey=False, figsize = (23, 9))
# add a rectangular box to highlight zoomed-in region
rect = patches.Rectangle((80, 20), 5, 5, linewidth=3, edgecolor='r', facecolor='none')
# plot thiessen polygons
gpd.GeoDataFrame(railway_stations_sjoin).set_geometry("thiessen").plot(ax = ax1, column = "area", figsize = (15, 12), linewidth = 0.1,
edgecolor = "black", legend = True, cmap = "cividis", k = 100, alpha = 0.95)
railway_stations.plot(ax = ax1, markersize = 0.3).set_title("Railway Stations", fontsize = 18)
ax1.add_patch(rect)
ax1.set_title("Railway Stations")
cut_thiessens = gpd.GeoDataFrame(railway_stations_sjoin).set_geometry("thiessen").plot(ax = ax2, column = "area", figsize = (15, 12), linewidth = 0.1,
edgecolor = "black", legend = True, cmap = "cividis", k = 100, alpha = 0.95)
cut_points = railway_stations.plot(ax = ax2, markersize = 3).set_title("Railway Stations", fontsize = 18)
cut_thiessens.set_xlim(80, 85)
cut_thiessens.set_ylim(20, 25)
f.tight_layout()
plt.subplots_adjust(wspace=0, hspace=0)
0%| | 0/35 [00:00<?, ?it/s]
Total: 8695 points. Growing thiessens within 35 polygons...
100%|██████████| 35/35 [06:32<00:00, 11.22s/it]
/dartfs-hpc/rc/home/y/f0051xy/ddl/tools/py/ddlpy/geospatialtools/geospatialtools.py:221: UserWarning: If the number of thiessen polygons do not match the initial points then it is likely that you are either a) attempting to grow singular points within polygons or b) some geometries do not have at least three non-colinear points to get a valid Delaunay triangulation
warnings.warn("If the number of thiessen polygons do not match the initial points then it is likely that you are either a) attempting to grow singular points within polygons or b) some geometries do not have at least three non-colinear points to get a valid Delaunay triangulation")
SUCCESS!: Generated 8684 thiessen polygons
railway_stations_sjoin["area"] = gpd.GeoSeries(railway_stations_sjoin["thiessen"]).set_crs("epsg:4326").to_crs("+proj=laea +lon_0=80.15625 +lat_0=18.2989446 +datum=WGS84 +units=km +no_defs").area
coverage_area = railway_stations_sjoin.groupby(['pc11_s_id'])['area'].agg(['mean', 'median', 'min', 'max']).reset_index().sort_values(by = "mean")
pc11_states = pd.read_stata(IEC / "keys/pc11_state_key.dta")
pc11_states.merge(coverage_area, left_on = ["pc11_state_id"], right_on = ["pc11_s_id"], how = "inner").sort_values(by = "mean")
| pc11_state_id | pc11_state_name | pc01_state_id | pc01_state_name | pc11_s_id | mean | median | min | max | |
|---|---|---|---|---|---|---|---|---|---|
| 5 | 07 | nct of delhi | 07 | delhi | 07 | 28.164610 | 13.381245 | 2.238803 | 194.263532 |
| 12 | 19 | west bengal | 19 | west bengal | 19 | 127.556589 | 80.327568 | 0.662191 | 2383.083658 |
| 8 | 10 | bihar | 10 | bihar | 10 | 136.855278 | 97.481915 | 3.456450 | 1531.645393 |
| 21 | 30 | goa | 30 | goa | 30 | 165.809294 | 132.922467 | 7.906010 | 612.993673 |
| 2 | 03 | punjab | 03 | punjab | 03 | 171.982735 | 151.834112 | 2.867916 | 662.135380 |
| 7 | 09 | uttar pradesh | 09 | uttar pradesh | 09 | 199.667641 | 160.785797 | 1.817210 | 1627.034613 |
| 22 | 32 | kerala | 32 | kerala | 32 | 212.702678 | 111.959037 | 4.142354 | 3247.291664 |
| 4 | 06 | haryana | 06 | haryana | 06 | 213.377542 | 186.877678 | 23.723697 | 887.523433 |
| 23 | 33 | tamil nadu | 33 | tamil nadu | 33 | 242.166575 | 160.473425 | 0.511096 | 3150.846456 |
| 13 | 20 | jharkhand | 20 | jharkhand | 20 | 278.866617 | 188.801408 | 2.727585 | 2437.463226 |
| 11 | 18 | assam | 18 | assam | 18 | 291.460048 | 203.177021 | 10.115379 | 1601.705100 |
| 17 | 24 | gujarat | 24 | gujarat | 24 | 326.965165 | 210.588640 | 0.103531 | 17359.557889 |
| 19 | 28 | andhra pradesh | 28 | andhra pradesh | 28 | 387.997046 | 261.847243 | 1.234048 | 4646.527721 |
| 18 | 27 | maharashtra | 27 | maharashtra | 27 | 424.439809 | 298.035695 | 1.316953 | 4889.430497 |
| 20 | 29 | karnataka | 29 | karnataka | 29 | 462.246279 | 359.711503 | 4.522621 | 3489.029570 |
| 14 | 21 | odisha | 21 | orissa | 21 | 496.226111 | 309.375996 | 5.965009 | 4906.419169 |
| 16 | 23 | madhya pradesh | 23 | madhya pradesh | 23 | 510.482198 | 359.976283 | 3.272434 | 8308.703294 |
| 6 | 08 | rajasthan | 08 | rajasthan | 08 | 547.466220 | 397.720988 | 6.305622 | 17197.851545 |
| 10 | 16 | tripura | 16 | tripura | 16 | 648.152245 | 365.651497 | 82.765452 | 1879.256391 |
| 15 | 22 | chhattisgarh | 22 | chhattisgarh | 22 | 847.960956 | 436.600785 | 9.858267 | 12198.232280 |
| 1 | 02 | himachal pradesh | 02 | himachal pradesh | 02 | 973.770355 | 184.052109 | 19.246246 | 16104.286578 |
| 3 | 05 | uttarakhand | 05 | uttarakhand | 05 | 1205.084263 | 226.182005 | 10.977778 | 9631.339412 |
| 9 | 13 | nagaland | 13 | nagaland | 13 | 5524.957776 | 2659.368644 | 1084.919719 | 12830.584966 |
| 0 | 01 | jammu kashmir | 01 | jammu kashmir | 01 | 12231.531582 | 329.557440 | 129.110963 | 163304.462260 |