# Merge/join dataframes in pandas: 
# pd.merge(left=dataset, right=dataset2, on='column name', how='left' etc.)

# Reoder categorical levels (e.g. sth. like low, medium, high) in seaborn plots:
# sns.barplot(data=..., x=..., y=..., order=['low', 'medium', 'high'])

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Suggestion: 
# If you'd like your portfolio to use a consistent style (and if you want to use seaborn for this), 
# it might be a good idea to define palettes and a context up front here:
my_cat_palette = sns.color_palette('Set2')
my_cont_palette = sns.color_palette('Blues')
# You can switch palettes with: sns.set_palette(palette_object)

# Note that some libraries might expect matplotlib colormap objects. Conversion is possible.

# Set context to notebook (sets the basic scaling of fonts etc.):
sns.set_context('notebook')

data_folder = './data/' # adjust to your filesystem / setup, if you like

who_air_data = pd.read_csv(data_folder+'who_aap_2021_v9_11august2022.csv', sep=';', decimal=',')
who_air_data.head() # prints the first 5 rows, useful to check what the dataframe looks like

# It's often useful to print the list of column names:
who_air_data.columns

Index(['WHO Region', 'ISO3', 'WHO Country Name', 'City or Locality',
       'Measurement Year', 'PM2.5 (μg/m3)', 'PM10 (μg/m3)', 'NO2 (μg/m3)',
       'PM25 temporal coverage (%)', 'PM10 temporal coverage (%)',
       'NO2 temporal coverage (%)', 'Reference',
       'Number and type of monitoring stations', 'Version of the database'],
      dtype='object')

who_region_income = pd.read_csv(data_folder+'who_country_income_ratings.csv', sep=';')
who_region_income.head()
# "head()" prints the first 5 rows - useful to check what a dataframe looks like

who_region_income.columns

Index(['WHO Country Name', 'WHO Region', 'World Bank ranking of income 2019'], dtype='object')

sns.set_palette(my_cat_palette)
sns.set_style('whitegrid')
plt.figure(figsize=(12,5))
sns.barplot(data=who_air_data, x="WHO Region", y="NO2 (μg/m3)", errorbar="sd", hue="WHO Region")
plt.title('NO2 per WHO region')
sns.despine()
plt.tight_layout()
plt.xlabel('WHO Region')
plt.ylabel('NO2 (μg/m3)')
plt.show()

import geopandas as gpd
# With conda you can install it with: conda install -c conda-forge geopandas
# If you want to use pip instead check that dependencies can be installed as well,
# see: https://geopandas.org/en/stable/getting_started/install.html

countries = gpd.read_file(data_folder+"ne_110m_admin_0_countries.shp")
countries.head()

who_air_data_mean_per_country = who_air_data.groupby(['ISO3'])[['PM2.5 (μg/m3)', 'PM10 (μg/m3)', 'NO2 (μg/m3)']].mean()
who_air_data_geo = gpd.GeoDataFrame(pd.merge(left=who_air_data_mean_per_country, right=countries[['ISO_A3', 'geometry']], left_on='ISO3', right_on='ISO_A3', how='left'))

from mpl_toolkits.axes_grid1 import make_axes_locatable

# Basic setup:
sns.set_style('whitegrid')
fig = plt.figure(figsize=(8,6))
ax = plt.subplot(111)

# Title, include the min and max year (since we averaged the measurements above):
plt.title('PM2.5 in the world, averaged measures from %d to %d' 
    % (who_air_data['Measurement Year'].min(), who_air_data['Measurement Year'].max()))

# Remove ticks, not needed for the world map here:
plt.xticks([])
plt.yticks([])

# Add a colorbar:
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)

# Let's plot the world map in grey first, since not all countries have air measurements:
countries.plot(ax=ax, color='lightgrey')

# Now let's plot the air pollution data on top:
who_air_data_geo[['PM2.5 (μg/m3)', 'geometry']].plot(
    ax=ax, cax=cax, legend=True, column='PM2.5 (μg/m3)', cmap='copper')

# Modify the colorbar a bit to have a useful label:
cax.set_ylabel('PM2.5 (μg/m3)\nmissing data in grey', rotation=90)

plt.tight_layout()
plt.show()

#. AIR QUAILTY IN DELHI OVER TIME

delhi_pm25 = who_air_data[who_air_data['City or Locality'] == 'Delhi'][['Measurement Year', 'PM2.5 (μg/m3)']]
delhi_pm10 = who_air_data[who_air_data['City or Locality'] == 'Delhi'][['Measurement Year', 'PM10 (μg/m3)']]
india_pm25 = who_air_data[who_air_data['WHO Country Name']== 'India'][['Measurement Year', 'PM2.5 (μg/m3)']]

plt.grid(True, alpha=0.3)
sns.lineplot(data=delhi_pm25, x='Measurement Year', y= 'PM2.5 (μg/m3)', label='PM2.5')
sns.lineplot(data=delhi_pm10, x='Measurement Year', y='PM10 (μg/m3)', label='PM10')
plt.legend(loc='upper center', frameon=False, ncol=2)
plt.ylabel('Particulate Matter Concentration (μg/m³)')

Text(0, 0.5, 'Particulate Matter Concentration (μg/m³)')

international_cities = ['Delhi', 'Beijing', 'Los Angeles', 'London', 'Tokyo']
international_cities_pm25 = who_air_data[who_air_data['City or Locality'].isin(international_cities)][['City or Locality', 'Measurement Year', 'PM2.5 (μg/m3)']]
international_cities_pm10 = who_air_data[who_air_data['City or Locality'].isin(international_cities)][['City or Locality', 'Measurement Year', 'PM10 (μg/m3)']]

plt.figure(figsize=(12, 8))

# PM2.5 lines
for city in international_cities:
    city_data_pm25 = international_cities_pm25[international_cities_pm25['City or Locality'] == city]
    if not city_data_pm25.empty:
        sns.lineplot(data=city_data_pm25, x='Measurement Year', y='PM2.5 (μg/m3)', label=f'{city} PM2.5', marker='o')

# PM10 lines  
for city in international_cities:
    city_data_pm10 = international_cities_pm10[international_cities_pm10['City or Locality'] == city]
    if not city_data_pm10.empty:
        sns.lineplot(data=city_data_pm10, x='Measurement Year', y='PM10 (μg/m3)', label=f'{city} PM10', marker='s')

plt.grid(True, alpha=0.3)
plt.legend(loc='upper left', frameon=False, ncol=2)
plt.ylabel('Particulate Matter Concentration (μg/m³)')
plt.show()

# AIR QUALITY IN DELHI COMPARED TO OTHER CITIES (2016-2018)

analysis_years = [2016, 2017, 2018]
pm25_cities = ['Delhi', 'Beijing', 'London', 'Kathmandu', 'Abu Dhabi']
pm10_cities = ['Delhi', 'London', 'Kathmandu', 'Abu Dhabi']

# Create a custom color mapping to ensure consistency across both plots
all_cities = list(set(pm25_cities + pm10_cities))  # Get unique cities
city_colors = dict(zip(all_cities, sns.color_palette('Set2', len(all_cities))))

# Filter data for the specified years (2016-2018)
filtered_data_2016_2018 = who_air_data[who_air_data['Measurement Year'].isin(analysis_years)]

# Create violin plots for both pollutants
fig, axes = plt.subplots(1, 2, figsize=(16, 8))

# PM2.5 violin plot
pm25_data = filtered_data_2016_2018[
    filtered_data_2016_2018['City or Locality'].isin(pm25_cities)
][['City or Locality', 'PM2.5 (μg/m3)', 'Measurement Year']].dropna()

# Create color list for PM2.5 plot based on the order of cities in the data
pm25_colors = [city_colors[city] for city in pm25_cities]

sns.violinplot(data=pm25_data, x='City or Locality', y='PM2.5 (μg/m3)', 
               palette=pm25_colors, ax=axes[0], inner='box', order=pm25_cities)
axes[0].set_title('PM2.5 Concentration Distribution (2016-2018)', 
                  fontsize=14, fontweight='bold', pad=20)
axes[0].set_xlabel('City', fontsize=12, fontweight='bold')
axes[0].set_ylabel('PM2.5 Concentration (μg/m³)', fontsize=12, fontweight='bold')
axes[0].grid(True, alpha=0.3)
axes[0].tick_params(axis='x', rotation=45)

# Add WHO guideline for PM2.5
axes[0].axhline(y=15, color='red', linestyle='--', alpha=0.8, linewidth=2, 
                label='WHO Guideline (15 μg/m³)')
axes[0].legend(loc='upper right', frameon=True, fancybox=True)

# PM10 violin plot
pm10_data = filtered_data_2016_2018[
    filtered_data_2016_2018['City or Locality'].isin(pm10_cities)
][['City or Locality', 'PM10 (μg/m3)', 'Measurement Year']].dropna()

# Create color list for PM10 plot based on the order of cities in the data
pm10_colors = [city_colors[city] for city in pm10_cities]

sns.violinplot(data=pm10_data, x='City or Locality', y='PM10 (μg/m3)', 
               palette=pm10_colors, ax=axes[1], inner='box', order=pm10_cities)
axes[1].set_title('PM10 Concentration Distribution (2016-2018)', 
                  fontsize=14, fontweight='bold', pad=20)
axes[1].set_xlabel('City', fontsize=12, fontweight='bold')
axes[1].set_ylabel('PM10 Concentration (μg/m³)', fontsize=12, fontweight='bold')
axes[1].grid(True, alpha=0.3)
axes[1].tick_params(axis='x', rotation=45)

# Add WHO guideline for PM10
axes[1].axhline(y=45, color='red', linestyle='--', alpha=0.8, linewidth=2, 
                label='WHO Guideline (45 μg/m³)')
axes[1].legend(loc='upper right', frameon=True, fancybox=True)


# Add overall title
fig.suptitle('Air Quality Comparison Amongst Major Cities (2016-2018)', 
             fontsize=16, fontweight='bold', y=0.99)

plt.tight_layout()
plt.subplots_adjust(top=0.88)  # Make room for the main title
plt.show()

C:\Users\shikh\AppData\Local\Temp\ipykernel_21652\3587721128.py:25: FutureWarning:


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.


C:\Users\shikh\AppData\Local\Temp\ipykernel_21652\3587721128.py:47: FutureWarning:


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

riots_india = pd.read_csv(data_folder+'riots_india.csv', sep=',') 
print(riots_india.head())
riots_india.columns

   data_id  iso event_id_cnty  event_id_no_cnty     event_date  year  \
0  8912977  356     IND107923          107923.0  18 March 2022  2022   
1  8912990  356     IND107846          107846.0  18 March 2022  2022   
2  8913012  356     IND107941          107941.0  18 March 2022  2022   
3  8913089  356     IND107842          107842.0  18 March 2022  2022   
4  8913091  356     IND107850          107850.0  18 March 2022  2022   

   time_precision event_type    sub_event_type              actor1  ...  \
0               1      Riots      Mob violence     Rioters (India)  ...   
1               1   Protests  Peaceful protest  Protesters (India)  ...   
2               1   Protests  Peaceful protest  Protesters (India)  ...   
3               1   Protests  Peaceful protest  Protesters (India)  ...   
4               1      Riots      Mob violence     Rioters (India)  ...   

               location  latitude longitude geo_precision  \
0             Kishanpur   25.6422   81.0244             1   
1  Mumbai - Azad Maidan   18.9388   72.8321             1   
2             Ahmedgarh   30.6785   75.8272             1   
3                Indore   22.7179   75.8333             1   
4               Kanavar   26.5622   78.9797             1   

                       source  source_scale  \
0                  Amar Ujala   Subnational   
1    Asian News International      National   
2          Chandigarh Tribune      National   
3  Free Press Journal (India)      National   
4  Free Press Journal (India)      National   

                                               notes fatalities   timestamp  \
0  On 18 March 2022, members of two caste groups ...          0  1647961433   
1  On 18 March 2022, aircraft technicians, employ...          0  1647961433   
2  On 18 March 2022, activists of various organis...          0  1647961433   
3  On 18 March 2022, doctors staged a protest at ...          0  1647961433   
4  On 18 March 2022, around half a dozen persons,...          1  1647961433   

  iso3  
0  IND  
1  IND  
2  IND  
3  IND  
4  IND  

[5 rows x 31 columns]

Index(['data_id', 'iso', 'event_id_cnty', 'event_id_no_cnty', 'event_date',
       'year', 'time_precision', 'event_type', 'sub_event_type', 'actor1',
       'assoc_actor_1', 'inter1', 'actor2', 'assoc_actor_2', 'inter2',
       'interaction', 'region', 'country', 'admin1', 'admin2', 'admin3',
       'location', 'latitude', 'longitude', 'geo_precision', 'source',
       'source_scale', 'notes', 'fatalities', 'timestamp', 'iso3'],
      dtype='object')

plt.figure(figsize=(12, 8))
event_counts = riots_india['event_type'].value_counts()

sns.barplot(x=event_counts.values, y=event_counts.index, 
           palette='viridis', orient='h')

plt.title('Types of Violent Occurrences in India (2016-2022)', 
          fontsize=16, fontweight='bold', pad=20)
plt.xlabel('Number of Incidents', fontsize=12, fontweight='bold')
plt.ylabel('Event Type', fontsize=12, fontweight='bold')

# Add value labels on the bars
for i, v in enumerate(event_counts.values):
    plt.text(v + 500, i, f'{v:,}', va='center', fontweight='bold')


plt.tight_layout()
plt.grid(axis='x', alpha=0.3)
plt.show()

C:\Users\shikh\AppData\Local\Temp\ipykernel_21652\1079270498.py:4: FutureWarning:


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

area_data = riots_india_grouped.pivot(index='year', columns='event_type', values='count').fillna(0)

plt.figure(figsize=(12, 6))
ax = area_data.plot.area(alpha=0.6, figsize=(12, 6))
plt.title('Stacked Area Chart of Event Counts by Year and Type')
plt.xlabel('Year')
plt.ylabel('Count of Events')

# dotted vertical guidelines for the protest periods
plt.axvline(x=2019, color='lightcoral', linestyle='--', alpha=0.7, linewidth=1.5)
plt.axvline(x=2020, color='steelblue', linestyle='--', alpha=0.7, linewidth=1.5)

# text annotations
plt.text(2019.2, area_data.sum(axis=1).max() * 0.75, 'CAA Protests', 
         fontsize=10, fontweight='bold', color='darkred', ha='left',
         bbox=dict(boxstyle='round,pad=0.3', facecolor='mistyrose', alpha=0.9, edgecolor='lightcoral'))

plt.text(2020.2, area_data.sum(axis=1).max() * 0.6, 'Farmers Protests', 
         fontsize=10, fontweight='bold', color='navy', ha='left',
         bbox=dict(boxstyle='round,pad=0.3', facecolor='lightcyan', alpha=0.9, edgecolor='steelblue'))

plt.legend(title='Event Type', bbox_to_anchor=(0.5, -0.15), loc='upper center', 
          frameon=False, ncol=3)


plt.tight_layout()
plt.subplots_adjust(bottom=0.2)
plt.show()

<Figure size 1200x600 with 0 Axes>

from mpl_toolkits.axes_grid1 import make_axes_locatable

# Calculate events by state
state_events = riots_india.groupby('admin1').size().reset_index(name='event_count')

# Load the Indian states shapefile
indian_states = gpd.read_file(data_folder + 'india_st.shp')

# Simple state name matching (normalize to uppercase)
state_mapping = {}
for riots_state in state_events['admin1'].unique():
    for shapefile_state in indian_states['STATE'].unique():
        if riots_state.upper() == shapefile_state.upper():
            state_mapping[riots_state] = shapefile_state
            break

# mapping and clean data
state_events['mapped_state'] = state_events['admin1'].map(state_mapping)
state_events_clean = state_events.dropna(subset=['mapped_state'])
final_events = state_events_clean.groupby('mapped_state')['event_count'].sum().reset_index()
# Merge with shapefile
merged_data = indian_states.merge(final_events, left_on='STATE', right_on='mapped_state', how='left')
merged_data['event_count'] = merged_data['event_count'].fillna(0)
# Create the map
fig, ax = plt.subplots(figsize=(14, 10))

# choropleth map
merged_data.plot(column='event_count', cmap='Reds', ax=ax, legend=False, 
                edgecolor='black', linewidth=0.5)

#  colorbar
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.05)
vmax = merged_data['event_count'].max()
sm = plt.cm.ScalarMappable(cmap='Reds', norm=plt.Normalize(vmin=0, vmax=vmax))
sm.set_array([])
cbar = plt.colorbar(sm, cax=cax)
cbar.set_label('Number of Events', rotation=90, fontweight='bold')

#  labels for top 5 states
top_states = merged_data.nlargest(5, 'event_count')
for idx, row in top_states.iterrows():
    if row['event_count'] > 0:
        centroid = row.geometry.centroid
        ax.annotate(f"{row['STATE']}\n({int(row['event_count'])})",
                   (centroid.x, centroid.y), fontsize=8, fontweight='bold', ha='center',
                   bbox=dict(boxstyle='round,pad=0.2', facecolor='white', 
                           alpha=0.8, edgecolor='red'))

ax.set_title('Civil Unrest Events by State in India (2016-2022)', fontsize=14, fontweight='bold')
ax.set_xticks([])
ax.set_yticks([])

plt.tight_layout()
plt.show()

import plotly.express as px 
import plotly.offline as plto
# also needs: conda install nbformat 
# or pip equivalent, see here: https://stackoverflow.com/questions/66557543/valueerror-mime-type-rendering-requires-nbformat-4-2-0-but-it-is-not-installed

plto.init_notebook_mode()

df = px.data.iris()
fig = px.parallel_coordinates(df, color="species_id", labels={"species_id": "Species",
                "sepal_width": "Sepal Width", "sepal_length": "Sepal Length",
                "petal_width": "Petal Width", "petal_length": "Petal Length", },
                             color_continuous_scale=px.colors.diverging.Tealrose,
                             color_continuous_midpoint=2)
fig.show()

teen_phone_data = pd.read_csv(data_folder + 'teen_phone_addiction_dataset.csv')
print(teen_phone_data.head())
print(teen_phone_data.columns)

   ID               Name  Age  Gender          Location School_Grade  \
0   1    Shannon Francis   13  Female        Hansonfort          9th   
1   2    Scott Rodriguez   17  Female      Theodorefort          7th   
2   3        Adrian Knox   13   Other       Lindseystad         11th   
3   4  Brittany Hamilton   18  Female      West Anthony         12th   
4   5       Steven Smith   14   Other  Port Lindsaystad          9th   

   Daily_Usage_Hours  Sleep_Hours  Academic_Performance  Social_Interactions  \
0                4.0          6.1                    78                    5   
1                5.5          6.5                    70                    5   
2                5.8          5.5                    93                    8   
3                3.1          3.9                    78                    8   
4                2.5          6.7                    56                    4   

   ...  Screen_Time_Before_Bed  Phone_Checks_Per_Day  Apps_Used_Daily  \
0  ...                     1.4                    86               19   
1  ...                     0.9                    96                9   
2  ...                     0.5                   137                8   
3  ...                     1.4                   128                7   
4  ...                     1.0                    96               20   

   Time_on_Social_Media  Time_on_Gaming  Time_on_Education  \
0                   3.6             1.7                1.2   
1                   1.1             4.0                1.8   
2                   0.3             1.5                0.4   
3                   3.1             1.6                0.8   
4                   2.6             0.9                1.1   

   Phone_Usage_Purpose  Family_Communication  Weekend_Usage_Hours  \
0             Browsing                     4                  8.7   
1             Browsing                     2                  5.3   
2            Education                     6                  5.7   
3         Social Media                     8                  3.0   
4               Gaming                    10                  3.7   

   Addiction_Level  
0             10.0  
1             10.0  
2              9.2  
3              9.8  
4              8.6  

[5 rows x 25 columns]
Index(['ID', 'Name', 'Age', 'Gender', 'Location', 'School_Grade',
       'Daily_Usage_Hours', 'Sleep_Hours', 'Academic_Performance',
       'Social_Interactions', 'Exercise_Hours', 'Anxiety_Level',
       'Depression_Level', 'Self_Esteem', 'Parental_Control',
       'Screen_Time_Before_Bed', 'Phone_Checks_Per_Day', 'Apps_Used_Daily',
       'Time_on_Social_Media', 'Time_on_Gaming', 'Time_on_Education',
       'Phone_Usage_Purpose', 'Family_Communication', 'Weekend_Usage_Hours',
       'Addiction_Level'],
      dtype='object')

# Clean the data 
teen_phone_clean = teen_phone_data.dropna()
# Create interactive parallel coordinates plot for teen phone addiction dataset
selected_dimensions = ['Age', 'Daily_Usage_Hours', 'Sleep_Hours', 'Academic_Performance', 'Addiction_Level']
# Filter the dataset to only include the specified columns
teen_phone_parallel = teen_phone_clean[selected_dimensions].copy()

# Create the parallel coordinates plot
fig_parallel_coords = px.parallel_coordinates(
    teen_phone_parallel, 
    color='Addiction_Level',
    labels={
        'Age': 'Age (years)',
        'Daily_Usage_Hours': 'Daily Usage (hours)',
        'Sleep_Hours': 'Sleep Hours',
        'Academic_Performance': 'Academic Performance',
        'Addiction_Level': 'Addiction Level'
    },
    color_continuous_scale=px.colors.sequential.Plasma,
    title="Teen Phone Addiction: Parallel Coordinates Analysis"
)

fig_parallel_coords.update_layout(
    title_font_size=16,
    title_x=0.5,
    height=600,
    font_size=12
)

fig_parallel_coords.show()

# Sample the data to reduce clutter ( 500 random samples)
teen_phone_sample = teen_phone_clean.sample(n=500, random_state=42)

selected_dimensions = ['Age', 'Daily_Usage_Hours', 'Sleep_Hours', 'Academic_Performance', 'Addiction_Level']

fig = px.parallel_coordinates(
    teen_phone_sample, 
    dimensions=selected_dimensions,
    color='Addiction_Level',
    color_continuous_scale='Viridis',
    title="Teen Phone Addiction: Parallel Coordinates (Sampled Data)"
)


fig.update_layout(
    title_font_size=14,
    title_x=0.5,
    height=500,
    font_size=10
)

fig.show()

teen_sample = teen_phone_clean.sample(n=500, random_state=42)
dimensions = ['Age', 'Daily_Usage_Hours', 'Sleep_Hours', 'Academic_Performance', 'Addiction_Level']
sample_data = teen_sample[dimensions]

#  scatterplot matrix 
fig = px.scatter_matrix(
    sample_data,
    dimensions=dimensions,
    color=teen_sample['Addiction_Level'],
    color_continuous_scale='Viridis',
    title="Teen Phone Addiction: Scatterplot Matrix",
    opacity=0.5,
    labels={
        'Age': 'Age',
        'Daily_Usage_Hours': 'Usage (hrs)',
        'Sleep_Hours': 'Sleep (hrs)',
        'Academic_Performance': 'Academic',
        'Addiction_Level': 'Addiction'
    }
)

fig.update_layout(
    title_font_size=14,
    title_x=0.5,
    height=700, 
    width=900,  
    font_size=8,
    margin=dict(l=80, r=80, t=80, b=80) 
)

# Update traces for better visibility
fig.update_traces(
    marker=dict(size=2)
)


fig.update_xaxes(tickangle=0, tickfont_size=8)
fig.update_yaxes(tickfont_size=8)

fig.show()

world_expenditure = pd.read_csv(data_folder+'WorldExpenditures.csv')
print(world_expenditure.head())
print(world_expenditure.columns)

   Unnamed: 0  Year    Country                                      Sector  \
0           0  2000  Australia                              Total function   
1           1  2000  Australia  Agriculture, forestry, fishing and hunting   
2           2  2000  Australia      Mining, manufacturing and construction   
3           3  2000  Australia                                   Transport   
4           4  2000  Australia                             Fuel and energy   

   Expenditure(million USD)    GDP(%)  
0                153122.633  37.36193  
1                  2195.583   0.53572  
2                   905.018   0.22082  
3                 11417.379   2.78584  
4                  2251.241   0.54930  
Index(['Unnamed: 0', 'Year', 'Country', 'Sector', 'Expenditure(million USD)',
       'GDP(%)'],
      dtype='object')

# Get the most recent year and filter data
recent_year = world_expenditure['Year'].max()
data = world_expenditure[
    (world_expenditure['Year'] == recent_year) & 
    (world_expenditure['Sector'] != 'Total function') &
    (world_expenditure['Expenditure(million USD)'] > 100)
]

#treemap
fig1 = px.treemap(
    data,
    path=['Country', 'Sector'],
    values='Expenditure(million USD)',
    color='GDP(%)',
    color_continuous_scale='Viridis',
    title=f'Government Expenditures by Country and Sector ({recent_year})',
    height=700
)

fig1.show()

top_countries = data.groupby('Country')['Expenditure(million USD)'].sum().nlargest(15).index
top_data = data[data['Country'].isin(top_countries)]

fig2 = px.treemap(
    top_data,
    path=['Country', 'Sector'],
    values='Expenditure(million USD)',
    color='GDP(%)',
    color_continuous_scale='RdYlBu',
    title=f'Top 15 Countries: Government Expenditures ({recent_year})',
    height=700
)

fig2.show()

elections_data = pd.read_csv(data_folder + 'All_States_GE.csv', low_memory=False)
print(elections_data.head())
print(elections_data.columns)

                  State_Name  Assembly_No  Constituency_No  Year  month  \
0  Andaman_&_Nicobar_Islands           17                1  2019    4.0   
1  Andaman_&_Nicobar_Islands           17                1  2019    4.0   
2  Andaman_&_Nicobar_Islands           17                1  2019    4.0   
3  Andaman_&_Nicobar_Islands           17                1  2019    4.0   
4  Andaman_&_Nicobar_Islands           17                1  2019    4.0   

   Poll_No  DelimID  Position              Candidate Sex  ... No_Terms  \
0        0        4         1     KULDEEP RAI SHARMA   M  ...      1.0   
1        0        4         2           VISHAL JOLLY   M  ...      0.0   
2        0        4         3  PARITOSH KUMAR HALDAR   M  ...      0.0   
3        0        4         4         SANJAY MESHACK   M  ...      0.0   
4        0        4         5           PRAKASH MINJ   M  ...      0.0   

   Turncoat Incumbent  Recontest       MyNeta_education  \
0     False     False       True  Graduate Professional   
1     False     False      False  Graduate Professional   
2     False     False      False          Post Graduate   
3     False     False       True              12th Pass   
4     False     False      False          Post Graduate   

                       TCPD_Prof_Main TCPD_Prof_Main_Desc TCPD_Prof_Second  \
0                            Business                 NaN      Social Work   
1  Liberal Profession or Professional              Lawyer              NaN   
2                         Agriculture                 NaN              NaN   
3                            Business                 NaN         Politics   
4                         Social Work                 NaN              NaN   

   TCPD_Prof_Second_Desc            Election_Type  
0                    NaN  Lok Sabha Election (GE)  
1                    NaN  Lok Sabha Election (GE)  
2                    NaN  Lok Sabha Election (GE)  
3    Municipality Member  Lok Sabha Election (GE)  
4                    NaN  Lok Sabha Election (GE)  

[5 rows x 45 columns]
Index(['State_Name', 'Assembly_No', 'Constituency_No', 'Year', 'month',
       'Poll_No', 'DelimID', 'Position', 'Candidate', 'Sex', 'Party', 'Votes',
       'Candidate_Type', 'Valid_Votes', 'Electors', 'Constituency_Name',
       'Constituency_Type', 'Sub_Region', 'N_Cand', 'Turnout_Percentage',
       'Vote_Share_Percentage', 'Deposit_Lost', 'Margin', 'Margin_Percentage',
       'ENOP', 'pid', 'Party_Type_TCPD', 'Party_ID', 'last_poll', 'Contested',
       'Last_Party', 'Last_Party_ID', 'Last_Constituency_Name',
       'Same_Constituency', 'Same_Party', 'No_Terms', 'Turncoat', 'Incumbent',
       'Recontest', 'MyNeta_education', 'TCPD_Prof_Main',
       'TCPD_Prof_Main_Desc', 'TCPD_Prof_Second', 'TCPD_Prof_Second_Desc',
       'Election_Type'],
      dtype='object')

# Clean and prepare the data
elections_data = elections_data.dropna(subset=['Votes', 'Turnout_Percentage'])
elections_data = elections_data[elections_data['Votes'] >= 0]
elections_data = elections_data[elections_data['Turnout_Percentage'] <= 100]  # Remove invalid turnout percentages

# Filter for Lok Sabha elections (General Elections) in recent years for better analysis
lok_sabha_data = elections_data[elections_data['Election_Type'] == 'Lok Sabha Election (GE)']

# Calculate average turnout by year for major election years
turnout_by_year = lok_sabha_data.groupby('Year')['Turnout_Percentage'].agg(['mean', 'count']).reset_index()
turnout_by_year = turnout_by_year[turnout_by_year['count'] > 100]  # Filter for years with substantial data

major_election_years = [1962, 1967, 1971, 1977, 1980, 1984, 1989, 1991, 1996, 1998, 1999, 2004, 2009, 2014, 2019]
turnout_major_years = turnout_by_year[turnout_by_year['Year'].isin(major_election_years)]

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(16, 12), height_ratios=[1, 1])
sns.set_style('white')

# Calculate gender representation data first
gender_data = lok_sabha_data[lok_sabha_data['Sex'].isin(['M', 'F'])].copy()
# Remove duplicates avoid double-counting
gender_data = gender_data.drop_duplicates(subset=['Year', 'State_Name', 'Constituency_Name', 'Candidate', 'Sex'])
gender_by_year = gender_data.groupby(['Year', 'Sex']).size().reset_index(name='Count')
gender_by_year_pivot = gender_by_year.pivot(index='Year', columns='Sex', values='Count').fillna(0)
gender_by_year_pivot['Total'] = gender_by_year_pivot['M'] + gender_by_year_pivot['F']
gender_by_year_pivot['Female_Percentage'] = (gender_by_year_pivot['F'] / gender_by_year_pivot['Total']) * 100

# election years with substantial data
election_years = gender_by_year_pivot[gender_by_year_pivot['Total'] > 1000].index.tolist()
gender_election_data = gender_by_year_pivot.loc[election_years]

#  common years between both datasets
turnout_years = set(turnout_major_years['Year'].tolist())
gender_years = set(gender_election_data.index.tolist())
common_years = sorted(list(turnout_years & gender_years))
turnout_filtered = turnout_major_years[turnout_major_years['Year'].isin(common_years)]
gender_filtered = gender_election_data.loc[common_years]

# Calculate winning parties for each election year
winning_parties = {}
party_colors = {
    'INC': '#19AADE',  # Congress blue
    'BJP': '#FF9933',  # BJP saffron
    'JNP': '#228B22',  # Janata Party green
    'JNP(JP)': '#228B22',  # Janata Party variant
    'Others': '#808080'  # Gray for others
}

for year in common_years:
    year_data = lok_sabha_data[lok_sabha_data['Year'] == year]
    # Count seats won by each party
    seats_won = year_data[year_data['Position'] == 1]['Party'].value_counts()
    if not seats_won.empty:
        winning_party = seats_won.index[0]
        # Simplify party names
        if winning_party in ['INC', 'INC(I)']:
            winning_parties[year] = 'INC'
        elif winning_party == 'BJP':
            winning_parties[year] = 'BJP'
        elif 'JNP' in winning_party or 'Janata' in winning_party:
            winning_parties[year] = 'JNP'
        else:
            winning_parties[year] = 'Others'

#  TOP SECTION:
ax1.grid(True, alpha=0.3, linestyle='-', linewidth=0.5, color='#E5E5E5')
ax1.set_axisbelow(True)

# categorical positions for consistent spacing
x_positions = range(len(common_years))
year_labels = [str(year) for year in common_years]

for i, year in enumerate(common_years):
    turnout_val = turnout_filtered[turnout_filtered['Year'] == year]['mean'].iloc[0]
    party = winning_parties.get(year, 'Others')
    color = party_colors.get(party, '#808080')
    
    ax1.bar(x_positions[i], turnout_val, alpha=0.3, color=color, width=0.8, zorder=1)

# Plot turnout line
ax1.plot(x_positions, turnout_filtered['mean'], 
         marker='o', linewidth=3, markersize=8, color='#2C3E50', 
         markerfacecolor='white', markeredgecolor='#2C3E50', markeredgewidth=2,
         label='Voter Turnout (%)', zorder=3)

#  trend line 
z_turnout = np.polyfit(x_positions, turnout_filtered['mean'], 1)
p_turnout = np.poly1d(z_turnout)
ax1.plot(x_positions, p_turnout(x_positions), 
         "--", alpha=0.7, color='#34495E', linewidth=2, label='Turnout Trend', zorder=2)

#  key annotations
max_turnout_idx = turnout_filtered['mean'].idxmax()
max_turnout_year = turnout_filtered.loc[max_turnout_idx, 'Year']
max_turnout_val = turnout_filtered.loc[max_turnout_idx, 'mean']
# Find the position index for the max turnout year
max_pos = common_years.index(max_turnout_year)
ax1.annotate(f'Peak Turnout\n{max_turnout_val:.1f}%', 
            xy=(max_pos, max_turnout_val), 
            xytext=(max_pos+0.5, max_turnout_val+3),
            arrowprops=dict(arrowstyle='->', color='#2C3E50', lw=1.5),
            fontsize=9, ha='center', color='#2C3E50', fontweight='bold')

ax1.set_title('Voter Turnout Trends Over Time', fontsize=16, fontweight='bold', pad=20, color='#2C3E50')
ax1.set_ylabel('Voter Turnout (%)', fontsize=13, color='#2C3E50', fontweight='bold')
ax1.set_ylim(45, 75)
ax1.set_xlim(-0.5, len(common_years)-0.5)
ax1.tick_params(axis='both', labelsize=11)

# party legend
party_legend_elements = [plt.Rectangle((0,0),1,1, color=party_colors[party], alpha=0.7, label=party) 
                        for party in ['INC', 'BJP', 'JNP', 'Others'] if party in winning_parties.values()]
ax1.legend(handles=party_legend_elements + [plt.Line2D([0], [0], color='#2C3E50', lw=3, label='Turnout')], 
          loc='upper left', bbox_to_anchor=(0.02, 0.98), frameon=True, fancybox=True, shadow=True)

# BOTTOM SECTION:
ax2.grid(True, alpha=0.3, linestyle='-', linewidth=0.5, color='#E5E5E5')
ax2.set_axisbelow(True)

# Create stacked bar chart for male and female candidates
male_counts = gender_filtered['M']
female_counts = gender_filtered['F']
bars_male = ax2.bar(x_positions, male_counts, 
                    alpha=0.8, color='#3498DB', width=0.8, label='Male Candidates')
bars_female = ax2.bar(x_positions, female_counts, 
                      bottom=male_counts, alpha=0.8, color='#E74C3C', width=0.8, label='Female Candidates')

# Add percentage labels on top of bars
for i, year in enumerate(gender_filtered.index):
    total = male_counts.iloc[i] + female_counts.iloc[i]
    female_pct = (female_counts.iloc[i] / total) * 100
    ax2.text(x_positions[i], total + 50, f'{female_pct:.1f}%', 
             ha='center', va='bottom', fontsize=12, fontweight='bold', color='#E74C3C')

# Add trendline for total candidate growth
total_counts = male_counts + female_counts
z_total = np.polyfit(x_positions, total_counts, 1)
p_total = np.poly1d(z_total)
ax2.plot(x_positions, p_total(x_positions), 
         "--", alpha=0.7, color='#34495E', linewidth=2, label='Total Candidates Trend', zorder=5)

ax2.set_title('Candidate Participation by Gender Over Time', fontsize=16, fontweight='bold', pad=20, color='#2C3E50')
ax2.set_xlabel('Election Year', fontsize=13, color='#2C3E50', fontweight='500')
ax2.set_ylabel('Number of Candidates', fontsize=13, color='#2C3E50', fontweight='bold')
ax2.set_xlim(-0.5, len(common_years)-0.5)
ax2.tick_params(axis='y', labelsize=11)
ax2.tick_params(axis='x', labelsize=11, rotation=45)


for ax in [ax1, ax2]:
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_linewidth(1.5)
    ax.spines['bottom'].set_linewidth(1.5)

ax1.spines['left'].set_color('#2C3E50')
ax1.spines['bottom'].set_color('#BDC3C7')
ax2.spines['left'].set_color('#2C3E50')
ax2.spines['bottom'].set_color('#BDC3C7')


for ax in [ax1, ax2]:
    ax.set_xticks(x_positions)
    ax.set_xticklabels(year_labels, fontsize=11)

# Only rotate x-axis labels for bottom chart
ax1.tick_params(axis='x', rotation=0)
ax2.tick_params(axis='x', rotation=45)

#  legend for gender section
ax2.legend(loc='upper left', bbox_to_anchor=(0.02, 0.98), frameon=True, fancybox=True, shadow=True)

# Calculate parity analysis for textbox
current_pct = (gender_filtered['F'].iloc[-1] / (gender_filtered['F'].iloc[-1] + gender_filtered['M'].iloc[-1])) * 100
gap_to_parity = 50 - current_pct
z_gender = np.polyfit(x_positions, [(gender_filtered['F'].iloc[i] / (gender_filtered['F'].iloc[i] + gender_filtered['M'].iloc[i])) * 100 for i in range(len(gender_filtered))], 1)

# Add textbox with parity analysis
if z_gender[0] > 0:
    years_to_parity = gap_to_parity / z_gender[0]
    textbox_content = f"Parity would be reached in\n~{years_to_parity:.0f} years ({2019 + years_to_parity:.0f})"
else:
    textbox_content = "Parity is moving further away"

# Add textbox to bottom chart 
ax2.text(0.98, 0.85, textbox_content, transform=ax2.transAxes, fontsize=11,
         verticalalignment='top', horizontalalignment='right',
         bbox=dict(boxstyle='round,pad=0.4', facecolor='white', alpha=0.95, edgecolor='#2C3E50', linewidth=1.5),
         color='#2C3E50', fontweight='bold')

#  overall title
fig.suptitle('Electoral Participation in Indian Democracy (1962-2019)', 
             fontsize=18, fontweight='bold', y=0.97, color='#2C3E50')

plt.tight_layout()
plt.subplots_adjust(top=0.90, bottom=0.12)  # Make room for title and rotated labels
plt.show()

# Left: RJD, SP, AITC, DMK, AAP, INC (center-left to left)
# Center-Right to Right: BSP, JDU, TDP, YSRCP, AIADMK, BJP
ideological_order = ['RJD', 'SP', 'AITC', 'DMK', 'AAP', 'INC', 'BSP', 'JDU', 'TDP', 'YSRCP', 'AIADMK', 'BJP']

# Define union territories to exclude (keeping only Delhi and Chandigarh)
excluded_union_territories = [
    'Andaman_&_Nicobar_Islands',
    'Dadra_&_Nagar_Haveli',
    'Daman_&_Diu',
    'Jammu_&_Kashmir', 
    'Ladakh',
    'Lakshadweep',
    'Puducherry'
]

# Define north-to-south geographical order
north_to_south_order = [
    'Jammu_&_Kashmir', 'Himachal_Pradesh', 'Punjab', 'Haryana', 'Delhi', 'Chandigarh',
    'Uttarakhand', 'Uttar_Pradesh', 'Sikkim', 'Arunachal_Pradesh', 'Assam', 'Nagaland', 
    'Manipur', 'Mizoram', 'Tripura', 'Meghalaya', 'West_Bengal', 'Bihar', 'Jharkhand',
    'Odisha', 'Chhattisgarh', 'Madhya_Pradesh', 'Rajasthan', 'Gujarat', 'Maharashtra',
    'Goa', 'Andhra_Pradesh', 'Telangana', 'Karnataka', 'Tamil_Nadu', 'Kerala'
]

# Major political parties to focus on
major_parties = ['BJP', 'INC', 'AITC', 'DMK', 'AIADMK', 'YSRCP', 'TDP', 'JDU', 'RJD', 'SP', 'BSP', 'AAP']

# Filter for 2009 and 2019 elections
elections_2009_2019 = lok_sabha_data[lok_sabha_data['Year'].isin([2009, 2019])]

# Function to calculate party dominance (vote share percentage)
def calculate_party_dominance(data, year):
    """Calculate party dominance by vote share percentage for each state"""
    year_data = data[data['Year'] == year]
    
    # Group by state and party, sum the votes
    state_party_votes = year_data.groupby(['State_Name', 'Party']).agg({
        'Votes': 'sum',
        'Turnout_Percentage': 'mean'
    }).reset_index()
    
    # Calculate total votes per state
    state_total_votes = state_party_votes.groupby('State_Name')['Votes'].sum().reset_index()
    state_total_votes.columns = ['State_Name', 'Total_Votes']
    
    # Merge to get vote share percentage
    state_party_votes = pd.merge(state_party_votes, state_total_votes, on='State_Name')
    state_party_votes['Vote_Share_Percentage'] = (state_party_votes['Votes'] / state_party_votes['Total_Votes']) * 100
    
    # Pivot to get parties as columns
    dominance_matrix = state_party_votes.pivot(index='State_Name', columns='Party', values='Vote_Share_Percentage')
    dominance_matrix = dominance_matrix.fillna(0)
    
    return dominance_matrix

# Calculate dominance for both years
dominance_2009 = calculate_party_dominance(elections_2009_2019, 2009)
dominance_2019 = calculate_party_dominance(elections_2009_2019, 2019)

# Find common states between both years
common_states_b = list(set(dominance_2009.index) & set(dominance_2019.index))


# Filter both matrices for common states
dominance_2009_common = dominance_2009.loc[common_states_b]
dominance_2019_common = dominance_2019.loc[common_states_b]

# Filter the geographical order to only include states present in our data
available_states_b = [state for state in north_to_south_order if state in common_states_b]
# Filter remaining states to exclude unwanted union territories
remaining_states_b = [state for state in common_states_b 
                      if state not in available_states_b 
                      and state not in excluded_union_territories]
final_state_order_b = available_states_b + remaining_states_b


# Ensure we have the parties in both datasets
common_parties = list(set(dominance_2009_common.columns) & set(dominance_2019_common.columns))
major_parties_available = [party for party in ideological_order if party in common_parties]


# Filter for major parties and reorder
dominance_2009_filtered = dominance_2009_common.loc[final_state_order_b, major_parties_available]
dominance_2019_filtered = dominance_2019_common.loc[final_state_order_b, major_parties_available]

# Calculate change (2019 - 2009)
dominance_change = dominance_2019_filtered - dominance_2009_filtered

# Create clean state names by removing underscores
clean_state_names = [state.replace('_', ' ').replace('&', '&') for state in final_state_order_b]

# Update the index with clean state names
dominance_2009_filtered.index = clean_state_names
dominance_2019_filtered.index = clean_state_names
dominance_change.index = clean_state_names

# Create the visualization with reduced clutter
fig, axes = plt.subplots(1, 3, figsize=(24, 12))

# Color schemes
cmap_original = 'YlOrRd'
cmap_change = 'RdBu_r'

# Create masks for zero values only
mask_2009 = dominance_2009_filtered == 0  
mask_2019 = dominance_2019_filtered == 0  
mask_change = dominance_change == 0  

# 2009 Results
sns.heatmap(dominance_2009_filtered, 
           annot=True, 
           fmt='.0f', 
           cmap=cmap_original,
           vmin=0, 
           vmax=70,
           mask=mask_2009,
           cbar=False,  
           ax=axes[0],
           annot_kws={'size': 7, 'fontweight': 'bold'})

#  gray background for zero values
for i in range(len(clean_state_names)):
    for j in range(len(major_parties_available)):
        if mask_2009.iloc[i, j]:
            axes[0].add_patch(plt.Rectangle((j, i), 1, 1, fill=True, color='lightgray', alpha=0.3))

axes[0].set_title('2009 Elections', fontsize=14, fontweight='bold')
axes[0].set_xlabel('')  
axes[0].set_ylabel('States (North to South)', fontsize=12, fontweight='bold')

# Plot 2: 2019 Results
sns.heatmap(dominance_2019_filtered, 
           annot=True, 
           fmt='.0f',  
           cmap=cmap_original,
           vmin=0, 
           vmax=70,
           mask=mask_2019,
           cbar_kws={'label': 'Vote Share (%)'},  
           ax=axes[1],
           annot_kws={'size': 7, 'fontweight': 'bold'})

# gray background for zero values
for i in range(len(clean_state_names)):
    for j in range(len(major_parties_available)):
        if mask_2019.iloc[i, j]:
            axes[1].add_patch(plt.Rectangle((j, i), 1, 1, fill=True, color='lightgray', alpha=0.3))

axes[1].set_title('2019 Elections', fontsize=14, fontweight='bold')
axes[1].set_xlabel('')  
axes[1].set_ylabel('')
axes[1].set_yticklabels([]) 

#  Change Analysis
sns.heatmap(dominance_change, 
           annot=True, 
           fmt='.0f', 
           cmap=cmap_change,
           center=0,
           vmin=-40, 
           vmax=40,
           mask=mask_change,
           cbar_kws={'label': 'Change (%)'},
           ax=axes[2],
           annot_kws={'size': 7, 'fontweight': 'bold'})

#  zero values
for i in range(len(clean_state_names)):
    for j in range(len(major_parties_available)):
        if mask_change.iloc[i, j]:
            axes[2].add_patch(plt.Rectangle((j, i), 1, 1, fill=True, color='lightgray', alpha=0.3))

axes[2].set_title('Change (2019 - 2009)', fontsize=14, fontweight='bold')
axes[2].set_xlabel('')  
axes[2].set_ylabel('')
axes[2].set_yticklabels([])  

#  main title 
fig.suptitle('India\'s Political Shift to the Right: Electoral Dominance Changes (2009-2019)', 
             fontsize=16, fontweight='bold', y=0.95)


fig.text(0.5, 0.02, 'Political Parties (Left to Right Ideological Spectrum)', 
         ha='center', fontsize=12, fontweight='bold')


fig.text(0.5, -0.02, 'Data Source: Trivedi Centre for Political Data, Ashoka University - Indian General Elections Dataset', 
         ha='center', fontsize=10, style='italic', color='gray')


plt.tight_layout()
plt.subplots_adjust(bottom=0.12, top=0.90)  # Make room for the centered xlabel, title, and data source
plt.show()

	WHO Country Name	WHO Region	World Bank ranking of income 2019
0	Afghanistan	Eastern Mediterranean	low
1	Albania	European	upper middle
2	Algeria	African	lower middle
3	Andorra	European	high
4	Angola	African	lower middle

	featurecla	scalerank	LABELRANK	SOVEREIGNT	SOV_A3	ADM0_DIF	LEVEL	TYPE	TLC	ADMIN	...	FCLASS_TR	FCLASS_ID	FCLASS_PL	FCLASS_GR	FCLASS_IT	FCLASS_NL	FCLASS_SE	FCLASS_BD	FCLASS_UA	geometry
0	Admin-0 country	1	6	Fiji	FJI	0	2	Sovereign country	1	Fiji	...	None	None	None	None	None	None	None	None	None	MULTIPOLYGON (((180 -16.06713, 180 -16.55522, ...
1	Admin-0 country	1	3	United Republic of Tanzania	TZA	0	2	Sovereign country	1	United Republic of Tanzania	...	None	None	None	None	None	None	None	None	None	POLYGON ((33.90371 -0.95, 34.07262 -1.05982, 3...
2	Admin-0 country	1	7	Western Sahara	SAH	0	2	Indeterminate	1	Western Sahara	...	Unrecognized	Unrecognized	Unrecognized	None	None	Unrecognized	None	None	None	POLYGON ((-8.66559 27.65643, -8.66512 27.58948...
3	Admin-0 country	1	2	Canada	CAN	0	2	Sovereign country	1	Canada	...	None	None	None	None	None	None	None	None	None	MULTIPOLYGON (((-122.84 49, -122.97421 49.0025...
4	Admin-0 country	1	2	United States of America	US1	1	2	Country	1	United States of America	...	None	None	None	None	None	None	None	None	None	MULTIPOLYGON (((-122.84 49, -120 49, -117.0312...

Infovis Portfolio 📓¶

Datasets¶

Useful snippets¶

Setup¶

Part 1: Foundations of infovis 🚀¶

Delhi Air Quality Comparison¶

Protests, Riots and other Conflict Scenarious in India (2016 - 2022): A data story¶

What are the major types of conflict scenarios over this time period (2016 - 2022)?¶

How have the different conflict scenarios evolved over time?¶

What regions saw the most occurences?¶

Part 2: Exploring multdimensional and hierarchical data with interaction 🔍¶

Part 3: Your own project 🏆¶

Visualization 1: Voter Turnout Trends in Indian Elections (1962-2021)¶

Visualization 2: Evolution of Political Party Dominance (2009-2019)¶

Poster 🖼️¶

	WHO Region	ISO3	WHO Country Name	City or Locality	Measurement Year	PM2.5 (μg/m3)	PM10 (μg/m3)	NO2 (μg/m3)	PM25 temporal coverage (%)	PM10 temporal coverage (%)	NO2 temporal coverage (%)	Reference	Number and type of monitoring stations	Version of the database
0	Eastern Mediterranean Region	AFG	Afghanistan	Kabul	2019	119.77	NaN	NaN	18.0	NaN	NaN	U.S. Department of State, United States Enviro...	NaN	2022
1	European Region	ALB	Albania	Durres	2015	NaN	17.65	26.63	NaN	NaN	83.961187	European Environment Agency (downloaded in 2021)	NaN	2022
2	European Region	ALB	Albania	Durres	2016	14.32	24.56	24.78	NaN	NaN	87.932605	European Environment Agency (downloaded in 2021)	NaN	2022
3	European Region	ALB	Albania	Elbasan	2015	NaN	NaN	23.96	NaN	NaN	97.853881	European Environment Agency (downloaded in 2021)	NaN	2022
4	European Region	ALB	Albania	Elbasan	2016	NaN	NaN	26.26	NaN	NaN	96.049636	European Environment Agency (downloaded in 2021)	NaN	2022