Space-Removal-Df

Sat 17 May 2025

import pandas as pd
FILEPATH = "../countries-of-the-world.csv"
df = pd.read_csv(FILEPATH)
df.head(2)
Country Region Population Area (sq. mi.) Pop. Density (per sq. mi.) Coastline (coast/area ratio) Net migration Infant mortality (per 1000 births) GDP ($ per capita) Literacy (%) Phones (per 1000) Arable (%) Crops (%) Other (%) Climate Birthrate Deathrate Agriculture Industry Service
0 Afghanistan ASIA (EX. NEAR EAST) 31056997 647500 48,0 0,00 23,06 163,07 700.0 36,0 3,2 12,13 0,22 87,65 1 46,6 20,34 0,38 0,24 0,38
1 Albania EASTERN EUROPE 3581655 28748 124,6 1,26 -4,93 21,52 4500.0 86,5 71,2 21,09 4,42 74,49 3 15,11 5,22 0,232 0,188 0,579
for col in df.columns:
    print(col)
Country
Region
Population
Area (sq. mi.)
Pop. Density (per sq. mi.)
Coastline (coast/area ratio)
Net migration
Infant mortality (per 1000 births)
GDP ($ per capita)
Literacy (%)
Phones (per 1000)
Arable (%)
Crops (%)
Other (%)
Climate
Birthrate
Deathrate
Agriculture
Industry
Service
# print all cols

new_cols = {}
for old_col in list(df.columns):
    # print(col)

    col = old_col
    col = col.lower()

    col = col.replace(".", "")
    col = col.replace(" ", "_")
    col = col.replace("(", "")
    col = col.replace(")", "")
    col = col.replace("%", "")
    col = col.replace("/", "")
    col = col.replace("$", "")

    # if " " in col or "(" in col:
    # print(col)

    new_cols[old_col] = col
new_cols
{'Country': 'country',
 'Region': 'region',
 'Population': 'population',
 'Area (sq. mi.)': 'area_sq_mi',
 'Pop. Density (per sq. mi.)': 'pop_density_per_sq_mi',
 'Coastline (coast/area ratio)': 'coastline_coastarea_ratio',
 'Net migration': 'net_migration',
 'Infant mortality (per 1000 births)': 'infant_mortality_per_1000_births',
 'GDP ($ per capita)': 'gdp__per_capita',
 'Literacy (%)': 'literacy_',
 'Phones (per 1000)': 'phones_per_1000',
 'Arable (%)': 'arable_',
 'Crops (%)': 'crops_',
 'Other (%)': 'other_',
 'Climate': 'climate',
 'Birthrate': 'birthrate',
 'Deathrate': 'deathrate',
 'Agriculture': 'agriculture',
 'Industry': 'industry',
 'Service': 'service'}
df.rename(
    columns = new_cols, 
    inplace = True
)
df.head(2)
country region population area_sq_mi pop_density_per_sq_mi coastline_coastarea_ratio net_migration infant_mortality_per_1000_births gdp__per_capita literacy_ phones_per_1000 arable_ crops_ other_ climate birthrate deathrate agriculture industry service
0 Afghanistan ASIA (EX. NEAR EAST) 31056997 647500 48,0 0,00 23,06 163,07 700.0 36,0 3,2 12,13 0,22 87,65 1 46,6 20,34 0,38 0,24 0,38
1 Albania EASTERN EUROPE 3581655 28748 124,6 1,26 -4,93 21,52 4500.0 86,5 71,2 21,09 4,42 74,49 3 15,11 5,22 0,232 0,188 0,579
df.to_csv('../countries-of-the-world-cleaned.csv')



Score: 10

Category: pandas