Space-Removal-Df
Sat 17 May 2025
import pandas as pd
FILEPATH = "../countries-of-the-world.csv"
df = pd.read_csv(FILEPATH)
df.head(2)
| Country | Region | Population | Area (sq. mi.) | Pop. Density (per sq. mi.) | Coastline (coast/area ratio) | Net migration | Infant mortality (per 1000 births) | GDP ($ per capita) | Literacy (%) | Phones (per 1000) | Arable (%) | Crops (%) | Other (%) | Climate | Birthrate | Deathrate | Agriculture | Industry | Service | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | ASIA (EX. NEAR EAST) | 31056997 | 647500 | 48,0 | 0,00 | 23,06 | 163,07 | 700.0 | 36,0 | 3,2 | 12,13 | 0,22 | 87,65 | 1 | 46,6 | 20,34 | 0,38 | 0,24 | 0,38 |
| 1 | Albania | EASTERN EUROPE | 3581655 | 28748 | 124,6 | 1,26 | -4,93 | 21,52 | 4500.0 | 86,5 | 71,2 | 21,09 | 4,42 | 74,49 | 3 | 15,11 | 5,22 | 0,232 | 0,188 | 0,579 |
for col in df.columns:
print(col)
Country
Region
Population
Area (sq. mi.)
Pop. Density (per sq. mi.)
Coastline (coast/area ratio)
Net migration
Infant mortality (per 1000 births)
GDP ($ per capita)
Literacy (%)
Phones (per 1000)
Arable (%)
Crops (%)
Other (%)
Climate
Birthrate
Deathrate
Agriculture
Industry
Service
# print all cols
new_cols = {}
for old_col in list(df.columns):
# print(col)
col = old_col
col = col.lower()
col = col.replace(".", "")
col = col.replace(" ", "_")
col = col.replace("(", "")
col = col.replace(")", "")
col = col.replace("%", "")
col = col.replace("/", "")
col = col.replace("$", "")
# if " " in col or "(" in col:
# print(col)
new_cols[old_col] = col
new_cols
{'Country': 'country',
'Region': 'region',
'Population': 'population',
'Area (sq. mi.)': 'area_sq_mi',
'Pop. Density (per sq. mi.)': 'pop_density_per_sq_mi',
'Coastline (coast/area ratio)': 'coastline_coastarea_ratio',
'Net migration': 'net_migration',
'Infant mortality (per 1000 births)': 'infant_mortality_per_1000_births',
'GDP ($ per capita)': 'gdp__per_capita',
'Literacy (%)': 'literacy_',
'Phones (per 1000)': 'phones_per_1000',
'Arable (%)': 'arable_',
'Crops (%)': 'crops_',
'Other (%)': 'other_',
'Climate': 'climate',
'Birthrate': 'birthrate',
'Deathrate': 'deathrate',
'Agriculture': 'agriculture',
'Industry': 'industry',
'Service': 'service'}
df.rename(
columns = new_cols,
inplace = True
)
df.head(2)
| country | region | population | area_sq_mi | pop_density_per_sq_mi | coastline_coastarea_ratio | net_migration | infant_mortality_per_1000_births | gdp__per_capita | literacy_ | phones_per_1000 | arable_ | crops_ | other_ | climate | birthrate | deathrate | agriculture | industry | service | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | ASIA (EX. NEAR EAST) | 31056997 | 647500 | 48,0 | 0,00 | 23,06 | 163,07 | 700.0 | 36,0 | 3,2 | 12,13 | 0,22 | 87,65 | 1 | 46,6 | 20,34 | 0,38 | 0,24 | 0,38 |
| 1 | Albania | EASTERN EUROPE | 3581655 | 28748 | 124,6 | 1,26 | -4,93 | 21,52 | 4500.0 | 86,5 | 71,2 | 21,09 | 4,42 | 74,49 | 3 | 15,11 | 5,22 | 0,232 | 0,188 | 0,579 |
df.to_csv('../countries-of-the-world-cleaned.csv')
Score: 10
Category: pandas