Split-And-Get-First

Sat 17 May 2025

UNDERSCORE = "_"
feature_value_map_for_score = {
  "system_businessname_overlapLevenshtein": 0.5,
  "system_address_overlapLevenshtein": 1.0,
  "system_streetname_overlapLevenshtein": 1.0,
  "system_city_overlapLevenshtein": 1.0,
  "system_zipcode_overlapLevenshtein": 1.0,
  "system_businessname_sorensen": 0.56,
  "system_address_sorensen": 1.0,
  "system_streetname_sorensen": 1.0,
  "system_city_sorensen": 1.0,
  "system_region_sorensen": 1.0,
  "system_houseno_exactMatch": 1.0,
  "system_zipcode_ratcliff": 1.0,
  "system_suiteno_exactMatch": 1.0
}
individual_algo_feature_score_map = {
  "system_businessname": {
    "system_businessname_overlapLevenshtein": 0.5,
    "system_businessname_sorensen": 0.56
  },
  "system_address": {
    "system_address_overlapLevenshtein": 1.0,
    "system_address_sorensen": 1.0
  },
  "system_streetname": {
    "system_streetname_overlapLevenshtein": 1.0,
    "system_streetname_sorensen": 1.0
  },
  "system_city": {
    "system_city_overlapLevenshtein": 1.0,
    "system_city_sorensen": 1.0
  },
  "system_zipcode": {
    "system_zipcode_overlapLevenshtein": 1.0,
    "system_zipcode_ratcliff": 1.0
  },
  "system_region": {
    "system_region_sorensen": 1.0
  },
  "system_houseno": {
    "system_houseno_exactMatch": 1.0
  }
}
for key, value in feature_value_map_for_score.items():
    feature_prefix = UNDERSCORE.join(key.split(UNDERSCORE)[:-1])
    # print(feature_prefix)

    if feature_prefix in individual_algo_feature_score_map.keys():
        print(f'{feature_prefix} avaible, no impute needed')
    else:
        print(f'{feature_prefix} not avaible, impute needed')
system_businessname avaible, no impute needed
system_address avaible, no impute needed
system_streetname avaible, no impute needed
system_city avaible, no impute needed
system_zipcode avaible, no impute needed
system_businessname avaible, no impute needed
system_address avaible, no impute needed
system_streetname avaible, no impute needed
system_city avaible, no impute needed
system_region avaible, no impute needed
system_houseno avaible, no impute needed
system_zipcode avaible, no impute needed
system_suiteno not avaible, impute needed



Score: 5

Category: basics