Goals
- show places of leisure in a map
- make observations about the places and their distribution
- explore the most widely available leisure type in india
import pandas as pdfrom matplotlib import pyplot as pltimport seaborn as snsimport astimport plotly.express as pximport geopandas as gpdle = pd.read_csv("../input/buildings-amenities-all-over-india/leisure.csv")le.head()| Unnamed: 0 | name | leisure | longitude-lattitude | All_tags | |
|---|---|---|---|---|---|
| 0 | 249132377 | DLF Golf Links Golf Course | golf_course | (77.10471029999984, 28.45473270000001) | {'name': 'DLF Golf Links Golf Course', 'barrie... |
| 1 | 250737365 | NaN | park | (80.23786640000002, 13.04278489999996) | {'leisure': 'park'} |
| 2 | 250979543 | Yoga Centre | sports_centre | (75.8870475, 31.52995199999996) | {'name': 'Yoga Centre', 'leisure': 'sports_cen... |
| 3 | 280167017 | Black Thunder | water_park | (76.9132247999999, 11.32635400000001) | {'name': 'Black Thunder', 'leisure': 'water_pa... |
| 4 | 280701513 | Ootacamund Gymkhana Golf Course | golf_course | (76.67157809999996, 11.417312599999995) | {'name': 'Ootacamund Gymkhana Golf Course', 'l... |
# remove all tags column and rename columnsle = le.drop("All_tags", axis=1)le.columns = ["id", "name", "leisure", "lo-la"]le.head()| id | name | leisure | lo-la | |
|---|---|---|---|---|
| 0 | 249132377 | DLF Golf Links Golf Course | golf_course | (77.10471029999984, 28.45473270000001) |
| 1 | 250737365 | NaN | park | (80.23786640000002, 13.04278489999996) |
| 2 | 250979543 | Yoga Centre | sports_centre | (75.8870475, 31.52995199999996) |
| 3 | 280167017 | Black Thunder | water_park | (76.9132247999999, 11.32635400000001) |
| 4 | 280701513 | Ootacamund Gymkhana Golf Course | golf_course | (76.67157809999996, 11.417312599999995) |
# set the ID as indexle.index = le["id"]le = le.drop("id", axis = 1)le.head()| name | leisure | lo-la | |
|---|---|---|---|
| id | |||
| 249132377 | DLF Golf Links Golf Course | golf_course | (77.10471029999984, 28.45473270000001) |
| 250737365 | NaN | park | (80.23786640000002, 13.04278489999996) |
| 250979543 | Yoga Centre | sports_centre | (75.8870475, 31.52995199999996) |
| 280167017 | Black Thunder | water_park | (76.9132247999999, 11.32635400000001) |
| 280701513 | Ootacamund Gymkhana Golf Course | golf_course | (76.67157809999996, 11.417312599999995) |
# check NA valuesle.isna().sum()name 27143leisure 0lo-la 37876dtype: int64the latitude and longitude are the most important columns so we will drop all rows that do not have them
le = le[le['lo-la'].notna()]le| name | leisure | lo-la | |
|---|---|---|---|
| id | |||
| 249132377 | DLF Golf Links Golf Course | golf_course | (77.10471029999984, 28.45473270000001) |
| 250737365 | NaN | park | (80.23786640000002, 13.04278489999996) |
| 250979543 | Yoga Centre | sports_centre | (75.8870475, 31.52995199999996) |
| 280167017 | Black Thunder | water_park | (76.9132247999999, 11.32635400000001) |
| 280701513 | Ootacamund Gymkhana Golf Course | golf_course | (76.67157809999996, 11.417312599999995) |
| ... | ... | ... | ... |
| 8277782288 | NaN | playground | (76.29733219999959, 10.029497999999887) |
| 8280851413 | Gothuruth muzhiris park | park | (76.21773650000003, 10.190251200000016) |
| 8280851414 | Gothuruth Muzhiris park | park | (76.21771200000003, 10.190284000000016) |
| 8281209559 | Exalt Fitness Club Gym | fitness_centre | (72.56438300000039, 23.089663400000084) |
| 8281506191 | NaN | playground | (75.54409639999994, 11.927387099999967) |
5813 rows × 3 columns
fill the missing names with the word “missing”
le['name'].fillna("missing", inplace=True)le| name | leisure | lo-la | |
|---|---|---|---|
| id | |||
| 249132377 | DLF Golf Links Golf Course | golf_course | (77.10471029999984, 28.45473270000001) |
| 250737365 | missing | park | (80.23786640000002, 13.04278489999996) |
| 250979543 | Yoga Centre | sports_centre | (75.8870475, 31.52995199999996) |
| 280167017 | Black Thunder | water_park | (76.9132247999999, 11.32635400000001) |
| 280701513 | Ootacamund Gymkhana Golf Course | golf_course | (76.67157809999996, 11.417312599999995) |
| ... | ... | ... | ... |
| 8277782288 | missing | playground | (76.29733219999959, 10.029497999999887) |
| 8280851413 | Gothuruth muzhiris park | park | (76.21773650000003, 10.190251200000016) |
| 8280851414 | Gothuruth Muzhiris park | park | (76.21771200000003, 10.190284000000016) |
| 8281209559 | Exalt Fitness Club Gym | fitness_centre | (72.56438300000039, 23.089663400000084) |
| 8281506191 | missing | playground | (75.54409639999994, 11.927387099999967) |
5813 rows × 3 columns
most available types of leisure places according to type
le["leisure"].value_counts()park 1798playground 773fitness_centre 580resort 544pitch 497sports_centre 495fishing 238garden 196stadium 159swimming_pool 155dance 58fitness_station 56nature_reserve 52water_park 24marina 20slipway 16beach_resort 14common 13amusement_arcade 12track 10yes 10outdoor_seating 9golf_course 8recreation_ground 6club 6bandstand 5bowling_alley 5hackerspace 5bird_hide 4adult_gaming_centre 4sauna 4picnic_table 3swimming_area 3firepit 3horse_riding 3cultural_centre 2gym 2hot_spring 2indoor_play 2wildlife_hide 2spa 2Park in residential area 1aquarium 1leisure 1ground 1Meeting_point 1sports_hall 1summer_camp 1social_club 1yoga 1schoolyard 1NITTE FOOTBALL STADIUM 1quary 1yoga_centre 1Name: leisure, dtype: int64lets draw a graph for an easier understanding
plt.rcParams['font.size'] = 10.0plt.rcParams['figure.figsize'] = 20, 10ax = sns.countplot(le['leisure'], palette="Blues_r", order=le.leisure.value_counts()[:20].index)ax.set_title("Most Avaiable Leisure Places in India")# rotate the names so they fitax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right")plt.tight_layout()plt.show()
parks are the most common type of leisure building in india
# split coordinatescords = list(le["lo-la"])long = []lat = []for cord in cords: set_r = ast.literal_eval(cord) long.append(set_r[0]) lat.append(set_r[1])le["long"] = longle["lat"] = latle.head()| name | leisure | lo-la | long | lat | |
|---|---|---|---|---|---|
| id | |||||
| 249132377 | DLF Golf Links Golf Course | golf_course | (77.10471029999984, 28.45473270000001) | 77.104710 | 28.454733 |
| 250737365 | missing | park | (80.23786640000002, 13.04278489999996) | 80.237866 | 13.042785 |
| 250979543 | Yoga Centre | sports_centre | (75.8870475, 31.52995199999996) | 75.887047 | 31.529952 |
| 280167017 | Black Thunder | water_park | (76.9132247999999, 11.32635400000001) | 76.913225 | 11.326354 |
| 280701513 | Ootacamund Gymkhana Golf Course | golf_course | (76.67157809999996, 11.417312599999995) | 76.671578 | 11.417313 |
# drop the old coordinates columnle = le.drop("lo-la", axis=1)le.head()| name | leisure | long | lat | |
|---|---|---|---|---|
| id | ||||
| 249132377 | DLF Golf Links Golf Course | golf_course | 77.104710 | 28.454733 |
| 250737365 | missing | park | 80.237866 | 13.042785 |
| 250979543 | Yoga Centre | sports_centre | 75.887047 | 31.529952 |
| 280167017 | Black Thunder | water_park | 76.913225 | 11.326354 |
| 280701513 | Ootacamund Gymkhana Golf Course | golf_course | 76.671578 | 11.417313 |
# basic scatter plot of placesplt.scatter(x=le["long"], y=le["lat"])plt.show()
above is the initial shape of the locations in the map based on their longitude and latitude, we can already see that the shape looks like india meaning there are many leisure places around the country
# create and view geopandas dataframegdf = gpd.GeoDataFrame( le, geometry=gpd.points_from_xy(le.long, le.lat))gdf| name | leisure | long | lat | geometry | |
|---|---|---|---|---|---|
| id | |||||
| 249132377 | DLF Golf Links Golf Course | golf_course | 77.104710 | 28.454733 | POINT (77.10471 28.45473) |
| 250737365 | missing | park | 80.237866 | 13.042785 | POINT (80.23787 13.04278) |
| 250979543 | Yoga Centre | sports_centre | 75.887047 | 31.529952 | POINT (75.88705 31.52995) |
| 280167017 | Black Thunder | water_park | 76.913225 | 11.326354 | POINT (76.91322 11.32635) |
| 280701513 | Ootacamund Gymkhana Golf Course | golf_course | 76.671578 | 11.417313 | POINT (76.67158 11.41731) |
| ... | ... | ... | ... | ... | ... |
| 8277782288 | missing | playground | 76.297332 | 10.029498 | POINT (76.29733 10.02950) |
| 8280851413 | Gothuruth muzhiris park | park | 76.217737 | 10.190251 | POINT (76.21774 10.19025) |
| 8280851414 | Gothuruth Muzhiris park | park | 76.217712 | 10.190284 | POINT (76.21771 10.19028) |
| 8281209559 | Exalt Fitness Club Gym | fitness_centre | 72.564383 | 23.089663 | POINT (72.56438 23.08966) |
| 8281506191 | missing | playground | 75.544096 | 11.927387 | POINT (75.54410 11.92739) |
5813 rows × 5 columns
# set mapbox acces token (required for drawing an interactive map)px.set_mapbox_access_token("pk.eyJ1IjoiYmxhY2tzdWFuMTkiLCJhIjoiY2twcDdtaGc4MDZ6djJvczR0Ym9sa3pqNCJ9.gNL1mxeSmDi6hfgwxz2qRA")# generate and show points in map (its intractive!)fig = px.scatter_geo(gdf, lat=gdf.geometry.y, lon=gdf.geometry.x, hover_data=["name", "leisure"], locationmode="country names" )fig.update_geos(fitbounds="locations") # zoom in to only indiafig.show()# check the project source code for the interactive version of the map
observations form the map
- most of the leisure places are located the the cost
- there are some obvious outlier locations that are probably fake
- the center of india has the least amount of leisure places
- most of the resorts are located on the western cost
- parks are the only leisure activity available all across the country