#import all necessary librarie for the data analysis
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sb
import numpy as np
import folium
#!pip install plotnine
from plotnine import *
#!pip install beautifulsoup4
from bs4 import BeautifulSoup
import re
import requests
from urllib.request import urlopen
#!conda install plotly
import plotly.express as px
import plotly.graph_objects as go
#!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # module to convert an address into latitude and longitude values


#to read dataframe as csv.
property_list = pd.read_csv('property_list.csv', )
property_list['date_sold'] = pd.to_datetime(property_list['date_sold'], dayfirst = True) #to set date to dd-mm-yyyy format
property_list.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1021 entries, 0 to 1020
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   price      1021 non-null   int64         
 1   address    1021 non-null   object        
 2   date_sold  1021 non-null   datetime64[ns]
 3   bedroom    1021 non-null   int64         
 4   bathroom   1021 non-null   int64         
 5   carport    1021 non-null   int64         
 6   dwelling   1021 non-null   object        
dtypes: datetime64[ns](1), int64(4), object(2)
memory usage: 56.0+ KB


property_list.describe()


#to find detail on the property that has the maximum number of bedrooms.
property_list.loc[property_list['bedroom'] == 8]


ax = pd.value_counts(property_list.dwelling).plot(kind='bar',figsize=(15,4),width = 0.8)

for p in ax.patches:
    width = p.get_width()
    height = p.get_height()
    x, y = p.get_xy() 
    ax.annotate('{:.0%}'.format(height/1021), (x + width/2, y + height*1.02), ha='center')


#to show the breakdown of types of the dwelling sold.
pd.value_counts(property_list.dwelling)

Apartment       761
House           172
Townhouse        45
Unit             40
Duplex            2
Off The Plan      1
Name: dwelling, dtype: int64


#using plotly.express
px.box(property_list, x='dwelling', y = 'price',
      title='The Sale of Properties in Australian Dollar (AUD)', 
             labels = {'dwelling':'Types of Dwelling', 'price': 'Price in AUD'})#to find out how the price fares against the types of dwelling.


#to show the property that sold at the maximum price.
property_list.loc[property_list['price'].idxmax()]

price                    7490000
address           25 Moir Street
date_sold    2020-08-10 00:00:00
bedroom                        3
bathroom                       1
carport                        1
dwelling                   House
Name: 1, dtype: object


px.box(property_list, x='bedroom', y = 'price',
      title='The Price vs Number of Bedrooms in AUD', 
             labels = {'bedroom':'Number of Bedrooms in a Property', 'price': 'Price in AUD'})#to find out how the price fares against number of bedrooms.


px.box(property_list, x='bathroom', y = 'price',
      title='The Price vs Number of Bathrooms in AUD', 
             labels = {'bathroom':'Number of Bathrooms in a Property', 'price': 'Price in AUD'})#to find out how the price fares against number of bathrooms.


#To breakdown the types of dwelling to find out its corresponding number of bedrooms, bathrooms and carport.
dimension_dwelling = pd.melt(property_list, id_vars = 'dwelling', value_vars = ['bedroom', 'bathroom', 'carport'], var_name= 'dimensions', value_name='number')


ax = sb.catplot(data = dimension_dwelling, x= 'dimensions', y= 'number', hue = 'dwelling', kind = 'box', height = 5, aspect = 3);
ax.set_titles('Type of Dwelling vs Dimensions');
ax.set_axis_labels('','Number of Rooms/Ports');
ax.set_xticklabels(['Bedroom', 'Bathroom', 'Carport']);


property_list[(property_list['carport'] == 4)]


#To breakdown the types of dwelling and prices.
price_dwelling = pd.melt(property_list, id_vars = ['dwelling', 'price'], value_vars = ['bedroom', 'bathroom', 'carport'], var_name= 'dimensions', value_name='number')


price_dwelling.describe()


sb.catplot(data=price_dwelling, x="number", y="price", hue="dwelling", row='dimensions', kind = 'box', height = 4, aspect = 3);


ax = sb.catplot(data = property_list, x= 'bedroom', y= 'price', hue = 'dwelling', row='bathroom', kind = 'box', height = 5, aspect = 3);


property_list['day'] = property_list['date_sold'].dt.day_name()
property_list['day_of_week'] = property_list['date_sold'].dt.dayofweek
property_list['weekend'] = property_list['day_of_week'].apply(lambda x: 1 if (x > 4) else 0)


property_list.head()


cats = [ 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
from pandas.api.types import CategoricalDtype
cat_type = CategoricalDtype(categories=cats, ordered=True)
property_list['day'] = property_list['day'].astype(cat_type) #to order the categorical values, 'day'


#to see on which day most properties were sold and which type.
px.histogram(property_list, x=property_list['day'].sort_values(), y='price', color = 'dwelling', nbins = 17, 
             title='The Top Days Most Sale of Properties Occured', 
             labels = {'x':"", 'dwelling': 'Types of Dwelling', 'price': 'Price in AUD'})


sb.heatmap(property_list.corr('pearson'), annot=True)
plt.show()


CLIENT_ID = 'S2RFPKBHSMYQBRNTNCIKSJJRRATXJCBRSNBVRP5QM2CNAQRR' # your Foursquare ID
CLIENT_SECRET = '0KF44UH0EIRPQCYHSZU5QJA40ZZTSBF43QLOLWRYEII5PS5N' # your Foursquare Secret
VERSION = '20201018'
LIMIT = 30


address = 'St. Georges Terrace, Perth, Western Australia 6000'

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude


search_query = 'restaurant'
radius = 1000
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)
results = requests.get(url).json()


# assign relevant part of JSON to venues
venues = results['response']['venues']

# tranform venues into a dataframe
dataframe_rest = pd.json_normalize(venues)
dataframe_rest.head(3)


# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe_rest.columns if col.startswith('location.')] + ['id']
dataframe_filtered_rest = dataframe_rest.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_filtered_rest['categories'] = dataframe_filtered_rest.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered_rest.columns = [column.split('.')[-1] for column in dataframe_filtered_rest.columns]

dataframe_filtered_rest.head(3)


search_query = 'cafes'
radius = 1000
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)
results = requests.get(url).json()


# assign relevant part of JSON to venues
venues = results['response']['venues']

# tranform venues into a dataframe
dataframe_cafe = pd.json_normalize(venues)
dataframe_cafe.head(3)


# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe_cafe.columns if col.startswith('location.')] + ['id']
dataframe_filtered_cafe = dataframe_cafe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_filtered_cafe['categories'] = dataframe_filtered_cafe.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered_cafe.columns = [column.split('.')[-1] for column in dataframe_filtered_cafe.columns]


search_query = 'bakery'
radius = 1000
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)
results = requests.get(url).json()


# assign relevant part of JSON to venues
venues = results['response']['venues']

# tranform venues into a dataframe
dataframe_bakery = pd.json_normalize(venues)
dataframe_bakery.head(3)


# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe_bakery.columns if col.startswith('location.')] + ['id']
dataframe_filtered_bakery = dataframe_bakery.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_filtered_bakery['categories'] = dataframe_filtered_bakery.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered_bakery.columns = [column.split('.')[-1] for column in dataframe_filtered_bakery.columns]


coordinates = pd.read_csv('coordinates.csv') #to read the csv file.
price = property_list.price.apply(str)


address = 'St. Georges Terrace, Perth, Western Australia 6000'

geolocator = Nominatim(user_agent="capstone")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

perth_map = folium.Map(location=[latitude, longitude], zoom_start=15) # generate map centred around Perth CBD

# add a black circle marker to represent the region
folium.CircleMarker(
    [latitude, longitude],
    radius=10,
    color='black',
    popup='Perth CBD',
    fill = True,
    fill_color = 'black',
    fill_opacity = 0.6
).add_to(perth_map)

# add the restaurants as blue circle markers
for lat, lng, label in zip(dataframe_filtered_rest.lat, dataframe_filtered_rest.lng, dataframe_filtered_rest.categories):
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        color='blue',
        popup=label,
        fill = True,
        fill_color='blue',
        fill_opacity=0.6
).add_to(perth_map)

# add the cafes as yellow circle markers
for lat, lng, label in zip(dataframe_filtered_cafe.lat, dataframe_filtered_cafe.lng, dataframe_filtered_cafe.categories):
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        color='yellow',
        popup=label,
        fill = True,
        fill_color='yellow',
        fill_opacity=0.6
).add_to(perth_map)

# add the bakeries as pink circle markers
for lat, lng, label in zip(dataframe_filtered_bakery.lat, dataframe_filtered_bakery.lng, dataframe_filtered_bakery.categories):
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        color='green',
        popup=label,
        fill = True,
        fill_color='green',
        fill_opacity=0.6
).add_to(perth_map)

# add the location of properties as red circle markers
for lat, lng, label in zip(coordinates.lat, coordinates.lng, price):
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        color='red',
        popup=label,
        fill = True,
        fill_color='red',
        fill_opacity=0.6
).add_to(perth_map)
    
# display map
perth_map


#filter dataframe to find properties on Murray Street, St. Georges Terrace, Wellington Street, Hay Street, and William Street.
recommendation = property_list[property_list['address'].str.contains('Murray|Wellington|Hay|William|St Georges')]
recommendation.head(10)


recommendation.describe()


recommendation.dwelling.value_counts()

Apartment    289
Unit           5
House          5
Townhouse      1
Name: dwelling, dtype: int64


px.box(recommendation, x="bedroom", y="price", color="dwelling", facet_row='bathroom')


address = ['Murray Street', 'Wellington Street', 'Hay Street', 'Hay Street', 'William Street','St. Georges Terrace']
price_selling = [1040000, 940000, 800000, 800000, 489000, 550000]
dimensions = ['3 x 2', '3 x 2', '3 x 2', '3 x 3', '3 x 2', '2 x2']

property_for_sale = pd.DataFrame({'Address': address, 'Price for Sale (AUD)': price_selling, 'Bedroom x Bathroom': dimensions})
property_for_sale


!jupyter nbconvert "Property Market Analysis.ipynb" --to html --output-dir='C:\Users\Ajeet\OneDrive\Projects\Blogging\ajeethaa.github.io'

C:\Users\Ajeet\anaconda3\lib\site-packages\traitlets\traitlets.py:2195: FutureWarning: Supporting extra quotes around Unicode is deprecated in traitlets 5.0. Use 'C:\\Users\\Ajeet\\OneDrive\\Projects\\Blogging\\ajeethaa.github.io' instead of "'C:\\Users\\Ajeet\\OneDrive\\Projects\\Blogging\\ajeethaa.github.io'" – or use CUnicode.
  warn(
[NbConvertApp] Converting notebook Property Market Analysis.ipynb to html
[NbConvertApp] Writing 5359610 bytes to C:\Users\Ajeet\OneDrive\Projects\Blogging\ajeethaa.github.io\Property Market Analysis.html

	price	bedroom	bathroom	carport
count	1.021000e+03	1021.000000	1021.000000	1021.000000
mean	5.243352e+05	1.916748	1.442703	1.005877
std	3.637455e+05	0.875552	0.595653	0.718089
min	1.550000e+05	0.000000	0.000000	0.000000
25%	3.400000e+05	1.000000	1.000000	1.000000
50%	4.450000e+05	2.000000	1.000000	1.000000
75%	6.200000e+05	2.000000	2.000000	1.000000
max	7.490000e+06	8.000000	4.000000	4.000000

	price	address	date_sold	bedroom	bathroom	carport	dwelling
286	1230000	10 Cantle Street	2019-02-08	4	3	4	House
318	3000000	2801/237 Adelaide Terrace	2018-11-30	4	4	4	Apartment
529	1030000	5/168 Mounts Bay Road	2017-11-18	3	2	4	Apartment
633	1295000	8 Somerville Street	2017-06-01	3	1	4	House
729	739000	32 Lacey Street	2016-11-21	4	1	4	House

	price	number
count	3.063000e+03	3063.000000
mean	5.243352e+05	1.455109
std	3.636267e+05	0.826879
min	1.550000e+05	0.000000
25%	3.400000e+05	1.000000
50%	4.450000e+05	1.000000
75%	6.200000e+05	2.000000
max	7.490000e+06	8.000000

	id	name	categories	referralId	hasPerk	location.address	location.lat	location.lng	location.labeledLatLngs	location.distance	location.cc	location.city	location.state	location.country	location.formattedAddress	location.postalCode	location.crossStreet	location.neighborhood
0	55ed79a3498e6eaf2fbcebde	Canton Bay Chinese Restaurant Northbridge 喜粵海鮮酒家	[{'id': '52af3a7c3cf9994f4e043bed', 'name': 'C...	v-1599226161	False	20 Roe St	-31.949653	115.858080	[{'label': 'display', 'lat': -31.9496528483201...	596	AU	Northbridge	WA	Australia	[20 Roe St, Northbridge WA, Australia]	NaN	NaN	NaN
1	4b74d676f964a520adf42de3	Restaurant Jun 純	[{'id': '4bf58dd8d48988d111941735', 'name': 'J...	v-1599226161	False	568 Hay St	-31.954960	115.861385	[{'label': 'display', 'lat': -31.9549600047376...	364	AU	Perth	WA	Australia	[568 Hay St, Perth WA 6000, Australia]	6000	NaN	NaN
2	4c2c5bd38abca5938c940120	Miss Maud Smorgasbord Restaurant	[{'id': '4bf58dd8d48988d1c6941735', 'name': 'S...	v-1599226161	False	97 Murray St	-31.954286	115.862652	[{'label': 'display', 'lat': -31.9542861800683...	490	AU	Perth	WA	Australia	[97 Murray St (cnr Pier St), Perth WA 6000, Au...	6000	cnr Pier St	NaN

	name	categories	address	lat	lng	labeledLatLngs	distance	cc	city	state	country	formattedAddress	postalCode	crossStreet	neighborhood	id
0	Canton Bay Chinese Restaurant Northbridge 喜粵海鮮酒家	Cantonese Restaurant	20 Roe St	-31.949653	115.858080	[{'label': 'display', 'lat': -31.9496528483201...	596	AU	Northbridge	WA	Australia	[20 Roe St, Northbridge WA, Australia]	NaN	NaN	NaN	55ed79a3498e6eaf2fbcebde
1	Restaurant Jun 純	Japanese Restaurant	568 Hay St	-31.954960	115.861385	[{'label': 'display', 'lat': -31.9549600047376...	364	AU	Perth	WA	Australia	[568 Hay St, Perth WA 6000, Australia]	6000	NaN	NaN	4b74d676f964a520adf42de3
2	Miss Maud Smorgasbord Restaurant	Scandinavian Restaurant	97 Murray St	-31.954286	115.862652	[{'label': 'display', 'lat': -31.9542861800683...	490	AU	Perth	WA	Australia	[97 Murray St (cnr Pier St), Perth WA 6000, Au...	6000	cnr Pier St	NaN	4c2c5bd38abca5938c940120

"Property Market in Perth, Western Australia"¶

Table of Contents¶

Introduction¶

Data Procurement¶

Data Extraction and Cleaning¶

Exploratory Data Analysis¶

Data Visualisation¶

----A----¶

----B----¶

Results¶

To search for restaurants within Perth CBD¶

To search for cafes within CBD¶

To search for bakeries within CBD¶

The map of restaurants, cafes, and bakeries in the vicinity of the properties¶

Discussion / Recommendations¶

Conclusion¶

References¶

	price	address	date_sold	bedroom	bathroom	carport	dwelling	day	day_of_week
0	1020000	61 Lindsay Street	2020-08-18	3	1	0	House	Tuesday	1
1	7490000	25 Moir Street	2020-08-10	3	1	1	House	Monday	0
2	260000	8/133 Lincoln Street	2020-07-28	2	1	1	Apartment	Tuesday	1
3	320000	14/130 Terrace Road	2020-07-10	2	1	0	Apartment	Friday	4
4	449000	33/67 Brewer Street	2020-07-09	2	2	1	Apartment	Thursday	3

	id	name	categories	referralId	hasPerk	location.lat	location.lng	location.labeledLatLngs	location.distance	location.postalCode	location.cc	location.city	location.state	location.country	location.formattedAddress	location.address	location.crossStreet
0	578ec414498ebef9383e2e21	Cafe Studio	[{'id': '4bf58dd8d48988d1a1941735', 'name': 'C...	v-1599226162	False	-31.953068	115.861230	[{'label': 'display', 'lat': -31.953068, 'lng'...	410	6000	AU	Perth	WA	Australia	[Perth WA 6000, Australia]	NaN	NaN
1	5855eb88fad9dc0635d26be2	Cafe Studio Perth	[{'id': '52e81612bcbc57f1066b79f1', 'name': 'B...	v-1599226162	False	-31.952731	115.861296	[{'label': 'display', 'lat': -31.9527311933084...	436	6000	AU	Perth	WA	Australia	[138 Barrack Street (Wellington Street), Perth...	138 Barrack Street	Wellington Street
2	4b610e01f964a52005082ae3	Café St George	[]	v-1599226162	False	-31.953175	115.849256	[{'label': 'display', 'lat': -31.9531754780512...	806	6000	AU	Perth	WA	Australia	[251 St Georges Tce, Perth WA 6000, Australia]	251 St Georges Tce	NaN

	id	name	categories	referralId	hasPerk	location.address	location.lat	location.lng	location.labeledLatLngs	location.distance	location.postalCode	location.cc	location.city	location.state	location.country	location.formattedAddress	location.neighborhood	location.crossStreet
0	4b8de8ccf964a520041333e3	Hay Street Bakery	[{'id': '4bf58dd8d48988d16a941735', 'name': 'B...	v-1599226162	False	Hay St	-31.953164	115.855744	[{'label': 'display', 'lat': -31.9531635969671...	263	6000	AU	Perth	WA	Australia	[Hay St, Perth WA 6000, Australia]	NaN	NaN
1	595ef60f35d3fc3e62118289	Mary Street Bakery (St Georges)	[{'id': '4bf58dd8d48988d16a941735', 'name': 'B...	v-1599226162	False	NaN	-31.955444	115.858501	[{'label': 'display', 'lat': -31.9554443144621...	105	6000	AU	Perth	WA	Australia	[Perth WA 6000, Australia]	NaN	NaN
2	4baab486f964a52052813ae3	Golden Bakery	[{'id': '4bf58dd8d48988d16a941735', 'name': 'B...	v-1599226162	False	97 William St	-31.952498	115.857604	[{'label': 'display', 'lat': -31.9524982432644...	277	NaN	AU	Perth	WA	Australia	[97 William St, Perth WA, Australia]	NaN	NaN

	price	address	date_sold	bedroom	bathroom	carport	dwelling	day	day_of_week	weekend
7	310000	1D/811 Hay Street	2020-06-28	2	1	0	Apartment	Sunday	6	1
8	452800	5/478 William Street	2020-06-22	3	2	2	Apartment	Monday	0	0
9	249000	3/375 Hay Street	2020-06-20	1	1	0	Apartment	Saturday	5	1
12	450000	120/580 Hay Street	2020-06-11	1	1	1	Apartment	Thursday	3	0
15	315000	8/418-428 Murray Street	2020-05-21	2	1	1	Unit	Thursday	3	0
18	275000	4/101 Murray Street	2020-05-13	1	1	1	Apartment	Wednesday	2	0
19	620000	29/101 Murray Street	2020-05-10	3	2	2	Apartment	Sunday	6	1
22	320000	4/418-428 Murray Street	2020-04-28	2	1	1	Apartment	Tuesday	1	0
28	600000	60/580 Hay Street	2020-03-31	2	2	2	Apartment	Tuesday	1	0
30	290000	62/101 Murray St	2020-03-27	1	1	1	Apartment	Friday	4	0

	price	bedroom	bathroom	carport	day_of_week	weekend
count	3.000000e+02	300.000000	300.000000	300.000000	300.000000	300.000000
mean	4.308749e+05	1.576667	1.246667	0.786667	2.266667	0.086667
std	1.369588e+05	0.765556	0.577582	0.650049	1.684740	0.281816
min	1.840000e+05	0.000000	0.000000	0.000000	0.000000	0.000000
25%	3.315000e+05	1.000000	1.000000	0.000000	1.000000	0.000000
50%	4.059440e+05	2.000000	1.000000	1.000000	2.000000	0.000000
75%	5.052500e+05	2.000000	2.000000	1.000000	4.000000	0.000000
max	1.175000e+06	4.000000	2.000000	3.000000	6.000000	1.000000

	Address	Price for Sale (AUD)	Bedroom x Bathroom
0	Murray Street	1040000	3 x 2
1	Wellington Street	940000	3 x 2
2	Hay Street	800000	3 x 2
3	Hay Street	800000	3 x 3
4	William Street	489000	3 x 2
5	St. Georges Terrace	550000	2 x2