# SAVE ZOOPLA LISTINGS TO CSV

		from urllib.request import urlopen as uReq
		from bs4 import BeautifulSoup as soup
		import sys

		default_url = 'https://www.zoopla.co.uk/for-sale/houses/SE5/?beds_min=1&is_auction=false&is_retirement_home=false&is_shared_ownership=false&price_max=250000&price_min=90000&q=SE5&radius=10&results_sort=newest_listings&search_source=home&page_size=100'

		if len(sys.argv) <= 1:
			my_url = default_url
		else:
			my_url = sys.argv[1]

		# open and read page
		uClient = uReq(my_url)
		page_html = uClient.read()
		uClient.close()
		page_soup = soup(page_html, "html.parser")

		# Grabs each listing
		# Finds all div.item-container elements (these are listings)
		containers = page_soup.findAll("div",{"class":"ListingsContainer"})

		# Get list details to see if there are more pages
		list_details = page_soup.find("span",{"class":"listing-results-utils-count"});
		number_of_listings = int(list_details.text.partition("of ")[2]);

		# prep output file
		from datetime import date
		today_string = date.today().strftime('%y%m%d')
		# e.g. 200619 for 19/06/2020, and prefix this to the filename
		filename = today_string + "-zoopla-listings.csv"
		f = open(filename, "w")

		headers = "Link, Title, Price, Modifier, Agent, Agent Tel, Address, Nearby, All Attributes, Bedrooms, Bathrooms, Reception Rooms, Sq Ft\n"

		f.write(headers)

		# loop through containers to grab info of interest
		for container in containers:
			listing_price = container.find("a",{"class":"listing-results-price"})
			
			link = "https://www.zoopla.co.uk" + listing_price["href"]
			#modifier must come first because of extract
			modifier = listing_price.span.extract().text.strip()
			price = listing_price.text.strip()
			
			title = container.find("h2",{"class":"listing-results-attr"})
			title = " ".join((title.text.split()))
			
			agent = container.find("div",{"class":"agent_logo"}).img["alt"]
			agent_phone = container.find("span",{"class":"agent_phone"}).a.span.text.strip()
			
			nearby_stations_schools = container.find("div",{"class":"nearby_stations_schools"}).text
			nearby_stations_schools =" ".join(nearby_stations_schools.split())
			
			prop_address = container.find("a",{"class":"listing-results-address"}).text
			
			# Property Attributes (Bedrooms, Bathrooms, Reception rooms, etc.)
			attributes = container.find("h3",{"class":"listing-results-attr"})

			bedrooms = attributes.find("span",{"class":"num-beds"})
			bathrooms = attributes.find("span",{"class":"num-baths"})
			reception_rooms = attributes.find("span",{"class":"num-reception"})
			num_sqft = attributes.find("span",{"class":"num-sqft"})
				
			att_list = [bedrooms, bathrooms, reception_rooms, num_sqft];

			# Getting text out of the property attributes, or 'nan' for the non-existent ones
			def TextOrNan(n):
				if n != None:
					return str(n.text)
				else:
					return 'nan'
					
			att_temp = map(TextOrNan,att_list)
			att_temp = list(att_temp)

			bedrooms = att_temp[0]
			bathrooms = att_temp[1]
			reception_rooms = att_temp[2]
			num_sqft = att_temp[3]
			
			# Generating the attribute summary string
			attributes_select = attributes.select('h3 > span')
			attribute_list = [];
			for attr in attributes_select:
				try:
					attribute_list.append(attr["title"])
				except:
					print()
					# carry on where title is missing
				
			attribute_string = " ".join(attribute_list)
			
			# Print to console
			infos = ["Link", "Title", "Price", "Modifier", "Agent", "Agent Tel", "Address", "Nearby", "All Attributes", "Bedrooms", "Bathrooms", "Reception Rooms", "Sq Ft"];
			data = [link, title, price, modifier, agent, agent_phone, prop_address, nearby_stations_schools, attribute_string, bedrooms, bathrooms, reception_rooms, num_sqft];
			
			i = 0
			for info in infos:
				print(info + ": " + str(data[i]))
				i += 1
			
			# Write to CSV
			f.write(link + ","\
			+ title.replace(",","|") + ","\
			+ price.replace(",","k") + ","\
			+ modifier.replace(",","|") + ","\
			+ agent.replace(",","|") + ","\
			+ agent_phone.replace(",","|") + ","\
			+ prop_address.replace(",","|") + ","\
			+ nearby_stations_schools.replace(",","|") + ","\
			+ attribute_string.replace(",","|") + ","\
			+ bedrooms + ","\
			+ bathrooms + ","\
			+ reception_rooms + ","\
			+ num_sqft + "\n")
			# the .replace() function removes commas, as this will otherwise create new columns in the csv file

		f.close()

		print("Number of Listings: "+ str(number_of_listings))
		print("Number Saved: " + str(len(containers)))
		print("If these numbers differ, you may need to go to the next page and collect more.")