From ac79e2fd23f4f903f0748f223b2e0ee7d9484763 Mon Sep 17 00:00:00 2001 From: Craig Oates Date: Tue, 19 Mar 2024 01:00:47 +0000 Subject: [PATCH] create prime-location-manchester.org file. --- prime-location-manchester.org | 262 ++++++++++++++++++++++++++++++++++ 1 file changed, 262 insertions(+) create mode 100644 prime-location-manchester.org diff --git a/prime-location-manchester.org b/prime-location-manchester.org new file mode 100644 index 0000000..b101214 --- /dev/null +++ b/prime-location-manchester.org @@ -0,0 +1,262 @@ +#+options: ':nil *:t -:t ::t <:t H:3 \n:nil ^:t arch:headline author:t +#+options: broken-links:nil c:nil creator:nil d:(not "LOGBOOK") date:t e:t +#+options: email:nil expand-links:t f:t inline:t num:t p:nil pri:nil prop:nil +#+options: stat:t tags:t tasks:t tex:t timestamp:t title:t toc:t todo:t |:t +#+title: Prime Location Manchester +#+date: \today +#+author: Craig Oates +#+email: craig@craigoates.net +#+language: en +#+select_tags: export +#+exclude_tags: noexport +#+creator: Emacs 29.1.90 (Org mode 9.7-pre) +#+cite_export: + +* Setup Common Lisp Environment + +I’ve copied the following code block over from other files. Run it if this is +your first file open in the session. + +#+begin_src lisp :session :results silent + (ql:quickload :com.inuoe.jzon) ; JSON parser. + (ql:quickload :dexador) ; HTTP requests. + (ql:quickload :plump) ; HTML/XML parser. + (ql:quickload :lquery) ; HTML/DOM manipulation. + (ql:quickload :lparallel) ; Parallel programming. + (ql:quickload :cl-ppcre) ; RegEx. library. + (ql:quickload :plot/vega) ; Vega plotting library. + (ql:quickload :lisp-stat) ; Stat's library. + (ql:quickload :data-frame) ; Data frame library eqv. to Python's Numpy. + (ql:quickload :str) ; String library, expands on 'string' library. +#+end_src + +* Gather Prime Location Data + +Having had a quick look at the website, it says there is 208 listings, spread +across 11 pages. The search used the following filters: + +- Date: 2024-03-18 +- Location: Manchester City Centre, Greater Manchester +- Rent Listing Only +- Only monthly rents listed +- Price Range: £400–£1,250 pcm +- Include Shared Accommodation +- Radius: ’This area only’ + +Because of the amount of listings, I'm going to need to use ~curl~ and grab each +page separately. + +*There are not options to filter out ’student-only’ and ’bills inc.’ so this is +going to be less accurate compared to other sites.* + +#+begin_src shell :results silent + cd raw-data/external/ + DIRECTORY="$(date '+%Y-%m-%d')-prime-location-manc" + mkdir $DIRECTORY + for PAGE in {0..11} + do + curl -o "$DIRECTORY/prime-location-$PAGE.html" \ + "https://www.primelocation.com/to-rent/property/manchester-city-centre/?price_max=1250&identifier=manchester-city-centre&price_min=400&q=Manchester%20City%20Centre%2C%20Greater%20Manchester&results_sort=lowest_price&search_source=to-rent&radius=0&price_frequency=per_month&view_type=grid&pn=$PAGE" + sleep 5 + done + # Change back to project's root directory, don’t want to call code whilst still + # in this directory. It'll probably cause errors. + cd ../../ +#+end_src + +* Clean Up and Parse Data + +The usual separate each listing into its own file. + +#+begin_src shell :results silent + mkdir raw-data/external/2024-03-18-prime-location-manc-listings/ +#+end_src + +[[file:./raw-data/external/2024-03-18-prime-location-manc-listings/]] + +#+begin_src lisp :results silent + (let ((counter 0)) + (loop for file-path + in (directory #P"raw-data/external/2024-03-18-prime-location-manc/*.html") + do (with-open-file (in-stream file-path) + (let* ((doc (plump:parse in-stream)) + (listings (lquery:$ doc ".srp.grid-cell.grid-cell--left.grid-cell--big" (serialize)))) + (loop for item across listings + do (let ((out-path + (merge-pathnames "raw-data/external/2024-03-18-prime-location-manc-listings/" + (format nil "listing-~a.html" (write-to-string counter))))) + (with-open-file (out-stream + out-path + :direction :output + :if-exists :supersede) + (format out-stream "~a" item)) + (incf counter))))))) +#+end_src + +* Create CSV of Listings + +Build the CSV file from the listings files, as is the established way. + +#+begin_src lisp :results silent + (with-open-file (out-stream + #P"working-data/2024-03-18-prime-location-manc.csv" + :direction :output + :if-exists :supersede) + (let ((row-id 0)) + (format out-stream "ROW-ID,LOCATION,RENT,URL,DESCRIPTION~%") + (loop for filepath + in (directory #P"raw-data/external/2024-03-18-prime-location-manc-listings/*.html") + do (with-open-file (in-stream filepath) + (let* ((doc (plump:parse in-stream)) + (raw-price (lquery:$ doc ".price" (text))) + (cleaned-price + (first + (cl-ppcre:all-matches-as-strings + "[0-9,]+" (aref raw-price 0)))) + (link (lquery:$ doc "a" (attr :href))) + (address (lquery:$ doc "p a" (text))) + (description (lquery:$ doc "p" (text))) + (clean-desc + (string-trim " " + (cl-ppcre:scan-to-strings + ".*\\.\.\." (aref description 4))))) + (format out-stream "~d,~a,~d,~a,~a~%" + row-id + (str:replace-all "," " " (aref address 0)) + (str:replace-all "," "" cleaned-price) + (format nil "https://www.primelocation.com~a" (aref link 0)) + (str:replace-all "," " " clean-desc)))) + (incf row-id)))) +#+end_src + +*There are some results which state ’only for students’*. So, I will need to +filter them out. + +* Explore CSV Data for Prime Location Manchester (2024-03-18) + +#+begin_src lisp :session + (lisp-stat:defdf *pl-manc* + (lisp-stat:read-csv #P"working-data/2024-03-18-prime-location-manc.csv")) +#+end_src + +#+RESULTS: +: # + +*The original look at the data on the website states it was showing 208 + listings. This count includes ad's (i.e. promoted) listings.* These listing + have not been included in the processing of the data. Thus, ~*pl-manc*~ stating + is has ~114 observations~ and not 208. + + This list is going to reduced further, as the ’only for students’ listings need + to be filtered out. + + #+begin_src lisp :session + (lisp-stat:defdf *pl-manc-filt* + (lisp-stat:filter-rows *pl-manc* + '(not (str:contains? + "only for student" + description + :ignore-case t)))) + #+end_src + + #+RESULTS: + : # + +So, only 109 listings after filtering, from 114. A reduction of 5 listings. The +list is still too big to print out in this file, though. + +#+begin_src lisp :session :results file + (vega:defplot pl-mon-manc + `(:title "Rent Rates for Manchester on Prime Location (18/03/2024)" + :width 1800 + :height 600 + :data ,*pl-manc-filt* + :layer #((:mark (:type :bar) + :encoding (:x (:field :row-id :title "Assigned Id." :type :nominal); :axis ("labelAngle" 0)) + :y (:field :rent :title "Monthly Rent (£)" :type :quantitative) + :tooltip (:field :rent))) + (:mark (:type rule :color "darkorange" :size 3) + :encoding (:y (:field :rent :type :quantitative :aggregate :average) + :tooltip (:field :rent :type :quantitative :aggregate :average)))))) + (vega:write-html pl-mon-manc "renders/2024-03-18-prime-location-rent-manc.html") +#+end_src + +#+RESULTS: +[[file:renders/2024-03-18-prime-location-rent-manc.html]] + +#+begin_src shell :results silent + mv ~/Downloads/visualization.png ./renders/2024-03-18-prime-location-rent-manc.png +#+end_src + +[[file:./renders/2024-03-18-prime-location-rent-manc.png]] + +#+begin_src lisp :session :results output code raw + (format t "- Mean Rent: £ ~a~%" (float (lisp-stat:mean *pl-manc-filt*:rent))) + (format t "- Min. Rent: £ ~d~%" (reduce #'min *pl-manc-filt*:rent)) + (format t "- Max. Rent: £ ~d" (reduce #'max *pl-manc-filt*:rent)) +#+end_src + +#+RESULTS: +- Mean Rent: £ 1040.8807 +- Min. Rent: £ 550 +- Max. Rent: £ 1250 + +#+begin_src calc :results output + 1040 * 12 +#+end_src + +#+RESULTS: +: 12480 + +* Summary of Prime Location + +Based on the average rent for Prime Location, I would need to make around +£13,000/yr to cover my living costs – not including the usual food, travel +etc. expenses. + +While I managed to apply filters to weed out the student-only listings, *I wasn't +able to filter out the listing which don't bills in the rent advertised.* Because +of this, these figure less accurate/consistent compared to some of the other +sites I've looked at up to now. With that said, there is a good amount of +listings here, compared to some of the other sites. So, hopefully, that can help +offset some of the ’bills not included’ stuff. + +Now, the usual ’survival mode’ stuff (add £5,000) onto the annual rent to pay. + +#+begin_src calc :results output + 12480 + 5000 +#+end_src + +#+RESULTS: +: 17480 + +Using £17,480 as the starting point (see [[file:./uk-wage-tax.org][UK Wage and Tax Rated]], + +#+begin_src lisp :results output raw + (let* ((earning-target 17480) + (p-allow 12570) + (taxable-income (- earning-target p-allow)) + (tax-to-pay (* taxable-income 0.2)) + (total (- earning-target tax-to-pay))) + (format t "- Annual Target Salary: £~a~%" earning-target) + (format t "- Part of Salary which is Taxable: £~a~%" taxable-income) + (format t "- Tax to Pay: £~a~%" tax-to-pay) + (format t "- Salary After Tax: £~a~%" total)) +#+end_src + +#+RESULTS: +- Annual Target Salary: £17480 +- Part of Salary which is Taxable: £4910 +- Tax to Pay: £982.0 +- Salary After Tax: £16498.0 + +| Time Span | Value After Tax (£) | Mean Rent (£) | +|-----------------------+---------------------+---------------| +| Annually | 16498.0 | 1040.88 | +| Monthly (Before Rent) | 1374.8333 | | +| Monthly (After Rent) | 333.9533 | | +| Weekly (After Rent) | 83.488325 | | +| Daily (After Rent) | 11.926904 | | +#+TBLFM: @3$2=@-1/12::@4$2=@-1-@-2$+1::@5$2=@-1/4::@6$2=@-1/7 + +Daily spend then is £11.93/day.