diff --git a/ideal-flatmate-manchester.org b/ideal-flatmate-manchester.org new file mode 100644 index 0000000..043ed9d --- /dev/null +++ b/ideal-flatmate-manchester.org @@ -0,0 +1,131 @@ +#+options: ':nil *:t -:t ::t <:t H:3 \n:nil ^:t arch:headline author:t +#+options: broken-links:nil c:nil creator:nil d:(not "LOGBOOK") date:t e:t +#+options: email:nil expand-links:t f:t inline:t num:t p:nil pri:nil prop:nil +#+options: stat:t tags:t tasks:t tex:t timestamp:t title:t toc:t todo:t |:t +#+title: Ideal Flatmate Manchester +#+date: \today +#+author: Craig Oates +#+email: craig@craigoates.net +#+language: en +#+select_tags: export +#+exclude_tags: noexport +#+creator: Emacs 29.1.90 (Org mode 9.7-pre) +#+cite_export: + +* Gather Ideal Flatmate Data (Manually) + +- [[https://www.idealflatmate.co.uk/][Ideal Flatmate]] + +Having had a quick look on the website and did a search with the following +filters: + +- Date: 2024-02-24 Sat +- Location: Manchester (City) +- Price Range: £0-1200 +- Distance: +20 km + +There are only two pages of results – sixteen listings in total. So, I’ve just +saved the HTML manually, from within the browser. Because they are HTML files +and come with JavaScript, CSS, images etc., I stored in +=raw-data/external/2024-02-24_ideal-flatmate-manc=. These files will not be +committed to the repository because I don’t want to clog it up with excess files +and images. I just want the rent rates and location data. + +* Setup Common Lisp Environment + +You will not need to execute this code block if you've already set up SLIME in +another ORG file. This is just in case this is the only file you're working on +today, or it's your first file of the day. + +*Run ~m-x slime~ before running the following code.* And, make note of the +~:session~ attribute. It allows you to use the code in the code block to be use +in other code blocks which also use the ~:session~ attribute. + +#+begin_src lisp :session :results silent + (ql:quickload :com.inuoe.jzon) ; JSON parser. + (ql:quickload :dexador) ; HTTP requests. + (ql:quickload :plump) ; HTML/XML parser. + (ql:quickload :lquery) ; HTML/DOM manipulation. + (ql:quickload :lparallel) ; Parallel programming. + (ql:quickload :cl-ppcre) ; RegEx. library. + (ql:quickload :plot/vega) ; Vega plotting library. + (ql:quickload :lisp-stat) ; Stat's library. + (ql:quickload :data-frame) ; Data frame library eqv. to Python's Numpy. + (ql:quickload :str) ; String library, expands on 'string' library. +#+end_src + +* Clean Up and Parse Data + +I'm taking a leaf out of the [[file:./spare-room-manchester.org][Spare Room (Manc)]] book and separating the +individual listings into their own files. I've already got code I can quickly +adapt to do this and it gives me more confidence around attaching values to the +wrong listings. + +#+begin_src shell :results silent + mkdir raw-data/external/2024-02-24_ideal-flatmate-manc-listings/ +#+end_src + +#+begin_src lisp :results silent + (let ((counter 0)) + (loop for file-path + in (directory #P"raw-data/external/2024-02-24_ideal-flatmate-manc/*.html") + do (with-open-file (in-stream file-path) + (let* ((doc (plump:parse in-stream)) + (listings (lquery:$ doc ".card-infos-flex-row" (serialize)))) + (loop for item across listings + do (let ((out-path + (merge-pathnames #P"raw-data/external/2024-02-24_ideal-flatmate-manc-listings/" + (format nil "listing-~a.html" (write-to-string counter))))) + (with-open-file (out-stream + out-path + :direction :output + :if-exists :supersede) + (format out-stream "~a" item)) + (incf counter))))))) +#+end_src + +** TODO Create CSV of Listings + +Need to come back to this and finish it off. Left it in a mess because it's the +end of the day (at time of commit) and need to get some sleep. + +#+begin_src lisp :results output raw + ;; (with-open-file (out-stream + ;; #P"working-data/2024-02-24-ideal-flatmate-manc.csv" + ;; :direction :output + ;; :if-exists :supersede) + ;; (format out-stream "ROW-ID, OTHER STUFF") + (let ((row-id 0)) + (format t "|ROW-ID|LISTING-INFO|URL|~%") + (format t "|-|-|-|~%") + (loop for file-path + in (directory #P"raw-data/external/2024-02-24_ideal-flatmate-manc-listings/*.html") + do (with-open-file (in-stream file-path) + (let* ((doc (plump:parse in-stream)) + (listing (lquery:$ doc ".card-infos-left" (text))) + (url (lquery:$ doc "a" (attr "href")))) + (format t "|~a|~a|~a|~%" row-id (aref listing 0) (aref url 0)))) + (incf row-id))) +#+end_src + +#+RESULTS: +| ROW-ID | LISTING-INFO | URL | +|--------+---------------------------------------------------------------------+-------------------------------------------------------------------------| +| 0 | £690/month per roomChapel Street, Salford M3 5DZ, UK | https://www.idealflatmate.co.uk/spare-room/manchester/property-id113377 | +| 1 | £740/month per roomChapel Street, Salford M3 5DZ, UK | https://www.idealflatmate.co.uk/spare-room/manchester/property-id113378 | +| 2 | £841 - £842/month per roomMiddlewood Street, Salford, M5 4YW, UK | https://www.idealflatmate.co.uk/spare-room/salford/property-id120130 | +| 3 | £746 - £750/month per roomSalford, M5 4ZF, UK | https://www.idealflatmate.co.uk/spare-room/salford/property-id122936 | +| 4 | £200/month 100, 100 Lloyd Mansions, Salford M6 6HA, UK | https://www.idealflatmate.co.uk/spare-room/salford/property-id122970 | +| 5 | £488/month per roomJoshua Grange, Pluto Cl, Salford M6 6HF, UK | https://www.idealflatmate.co.uk/spare-room/salford/property-id122929 | +| 6 | £580/month per roomJoshua Grange, Pluto Cl, Salford M6 6HF, UK | https://www.idealflatmate.co.uk/spare-room/salford/property-id123025 | +| 7 | £480/month per roomGreater Manchester, Manchester, M31 4HZ, 296, UK | https://www.idealflatmate.co.uk/spare-room/manchester/property-id122962 | +| 8 | £580/month per roomJoshua Grange, Pluto Cl, Salford M6 6HF, UK | https://www.idealflatmate.co.uk/auth/signup?f=b&uid=242365&pid=123025 | +| 9 | £480/month per roomGreater Manchester, Manchester, M31 4HZ, 296, UK | https://www.idealflatmate.co.uk/auth/signup?f=b&uid=210168&pid=122962 | +| 10 | £850/month per room7 Symphony Park, Manchester M1 7GB, UK | https://www.idealflatmate.co.uk/spare-room/manchester/property-id121033 | +| 11 | £850/month per room7 Symphony Park, Manchester M1 7GB, UK | https://www.idealflatmate.co.uk/spare-room/manchester/property-id121032 | +| 12 | £956 - £957/month per room7 Symphony Park, Manchester M1 7GB, UK | https://www.idealflatmate.co.uk/spare-room/manchester/property-id121034 | +| 13 | £980/month per room7 Symphony Park, Manchester M1 7GB, UK | https://www.idealflatmate.co.uk/spare-room/manchester/property-id121030 | +| 14 | £678 - £679/month per roomSalford M5 4YW, UK | https://www.idealflatmate.co.uk/spare-room/salford/property-id120131 | +| 15 | £708 - £709/month per roomSalford M5 4YW, UK | https://www.idealflatmate.co.uk/spare-room/salford/property-id120127 | +| 16 | £725/month per roomSalford M5 4YW, UK | https://www.idealflatmate.co.uk/spare-room/salford/property-id120128 | +| 17 | £775/month per roomMiddlewood Street, Salford, M5 4YW, UK | https://www.idealflatmate.co.uk/spare-room/salford/property-id120129 |