From eb3fe9004c1366c6fb747cae98517a08e80e7ee4 Mon Sep 17 00:00:00 2001 From: Craig Oates Date: Sun, 25 Feb 2024 22:09:03 +0000 Subject: [PATCH] add link to 2024-02-24-ideal-flatmate-manc.csv file. --- ideal-flatmate-manchester.org | 49 +++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/ideal-flatmate-manchester.org b/ideal-flatmate-manchester.org index 5922714..39a67c1 100644 --- a/ideal-flatmate-manchester.org +++ b/ideal-flatmate-manchester.org @@ -89,30 +89,35 @@ wrong listings. Need to come back to this and finish it off. Left it in a mess because it's the end of the day (at time of commit) and need to get some sleep. -#+begin_src lisp :results silent - (with-open-file (out-stream - #P"working-data/2024-02-24-ideal-flatmate-manc.csv" - :direction :output - :if-exists :supersede) - (let ((row-id 0)) - (format out-stream "ROW-ID,RAW-LISTING-INFO,RENT,LOCATION,URL~%") - (loop for file-path - in (directory #P"raw-data/external/2024-02-24_ideal-flatmate-manc-listings/*.html") - do (with-open-file (in-stream file-path) - (let* ((doc (plump:parse in-stream)) - (listing (lquery:$ doc ".card-infos-left" (text))) - (cleaned-price (first (cl-ppcre:all-matches-as-strings "\\d+" (first (str:split "room" (aref listing 0)))))) - (cleaned-location (str:replace-all "," "" (second (str:split "room" (aref listing 0))))) - (url (lquery:$ doc "a" (attr "href")))) - (format out-stream "~d,~s,~d,~a,~a~%" - row-id - (aref listing 0) - cleaned-price - cleaned-location - (aref url 0)))) - (incf row-id)))) +#+begin_src lisp :results output raw + (let ((filepath #P"working-data/2024-02-24-ideal-flatmate-manc.csv")) + (with-open-file (out-stream + filepath + :direction :output + :if-exists :supersede) + (let ((row-id 0)) + (format out-stream "ROW-ID,RAW-LISTING-INFO,RENT,LOCATION,URL~%") + (loop for file-path + in (directory #P"raw-data/external/2024-02-24_ideal-flatmate-manc-listings/*.html") + do (with-open-file (in-stream file-path) + (let* ((doc (plump:parse in-stream)) + (listing (lquery:$ doc ".card-infos-left" (text))) + (cleaned-price (first (cl-ppcre:all-matches-as-strings "\\d+" (first (str:split "room" (aref listing 0)))))) + (cleaned-location (str:replace-all "," "" (second (str:split "room" (aref listing 0))))) + (url (lquery:$ doc "a" (attr "href")))) + (format out-stream "~d,~s,~d,~a,~a~%" + row-id + (aref listing 0) + cleaned-price + cleaned-location + (aref url 0)))) + (incf row-id)))) + (format t "[[file:./working-data/2024-02-24-ideal-flatmate-manc.csv]]")) #+end_src +#+RESULTS: +[[file:./working-data/2024-02-24-ideal-flatmate-manc.csv]] + The amount of listings on Ideal Flatmate is tiny compared to the others. I can basically show all them here.