|
|
|
@ -89,30 +89,35 @@ wrong listings.
|
|
|
|
|
Need to come back to this and finish it off. Left it in a mess because it's the |
|
|
|
|
end of the day (at time of commit) and need to get some sleep. |
|
|
|
|
|
|
|
|
|
#+begin_src lisp :results silent |
|
|
|
|
(with-open-file (out-stream |
|
|
|
|
#P"working-data/2024-02-24-ideal-flatmate-manc.csv" |
|
|
|
|
:direction :output |
|
|
|
|
:if-exists :supersede) |
|
|
|
|
(let ((row-id 0)) |
|
|
|
|
(format out-stream "ROW-ID,RAW-LISTING-INFO,RENT,LOCATION,URL~%") |
|
|
|
|
(loop for file-path |
|
|
|
|
in (directory #P"raw-data/external/2024-02-24_ideal-flatmate-manc-listings/*.html") |
|
|
|
|
do (with-open-file (in-stream file-path) |
|
|
|
|
(let* ((doc (plump:parse in-stream)) |
|
|
|
|
(listing (lquery:$ doc ".card-infos-left" (text))) |
|
|
|
|
(cleaned-price (first (cl-ppcre:all-matches-as-strings "\\d+" (first (str:split "room" (aref listing 0)))))) |
|
|
|
|
(cleaned-location (str:replace-all "," "" (second (str:split "room" (aref listing 0))))) |
|
|
|
|
(url (lquery:$ doc "a" (attr "href")))) |
|
|
|
|
(format out-stream "~d,~s,~d,~a,~a~%" |
|
|
|
|
row-id |
|
|
|
|
(aref listing 0) |
|
|
|
|
cleaned-price |
|
|
|
|
cleaned-location |
|
|
|
|
(aref url 0)))) |
|
|
|
|
(incf row-id)))) |
|
|
|
|
#+begin_src lisp :results output raw |
|
|
|
|
(let ((filepath #P"working-data/2024-02-24-ideal-flatmate-manc.csv")) |
|
|
|
|
(with-open-file (out-stream |
|
|
|
|
filepath |
|
|
|
|
:direction :output |
|
|
|
|
:if-exists :supersede) |
|
|
|
|
(let ((row-id 0)) |
|
|
|
|
(format out-stream "ROW-ID,RAW-LISTING-INFO,RENT,LOCATION,URL~%") |
|
|
|
|
(loop for file-path |
|
|
|
|
in (directory #P"raw-data/external/2024-02-24_ideal-flatmate-manc-listings/*.html") |
|
|
|
|
do (with-open-file (in-stream file-path) |
|
|
|
|
(let* ((doc (plump:parse in-stream)) |
|
|
|
|
(listing (lquery:$ doc ".card-infos-left" (text))) |
|
|
|
|
(cleaned-price (first (cl-ppcre:all-matches-as-strings "\\d+" (first (str:split "room" (aref listing 0)))))) |
|
|
|
|
(cleaned-location (str:replace-all "," "" (second (str:split "room" (aref listing 0))))) |
|
|
|
|
(url (lquery:$ doc "a" (attr "href")))) |
|
|
|
|
(format out-stream "~d,~s,~d,~a,~a~%" |
|
|
|
|
row-id |
|
|
|
|
(aref listing 0) |
|
|
|
|
cleaned-price |
|
|
|
|
cleaned-location |
|
|
|
|
(aref url 0)))) |
|
|
|
|
(incf row-id)))) |
|
|
|
|
(format t "[[file:./working-data/2024-02-24-ideal-flatmate-manc.csv]]")) |
|
|
|
|
#+end_src |
|
|
|
|
|
|
|
|
|
#+RESULTS: |
|
|
|
|
[[file:./working-data/2024-02-24-ideal-flatmate-manc.csv]] |
|
|
|
|
|
|
|
|
|
The amount of listings on Ideal Flatmate is tiny compared to the others. I can |
|
|
|
|
basically show all them here. |
|
|
|
|
|
|
|
|
|