create day-to-day-comparisons.py script.

This script takes the 'same' day from each month (E.G. the 16th June and July) and overlays each others charts on top of each other. If there is no data for both days (E.G. data for the 15th of June but not for the 15th July), no chart is produced. Because each chart has inconsistent timestamps (between the two days), each reading is given a 'Reading Id.' so the time element of each chart is lost: THEY ARE NOT ONE-TO-ONE MAPPING OF EACH DAY. The charts produced with this script produce charts depicting the amount of change as a set of sequential events and that's it. They show a general pattern to the day. For example, a chart will show an increase in light from point A to Point B for Light Meter 1. It will not specify how long it took for the change to occur.
1 year ago · bea85271d8
1 changed files with 62 additions and 0 deletions
--- a/day-to-day-comparisons.py
+++ b/day-to-day-comparisons.py
@ -0,0 +1,62 @@
+from bokeh.plotting import figure, output_file, save, show
+from bokeh.models import Legend, ColumnDataSource
+# import datetime
+from datetime import datetime
+import re
+import bokeh.palettes
+import pandas as pd
+import numpy as np
+import random
+
+def  random_colour():
+    r = random.randint(0,255)
+    g = random.randint(0,255)
+    b = random.randint(0,255)
+    rgb = [r,g,b]
+    return rgb
+
+for month in range(6, 8, 1) : # 6 8 1
+    for day in range(0, 31, 1) : # 1 31 1
+        p = figure(# title=f"Side-by-Side Comparison Day: {day}",
+                    x_axis_label="Reading Id. (not same as database)",
+                    y_axis_label="Light Reading",
+                    # x_axis_type='datetime',
+                    sizing_mode="stretch_both")
+        p.axis.major_label_text_font_size = "12px"
+        p.axis.major_label_standoff = 10
+        for meter in range(1, 3) :
+            if (day < 10) :
+                d = f"0{day}"
+            else :
+                d = day
+                15
+
+            if (day <= 13) or (day == 15) or (day == 20) or (day == 23) or (day == 25) or (day == 27) :
+                print(f"Skipping: Day {d} -- incomplete data...")
+            else :
+                print(f"PROCESSING: 2021-{month}-{day} for Meter {meter}...")
+                file_data = []
+                file_path = f"data/light-meter-{meter}/2021-0{month}-{d}.csv"
+                single_file = pd.read_csv (file_path, sep="," ,header=None,
+                                           index_col=False, dtype='unicode')
+                file_data.append(single_file.values[1:])
+
+                counter = 0
+                for row in file_data :
+                    x_vals = []
+                    y_vals = []
+                    for data in row :
+                        # print(f"Counter: {counter}")
+                        x_vals.append(counter)
+                        y_vals.append(data[1])
+                        counter += 1
+                    p.line(x = np.asarray(x_vals).astype(int),
+                           y = np.asarray(y_vals).astype(int),
+                           legend_label=f"Meter: {meter}",
+                           line_color=random_colour())
+                    counter = 0
+
+                p.title = f"Side-by-Side Comparison Day: {day}"
+                output_file(f"output/side-by-side-day-{d}.html",
+                            title=f"Side-by-Side Comparison For Day: {d} (From Each Month)")
+                save(p)