diff --git a/scripts/log.py b/scripts/log.py new file mode 100644 index 0000000..aa36ed0 --- /dev/null +++ b/scripts/log.py @@ -0,0 +1,208 @@ +import re +import random +import argparse +from pathlib import Path +from datetime import datetime +from collections import defaultdict +import numpy as np +import matplotlib.pyplot as plt + +# keep max samples per day (adjust if needed) +MAX_SAMPLES = 200000 + +# Log format looks like: +# [pid: ...] ... [Tue Feb 10 14:47:06 2026] GET /... => generated ... in 2740 msecs +# Use a greedy prefix so we capture the *last* [...] date block before GET. +pattern = re.compile( + r".*\[(?P(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s+[A-Za-z]{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2}\s+\d{4})\]\s+GET\s+(?P/[^\s]*)\s+=> generated .* in (?P