Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
208 changes: 208 additions & 0 deletions scripts/log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
import re
import random
import argparse
from pathlib import Path
from datetime import datetime
from collections import defaultdict
import numpy as np
import matplotlib.pyplot as plt

# keep max samples per day (adjust if needed)
MAX_SAMPLES = 200000

# Log format looks like:
# [pid: ...] <ip> ... [Tue Feb 10 14:47:06 2026] GET /... => generated ... in 2740 msecs
# Use a greedy prefix so we capture the *last* [...] date block before GET.
pattern = re.compile(
r".*\[(?P<date>(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s+[A-Za-z]{3}\s+\d{1,2}\s+\d{2}:\d{2}:\d{2}\s+\d{4})\]\s+GET\s+(?P<path>/[^\s]*)\s+=> generated .* in (?P<time>\d+) msecs"
)

# reservoir per day
samples = defaultdict(list)
counts = defaultdict(int)

def reservoir_add(day, value):
counts[day] += 1
c = counts[day]

if len(samples[day]) < MAX_SAMPLES:
samples[day].append(value)
else:
# replace elements with decreasing probability
j = random.randint(0, c - 1)
if j < MAX_SAMPLES:
samples[day][j] = value


def _default_log_file() -> Path | None:
downloads = Path.home() / "Downloads"
if not downloads.exists():
return None
# Prefer the exact historical naming scheme, but fall back to any similar file.
matches = list(downloads.glob("orumsV3_spoken.log-*"))
if not matches:
matches = list(downloads.glob("*orumsV3_spoken*"))
matches = sorted(matches, key=lambda p: p.stat().st_mtime, reverse=True)
return matches[0] if matches else None


def _parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Compute homepage response-time percentiles from a log file.")
parser.add_argument(
"--log-file",
type=str,
default=None,
help="Path to the log file. If omitted, uses the newest ~/Downloads/orumsV3_spoken.log-* file.",
)
parser.add_argument(
"--max-samples",
type=int,
default=MAX_SAMPLES,
help="Max reservoir samples per day (default: %(default)s).",
)
parser.add_argument(
"--days",
type=str,
default="10,11,12,13,14,15",
help="Comma-separated day-of-month numbers to include for Feb (default: %(default)s).",
)
parser.add_argument(
"--homepage-match",
choices=["strict", "allow-query"],
default="allow-query",
help="Homepage matching: strict='/' only; allow-query='/' and '/?…' (default: %(default)s).",
)
parser.add_argument(
"--plot",
choices=["compare", "hist", "both", "none"],
default="compare",
help="Plot type (default: %(default)s).",
)
return parser.parse_args()


args = _parse_args()
MAX_SAMPLES = args.max_samples
days = {int(x.strip()) for x in args.days.split(",") if x.strip()}

def _is_homepage(path: str) -> bool:
if args.homepage_match == "strict":
return path == "/"
# allow-query
return path == "/" or path.startswith("/?")

log_file = Path(args.log_file).expanduser() if args.log_file else _default_log_file()
if not log_file or not log_file.exists():
downloads = Path.home() / "Downloads"
default_hint = downloads / "orumsV3_spoken.log-*"
raise SystemExit(
"Log file not found.\n"
f"Tried auto-detecting the newest file matching `{default_hint}`.\n"
"Pass it explicitly, e.g.:\n"
" python log.py --log-file ~/Downloads/orumsV3_spoken.log-20260215\n"
)


with open(log_file, "r", errors="ignore") as f:
for line in f:
m = pattern.search(line)
if not m:
continue

path = m.group("path").strip()

# homepage only
if not _is_homepage(path):
continue

raw_date = m.group("date")
response_time = int(m.group("time"))

dt = datetime.strptime(raw_date, "%a %b %d %H:%M:%S %Y")

# only Feb 10–15
if dt.month == 2 and dt.day in days:
day_key = dt.strftime("%Y-%m-%d")
reservoir_add(day_key, response_time)


# compute percentiles & print a comparison table
print("\n📊 Homepage response-time percentiles (Feb comparison)\n")
print(f"Log file: {log_file}")
print(f"Homepage match: {args.homepage_match}")
print("")

ordered_days = [f"2026-02-{d:02d}" for d in sorted(days)]

rows = []
results = {} # day -> np.ndarray (for optional plotting)

for day in ordered_days:
arr_list = samples.get(day, [])
if not arr_list:
rows.append((day, 0, counts.get(day, 0), None, None, None))
continue

arr = np.array(arr_list)
results[day] = arr

p50 = float(np.percentile(arr, 50))
p80 = float(np.percentile(arr, 80))
p90 = float(np.percentile(arr, 90))
rows.append((day, len(arr), counts.get(day, len(arr)), p50, p80, p90))

header = ("Date", "samples_used", "total_seen", "p50_ms", "p80_ms", "p90_ms")
print(f"{header[0]:<12} {header[1]:>12} {header[2]:>10} {header[3]:>10} {header[4]:>10} {header[5]:>10}")
print("-" * 70)
for (day, n, total, p50, p80, p90) in rows:
if p50 is None:
print(f"{day:<12} {n:>12} {total:>10} {'-':>10} {'-':>10} {'-':>10}")
else:
print(f"{day:<12} {n:>12} {total:>10} {p50:>10.1f} {p80:>10.1f} {p90:>10.1f}")


def _plot_histograms() -> None:
for day, arr in results.items():
plt.figure()
plt.hist(arr, bins=40)
plt.title(f"Homepage Response Time Distribution ({day})")
plt.xlabel("Response Time (ms)")
plt.ylabel("Requests")
plt.grid(True)


def _plot_comparison() -> None:
xs = []
p50s = []
p80s = []
p90s = []
for (day, n, _total, p50, p80, p90) in rows:
if n == 0:
continue
xs.append(day)
p50s.append(p50)
p80s.append(p80)
p90s.append(p90)

if not xs:
return

plt.figure()
plt.plot(xs, p50s, marker="o", label="P50")
plt.plot(xs, p80s, marker="o", label="P80")
plt.plot(xs, p90s, marker="o", label="P90")
plt.title("Homepage Response Time Percentiles (Feb 10–15)")
plt.xlabel("Date")
plt.ylabel("Response time (ms)")
plt.grid(True)
plt.legend()
plt.xticks(rotation=45, ha="right")
plt.tight_layout()


if args.plot in ("hist", "both"):
_plot_histograms()
if args.plot in ("compare", "both"):
_plot_comparison()
if args.plot != "none":
plt.show()
26 changes: 13 additions & 13 deletions static/website/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -169,12 +169,12 @@ <h3 align="center">Answers</h3>
</td>

<td>
{{ question.answer_set.count }}
{{ question.answer_count }}
</td>

<td>
<span class="title" data-toggle="tooltip" data-placement="top" title="{{ question.user }}">
{{ question.user|truncatechars:10 }}
<span class="title" data-toggle="tooltip" data-placement="top" title="{{ question.cached_user }}">
{{ question.cached_user|truncatechars:10 }}
</span>
</td>
</tr>
Expand Down Expand Up @@ -250,8 +250,8 @@ <h3 align="center">Answers</h3>
</span>
</td>
<td>
<span class="title" data-toggle="tooltip" data-placement="top" title="{{ question.user }}">
{{ question.user|truncatechars:10 }}
<span class="title" data-toggle="tooltip" data-placement="top" title="{{ question.cached_user }}">
{{ question.cached_user|truncatechars:10 }}
</span>
</td>

Expand All @@ -263,8 +263,8 @@ <h3 align="center">Answers</h3>
</span>
</td>
<td>
<span class="title" data-toggle="tooltip" data-placement="top" title="{{ question.last_post_user }}">
{{ question.last_post_user|truncatechars:10 }}
<span class="title" data-toggle="tooltip" data-placement="top" title="{{ question.cached_last_post_user }}">
{{ question.cached_last_post_user|truncatechars:10 }}
</span>
</td>

Expand All @@ -273,7 +273,7 @@ <h3 align="center">Answers</h3>
</td>

<td>
{{ question.answer_set.count }}
{{ question.answer_count }}
</td>
</tr>
{% endfor %}
Expand Down Expand Up @@ -350,8 +350,8 @@ <h3 align="center">Answers</h3>
</span>
</td>
<td>
<span class="title" data-toggle="tooltip" data-placement="top" title="{{ question.user }}">
{{ question.user|truncatechars:10 }}
<span class="title" data-toggle="tooltip" data-placement="top" title="{{ question.cached_user }}">
{{ question.cached_user|truncatechars:10 }}
</span>
</td>

Expand All @@ -363,8 +363,8 @@ <h3 align="center">Answers</h3>
</span>
</td>
<td>
<span class="title" data-toggle="tooltip" data-placement="top" title="{{ question.last_post_user }}">
{{ question.last_post_user|truncatechars:10 }}
<span class="title" data-toggle="tooltip" data-placement="top" title="{{ question.cached_last_post_user }}">
{{ question.cached_last_post_user|truncatechars:10 }}
</span>
</td>

Expand All @@ -373,7 +373,7 @@ <h3 align="center">Answers</h3>
</td>

<td>
{{ question.answer_set.count }}
{{ question.answer_count }}
</td>
</tr>
{% endfor %}
Expand Down
4 changes: 2 additions & 2 deletions website/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ def user(self):
return user.username

def last_post_user(self):
user = User.objects.get(id=self.last_post_by)
return user.username
user = User.objects.filter(id=self.last_post_by).first()
return user.username if user else "Unknown User"

class Meta:
get_latest_by = "date_created"
Expand Down
44 changes: 32 additions & 12 deletions website/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,35 @@


def home(request):
questions = Question.objects.filter(status=1).order_by('date_created').reverse()[:100]
active_questions = Question.objects.filter(status=1, last_active__isnull=False).order_by('last_active').reverse()[:100]

# Base query with answer count annotation
base_queryset = Question.objects.annotate(answer_count=Count('answer'))

questions = base_queryset.filter(status=1).order_by('-date_created')[:100]
active_questions = base_queryset.filter(status=1, last_active__isnull=False).order_by('-last_active')[:100]

# Retrieve latest questions per category for the slider
subquery = Question.objects.filter(category=OuterRef('category'), status=1).values('category').annotate(max_date=Max('date_created')).values('max_date')
slider_questions = Question.objects.filter(
slider_questions = base_queryset.filter(
date_created=Subquery(subquery), status=1
).order_by('category')

# spam questions
spam_questions = base_queryset.filter(status=2).order_by('-last_active')[:100]

# Bulk fetch users for all displayed questions to avoid N+1
all_questions = list(questions) + list(active_questions) + list(slider_questions) + list(spam_questions)
uids = set()
for q in all_questions:
uids.add(q.uid)
if q.last_post_by:
uids.add(q.last_post_by)

users = {u.id: u.username for u in User.objects.filter(id__in=uids)}

# Attach usernames to question objects so templates don't trigger queries
for q in all_questions:
q.cached_user = users.get(q.uid, "Unknown User")
q.cached_last_post_user = users.get(q.last_post_by, "Unknown User") if q.last_post_by else "Unknown User"

# Mapping of foss name as in spk db & its corresponding category name in forums db
category_fosses = {val.replace(" ", "-") : val for val in categories}
Expand All @@ -60,18 +81,17 @@ def home(request):
if foss not in category_question_map:
category_question_map[foss] = None

# spam questions
spam_questions = Question.objects.filter(status=2).order_by('last_active').reverse()[:100]
# Sort category_question_map by category name
category_question_map = dict(sorted(category_question_map.items(), key= lambda item: item[0].lower()))
show_spam_list = is_administrator(request.user) or is_forumsadmin(request.user)

context = {
'questions': questions,
'active_questions':active_questions,
'spam_questions': spam_questions,
'category_question_map': category_question_map,
'show_spam_list': show_spam_list
}
'questions': questions,
'active_questions': active_questions,
'spam_questions': spam_questions,
'category_question_map': category_question_map,
'show_spam_list': show_spam_list
}
return render(request, "website/templates/index.html", context)


Expand Down