-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathget_commit_report.py
More file actions
364 lines (314 loc) · 10.7 KB
/
get_commit_report.py
File metadata and controls
364 lines (314 loc) · 10.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
#!/usr/bin/env python3
"""
Fetch commit report from Bitbucket (per user, for a date range) using the REST API.
Usage:
cp .env.example .env # then edit .env with your credentials
python get_commit_report.py --workspace MYWORKSPACE --from 2025-01-01 --to 2025-01-31
python get_commit_report.py --workspace MYWORKSPACE --repo my-repo --from 2025-01-01 --to 2025-01-31
If --repo is omitted, all repositories in the workspace are included.
Optional: --branch main (uses POST /commits with include to limit to that branch)
"""
import argparse
import os
import sys
from dotenv import load_dotenv
# Load .env from script directory so it works from any cwd
load_dotenv(os.path.join(os.path.dirname(os.path.abspath(__file__)), ".env"))
from collections import defaultdict
from datetime import datetime, timezone
try:
from tqdm import tqdm
except ImportError:
tqdm = None
# Prefer requests if available
try:
import requests
except ImportError:
requests = None
def get_auth():
user = (os.environ.get("BITBUCKET_USER") or "").strip()
token = (
os.environ.get("BITBUCKET_APP_PASSWORD")
or os.environ.get("BITBUCKET_API_TOKEN")
or ""
).strip()
if not user or not token:
print(
"Set BITBUCKET_USER and BITBUCKET_APP_PASSWORD (or BITBUCKET_API_TOKEN) in .env.",
file=sys.stderr,
)
sys.exit(1)
if "example.com" in user or "your-" in token:
print(
"Replace placeholder values in .env with your real Bitbucket email and API token.",
file=sys.stderr,
)
sys.exit(1)
return (user, token)
def _check_401(response, url: str):
"""On 401, print helpful message and exit."""
if response.status_code != 401:
return
print("Bitbucket returned 401 Unauthorized. Check:", file=sys.stderr)
print(
" • BITBUCKET_USER must be your Bitbucket account email (not username).",
file=sys.stderr,
)
print(
" • Use an API token: Bitbucket → Personal settings → API tokens → Create token (Repository read).",
file=sys.stderr,
)
print(
" • You must have access to the workspace (e.g. be a member of MYWORKSPACE).",
file=sys.stderr,
)
sys.exit(1)
REQUEST_TIMEOUT = 30
def fetch_commits_get(
workspace: str, repo: str, auth, pagelen: int = 100, page: int = 1
):
url = (
f"https://api.bitbucket.org/2.0/repositories/{workspace}/{repo}/commits"
f"?pagelen={pagelen}&page={page}"
)
if requests:
r = requests.get(url, auth=auth, timeout=REQUEST_TIMEOUT)
_check_401(r, url)
r.raise_for_status()
return r.json()
# Fallback: urllib
import base64
import urllib.request
req = urllib.request.Request(url)
req.add_header(
"Authorization",
"Basic " + base64.b64encode(f"{auth[0]}:{auth[1]}".encode()).decode(),
)
with urllib.request.urlopen(req) as resp:
import json
return json.load(resp)
def fetch_commits_post(
workspace: str, repo: str, auth, branch: str, pagelen: int = 100, page: int = 1
):
url = f"https://api.bitbucket.org/2.0/repositories/{workspace}/{repo}/commits?pagelen={pagelen}&page={page}"
body = {"include": f"refs/heads/{branch}"}
if requests:
r = requests.post(url, auth=auth, json=body, timeout=REQUEST_TIMEOUT)
_check_401(r, url)
r.raise_for_status()
return r.json()
import base64
import json
import urllib.request
req = urllib.request.Request(url, data=json.dumps(body).encode(), method="POST")
req.add_header(
"Authorization",
"Basic " + base64.b64encode(f"{auth[0]}:{auth[1]}".encode()).decode(),
)
req.add_header("Content-Type", "application/json")
with urllib.request.urlopen(req) as resp:
return json.load(resp)
def fetch_repositories(workspace: str, auth, pagelen: int = 100, page: int = 1):
"""List repositories in a workspace (paginated)."""
url = (
f"https://api.bitbucket.org/2.0/repositories/{workspace}"
f"?pagelen={pagelen}&page={page}"
)
if requests:
r = requests.get(url, auth=auth, timeout=REQUEST_TIMEOUT)
_check_401(r, url)
r.raise_for_status()
return r.json()
import base64
import urllib.request
req = urllib.request.Request(url)
req.add_header(
"Authorization",
"Basic " + base64.b64encode(f"{auth[0]}:{auth[1]}".encode()).decode(),
)
with urllib.request.urlopen(req, timeout=REQUEST_TIMEOUT) as resp:
import json
return json.load(resp)
def commit_author_key(c):
"""Unique key for grouping by author."""
author = c.get("author") or {}
user = author.get("user") or {}
if user.get("uuid"):
return (
"uuid",
user["uuid"],
user.get("display_name") or user.get("nickname") or "?",
)
if user.get("nickname"):
return (
"nickname",
user["nickname"],
user.get("display_name") or user["nickname"],
)
raw = author.get("raw") or "unknown"
return ("raw", raw, raw)
def collect_commits_for_repo(
workspace: str,
repo: str,
auth,
start,
end,
branch,
pagelen: int,
repo_display_name: str = None,
repo_index: int = None,
repo_total: int = None,
):
"""Fetch commits for one repo in the date range; return (by_user, total_in_range)."""
by_user = defaultdict(int)
total_in_range = 0
page = 1
past_end = False
display = repo_display_name or repo
if branch:
def fetch(p):
return fetch_commits_post(workspace, repo, auth, branch, pagelen, p)
else:
def fetch(p):
return fetch_commits_get(workspace, repo, auth, pagelen, p)
pbar_commits = None
try:
while True:
data = fetch(page)
values = data.get("values") or []
# Create progress bar after first response (we may get total from API)
if pbar_commits is None and tqdm is not None:
total_size = data.get(
"size"
) # Bitbucket paginated response sometimes has total
pbar_commits = tqdm(
total=total_size,
desc=(
f"Repo {repo_index}/{repo_total}: {display}"
if repo_total
else display
),
unit=" commits",
position=1,
leave=False,
file=sys.stderr,
)
if pbar_commits is not None:
pbar_commits.update(len(values))
if not values:
break
for c in values:
date_str = c.get("date")
if not date_str:
continue
commit_dt = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
if commit_dt < start:
past_end = True
break
if commit_dt > end:
continue
total_in_range += 1
key = commit_author_key(c)
by_user[key] += 1
if past_end:
break
if not data.get("next"):
break
page += 1
finally:
if pbar_commits is not None:
pbar_commits.close()
return by_user, total_in_range
def get_workspace_repos(workspace: str, auth, pagelen: int = 100):
"""Yield (slug, name) for each repo in the workspace."""
page = 1
while True:
data = fetch_repositories(workspace, auth, pagelen, page)
for r in data.get("values") or []:
yield (r["slug"], r.get("name") or r["slug"])
if not data.get("next"):
break
page += 1
def main():
ap = argparse.ArgumentParser(
description="Bitbucket commit report per user for a date range"
)
ap.add_argument("--workspace", required=True, help="Bitbucket workspace slug")
ap.add_argument(
"--repo",
default=None,
help="Repository slug (omit to include all repos in workspace)",
)
ap.add_argument(
"--from", dest="date_from", required=True, help="Start date YYYY-MM-DD"
)
ap.add_argument("--to", dest="date_to", required=True, help="End date YYYY-MM-DD")
ap.add_argument(
"--branch", default=None, help="Optional: limit to branch (e.g. main)"
)
ap.add_argument("--pagelen", type=int, default=100, help="Page size (default 100)")
args = ap.parse_args()
try:
start = datetime.strptime(args.date_from, "%Y-%m-%d").replace(
tzinfo=timezone.utc
)
end = datetime.strptime(args.date_to, "%Y-%m-%d").replace(
hour=23, minute=59, second=59, tzinfo=timezone.utc
)
except ValueError:
print("Invalid date; use YYYY-MM-DD.", file=sys.stderr)
sys.exit(1)
auth = get_auth()
by_user = defaultdict(int)
total_in_range = 0
if args.repo:
repos = [(args.repo, args.repo)]
else:
repos = list(get_workspace_repos(args.workspace, auth, args.pagelen))
if not repos:
print("No repositories found in workspace.", file=sys.stderr)
sys.exit(1)
num_repos = len(repos)
repo_iterator = (
tqdm(
repos,
desc="Repos",
total=num_repos,
unit=" repo",
position=0,
leave=True,
file=sys.stderr,
)
if tqdm
else repos
)
for idx, (slug, name) in enumerate(repo_iterator, start=1):
repo_by_user, repo_total = collect_commits_for_repo(
args.workspace,
slug,
auth,
start,
end,
args.branch,
args.pagelen,
repo_display_name=name,
repo_index=idx,
repo_total=num_repos,
)
for key, count in repo_by_user.items():
by_user[key] += count
total_in_range += repo_total
# Merge counts by display name so the same person doesn't appear twice
by_display = defaultdict(int)
for (_, _, display), count in by_user.items():
by_display[display.strip() or "unknown"] += count
scope = f"repo {args.repo}" if args.repo else f"workspace {args.workspace}"
print(
f"\nCommits in {args.date_from} .. {args.date_to} ({scope}, branch: {args.branch or 'default'}): {total_in_range}\n"
)
print("User (display) Count")
print("-" * 45)
for display, count in sorted(by_display.items(), key=lambda x: -x[1]):
print(f" {(display[:32]):32} {count}")
if __name__ == "__main__":
main()