Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupport News AboutSign UpSign In
| Download
Views: 39558
1
#!/usr/bin/env python3
2
# -*- coding: utf8 -*-
3
###############################################################################
4
#
5
# CoCalc: Collaborative Calculation in the Cloud
6
#
7
# Copyright (C) 2015, CoCalc Authors
8
#
9
# This program is free software: you can redistribute it and/or modify
10
# it under the terms of the GNU General Public License as published by
11
# the Free Software Foundation, either version 3 of the License, or
12
# (at your option) any later version.
13
#
14
# This program is distributed in the hope that it will be useful,
15
# but WITHOUT ANY WARRANTY; without even the implied warranty of
16
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
# GNU General Public License for more details.
18
#
19
# You should have received a copy of the GNU General Public License
20
# along with this program. If not, see <http://www.gnu.org/licenses/>.
21
#
22
###############################################################################
23
24
# Authors:
25
# Harald Schilly <[email protected]>
26
27
'''
28
This command-line utility gathers statistics and general information about a user's
29
project from "inside" the project.
30
31
Datapoints include:
32
* processes
33
* process classifications
34
* metrics for time/cpu/memory/...
35
* etc.
36
37
The output is JSON encoded data, which is used by the SMC UI, or text.
38
39
Notes:
40
* Assumption, that this is run with cgroups accounting,
41
where the group is the username.
42
* All memory related units are in kibibytes (IEC 80000-13:2008).
43
* All time related units are seconds or UTC.
44
* Some values have human readable string counterparts, they are `*_h`.
45
'''
46
import os
47
from os.path import join
48
from json import load
49
from pytz import utc
50
from datetime import datetime
51
import psutil as ps
52
from dateutil.parser import parse as date_parser
53
from collections import OrderedDict, Counter, defaultdict
54
55
# byte -> ki(lo/bi)byte; see IEC 80000-13:2008
56
KBMB = 1024.
57
58
# cgroup stats accounts cpu usage in "USER_HZ" units - usually 1/100th second.
59
USER_HZ = float(os.sysconf(os.sysconf_names['SC_CLK_TCK']))
60
61
try:
62
SMC_INFO = load(open(join(os.environ["SMC"], "info.json"), 'r'))
63
PROJECT_ID = SMC_INFO.get("project_id")
64
except:
65
PROJECT_ID = None
66
67
68
def secs2hms(secs, as_string=True):
69
'''
70
Convert seconds into hours, minutes, seconds or a human readable string.
71
'''
72
h = int(secs // 60**2)
73
m = int((secs // 60) % 60)
74
s = secs % 60
75
if as_string:
76
if h > 0:
77
# rounding
78
if s > 30:
79
m += 1
80
if m == 60:
81
h += 1
82
hms = "{h:d}h{m:02d}m"
83
elif m > 0:
84
hms = "{m:d}m{s:02.0f}s"
85
else:
86
hms = "{s:5.2f}s"
87
return hms.format(**locals())
88
else:
89
return h, m, s
90
91
92
def mb2human(mb):
93
return kb2human(mb * KBMB)
94
95
96
def byte2human(b):
97
return kb2human(b / KBMB)
98
99
100
def kb2human(kb):
101
'''
102
Convert a standard kilobyte value to larger units – similar to how '-h' switches do it.
103
'''
104
if kb > KBMB**2:
105
return "{:.2f}GiB".format(kb / KBMB**2)
106
elif kb > KBMB:
107
return "{:.2f}MiB".format(kb / KBMB)
108
return "{:.2f}KiB".format(kb)
109
110
111
def run(*cmd):
112
from subprocess import Popen, PIPE
113
p = Popen(cmd, stdout=PIPE)
114
(output, err) = p.communicate()
115
ec = p.wait()
116
if ec == 0:
117
return output
118
else:
119
raise Exception(err)
120
121
122
def read(fn):
123
with open(fn, "r") as f:
124
ret = f.read()
125
return ret
126
127
# This is a classification mechanism for tasks.
128
CATEGORY = [
129
"SMC Project",
130
"SageMath",
131
"Terminal",
132
"Jupyter",
133
"SMC Management",
134
"Other"]
135
136
137
def classify_proc(proc):
138
'''
139
proc is a process, proc.cmdline is an array
140
'''
141
name = proc.name()
142
cmd = proc.cmdline()
143
144
if len(cmd) >= 3:
145
if name == "node":
146
if "smc-project/local_hub.coffee" in cmd[-1]:
147
return CATEGORY[0]
148
149
elif name == "nodejs":
150
if "smc-project/console_server_child.coffee" in cmd[-1]:
151
return CATEGORY[2]
152
elif "forever/bin/monitor" in cmd[1]:
153
return CATEGORY[4]
154
155
elif name in ["python", "python3"]:
156
if "smc_sagews.sage_server_command_line" in cmd[-1]:
157
return CATEGORY[1]
158
elif cmd[1].endswith('ipython') and cmd[2] == 'notebook':
159
return CATEGORY[3]
160
# fallback
161
return "{}/{}".format(CATEGORY[-1], name)
162
163
164
class SmcTop(object):
165
"""
166
Usage: class-instantiation → call `<obj>.json()` for a serialization of it.
167
Expected runtime is ~4 seconds, because it has to sample the CPU usage.
168
"""
169
170
def __init__(self,
171
userid=None,
172
sample_interval=3.0,
173
tree=False,
174
summarize=False):
175
from os import getuid
176
from pwd import getpwuid
177
self.summarize = summarize
178
if userid is None:
179
self.userid = getpwuid(getuid()).pw_name
180
181
# used for measuring % cpu usage, in seconds
182
self.sample_interval = sample_interval
183
self._calc_tree = tree
184
185
# state variables
186
self._proc_stats = None
187
self._procs = None
188
self._tree = None
189
self._totals = None
190
191
def totals(self):
192
"""
193
This uses kernel accounting (or cgroup accounting) directly.
194
Calculating sums of the reported values might differ.
195
"""
196
197
def add_human_readable(d, converter=mb2human):
198
# list, because dict changes size
199
for k, v in list(d.items()):
200
d.update({"%s_h" % k: converter(v)})
201
202
def memory(impl="cgroup"):
203
'''
204
On SMC, all projects are in a cgroup. That's the preferred
205
implementation used.
206
207
TODO: there is no fallback if there is no cgroup for a user
208
and `smem` is not used either.
209
'''
210
if impl == "smem":
211
# smem is in kilobytes
212
try:
213
# User Count Swap USS PSS RSS
214
smem = run("/usr/bin/smem", "-uH").split()[2:]
215
smem = [int(_) / KBMB for _ in smem]
216
smem = dict(zip(["swap", "uss", "pss", "rss"], smem))
217
add_human_readable(smem)
218
return smem
219
except Exception as e:
220
return {"error": str(e)}
221
222
elif impl == "cgroup":
223
# cgroups is in bytes
224
try:
225
# memory usage and limits
226
memstat = "/sys/fs/cgroup/memory/%s/memory.stat" % self.userid
227
cg_mem = read(memstat).splitlines()
228
mem = dict(_.split() for _ in cg_mem)
229
conv = lambda x: float(x) / KBMB**2
230
231
rss = conv(mem['total_rss'])
232
swap = conv(mem["total_swap"])
233
mem_max = conv(mem['hierarchical_memory_limit'])
234
swap_max = conv(mem['hierarchical_memsw_limit']) - mem_max
235
total = rss + swap
236
total_max = mem_max + swap_max
237
238
vals = {
239
"rss": rss,
240
"swap": swap,
241
"total": total,
242
"mem_max": mem_max,
243
"swap_max": swap_max,
244
"total_max": total_max,
245
}
246
add_human_readable(vals)
247
# no human readable version
248
vals["percent"] = 100. * float(total / total_max)
249
return vals
250
except IOError as e:
251
return {"error": str(e)}
252
253
def cpu():
254
try:
255
# cpu usage and limits
256
cpuacct = "/sys/fs/cgroup/cpu,cpuacct/%s/cpuacct.stat" % self.userid
257
cg_cpu = read(cpuacct).splitlines()
258
cpu = dict(_.split() for _ in cg_cpu)
259
s = float(cpu["system"]) / USER_HZ
260
u = float(cpu["user"]) / USER_HZ
261
t = s + u
262
vals = {
263
'system': s,
264
'user': u,
265
'total': t,
266
}
267
add_human_readable(vals, secs2hms)
268
return vals
269
except IOError as e:
270
return {"error": str(e)}
271
272
self._totals = {
273
"mem": memory(),
274
"cpu": cpu()
275
}
276
return self._totals
277
278
def user_processes(self):
279
'''
280
Returns an iterator over all processes of the given user.
281
'''
282
for p in ps.process_iter():
283
if p.username() != self.userid:
284
continue
285
yield p
286
287
def capture(self):
288
"""
289
The current state of all processes of a given user.
290
By default, the current user is taken and analyzed.
291
"""
292
if self._totals is None:
293
self.totals()
294
if self._totals is None:
295
return {"error": "no totals available"}
296
297
from time import sleep
298
299
self.now = now = datetime.utcnow().replace(tzinfo=utc)
300
cpu_pct_sum = 0.0
301
cpu_time_sum = 0.0
302
303
if self._calc_tree:
304
# used to build the process tree
305
par_ch = defaultdict(list)
306
procs = []
307
# sum up process categories
308
proc_stats = defaultdict(lambda: defaultdict(lambda: 0.0))
309
# reset all instance counters to 0
310
for proc_class in CATEGORY:
311
proc_stats[proc_class]["instances"] = 0
312
313
# cpu_percent needs to be called twice for meaningful values
314
for p in self.user_processes():
315
p.cpu_percent()
316
sleep(self.sample_interval)
317
318
def check(fn):
319
try:
320
return fn()
321
except ps.AccessDenied:
322
return None
323
324
for p in self.user_processes():
325
io = check(p.io_counters)
326
mem = p.memory_info_ex()
327
328
# relative cpu time usage
329
cpu_times = p.cpu_times()
330
time_rel = cpu_times.user + cpu_times.system
331
332
# absolute cpu time usage
333
start = datetime.fromtimestamp(p.create_time()).replace(tzinfo=utc)
334
time_abs = (now - start).total_seconds()
335
336
# memory in pct of cgroup limit, exclucing swap.
337
# i.e. a value near or above 100% indicates excessive usage
338
if not "error" in self._totals["mem"]:
339
mem_pct = 100. * mem.rss / KBMB**2 / self._totals["mem"]["mem_max"]
340
else:
341
mem_pct = 0.
342
343
proc_class = classify_proc(p)
344
proc_stats[proc_class]["instances"] += 1
345
proc_stats[proc_class]["cpu"] += p.cpu_percent()
346
proc_stats[proc_class]["mem"] += mem_pct
347
proc_stats[proc_class]["time"] += time_rel
348
349
if self._calc_tree:
350
for chpid in [ch.pid for ch in p.children()]:
351
par_ch[p.pid].append(chpid)
352
353
procs.append({
354
"pid": p.pid,
355
# funny thing: name, path and cmdline can be uneqal
356
"name": p.name(),
357
# The process executable as an absolute path.
358
"path": check(p.exe),
359
"category": proc_class,
360
"command_line": p.cmdline(),
361
"open_files": check(p.num_fds),
362
#"threads": p.threads(),
363
"read": io.read_bytes if io else 0,
364
"write": io.write_bytes if io else 0,
365
"cpu_percent": p.cpu_percent(),
366
"time": {
367
"started": datetime.isoformat(start),
368
"absolute": time_abs,
369
"absolute_h": secs2hms(time_abs),
370
"used": time_rel,
371
"used_h": secs2hms(time_rel),
372
"percent": 100. * time_rel / time_abs,
373
},
374
"memory": {
375
"real": mem.rss / KBMB**2,
376
"virtual": mem.vms / KBMB**2,
377
"shared": mem.shared / KBMB**2,
378
"percent": 100. * mem_pct,
379
}
380
})
381
382
if self._calc_tree:
383
tree = defaultdict(dict)
384
for par, chlds in par_ch.items():
385
for ch in chlds:
386
tree[par][ch] = tree[ch]
387
388
roots = set(tree.keys())
389
for ch in tree.values():
390
for p in ch.keys():
391
roots.remove(p)
392
self._tree = [{r: tree[r]} for r in roots]
393
394
self._procs = procs
395
for c in proc_stats: # type for instance counter is 'int'
396
proc_stats[c]["instances"] = int(proc_stats[c]["instances"])
397
self._proc_stats = proc_stats
398
return self._procs, self._tree, self._proc_stats
399
400
def data(self):
401
'''
402
stitch together the gathered data
403
'''
404
from datetime import datetime
405
406
self.capture()
407
408
data = {
409
"timestamp": datetime.isoformat(self.now),
410
"username": self.userid,
411
"totals": self._totals,
412
"processes": self._procs,
413
"summaries": self._proc_stats,
414
}
415
416
if self._calc_tree:
417
data["tree"] = self._tree
418
419
# add project_id if available
420
if PROJECT_ID is not None:
421
data["project_id"] = PROJECT_ID
422
423
return data
424
425
def json(self, indent=None):
426
'''
427
Generates a JSON datastructure of the gathered information.
428
'''
429
import json
430
data = self.data()
431
if indent == 0:
432
indent = None
433
return json.dumps(data, indent=indent)
434
435
def text(self, sortby=None, width=130):
436
from io import StringIO
437
from itertools import groupby
438
from textwrap import wrap
439
440
ret = StringIO()
441
data = self.data()
442
I = " "
443
444
def print0(*args, **kwds):
445
sep = kwds.get('sep', ' ')
446
nl = kwds.get('nl', True)
447
ret.write(sep.join(args))
448
if nl:
449
ret.write('\n')
450
451
if sortby == "mem":
452
sortkey = lambda x: - x["memory"]["percent"]
453
elif sortby == "cpu":
454
sortkey = lambda x: - x["cpu_percent"]
455
elif sortby == "auto":
456
sortkey = lambda x: - max(x["cpu_percent"],
457
x["memory"]["percent"])
458
elif sortby == "time":
459
sortkey = lambda x: - x["time"]["used"]
460
else:
461
# default is by pid
462
sortkey = lambda x: x["pid"]
463
464
ts = date_parser(data["timestamp"]).strftime("%Y-%m-%d %H:%M:%S")
465
print0(" CoCalc Process Accounting -- {} UTC "
466
.format(ts).center(width, "="))
467
print0()
468
if self.summarize:
469
print0("{} {:>6s} {:>14s} {:>7s} {:>7s} {:>13s}"
470
.format(I, "", "#", "CPU%", "MEM%", "TIME+"))
471
else:
472
print0("{} {:>6s} {:<12s} {:>7s} {:>7s} {:>13s} {:s}"
473
.format(I, "PID", "Name", "CPU%", "MEM%", "TIME+", "COMMAND"))
474
print0(width * "-")
475
476
cat_fn = lambda x: x["category"]
477
478
def cat_fn_sorted(x):
479
# sort categories by CATEGORY list
480
cat = cat_fn(x)
481
return CATEGORY.index(cat.split("/", 1)[0]), cat
482
483
procs_by_cat = sorted(data["processes"], key=cat_fn_sorted)
484
for cat, procs in groupby(procs_by_cat, cat_fn):
485
print0("{:20s} ".format(cat), nl=not self.summarize)
486
for p in sorted(procs, key=sortkey):
487
if not self.summarize:
488
line = '{} {pid:>6d} {name:<12s} {cpu_percent:>6.1f}% {memory[percent]:>6.1f}% {time[used_h]:>13s}'
489
print0(line.format(I, **p), nl=False)
490
491
cltxt = ' '.join(p["command_line"])
492
# corner case: no command_line entries
493
if len(cltxt) == 0:
494
print0("")
495
for l, idx in enumerate(range(0, len(cltxt), 80)):
496
indent = 3 if l == 0 else (width - 74)
497
print0("{}{}".format(" " * indent, cltxt[idx:idx + 80]))
498
499
if self.summarize:
500
sums = data["summaries"][cat]
501
sums["time"] = secs2hms(sums["time"])
502
print0("{instances:>3.0f} {cpu:>6.1f}% {mem:>6.1f}% {time:>13s}"
503
.format(**sums))
504
505
totals = data["totals"]
506
print0()
507
print0(" Total Resource Usage ".center(width, "="))
508
print0("Processes: {}".format(len(data["processes"])))
509
try:
510
print0("CPU time used: {cpu[total_h]:s} \
511
(sys:{cpu[system_h]} + user:{cpu[user_h]})".format(**totals))
512
print0("MEM consumption: {mem[total_h]:s} of \
513
{mem[total_max_h]:s} ({mem[percent]:.1f}%)".format(**totals))
514
except:
515
print0("CPU/MEM: <no cgroup information>")
516
#print0(" SUMS: {}".format(data["summaries"]))
517
return ret.getvalue()
518
519
520
def parse_arguments():
521
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
522
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
523
paa = parser.add_argument
524
525
paa("--tree",
526
action="store_true",
527
help="also generate a process tree")
528
529
paa("--format",
530
help="the output format",
531
default="json",
532
metavar="FMT",
533
choices=["json", "text"])
534
535
paa("--indent",
536
help="Number of spaces for indentation,\
537
e.g. used for JSON serialization",
538
default=1,
539
type=int,
540
metavar="SPACES")
541
542
paa("--user",
543
metavar="USER",
544
dest="userid",
545
help="accounting for the given user, defaults to current user")
546
547
paa("--interval",
548
default=3.0,
549
metavar="SECS",
550
dest="sample_interval",
551
type=float,
552
help="sampling interval in seconds")
553
554
paa("--summarize",
555
default=False,
556
action="store_true",
557
help="If set to true, the process stats\
558
will be shown up per category in 'text' format.")
559
560
paa("--sort",
561
metavar="COL",
562
dest="sortby",
563
help="sort text output by this column",
564
default="auto",
565
choices=sorted(["mem", "cpu", "time", "pid", "auto"]))
566
567
return parser.parse_args()
568
569
570
def main():
571
args = parse_arguments()
572
format = args.__dict__.pop("format")
573
sortby = args.__dict__.pop("sortby")
574
indent = args.__dict__.pop("indent")
575
top = SmcTop(**args.__dict__)
576
if format == "json":
577
return top.json(indent=indent)
578
elif format == "text":
579
return top.text(sortby=sortby)
580
581
if __name__ == "__main__":
582
out = main()
583
from sys import stdout
584
stdout.write(out)
585
stdout.write("\n")
586
stdout.flush()
587
588