import csv
import gzip
from datetime import datetime
from dateutil.parser import parse as dt_parse
fn = 'smc_file_access_2016-02.csv.gz'
file_handler = csv.reader(gzip.open(fn), delimiter=";")
proj_per_day = {}
account_per_weekday = {}
for line_no, row in enumerate(file_handler):
if line_no == 0:
continue
if line_no % 100000 == 0:
print "at line %d" % line_no
timestamp = dt_parse(row[0])
account = int(row[1])
project = int(row[2])
filename = int(row[3])
extension = row[4]
weekday = timestamp.weekday()
hour = timestamp.hour
day = timestamp.day
proj_per_day.setdefault(day, set())
proj_per_day[day].add(project)
account_per_weekday.setdefault(weekday, set())
account_per_weekday[weekday].add(account)
print
for day, proj in sorted(proj_per_day.iteritems()):
print "Day %3d: %5d projects active" % (day, len(proj))
print
for weekday, accounts in sorted(account_per_weekday.iteritems()):
print "Weekday %2d: %5d accounts active" % (weekday, len(accounts))
print