CoCalc Public Fileswikirecents.ipynbOpen with one click!
Author: Tim Clemans
Views : 77
License: Apache License 2.0
Compute Environment: Ubuntu 20.04 (Experimental)
In [1]:
import threading from IPython.display import display, HTML import ipywidgets as widgets import time import requests import os from os import popen import re def mz(code): results = ! psql -h -p 6875 materialize -c """$code""" return results def thread_func(out): sql_code = """ CREATE SOURCE wikirecent FROM FILE 'wikirecent' WITH (tail = true) FORMAT REGEX '^data: (?P<data>.*)';""" out.append_stdout(mz(sql_code)) sql_code = """ CREATE MATERIALIZED VIEW recentchanges AS SELECT val->>'$schema' AS r_schema, (val->'bot')::bool AS bot, val->>'comment' AS comment, (val->'id')::float::int AS id, (val->'length'->'new')::float::int AS length_new, (val->'length'->'old')::float::int AS length_old, val->'meta'->>'uri' AS meta_uri, val->'meta'->>'id' as meta_id, (val->'minor')::bool AS minor, (val->'namespace')::float AS namespace, val->>'parsedcomment' AS parsedcomment, (val->'revision'->'new')::float::int AS revision_new, (val->'revision'->'old')::float::int AS revision_old, val->>'server_name' AS server_name, (val->'server_script_path')::text AS server_script_path, val->>'server_url' AS server_url, (val->'timestamp')::float AS r_ts, val->>'title' AS title, val->>'type' AS type, val->>'user' AS user, val->>'wiki' AS wiki FROM (SELECT data::jsonb AS val FROM wikirecent);""" out.append_stdout(mz(sql_code)) sql_code = """CREATE MATERIALIZED VIEW useredits AS SELECT user, count(*) FROM recentchanges GROUP BY user;""" out.append_stdout(mz(sql_code)) sql_code = """ CREATE MATERIALIZED VIEW top10 AS SELECT * FROM useredits ORDER BY count DESC LIMIT 10; """ out.append_stdout(mz(sql_code)) while True: out.clear_output() import psycopg2 con = psycopg2.connect(database="materialize", host="", port="6875") cur = con.cursor() cur.execute("SELECT * FROM top10 ORDER BY count DESC;") rows = '\n'.join(['%s: %s' % (cur.fetchall()]) out.append_stdout(rows) time.sleep(2) out = widgets.Output() thread = threading.Thread( target=thread_func, args=(out,)) thread.start() display(out)

Analyzing Wikipedia's most recent edits

In [ ]: