\documentclass{beamer}
\usepackage{graphicx}
\usepackage{url}
\mode<presentation> {
\usetheme{Madrid}
}
\usepackage{graphicx}
\usepackage{booktabs}
\title[SageMathCloud]{Using RethinkDB in Production for SageMathCloud}
\author{William Stein}
\institute[SMC]
{
University of Washington \\
\medskip
SageMath, Inc. \\
\medskip
\url{https://cloud.sagemath.com/}
}
\date{\today}
\begin{document}
\begin{frame}
\titlepage
\end{frame}
\begin{frame}
\frametitle{What is Sage?}
\begin{block}{SageMath}
\begin{itemize}
\item SageMath: big open source math software I started in 2004
\end{itemize}
\end{block}
\begin{block}{SageMathCloud (SMC)}
\begin{itemize}
\item {\bf Launched:} 2013
\item {\bf Real-time editing like Google Docs:} Latex, IPython/Jupyter notebooks, Sage, Terminals, Teaching, etc.
\item {\bf Tech Stack:} RethinkDB, Linux, React.js, Node.js, SageMath/Python, CodeMirror, CoffeeScript
\item {\bf Users:} 4000+ daily active; nearly 1000 simultaneous
\item {\bf Production:} anger when it doesn't work -- ``my homework is gone!"
\item {\bf Open source:} 100\% open source, GPL 3, etc.
\end{itemize}
\end{block}
\end{frame}
\begin{frame}
\frametitle{Hi From Sage Days 70}
\includegraphics[keepaspectratio=true,width=.9\paperwidth]{sagedays70}
\end{frame}
\begin{frame}
\frametitle{RethinkDB and SMC}
{\bf Switched from Cassandra} to RethinkDB this summer.
\begin{block}{SMC Uses RethinkDB Heavily...}
\begin{itemize}
\item {\bf Setup:}
\begin{itemize}
\item 6 Google compute engine nodes (quad-core n1-standard-4)
\item About 23 tables storing about 5 million documents
\item Replication factor 3, sharding of 3
\item Storage in persistent (network-mounted) SSD
\end{itemize}
\item 5K-10K simultaneous changefeeds.
\end{itemize}
\end{block}
\begin{block}{Operations}
\begin{itemize}
\item {\bf Backups:} periodic dump of most tables to json on a compressed filesystem, snapshot via bup (=git+more), rsync to google cloud storage and encrypted off-site USB drives.
\end{itemize}
\end{block}
RethinkDB team {\bf amazing} at addressing all issues I encountered.
\end{frame}
\begin{frame}
\frametitle{SMC Demo}
\begin{block}{Show how SMC uses RethinkDB}
\begin{enumerate}
\item Change name and see change in another browser.
\item Show changing project title and that appearing in another browser.
\item Draw a 3d plot in a sage worksheet
\item Open a Jupyter notebook -- demo sync and history
\item No REST/API calls; instead, set entries in a table, back-end sees it, makes table change, all parts of all front-ends simultaneously see that (do a demo of project restart).
\end{enumerate}
\end{block}
\end{frame}
\begin{frame}
\frametitle{SMC Demo: Change username}
\includegraphics[keepaspectratio=true,width=.9\paperwidth]{smc-name}
\end{frame}
\begin{frame}
\frametitle{SMC Demo: Change Project Title}
\includegraphics[keepaspectratio=true,width=.9\paperwidth]{smc-title}
\end{frame}
\begin{frame}
\frametitle{SMC Demo: 3D Plot}
\includegraphics[keepaspectratio=true,width=.9\paperwidth]{smc-plot}
\end{frame}
\begin{frame}
\frametitle{SMC Demo: Jupyter Notebook}
\includegraphics[keepaspectratio=true,width=.9\paperwidth]{smc-jupyter}
\end{frame}
\begin{frame}
\frametitle{How SageMathCloud uses Changefeeds}
\begin{block}{Motivation}
\begin{itemize}
\item Make front-end development easier
\item Simplify code connecting the front-end to back-end (one declaration instead of messages flying all over)
\end{itemize}
\end{block}
\begin{block}{Inspiration}
\begin{itemize}
\item Facebook's GraphQL -- but simpler
\end{itemize}
\end{block}
\begin{block}{Goal}
\begin{itemize}
\item Have declarative client-side queries and database schema
\item Instant notifications about changes.
\end{itemize}
\end{block}
\end{frame}
\begin{frame}[fragile]
\frametitle{GraphQL-like API on RethinkDB}
\vfill
\begin{center}
\Large
Building a GraphQL-like API on RethinkDB and Node.js
\end{center}
\vfill
\begin{block}{(do not look at this)}
\tiny
Browser (or iOS/Android at some point) client query:
\begin{itemize}
\item JSON object that describes what result should look like;
null's get filled in.
\verb|{table:{foo:bar, stuff:null}}| gets one record in table where \verb|foo="bar"| and \verb|{table:[{foo:bar, stuff:null}]}| gets them all.
\item If \verb|changes=true|, then any time RethinkDB table changes, client gets updates, and anytime client makes changes, they get pushed to back-end to RethinkDB.
\item Tables can be "virtual", and not correspond to actual RethinkDB tables. e.g., different permissions, or involving multiple tables (so joins, technically; they also have a killfeed).
\end{itemize}
\begin{itemize}
\item Show {\tt schema.coffee}.
\item Text editing: describe algorithm based on the above, which isn't deployed yet.
\end{itemize}
\end{block}
\end{frame}
\begin{frame}
\frametitle{Instrumentation data in production}
\begin{block}{Example: Server Overload}
About 3 week of data for November 2015 across 6 nodes.
At one point (with 6 n1-highcpu-2’s), we hit a threshold (with around 850 simultaneous users) and the backend collapsed.
\end{block}
\begin{block}{Solution}
A new node had to be added (Tue 27th).
\end{block}
\end{frame}
\begin{frame}
\frametitle{Memory usage across database nodes}
\includegraphics[keepaspectratio=true,width=.9\paperwidth]{rethinkdb-memory.png}
\end{frame}
\begin{frame}
\frametitle{CPU Load (1 min) across database nodes}
\includegraphics[keepaspectratio=true,width=.9\paperwidth]{rethinkdb-load-1min.png}
\end{frame}
\begin{frame}
\frametitle{TCP connections across database nodes}
\includegraphics[keepaspectratio=true,width=.9\paperwidth]{rethinkdb-tcp-connections.png}
\end{frame}
\begin{frame}
\frametitle{Thanks!}
\begin{block}{Sign up today!}
\medskip
{\LARGE \url{https://cloud.sagemath.com/}}\\
\medskip
\end{block}
\end{frame}
\end{document}