Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupport News AboutSign UpSign In
| Download
Project: SD70
Views: 2692
1
\documentclass{beamer}
2
\usepackage{graphicx}
3
\usepackage{url}
4
5
\mode<presentation> {
6
\usetheme{Madrid}
7
}
8
9
\usepackage{graphicx}
10
\usepackage{booktabs}
11
12
\title[SageMathCloud]{Using RethinkDB in Production for SageMathCloud}
13
\author{William Stein}
14
\institute[SMC]
15
{
16
University of Washington \\
17
\medskip
18
SageMath, Inc. \\
19
\medskip
20
\url{https://cloud.sagemath.com/}
21
}
22
\date{\today}
23
24
\begin{document}
25
26
\begin{frame}
27
\titlepage
28
\end{frame}
29
30
\begin{frame}
31
\frametitle{What is Sage?}
32
33
\begin{block}{SageMath}
34
\begin{itemize}
35
36
\item SageMath: big open source math software I started in 2004
37
\end{itemize}
38
\end{block}
39
40
\begin{block}{SageMathCloud (SMC)}
41
\begin{itemize}
42
\item {\bf Launched:} 2013
43
\item {\bf Real-time editing like Google Docs:} Latex, IPython/Jupyter notebooks, Sage, Terminals, Teaching, etc.
44
\item {\bf Tech Stack:} RethinkDB, Linux, React.js, Node.js, SageMath/Python, CodeMirror, CoffeeScript
45
\item {\bf Users:} 4000+ daily active; nearly 1000 simultaneous
46
\item {\bf Production:} anger when it doesn't work -- ``my homework is gone!"
47
\item {\bf Open source:} 100\% open source, GPL 3, etc.
48
\end{itemize}
49
\end{block}
50
51
\end{frame}
52
53
\begin{frame}
54
\frametitle{Hi From Sage Days 70}
55
\includegraphics[keepaspectratio=true,width=.9\paperwidth]{sagedays70}
56
\end{frame}
57
58
59
60
\begin{frame}
61
\frametitle{RethinkDB and SMC}
62
63
{\bf Switched from Cassandra} to RethinkDB this summer.
64
65
\begin{block}{SMC Uses RethinkDB Heavily...}
66
\begin{itemize}
67
\item {\bf Setup:}
68
\begin{itemize}
69
\item 6 Google compute engine nodes (quad-core n1-standard-4)
70
\item About 23 tables storing about 5 million documents
71
\item Replication factor 3, sharding of 3
72
\item Storage in persistent (network-mounted) SSD
73
\end{itemize}
74
\item 5K-10K simultaneous changefeeds.
75
\end{itemize}
76
\end{block}
77
78
\begin{block}{Operations}
79
\begin{itemize}
80
\item {\bf Backups:} periodic dump of most tables to json on a compressed filesystem, snapshot via bup (=git+more), rsync to google cloud storage and encrypted off-site USB drives.
81
\end{itemize}
82
\end{block}
83
84
RethinkDB team {\bf amazing} at addressing all issues I encountered.
85
86
\end{frame}
87
88
89
\begin{frame}
90
\frametitle{SMC Demo}
91
92
\begin{block}{Show how SMC uses RethinkDB}
93
\begin{enumerate}
94
\item Change name and see change in another browser.
95
\item Show changing project title and that appearing in another browser.
96
\item Draw a 3d plot in a sage worksheet
97
\item Open a Jupyter notebook -- demo sync and history
98
\item No REST/API calls; instead, set entries in a table, back-end sees it, makes table change, all parts of all front-ends simultaneously see that (do a demo of project restart).
99
\end{enumerate}
100
\end{block}
101
102
\end{frame}
103
104
\begin{frame}
105
\frametitle{SMC Demo: Change username}
106
\includegraphics[keepaspectratio=true,width=.9\paperwidth]{smc-name}
107
\end{frame}
108
109
\begin{frame}
110
\frametitle{SMC Demo: Change Project Title}
111
\includegraphics[keepaspectratio=true,width=.9\paperwidth]{smc-title}
112
\end{frame}
113
114
\begin{frame}
115
\frametitle{SMC Demo: 3D Plot}
116
\includegraphics[keepaspectratio=true,width=.9\paperwidth]{smc-plot}
117
\end{frame}
118
119
\begin{frame}
120
\frametitle{SMC Demo: Jupyter Notebook}
121
\includegraphics[keepaspectratio=true,width=.9\paperwidth]{smc-jupyter}
122
\end{frame}
123
124
125
\begin{frame}
126
\frametitle{How SageMathCloud uses Changefeeds}
127
128
\begin{block}{Motivation}
129
\begin{itemize}
130
\item Make front-end development easier
131
\item Simplify code connecting the front-end to back-end (one declaration instead of messages flying all over)
132
\end{itemize}
133
\end{block}
134
135
\begin{block}{Inspiration}
136
\begin{itemize}
137
\item Facebook's GraphQL -- but simpler
138
\end{itemize}
139
\end{block}
140
141
\begin{block}{Goal}
142
\begin{itemize}
143
\item Have declarative client-side queries and database schema
144
\item Instant notifications about changes.
145
\end{itemize}
146
\end{block}
147
\end{frame}
148
149
\begin{frame}[fragile]
150
\frametitle{GraphQL-like API on RethinkDB}
151
\vfill
152
\begin{center}
153
\Large
154
Building a GraphQL-like API on RethinkDB and Node.js
155
\end{center}
156
\vfill
157
158
\begin{block}{(do not look at this)}
159
\tiny
160
Browser (or iOS/Android at some point) client query:
161
\begin{itemize}
162
\item JSON object that describes what result should look like;
163
null's get filled in.
164
\verb|{table:{foo:bar, stuff:null}}| gets one record in table where \verb|foo="bar"| and \verb|{table:[{foo:bar, stuff:null}]}| gets them all.
165
\item If \verb|changes=true|, then any time RethinkDB table changes, client gets updates, and anytime client makes changes, they get pushed to back-end to RethinkDB.
166
\item Tables can be "virtual", and not correspond to actual RethinkDB tables. e.g., different permissions, or involving multiple tables (so joins, technically; they also have a killfeed).
167
\end{itemize}
168
169
\begin{itemize}
170
\item Show {\tt schema.coffee}.
171
172
\item Text editing: describe algorithm based on the above, which isn't deployed yet.
173
\end{itemize}
174
\end{block}
175
\end{frame}
176
177
%\begin{frame}
178
%\frametitle{Running RethinkDB in production}
179
%\begin{block}{Setup}
180
%\begin{itemize}
181
%\item Until mid-Oct we used 6 dual-core n1-highcpu-2, but had to switch to 6 quad-core n1-standard-4.
182
%
183
%\item We use 6 GCE nodes, with about 20+ tables, about 5 Million documents, replication factor 3, sharding of 3, persistent (network-mounted) SSD.
184
%\end{itemize}
185
%\end{block}
186
%\end{frame}
187
%
188
%\begin{frame}
189
%\frametitle{Running RethinkDB in production /2}
190
%\begin{block}{Experiences}
191
%\begin{itemize}
192
%\item Often have around 5000 changefeeds.
193
%
194
%\item Had some trouble with automatic failover (I test in production to be sure it is working!).
195
%
196
%\item The RethinkDB team was \textbf{amazing} in fixing absolutely all bugs I found.
197
%
198
%\item Backups by dumping most tables frequently and using bup
199
%(=git+more, \url{bup.github.io}) to backup,
200
%then rsync to cloud storage and offsite encrypted USB drive.
201
%
202
%\end{itemize}
203
%\end{block}
204
%\end{frame}
205
206
\begin{frame}
207
\frametitle{Instrumentation data in production}
208
\begin{block}{Example: Server Overload}
209
About 3 week of data for November 2015 across 6 nodes.
210
At one point (with 6 n1-highcpu-2’s), we hit a threshold (with around 850 simultaneous users) and the backend collapsed.
211
\end{block}
212
\begin{block}{Solution}
213
A new node had to be added (Tue 27th).
214
\end{block}
215
\end{frame}
216
217
\begin{frame}
218
\frametitle{Memory usage across database nodes}
219
\includegraphics[keepaspectratio=true,width=.9\paperwidth]{rethinkdb-memory.png}
220
\end{frame}
221
222
\begin{frame}
223
\frametitle{CPU Load (1 min) across database nodes}
224
\includegraphics[keepaspectratio=true,width=.9\paperwidth]{rethinkdb-load-1min.png}
225
\end{frame}
226
227
\begin{frame}
228
\frametitle{TCP connections across database nodes}
229
\includegraphics[keepaspectratio=true,width=.9\paperwidth]{rethinkdb-tcp-connections.png}
230
\end{frame}
231
232
\begin{frame}
233
\frametitle{Thanks!}
234
\begin{block}{Sign up today!}
235
\medskip
236
{\LARGE \url{https://cloud.sagemath.com/}}\\
237
\medskip
238
\end{block}
239
\end{frame}
240
241
\end{document}
242