Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupport News AboutSign UpSign In
| Download
Views: 39538
1
##############################################################################
2
#
3
# CoCalc: Collaborative Calculation in the Cloud
4
#
5
# Copyright (C) 2016, Sagemath Inc.
6
#
7
# This program is free software: you can redistribute it and/or modify
8
# it under the terms of the GNU General Public License as published by
9
# the Free Software Foundation, either version 3 of the License, or
10
# (at your option) any later version.
11
#
12
# This program is distributed in the hope that it will be useful,
13
# but WITHOUT ANY WARRANTY; without even the implied warranty of
14
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
# GNU General Public License for more details.
16
#
17
# You should have received a copy of the GNU General Public License
18
# along with this program. If not, see <http://www.gnu.org/licenses/>.
19
#
20
###############################################################################
21
22
###
23
The Hub's HTTP Server
24
###
25
26
fs = require('fs')
27
path_module = require('path')
28
Cookies = require('cookies')
29
util = require('util')
30
ms = require('ms')
31
32
async = require('async')
33
cookieParser = require('cookie-parser')
34
body_parser = require('body-parser')
35
express = require('express')
36
formidable = require('formidable')
37
http_proxy = require('http-proxy')
38
http = require('http')
39
winston = require('winston')
40
41
misc = require('smc-util/misc')
42
{defaults, required} = misc
43
44
misc_node = require('smc-util-node/misc_node')
45
hub_register = require('./hub_register')
46
auth = require('./auth')
47
access = require('./access')
48
hub_proxy = require('./proxy')
49
hub_projects = require('./projects')
50
MetricsRecorder = require('./metrics-recorder')
51
52
{http_message_api_v1} = require('./api/handler')
53
54
# Rendering stripe invoice server side to PDF in memory
55
{stripe_render_invoice} = require('./stripe/invoice')
56
57
SMC_ROOT = process.env.SMC_ROOT
58
STATIC_PATH = path_module.join(SMC_ROOT, 'static')
59
60
exports.init_express_http_server = (opts) ->
61
opts = defaults opts,
62
base_url : required
63
dev : false # if true, serve additional dev stuff, e.g., a proxyserver.
64
database : required
65
compute_server : required
66
winston.debug("initializing express http server")
67
winston.debug("MATHJAX_URL = ", misc_node.MATHJAX_URL)
68
69
# Create an express application
70
router = express.Router()
71
app = express()
72
app.use(cookieParser())
73
74
router.use(body_parser.json())
75
router.use(body_parser.urlencoded({ extended: true }))
76
77
# initialize metrics
78
response_time_quantile = MetricsRecorder.new_quantile('http_quantile', 'http server',
79
percentiles : [0, 0.5, 0.75, 0.9, 0.99, 1]
80
labels: ['path', 'method', 'code']
81
)
82
response_time_histogram = MetricsRecorder.new_histogram('http_histogram', 'http server'
83
buckets : [0.0001, 0.0005, 0.001, 0.002, 0.005, 0.01, 0.1, 0.5, 1, 5, 10]
84
labels: ['path', 'method', 'code']
85
)
86
87
router.use (req, res, next) ->
88
res_finished_q = response_time_quantile.startTimer()
89
res_finished_h = response_time_histogram.startTimer()
90
original_end = res.end
91
res.end = ->
92
original_end.apply(res, arguments)
93
{dirname} = require('path')
94
dir_path = dirname(req.path).split('/')[1] # for two levels: split('/')[1..2].join('/')
95
res_finished_q({path:dir_path, method:req.method, code:res.statusCode})
96
res_finished_h({path:dir_path, method:req.method, code:res.statusCode})
97
next()
98
99
app.enable('trust proxy') # see http://stackoverflow.com/questions/10849687/express-js-how-to-get-remote-client-address
100
101
# The webpack content. all files except for unhashed .html should be cached long-term ...
102
cacheLongTerm = (res, path) ->
103
if not opts.dev # ... unless in dev mode
104
timeout = ms('100 days') # more than a year would be invalid
105
res.setHeader('Cache-Control', "public, max-age='#{timeout}'")
106
res.setHeader('Expires', new Date(Date.now() + timeout).toUTCString());
107
108
# The /static content
109
router.use '/static',
110
express.static(STATIC_PATH, setHeaders: cacheLongTerm)
111
112
router.use '/policies',
113
express.static(path_module.join(STATIC_PATH, 'policies'), {maxAge: 0})
114
router.use '/doc',
115
express.static(path_module.join(STATIC_PATH, 'doc'), {maxAge: 0})
116
117
router.get '/', (req, res) ->
118
# for convenicnece, a simple heuristic checks for the presence of the remember_me cookie
119
# that's not a security issue b/c the hub will do the heavy lifting
120
# TODO code in comments is a heuristic looking for the remember_me cookie, while when deployed the haproxy only
121
# looks for the has_remember_me value (set by the client in accounts).
122
# This could be done in different ways, it's not clear what works best.
123
#remember_me = req.cookies[opts.base_url + 'remember_me']
124
has_remember_me = req.cookies[opts.base_url + 'has_remember_me']
125
if has_remember_me == 'true' # and remember_me?.split('$').length == 4 and not req.query.signed_out?
126
res.redirect(opts.base_url + '/app')
127
else
128
#res.cookie(opts.base_url + 'has_remember_me', 'false', { maxAge: 60*60*1000, httpOnly: false })
129
res.sendFile(path_module.join(STATIC_PATH, 'index.html'), {maxAge: 0})
130
131
router.get '/app', (req, res) ->
132
#res.cookie(opts.base_url + 'has_remember_me', 'true', { maxAge: 60*60*1000, httpOnly: false })
133
res.sendFile(path_module.join(STATIC_PATH, 'app.html'), {maxAge: 0})
134
135
# The base_url javascript, which sets the base_url for the client.
136
router.get '/base_url.js', (req, res) ->
137
res.send("window.app_base_url='#{opts.base_url}';")
138
139
# used by HAPROXY for testing that this hub is OK to receive traffic
140
router.get '/alive', (req, res) ->
141
if not hub_register.database_is_working()
142
# this will stop haproxy from routing traffic to us
143
# until db connection starts working again.
144
winston.debug("alive: answering *NO*")
145
res.status(404).end()
146
else
147
res.send('alive')
148
149
router.get '/metrics', (req, res) ->
150
res.header("Content-Type", "text/plain")
151
res.header('Cache-Control', 'private, no-cache, no-store, must-revalidate')
152
metricsRecorder = MetricsRecorder.get()
153
if metricsRecorder?
154
# res.send(JSON.stringify(opts.metricsRecorder.get(), null, 2))
155
res.send(metricsRecorder.metrics())
156
else
157
res.send(JSON.stringify(error:'Metrics recorder not initialized.'))
158
159
# /concurrent -- used by kubernetes to decide whether or not to kill the container; if
160
# below the warn thresh, returns number of concurrent connection; if hits warn, then
161
# returns 404 error, meaning hub may be unhealthy. Kubernetes will try a few times before
162
# killing the container. Will also return 404 if there is no working database connection.
163
router.get '/concurrent-warn', (req, res) ->
164
c = opts.database.concurrent()
165
if not hub_register.database_is_working() or c >= opts.database._concurrent_warn
166
winston.debug("/concurrent: not healthy, since concurrent >= #{opts.database._concurrent_warn}")
167
res.status(404).end()
168
else
169
res.send("#{c}")
170
171
# Return number of concurrent connections (could be useful)
172
router.get '/concurrent', (req, res) ->
173
res.send("#{opts.database.concurrent()}")
174
175
# HTTP API
176
router.post '/api/v1/*', (req, res) ->
177
h = req.header('Authorization')
178
if not h?
179
res.status(400).send(error:'You must provide authentication via an API key.')
180
return
181
[type, user] = misc.split(h)
182
switch type
183
when "Bearer"
184
api_key = user
185
when "Basic"
186
api_key = new Buffer.from(user, 'base64').toString().split(':')[0]
187
else
188
res.status(400).send(error:"Unknown authorization type '#{type}'")
189
return
190
191
http_message_api_v1
192
event : req.path.slice(req.path.lastIndexOf('/') + 1)
193
body : req.body
194
api_key : api_key
195
logger : winston
196
database : opts.database
197
compute_server : opts.compute_server
198
ip_address : req.ip
199
cb : (err, resp) ->
200
if err
201
res.status(400).send(error:err) # Bad Request
202
else
203
res.send(resp)
204
205
# stripe invoices: /invoice/[invoice_id].pdf
206
stripe_connections = require('./stripe/connect').get_stripe()
207
if stripe_connections?
208
router.get '/invoice/*', (req, res) ->
209
winston.debug("/invoice/* (hub --> client): #{misc.to_json(req.query)}, #{req.path}")
210
path = req.path.slice(req.path.lastIndexOf('/') + 1)
211
i = path.lastIndexOf('-')
212
if i != -1
213
path = path.slice(i+1)
214
i = path.lastIndexOf('.')
215
if i == -1
216
res.status(404).send("invoice must end in .pdf")
217
return
218
invoice_id = path.slice(0,i)
219
winston.debug("id='#{invoice_id}'")
220
221
stripe_render_invoice(stripe_connections, invoice_id, true, res)
222
else
223
router.get '/invoice/*', (req, res) ->
224
res.status(404).send("stripe not configured")
225
226
# return uuid-indexed blobs (mainly used for graphics)
227
router.get '/blobs/*', (req, res) ->
228
#winston.debug("blob (hub --> client): #{misc.to_json(req.query)}, #{req.path}")
229
if not misc.is_valid_uuid_string(req.query.uuid)
230
res.status(404).send("invalid uuid=#{req.query.uuid}")
231
return
232
if not hub_register.database_is_working()
233
res.status(404).send("can't get blob -- not connected to database")
234
return
235
opts.database.get_blob
236
uuid : req.query.uuid
237
cb : (err, data) ->
238
if err
239
res.status(500).send("internal error: #{err}")
240
else if not data?
241
res.status(404).send("blob #{req.query.uuid} not found")
242
else
243
filename = req.path.slice(req.path.lastIndexOf('/') + 1)
244
if req.query.download?
245
# tell browser to download the link as a file instead
246
# of displaying it in browser
247
res.attachment(filename)
248
else
249
res.type(filename)
250
res.send(data)
251
252
# TODO: is this cookie trick dangerous in some surprising way?
253
router.get '/cookies', (req, res) ->
254
if req.query.set
255
# TODO: implement expires as part of query? not needed for now.
256
expires = new Date(new Date().getTime() + 1000*24*3600*30*36) # 3 years -- this is fine now since we support "sign out everywhere"
257
cookies = new Cookies(req, res)
258
cookies.set(req.query.set, req.query.value, {expires:expires})
259
res.end()
260
261
# Used to determine whether or not a token is needed for
262
# the user to create an account.
263
router.get '/registration', (req, res) ->
264
if not hub_register.database_is_working()
265
res.json({error:"not connected to database"})
266
return
267
opts.database.get_server_setting
268
name : 'account_creation_token'
269
cb : (err, token) ->
270
if err or not token
271
res.json({})
272
else
273
res.json({token:true})
274
275
router.get '/customize', (req, res) ->
276
if not hub_register.database_is_working()
277
res.json({error:"not connected to database"})
278
return
279
opts.database.get_site_settings
280
cb : (err, settings) ->
281
if err or not settings
282
res.json({})
283
else
284
res.json(settings)
285
286
# Save other paths in # part of URL then redirect to the single page app.
287
router.get ['/projects*', '/help*', '/settings*'], (req, res) ->
288
url = require('url')
289
q = url.parse(req.url, true).search # gives exactly "?key=value,key=..."
290
res.redirect(opts.base_url + "/app#" + req.path.slice(1) + q)
291
292
# Return global status information about smc
293
router.get '/stats', (req, res) ->
294
if not hub_register.database_is_working()
295
res.json({error:"not connected to database"})
296
return
297
opts.database.get_stats
298
update : false # never update in hub b/c too slow. instead, run $ hub --update_stats via a cronjob every minute
299
cb : (err, stats) ->
300
res.header('Cache-Control', 'private, no-cache, no-store, must-revalidate')
301
if err
302
res.status(500).send("internal error: #{err}")
303
else
304
res.header("Content-Type", "application/json")
305
res.send(JSON.stringify(stats, null, 1))
306
307
###
308
# Stripe webhooks -- not done
309
router.post '/stripe', (req, res) ->
310
form = new formidable.IncomingForm()
311
form.parse req, (err, fields, files) ->
312
# record and act on the webhook here -- see https://stripe.com/docs/webhooks
313
# winston.debug("STRIPE: webhook -- #{err}, #{misc.to_json(fields)}")
314
res.send('')
315
###
316
317
# Get the http server and return it.
318
if opts.base_url
319
app.use(opts.base_url, router)
320
else
321
app.use(router)
322
323
if opts.dev
324
# Proxy server urls -- on SMC in production, HAproxy sends these requests directly to the proxy server
325
# serving (from this process) on another port. However, for development, we handle everything
326
# directly in the hub server (there is no separate proxy server), so have to handle these routes
327
# directly here.
328
329
# Implementation below is insecure -- it doesn't even check if user is allowed access to the project.
330
# This is fine in dev mode, since all as the same user anyways.
331
proxy_cache = {}
332
333
# The port forwarding proxy server probably does not work, and definitely won't upgrade to websockets.
334
# Jupyter Classical won't work: (1) the client connects to the wrong URL (no base_url),
335
# (2) no websocket upgrade, (3) jupyter listens on eth0 instead of localhost.
336
# Jupyter2 works fine though.
337
dev_proxy_port = (req, res) ->
338
req_url = req.url.slice(opts.base_url.length)
339
{key, port_number, project_id} = hub_proxy.target_parse_req('', req_url)
340
proxy = proxy_cache[key]
341
if proxy?
342
proxy.web(req, res)
343
return
344
winston.debug("proxy port: req_url='#{req_url}', port='#{port_number}'")
345
get_port = (cb) ->
346
if port_number == 'jupyter'
347
hub_proxy.jupyter_server_port
348
project_id : project_id
349
compute_server : opts.compute_server
350
database : opts.database
351
cb : cb
352
else
353
cb(undefined, port_number)
354
get_port (err, port) ->
355
winston.debug("get_port: port='#{port}'")
356
if err
357
res.status(500).send("internal error: #{err}")
358
else
359
target = "http://localhost:#{port}"
360
proxy = http_proxy.createProxyServer(ws:false, target:target, timeout:0)
361
proxy_cache[key] = proxy
362
proxy.on("error", -> delete proxy_cache[key]) # when connection dies, clear from cache
363
# also delete after a few seconds - caching is only to optimize many requests near each other
364
setTimeout((-> delete proxy_cache[key]), 10000)
365
proxy.web(req, res)
366
367
port_regexp = '^' + opts.base_url + '\/[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}\/port\/*'
368
369
app.get( port_regexp, dev_proxy_port)
370
app.post(port_regexp, dev_proxy_port)
371
372
# Also, ensure the raw server works
373
dev_proxy_raw = (req, res) ->
374
req_url = req.url.slice(opts.base_url.length)
375
{key, project_id} = hub_proxy.target_parse_req('', req_url)
376
winston.debug("dev_proxy_raw", project_id)
377
proxy = proxy_cache[key]
378
if proxy?
379
proxy.web(req, res)
380
return
381
opts.compute_server.project
382
project_id : project_id
383
cb : (err, project) ->
384
if err
385
res.status(500).send("internal error: #{err}")
386
else
387
project.status
388
cb : (err, status) ->
389
if err
390
res.status(500).send("internal error: #{err}")
391
else if not status['raw.port']
392
res.status(500).send("no raw server listening")
393
else
394
port = status['raw.port']
395
target = "http://localhost:#{port}"
396
proxy = http_proxy.createProxyServer(ws:false, target:target, timeout:0)
397
proxy_cache[key] = proxy
398
# when connection dies, clear from cache
399
proxy.on("error", -> delete proxy_cache[key])
400
proxy.web(req, res)
401
# also delete eventually (1 hour)
402
setTimeout((-> delete proxy_cache[key]), 1000*60*60)
403
404
raw_regexp = '^' + opts.base_url + '\/[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}\/raw*'
405
app.get( raw_regexp, dev_proxy_raw)
406
app.post(raw_regexp, dev_proxy_raw)
407
408
app.on 'upgrade', (req, socket, head) ->
409
winston.debug("\n\n*** http_server websocket(#{req.url}) ***\n\n")
410
req_url = req.url.slice(opts.base_url.length)
411
# TODO: THIS IS NOT DONE and does not work. I still don't know how to
412
# proxy wss:// from the *main* site to here in the first place; i.e.,
413
# this upgrade is never hit, since the main site (that is
414
# proxying to this server) is already trying to do something.
415
# I don't know if this sort of multi-level proxying is even possible.
416
417
http_server = http.createServer(app)
418
return {http_server:http_server, express_router:router}
419
420
421