CoCalc -- syncstring_schema.coffee

Path: cocalc/src / smc-util / syncstring_schema.coffee
Views: ³⁹⁵³⁸
1
###############################################################################
2
#
3
#    CoCalc: Collaborative Calculation in the Cloud
4
#
5
#    Copyright (C) 2016, Sagemath Inc.
6
#
7
#    This program is free software: you can redistribute it and/or modify
8
#    it under the terms of the GNU General Public License as published by
9
#    the Free Software Foundation, either version 3 of the License, or
10
#    (at your option) any later version.
11
#
12
#    This program is distributed in the hope that it will be useful,
13
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
14
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
#    GNU General Public License for more details.
16
#
17
#    You should have received a copy of the GNU General Public License
18
#    along with this program.  If not, see <http://www.gnu.org/licenses/>.
19
#
20
###############################################################################
21

22
###
23
Schema for synchronized editing of strings.
24
###
25

26
misc = require('./misc')
27

28
schema = require('./schema').SCHEMA
29

30
schema.syncstrings =
31
    primary_key : 'string_id'
32
    fields :
33
        string_id :
34
            type : 'sha1'
35
            pg_type : 'CHAR(40)'
36
            desc : 'id of this synchronized string -- sha1 hash of (project_id and path)'
37
        project_id :
38
            type : 'uuid'
39
            desc : 'id of project that this synchronized string belongs to'
40
        last_active :
41
            type : 'timestamp'
42
            desc : 'when a user most-recently "cared" about this syncstring (syncstring will be automatically opened in running project if last_active is sufficiently recent)'
43
        last_file_change:
44
            type : 'timestamp'
45
            desc : 'when file on disk last changed not due to save (used by Jupyter sync)'
46
        path :
47
            type : 'string'
48
            desc : 'optional path of file being edited'
49
        deleted :
50
            type : 'boolean'
51
            desc : 'if true, the file was deleted; client **must** create file on disk before editing again.'
52
        init :
53
            type : 'map'
54
            desc : '{time:timestamp, error:?} - info about what happened when project tried to initialize this string'
55
            date : ['time']
56
        save :
57
            type : 'map'
58
            desc : "{state:['requested', 'done'], hash:misc.hash_string(what was last saved), error:['' or 'error message']}"
59
        read_only :
60
            type : 'boolean'
61
            desc : 'true or false, depending on whether this syncstring is readonly or can be edited'
62
        users :
63
            type : 'array'
64
            pg_type : 'UUID[]'
65
            desc : "array of account_id's of those who have edited this string. Index of account_id in this array is used to represent patch authors."
66
        last_snapshot :
67
            type : 'timestamp'
68
            desc : 'timestamp of a recent snapshot; if not given, assume no snapshots.  This is used to restrict the range of patches that have to be downloaded in order start editing the file.'
69
        snapshot_interval :
70
            type : 'integer'
71
            desc : 'If m=snapshot_interval is given and there are a total of n patches, then we (some user) should make snapshots at patches m, 2*m, ..., k, where k<=n-m.'
72
        archived :
73
            type : 'uuid'
74
            desc : "if set, then syncstring patches array have been archived in the blob with given uuid."
75
        doctype :
76
            type : 'string'
77
            desc : "(optional) JSON string describing meaning of the patches (i.e., of this document0 -- e.g., {type:'db', opts:{primary_keys:['id'], string_cols:['name']}}"
78

79
    pg_indexes : ['last_active']
80

81
    user_query:
82
        get :
83
            fields :
84
                string_id         : (obj, db) -> db.sha1(obj.project_id, obj.path)
85
                users             : null
86
                last_snapshot     : null
87
                snapshot_interval : 300
88
                project_id        : null
89
                path              : null
90
                deleted           : null
91
                save              : null
92
                last_active       : null
93
                init              : null
94
                read_only         : null
95
                last_file_change  : null
96
                doctype           : null
97
            required_fields :
98
                path              : true
99
                project_id        : true
100
            check_hook : (db, obj, account_id, project_id, cb) ->
101
                db._syncstrings_check obj, account_id, project_id, (err) ->
102
                    if not err
103
                        db.unarchive_patches(string_id: obj.string_id, cb:cb)
104
                    else
105
                        cb(err)
106

107
        set :
108
            fields :        # That string_id must be sha1(project_id,path) means
109
                            # user can only ever query one entry from THIS table;
110
                            # use recent_syncstrings_in_project below to get many.
111
                string_id         : (obj, db) -> db.sha1(obj.project_id, obj.path)
112
                users             : true
113
                last_snapshot     : true
114
                snapshot_interval : true
115
                project_id        : true
116
                path              : true
117
                deleted           : true
118
                save              : true
119
                last_active       : true
120
                init              : true
121
                read_only         : true
122
                last_file_change  : true
123
                doctype           : true
124
            required_fields :
125
                path              : true
126
                project_id        : true
127
            check_hook : (db, obj, account_id, project_id, cb) ->
128
                db._syncstrings_check(obj, account_id, project_id, cb)
129
            on_change : (db, old_val, new_val, account_id, cb) ->
130
                db._user_set_query_syncstring_change_after(old_val, new_val, account_id, cb)
131

132
schema.syncstrings.project_query = misc.deep_copy(schema.syncstrings.user_query)
133

134
schema.syncstrings_delete  =
135
    primary_key : schema.syncstrings.primary_key
136
    virtual     : 'syncstrings'
137
    fields      : schema.syncstrings.fields
138
    user_query:
139
        set :  # use set query since selecting only one record by its primary key
140
            admin   : true   # only admins can do queries on this virtual table
141
            delete  : true   # allow deletes
142
            options : [{delete:true}]   # always delete when doing set on this table, even if not explicitly requested
143
            fields  :
144
                string_id  : (obj, db) -> db.sha1(obj.project_id, obj.path)
145
                project_id : true
146
                path       : true
147
            required_fields :
148
                project_id : true
149
                path       : true
150
            check_hook : (db, obj, account_id, project_id, cb) ->
151
                db._syncstrings_check(obj, account_id, project_id, cb)
152

153
schema.recent_syncstrings_in_project =
154
    primary_key : schema.syncstrings.primary_key
155
    virtual     : 'syncstrings'
156
    fields :
157
        string_id   : true
158
        project_id  : true
159
        path        : true
160
        last_active : true
161
        deleted     : true
162
        doctype     : true
163
    user_query :
164
        get :
165
            pg_where : (obj, db) ->
166
                [
167
                    "project_id = $::UUID"        : obj.project_id,
168
                    "last_active >= $::TIMESTAMP" : misc.minutes_ago(obj.max_age_m)
169
                ]
170
            pg_changefeed : ->   # need to do this, since last_active won't
171
                                 # be selected automatically, but it is needed by where.
172
                select :
173
                    project_id  : 'UUID'
174
                    last_active : 'TIMESTAMP'
175
            fields :
176
                project_id  : null
177
                max_age_m   : 'null'
178
                string_id   : null
179
                last_active : null
180
                path        : null
181
                deleted     : null
182
                doctype     : null
183
            required_fields :
184
                project_id  : true
185
                max_age_m   : true
186
            check_hook : (db, obj, account_id, project_id, cb) ->
187
                db._syncstrings_check(obj, account_id, project_id, cb)
188

189
schema.recent_syncstrings_in_project.project_query = schema.recent_syncstrings_in_project.user_query
190

191
schema.patches =
192
    primary_key   : ['string_id', 'time']   # compound primary key
193
    unique_writes : true   # there is no reason for a user to write exactly the same record twice
194
    pg_indexes    : ['time']
195
    fields :
196
        string_id :
197
            pg_type : 'CHAR(40)'
198
            desc    : 'id of the syncstring that this patch belongs to.'
199
        time :
200
            type : 'timestamp'
201
            desc : 'the timestamp of the patch'
202
        user_id  :
203
            type : 'integer'
204
            desc : "a nonnegative integer; this is the index into the syncstrings.users array of account_id's"
205
        patch    :
206
            type : 'string'
207
            pg_type : 'TEXT'  # that's what it is in the database now...
208
            desc : 'JSON string that parses to a patch, which transforms the previous version of the syncstring to this version'
209
        snapshot :
210
            type : 'string'
211
            desc : 'Optional -- gives the state of the string at this point in time; this should only be set some time after the patch at this point in time was made. Knowing this snap and all future patches determines all the future versions of the syncstring.'
212
        sent :
213
            type : 'timestamp'
214
            desc : 'Optional approximate time at which patch was **actually** sent to the server, which is approximately when it was really made available to other users.  In case of offline editing, patches from days ago might get inserted into the stream, and this makes it possible for the client to know and behave accordingly.  If this is not set then patch was sent about the same time it was created.'
215
        prev :
216
            type : 'timestamp'
217
            desc : "Optional field to indicate patch dependence; if given, don't apply this patch until the patch with timestamp prev has been applied."
218
        format :
219
            type : 'integer'
220
            desc : "The format of the patch; NULL = compressed dmp patch (for strings); 1 = db-doc patches on objects;"
221
    user_query :
222
        get :
223
            throttle_changes : 1000
224
            fields :
225
                string_id : null
226
                time      : null
227
                patch     : null
228
                user_id   : null
229
                snapshot  : null
230
                sent      : null
231
                prev      : null
232
                format    : null
233
            check_hook : (db, obj, account_id, project_id, cb) ->
234
                # this verifies that user has read access to these patches
235
                db._user_get_query_patches_check(obj, account_id, project_id, cb)
236
        set :
237
            fields :
238
                string_id : true
239
                time      : true
240
                patch     : true
241
                user_id   : true
242
                snapshot  : true
243
                sent      : true
244
                prev      : true
245
                format    : true
246
            required_fields :
247
                string_id : true
248
                time      : true
249
                user_id   : true
250
            check_hook : (db, obj, account_id, project_id, cb) ->
251
                # this verifies that user has write access to these patches
252
                db._user_set_query_patches_check(obj, account_id, project_id, cb)
253
            before_change : (database, old_val, new_val, account_id, cb) ->
254
                if old_val?
255
                    # TODO/CRITICAL: not allowing this seems to cause a lot of problems
256
                    #if old_val.sent and new_val.sent and new_val.sent - 0 != old_val.sent - 0   # CRITICAL: comparing dates here!
257
                    #    cb("you may not change the sent time once it is set")
258
                    #    return
259
                    if old_val.user_id? and new_val.user_id? and old_val.user_id != new_val.user_id
260
                        cb("you may not change the author of a patch from #{old_val.user_id} to #{new_val.user_id}")
261
                        return
262
                    if old_val.patch? and new_val.patch? and old_val.patch != new_val.patch   # comparison is ok since it is of *strings*
263
                        cb("you may not change a patch")
264
                        return
265
                cb()
266

267
schema.patches.project_query = schema.patches.user_query
268

269
###
270
TODO: re-implement
271
# Table to be used for deleting the patches associated to a syncstring.
272
# Currently only allowed by admin.
273
schema.patches_delete  =
274
    primary_key : schema.patches.primary_key
275
    virtual     : 'patches'
276
    fields      : schema.patches.fields
277
    user_query:
278
        get :  # use get query since selecting a range of records for deletion
279
            pg_where : (obj, db) ->
280
                where = ["string_id = $::CHAR(40)" : obj.id[0]]
281
                if obj.id[1]?
282
                    where.push("time >= $::TIMESTAMP" : obj.id[1])
283
                return where
284
            admin  : true
285
            delete : true
286
            fields :
287
                id   : 'null'
288
                dummy : null
289
            check_hook : (db, obj, account_id, project_id, cb) ->
290
                # this verifies that user has read access to these patches -- redundant with admin requirement above.
291
                db._user_get_query_patches_check(obj, account_id, project_id, cb)
292
###
293

294
schema.cursors =
295
    primary_key: ['string_id', 'user_id']  # this is a compound primary key as an array -- [string_id, user_id]
296
    durability : 'soft' # loss of data for the cursors table just doesn't matter
297
    fields:
298
        string_id :
299
            pg_type : 'CHAR(40)'
300
            desc    : 'id of the syncstring that this patch belongs to.'
301
        user_id :
302
            type : 'integer'
303
            desc : "id index of the user into the syncstrings users array"
304
            pg_check : "CHECK (user_id >= 0)"
305
        locs :
306
            type : 'array'
307
            pg_type : 'JSONB[]'
308
            desc : "[{x:?,y:?}, ...]    <-- locations of user_id's cursor(s)"
309
            pg_check : "NOT NULL"
310
        time :
311
            type : 'timestamp'
312
            desc : 'time when these cursor positions were sent out'
313
    indexes :
314
        string_id : ["that.r.row('id')(0)"]
315
    user_query:
316
        get :
317
            throttle_changes : 1000
318
            fields :
319
                string_id : null
320
                user_id   : null
321
                locs      : null
322
                time      : null
323
            required_fields :
324
                string_id : true
325
            check_hook : (db, obj, account_id, project_id, cb) ->
326
                # this verifies that user has read access to these cursors
327
                db._user_get_query_cursors_check(obj, account_id, project_id, cb)
328
        set :
329
            fields :
330
                string_id : null
331
                user_id   : null
332
                locs      : true
333
                time      : true
334
            required_fields :
335
                string_id : true
336
                user_id   : true
337
                locs      : true
338
                time      : true
339
            check_hook : (db, obj, account_id, project_id, cb) ->
340
                # this verifies that user has write access to these cursors
341
                db._user_set_query_cursors_check(obj, account_id, project_id, cb)
342

343
schema.eval_inputs =
344
    primary_key: ['string_id', 'time', 'user_id']
345
    durability : 'soft' # loss of eval requests not serious
346
    unique_writes: true
347
    pg_indexes : ['time']
348
    fields:
349
        string_id :
350
            pg_type : 'CHAR(40)'
351
            desc    : 'id of the syncstring that this patch belongs to.'
352
        time :
353
            type : 'timestamp'
354
            desc : 'the timestamp of the input'
355
        user_id :
356
            type : 'integer'
357
            desc : "id index of the user into the syncstrings users array"
358
            pg_check : "CHECK (user_id >= 0)"
359
        input :
360
            type : 'map'
361
            desc : "For example it could be {program:'sage' or 'sh', input:{code:'...', data:'...', preparse:?, event:'execute_code', output_uuid:?, id:....}}"
362
    user_query:
363
        get :
364
            fields :
365
                string_id : null
366
                time      : null
367
                user_id   : null
368
                input     : null
369
            check_hook : (db, obj, account_id, project_id, cb) ->
370
                db._syncstring_access_check(obj.string_id, account_id, project_id, cb)
371
        set :
372
            fields :
373
                string_id : true
374
                time      : true
375
                user_id   : true
376
                input     : true
377
            required_fields :
378
                string_id : true
379
                time      : true
380
                user_id   : true
381
                input     : true
382
            check_hook : (db, obj, account_id, project_id, cb) ->
383
                db._syncstring_access_check(obj.string_id, account_id, project_id, cb)
384

385
schema.eval_inputs.project_query = schema.eval_inputs.user_query
386

387
schema.eval_outputs =
388
    primary_key: ['string_id', 'time', 'number']
389
    durability : 'soft' # loss of eval output not serious (in long term only used for analytics)
390
    pg_indexes : ['time']
391
    fields:
392
        string_id :
393
            pg_type : 'CHAR(40)'
394
            desc    : 'id of the syncstring that this patch belongs to.'
395
        time :
396
            type : 'timestamp'
397
            desc : 'the timestamp of the output'
398
        number :
399
            type : 'integer'
400
            desc : "output_number starting at 0"
401
            pg_check : "CHECK (number >= 0)"
402
        output :
403
            type : 'map'
404
    user_query:
405
        get :
406
            fields :
407
                string_id : null
408
                time      : null
409
                number    : null
410
                output    : null
411
            check_hook : (db, obj, account_id, project_id, cb) ->
412
                db._syncstring_access_check(obj.string_id, account_id, project_id, cb)
413
        set :
414
            fields :
415
                string_id : true
416
                time      : true
417
                number    : true
418
                output    : true
419
            required_fields :
420
                string_id : true
421
                time      : true
422
                number    : true
423
                output    : true
424
            check_hook : (db, obj, account_id, project_id, cb) ->
425
                db._syncstring_access_check(obj.string_id, account_id, project_id, cb)
426

427
schema.eval_outputs.project_query = schema.eval_outputs.user_query
428

429

430

431

432