Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupport News AboutSign UpSign In
| Download

📚 The CoCalc Library - books, templates and other resources

Views: 95192
License: OTHER
1
#!/usr/bin/env python3
2
# Copyright: Harald Schilly <[email protected]>
3
# License: Apache 2.0
4
from pprint import pprint
5
import yaml
6
import json
7
import os
8
import itertools as it
9
# to make all "src" absolute paths!
10
ROOT = os.path.dirname(os.path.abspath(__file__))
11
os.chdir(ROOT)
12
13
14
# TODO this is silly code, please fix it ...
15
16
def update_meta(meta, new_meta):
17
'''
18
A simple dict update would overwrite/remove entries.
19
'''
20
if 'tags' in new_meta:
21
meta['tags'].update(new_meta['tags'])
22
if 'licenses' in new_meta:
23
meta['licenses'].update(new_meta['licenses'])
24
if 'categories' in new_meta:
25
# if you introduce a new category, it must be unique
26
assert all(nm not in meta['categories'].keys() for nm in new_meta['categories'].keys())
27
meta['categories'].update(new_meta['categories'])
28
29
30
# TODO this is just for a unique id for each document. maybe make it stable?
31
ID = it.count(0)
32
all_ids = set()
33
def init_doc(docs, prefix):
34
for doc in docs:
35
doc['src'] = os.path.join(prefix, doc['src'])
36
if 'thumbnail' in doc:
37
doc['thumbnail'] = os.path.join(prefix, doc['thumbnail'])
38
if 'id' in doc:
39
assert doc['id'] not in all_ids
40
all_ids.add(doc['id'])
41
else:
42
newid = 'doc-{}'.format(next(ID))
43
assert newid not in all_ids
44
all_ids.add(newid)
45
doc['id'] = newid
46
47
# prefix is the path to prefix
48
def resolve_references(meta, docs, prefix=''):
49
# append new documents and merge meta
50
if 'references' in meta:
51
for ref in meta['references']:
52
prefix = os.path.join(prefix, os.path.dirname(ref))
53
print("resolve_references prefix={}".format(prefix))
54
new_meta, *new_docs = yaml.load_all(open(ref))
55
init_doc(new_docs, prefix)
56
resolve_references(new_meta, new_docs, prefix=prefix)
57
update_meta(meta, new_meta)
58
docs.extend(new_docs)
59
del meta['references']
60
return meta, docs
61
62
def consistency_checks(meta, docs):
63
print('done. running consistency checks ...')
64
cats = meta['categories']
65
tags = meta['tags']
66
allowed_keys = ['id', 'src', 'title', 'description', 'website', 'author', 'license', 'category', 'tags', 'thumbnail', 'subdir']
67
for doc in docs:
68
print('checking {0[id]}: {0[title]}'.format(doc))
69
assert all(k in allowed_keys for k in doc.keys()), "keys: {}".format(list(doc.keys()))
70
assert 'title' in doc, "doc {} misses a title".format(doc.id)
71
assert 'category' in doc, "doc {} misses category".format(doc['title'])
72
assert 'src' in doc, "doc {} misses src".format(doc['title'])
73
assert doc['src'].endswith('/'), 'doc "{}" src must end with a slash to signal it is a directory. single files will be supported later ...'.format(doc['title'])
74
assert os.path.exists(os.path.join(ROOT, doc['src'])), 'doc "{}" src path does not exist!'
75
assert doc['category'] in cats
76
if 'tags' in doc:
77
for t in doc['tags']:
78
assert t in tags, 'Tag {} of document {} not in meta.tags'.format(t, doc['id'])
79
if 'thumbnail' in doc:
80
assert os.path.exists(doc['thumbnail']), 'Thumbnail {0[thumbnail]} for {0[id]} does not exist'.format(doc)
81
for k, v in cats.items():
82
assert 'name' in v
83
for k, v in tags.items():
84
assert 'name' in v
85
86
def debug(meta, docs):
87
print("META:")
88
pprint(meta)
89
print("DOCS:")
90
for doc in docs:
91
pprint(doc)
92
93
def export_json(meta, docs):
94
with open('index.json', 'w') as out:
95
json.dump({'metadata': meta, 'documents': docs}, out, indent=1)
96
97
def main(index_fn):
98
meta, *docs = yaml.load_all(open(index_fn))
99
init_doc(docs, ROOT)
100
resolve_references(meta, docs, prefix=ROOT)
101
#debug(meta, docs)
102
consistency_checks(meta, docs)
103
export_json(meta, docs)
104
print('all done.')
105
106
if __name__ == '__main__':
107
main(index_fn = 'index.yaml')
108