Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
| Download

Python Data Science Handbook

Views: 90273
1
"""
2
This script copies all notebooks from the book into the website directory, and
3
creates pages which wrap them and link together.
4
"""
5
import os
6
import nbformat
7
import shutil
8
9
PAGEFILE = """title: {title}
10
url:
11
save_as: {htmlfile}
12
Template: {template}
13
14
{{% notebook notebooks/{notebook_file} cells[{cells}] %}}
15
"""
16
17
INTRO_TEXT = """This website contains the full text of the [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do) by Jake VanderPlas; the content is available [on GitHub](https://github.com/jakevdp/PythonDataScienceHandbook) in the form of Jupyter notebooks.
18
19
The text is released under the [CC-BY-NC-ND license](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode), and code is released under the [MIT license](https://opensource.org/licenses/MIT).
20
21
If you find this content useful, please consider supporting the work by [buying the book](http://shop.oreilly.com/product/0636920034919.do)!
22
"""
23
24
25
def abspath_from_here(*args):
26
here = os.path.dirname(__file__)
27
path = os.path.join(here, *args)
28
return os.path.abspath(path)
29
30
NB_SOURCE_DIR = abspath_from_here('..', 'notebooks')
31
NB_DEST_DIR = abspath_from_here('content', 'notebooks')
32
PAGE_DEST_DIR = abspath_from_here('content', 'pages')
33
34
35
def copy_notebooks():
36
nblist = sorted(nb for nb in os.listdir(NB_SOURCE_DIR)
37
if nb.endswith('.ipynb'))
38
name_map = {nb: nb.rsplit('.', 1)[0].lower() + '.html'
39
for nb in nblist}
40
41
figsource = abspath_from_here('..', 'notebooks', 'figures')
42
figdest = abspath_from_here('content', 'figures')
43
44
if os.path.exists(figdest):
45
shutil.rmtree(figdest)
46
shutil.copytree(figsource, figdest)
47
48
figurelist = os.listdir(abspath_from_here('content', 'figures'))
49
figure_map = {os.path.join('figures', fig) : os.path.join('/PythonDataScienceHandbook/figures', fig)
50
for fig in figurelist}
51
52
for nb in nblist:
53
base, ext = os.path.splitext(nb)
54
print('-', nb)
55
56
content = nbformat.read(os.path.join(NB_SOURCE_DIR, nb),
57
as_version=4)
58
59
if nb == 'Index.ipynb':
60
cells = '1:'
61
template = 'page'
62
title = 'Python Data Science Handbook'
63
content.cells[2].source = INTRO_TEXT
64
else:
65
cells = '2:'
66
template = 'booksection'
67
title = content.cells[2].source
68
if not title.startswith('#') or len(title.splitlines()) > 1:
69
raise ValueError('title not found in third cell')
70
title = title.lstrip('#').strip()
71
72
# put nav below title
73
content.cells[0], content.cells[1], content.cells[2] = content.cells[2], content.cells[0], content.cells[1]
74
75
# Replace internal URLs and figure links in notebook
76
for cell in content.cells:
77
if cell.cell_type == 'markdown':
78
for nbname, htmlname in name_map.items():
79
if nbname in cell.source:
80
cell.source = cell.source.replace(nbname, htmlname)
81
for figname, newfigname in figure_map.items():
82
if figname in cell.source:
83
cell.source = cell.source.replace(figname, newfigname)
84
85
nbformat.write(content, os.path.join(NB_DEST_DIR, nb))
86
87
pagefile = os.path.join(PAGE_DEST_DIR, base + '.md')
88
htmlfile = base.lower() + '.html'
89
with open(pagefile, 'w') as f:
90
f.write(PAGEFILE.format(title=title,
91
htmlfile=htmlfile,
92
notebook_file=nb,
93
template=template,
94
cells=cells))
95
96
if __name__ == '__main__':
97
copy_notebooks()
98
99
100
101