CoCalc -- copy_notebooks.py

Python Data Science Handbook
Project: Python Data Science Handbook
Path: PythonDataScienceHandbook / website / copy_notebooks.py
Views: ⁹⁰²⁷³
1
"""
2
This script copies all notebooks from the book into the website directory, and
3
creates pages which wrap them and link together.
4
"""
5
import os
6
import nbformat
7
import shutil
8

9
PAGEFILE = """title: {title}
10
url:
11
save_as: {htmlfile}
12
Template: {template}
13

14
{{% notebook notebooks/{notebook_file} cells[{cells}] %}}
15
"""
16

17
INTRO_TEXT = """This website contains the full text of the [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do) by Jake VanderPlas; the content is available [on GitHub](https://github.com/jakevdp/PythonDataScienceHandbook) in the form of Jupyter notebooks.
18

19
The text is released under the [CC-BY-NC-ND license](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode), and code is released under the [MIT license](https://opensource.org/licenses/MIT).
20

21
If you find this content useful, please consider supporting the work by [buying the book](http://shop.oreilly.com/product/0636920034919.do)!
22
"""
23

24

25
def abspath_from_here(*args):
26
    here = os.path.dirname(__file__)
27
    path = os.path.join(here, *args)
28
    return os.path.abspath(path)
29

30
NB_SOURCE_DIR = abspath_from_here('..', 'notebooks')
31
NB_DEST_DIR = abspath_from_here('content', 'notebooks')
32
PAGE_DEST_DIR = abspath_from_here('content', 'pages')
33

34

35
def copy_notebooks():
36
    nblist = sorted(nb for nb in os.listdir(NB_SOURCE_DIR)
37
                    if nb.endswith('.ipynb'))
38
    name_map = {nb: nb.rsplit('.', 1)[0].lower() + '.html'
39
                for nb in nblist}
40

41
    figsource = abspath_from_here('..', 'notebooks', 'figures')
42
    figdest = abspath_from_here('content', 'figures')
43

44
    if os.path.exists(figdest):
45
        shutil.rmtree(figdest)
46
    shutil.copytree(figsource, figdest)
47

48
    figurelist = os.listdir(abspath_from_here('content', 'figures'))
49
    figure_map = {os.path.join('figures', fig) : os.path.join('/PythonDataScienceHandbook/figures', fig)
50
                  for fig in figurelist}
51

52
    for nb in nblist:
53
        base, ext = os.path.splitext(nb)
54
        print('-', nb)
55

56
        content = nbformat.read(os.path.join(NB_SOURCE_DIR, nb),
57
                                as_version=4)
58

59
        if nb == 'Index.ipynb':
60
            cells = '1:'
61
            template = 'page'
62
            title = 'Python Data Science Handbook'
63
            content.cells[2].source = INTRO_TEXT
64
        else:
65
            cells = '2:'
66
            template = 'booksection'
67
            title = content.cells[2].source
68
            if not title.startswith('#') or len(title.splitlines()) > 1:
69
                raise ValueError('title not found in third cell')
70
            title = title.lstrip('#').strip()
71

72
            # put nav below title
73
            content.cells[0], content.cells[1], content.cells[2] = content.cells[2], content.cells[0], content.cells[1]
74

75
        # Replace internal URLs and figure links in notebook
76
        for cell in content.cells:
77
            if cell.cell_type == 'markdown':
78
                for nbname, htmlname in name_map.items():
79
                    if nbname in cell.source:
80
                        cell.source = cell.source.replace(nbname, htmlname)
81
                for figname, newfigname in figure_map.items():
82
                    if figname in cell.source:
83
                        cell.source = cell.source.replace(figname, newfigname)
84
                        
85
        nbformat.write(content, os.path.join(NB_DEST_DIR, nb))
86

87
        pagefile = os.path.join(PAGE_DEST_DIR, base + '.md')
88
        htmlfile = base.lower() + '.html'
89
        with open(pagefile, 'w') as f:
90
            f.write(PAGEFILE.format(title=title,
91
                                    htmlfile=htmlfile,
92
                                    notebook_file=nb,
93
                                    template=template,
94
                                    cells=cells))
95

96
if __name__ == '__main__':
97
    copy_notebooks()
98

99
    
100

101
Product

Resources

Company