CoCalc -- rh.tex

William Stein -- Talk for Mathematics is a long conversation: a celebration of Barry Mazur
Project: 🤔 William Stein personal general -- TODO, Planning, Inventory, Notes
Path: travel/2018/2018-06-mazur-boston/talk / book / rh / rh.tex
Views: ²³⁴²
1
\documentclass[openany]{book}
2

3

4
%\documentclass[11pt,draft]{article}   % uncomment this and comment out the above line for *fast* typesetting (no images)
5

6
\usepackage{fix-cm}
7

8
\usepackage{soul}
9

10
\usepackage{float}
11
\usepackage{tikz}
12

13
\usepackage{wrapfig}
14

15

16
\usepackage{color}
17
\definecolor{dblackcolor}{rgb}{0.0,0.0,0.0}
18
\definecolor{dbluecolor}{rgb}{.01,.02,0.7}
19
\definecolor{dredcolor}{rgb}{0.8,0,0}
20
\definecolor{dgraycolor}{rgb}{0.30,0.3,0.30}
21
\usepackage{listings}
22
\lstdefinelanguage{Sage}[]{Python}
23
{morekeywords={True,False,sage,singular},
24
sensitive=true}
25
\lstset{frame=none,
26
          showtabs=False,
27
          showspaces=False,
28
          showstringspaces=False,
29
          commentstyle={\ttfamily\color{dredcolor}},
30
          keywordstyle={\ttfamily\color{dbluecolor}\bfseries},
31
          stringstyle ={\ttfamily\color{dgraycolor}\bfseries},
32
          language = Sage,
33
          basicstyle={\scriptsize \ttfamily},
34
          aboveskip=.3em,
35
          belowskip=.1em
36
          }
37

38

39
\usepackage{fancybox}
40
\usepackage{graphicx}
41
\usepackage{amsmath}
42
\usepackage{amsfonts}
43
\usepackage{amssymb}
44
\usepackage{amsthm}
45

46
\usepackage{url}
47

48
\usepackage{makeidx}\makeindex
49

50
\DeclareMathOperator{\Gap}{Gap}
51
\DeclareMathOperator{\Li}{Li}
52
\DeclareMathOperator{\li}{li}
53

54
\DeclareMathOperator{\Sing}{Sing}
55
\DeclareMathOperator{\Triv}{Triv}
56
\DeclareMathOperator{\Osc}{Osc}
57
\DeclareMathOperator{\Elem}{Elem}
58
\DeclareMathOperator{\Double}{Double}
59
\DeclareMathOperator{\Smooth}{Smooth}
60
\DeclareMathOperator{\Ces}{Ces}
61

62

63
\DeclareGraphicsRule{.tif}{png}{.png}{`convert #1 `dirname #1`/`basename #1 .tif`.png}
64

65
\newcommand{\mycaption}[1]{\begin{quote}{\bf Figure: } \large #1\end{quote}}
66

67
\newcommand{\ill}[3]{%
68
   \begin{figure}[H]%
69
   \vspace{-2ex}
70
   \centering%
71
   \includegraphics[width=#2\textwidth]{illustrations/#1}%
72
   \caption{#3}%
73
   \vspace{-2ex}
74
    \end{figure}}
75

76
\newcommand{\illtwo}[4]{%
77
   \begin{figure}[H]\centering%
78
   \includegraphics[width=#3\textwidth]{illustrations/#1}$\qquad$\includegraphics[width=#3\textwidth]{illustrations/#2}%
79
   \caption{#4}%
80
    \end{figure}}
81

82
\newcommand{\illthree}[5]{%
83
   \begin{figure}[H]%
84
\centering%
85
   \includegraphics[width=#4\textwidth]{illustrations/#1}$\qquad$\includegraphics[width=#4\textwidth]{illustrations/#2}$\qquad$\includegraphics[width=#4\textwidth]{illustrations/#3}%
86
   \caption{#5}%
87
    \end{figure}}
88

89
\newcommand{\illtwoh}[4]{%
90
   \begin{figure}[H]\centering%
91
   \includegraphics[height=#3\textheight]{illustrations/#1}$\qquad$\includegraphics[height=#3\textheight]{illustrations/#2}%
92
   \caption{#4}%
93
    \end{figure}}
94

95
\newcommand{\illthreeh}[5]{%
96
   \begin{figure}[H]%
97
\centering%
98
   \includegraphics[height=#4\textheight]{illustrations/#1}$\qquad$\includegraphics[height=#4\textheight]{illustrations/#2}$\qquad$\includegraphics[height=#4\textheight]{illustrations/#3}%
99
   \caption{#5}%
100
    \end{figure}}
101

102

103
%%%% Theoremstyles
104
\theoremstyle{plain}
105
\newtheorem{theorem}{Theorem}[chapter]
106
\newtheorem{proposition}[theorem]{Proposition}
107
\newtheorem{corollary}[theorem]{Corollary}
108
\newtheorem{claim}[theorem]{Claim}
109
\newtheorem{lemma}[theorem]{Lemma}
110
\newtheorem{hypothesis}[theorem]{Hypothesis}
111
\newtheorem{conjecture}[theorem]{Conjecture}
112

113
\theoremstyle{definition}
114
\newtheorem{definition}[theorem]{Definition}
115
\newtheorem{question}[theorem]{Question}
116
\newtheorem{problem}[theorem]{Problem}
117
\newtheorem{alg}[theorem]{Algorithm}
118
\newtheorem{openproblem}[theorem]{Open Problem}
119

120
%\theoremstyle{remark}
121
\newtheorem{goal}[theorem]{Goal}
122
\newtheorem{remark}[theorem]{Remark}
123
\newtheorem{remarks}[theorem]{Remarks}
124
\newtheorem{example}[theorem]{Example}
125
\newtheorem{exercise}[theorem]{Exercise}
126

127

128

129
%\hoffset=-0.05\textwidth
130
%\textwidth = 1.1\textwidth
131
%\voffset=-0.05\textheight
132
%\textheight = 1.1\textheight
133
%
134
% Set equal margins on book style
135
\setlength{\oddsidemargin}{53pt}
136
\setlength{\evensidemargin}{53pt}
137
\setlength{\marginparwidth}{57pt}
138
\setlength{\footskip}{30pt}
139

140
% Dutch style of paragraph formatting, i.e. no indents.
141
\setlength{\parskip}{1.3ex plus 0.2ex minus 0.2ex}
142
\setlength{\parindent}{0pt}
143

144
\setcounter{tocdepth}{0}
145

146
%\textheight = 9 in
147
%\oddsidemargin = 0.0 in
148
%\evensidemargin = 0.0 in
149
%\topmargin = 0.0 in
150
%\headheight = 0.0 in
151
%\headsep = 0.0 in
152
%\parskip = 0.2in
153
%\parindent = 0.0in
154

155

156
\def\GL{\mathrm{GL}}
157
\def\PGL{\mathrm{PGL}}
158
\def\PSL{\mathrm{PSL}}
159
\def\GSP{\mathrm{GSP}}
160
\def\Z{\mathrm{Z}}
161
\def\Q{\mathrm{Q}}
162
\def\Gal{\mathrm{Gal}}
163
\def\Hom{\mathrm{Hom}}
164
\def\Ind{\mathrm{Ind}}
165
\def\End{\mathrm{End}}
166
\def\Aut{\mathrm{Aut}}
167
\def\loc{\mathrm{loc}}
168
\def\glob{\mathrm{glob}}
169
\def\Kbar{{\bar K}}
170
\def\D{{\mathcal D}}
171
\def\L{{\mathcal L}}
172
\def\R{{\mathcal R}}
173
\def\G{{\mathcal G}}
174
\def\W{{\mathcal W}}
175
\def\H{{\mathcal H}}
176
\def\OH{{\mathcal OH}}
177

178

179

180
\newcommand{\RH}{Riemann Hypothesis\index{Riemann Hypothesis}}
181

182
\title{Prime Numbers and the Riemann Hypothesis}
183
\date{}
184
\author{\Large Barry Mazur \and \Large William Stein \and \vspace{20ex}\\
185
%\begin{center}
186
%A formula that counts the prime numbers ...
187
%\includegraphics[width=.65\textwidth]{illustrations/Rk_50_2_100}
188
%... built out of their spectrum:
189
% cover = use fractal trace with params 0.3 -0.1 0 0
190
\includegraphics[width=1.1\textwidth]{illustrations/cover}
191
%$$
192
%f(t) = -\sum_{\text{prime powers }p^n}{\frac{\log(p)}{p^{n/2}}}\cos(t\log(p^n))
193
%$$
194
%\end{center}
195
}
196

197
\usepackage{notes2bib}
198
\bibliographystyle{amsplain}
199

200
\usepackage{hyperref}
201

202

203
\newcommand{\todo}[1]{\par\vspace{1em}{\small---------\\{{\bf To be done:} #1}\\-----------}\par\vspace{1em}}
204

205

206
% More space between footnotes -- see
207
% http://www.latex-community.org/forum/viewtopic.php?f=44&t=22437
208
\setlength{\footnotesep}{1.2\baselineskip}
209

210

211
\usepackage{pdfpages}
212

213
\begin{document}
214

215
%\maketitle
216
\includepdf{cover}
217

218
% Remove parskip for toc
219
\setlength{\parskip}{0ex plus 0.5ex minus 0.2ex}
220

221
\tableofcontents
222

223

224
% Dutch style of paragraph formatting, i.e. no indents.
225
\setlength{\parskip}{1.3ex plus 0.2ex minus 0.2ex}
226

227
   \chapter*{\label{preface}Preface}
228
\addcontentsline{toc}{chapter}{Preface}
229
The \RH{} is one of the great unsolved problems of
230
mathematics, and the reward of \$1{,}000{,}000 of {\em Clay Mathematics
231
  Institute} prize money awaits the person who solves it. But---with
232
or without money---its resolution is crucial for our understanding of
233
the nature of numbers.
234

235

236
There are several full-length books recently published, written
237
for a general audience, that have the \RH{} as their main
238
topic.  A reader of these books will get a fairly rich picture of the
239
personalities engaged in the pursuit, and of related mathematical and
240
historical issues.\footnote{See, e.g., {\em The Music of the Primes} by Marcus du Sautoy (2003)
241
and {\em Prime Obsession: Bernhard Riemann and
242
     the Greatest Unsolved Problem in Mathematics} by John Derbyshire (2003).}
243

244
This is {\em not} the mission of the book that you now hold in your
245
hands. We aim---instead---to explain, in as direct a manner as
246
possible and with the least mathematical background required, what
247
this problem is all about and why it is so important. For even before
248
anyone proves this {\em hypothesis} to be true (or false!), just
249
getting familiar with it, and with some of the ideas behind it, is
250
exciting.  Moreover, this hypothesis is of crucial importance in a
251
wide range of mathematical fields; for example, it is a
252
confidence-booster for computational mathematics: even if the \RH{}
253
is never proved, assuming its truth (and that of closely
254
related hypotheses) gives us an excellent sense of
255
how long certain computer programs will take to run, which, in some
256
cases, gives us the assurance we need to initiate a computation that
257
might take weeks or even months to complete.
258

259

260
% I (William Stein) took this very picture in the Princeton common room in 2001!
261
\ill{sarnak}{0.4}{Peter Sarnak\index{Sarnak, Peter}}
262

263

264

265
Here is how the
266
Princeton mathematician Peter Sarnak describes the broad impact the
267
\RH{} has had\footnote{See page 222 of
268
{\em The Riemann hypothesis: the greatest unsolved problem in mathematics} by Karl Sabbagh (2002).}:
269
\begin{quote}
270
``The Riemann hypothesis is the central problem and it implies many,
271
many things. One thing that makes it rather unusual in mathematics
272
today is that there must be over five hundred papers---somebody should
273
go and count---which start `Assume the Riemann hypothesis\footnote{Technically,
274
a generalized version of the Riemann hypothesis (see
275
Chapter~\ref{ch:companions} below).},' and
276
the conclusion is fantastic. And those [conclusions] would then become
277
theorems ... With this one solution you would have proven five hundred
278
theorems or more at once.''
279
\end{quote}
280
% This is from page 106 of the Borwein book, which is just citing Sabbagh.
281

282

283

284
So, what {\it is} the \RH{}?  Below is a {\it first
285
  description} of what it is about. The task of our book is to develop
286
the following boxed paragraph into a fuller explanation and to
287
convince you of the importance and beauty of the mathematics it
288
represents.  We will be offering, throughout our book, a number of
289
different---but equivalent---ways of precisely formulating this
290
hypothesis (we display these in boxes).  When we say that two
291
mathematical statements are ``equivalent'' we mean that, given the
292
present state of mathematical knowledge, we can prove that if either
293
one of those statements is true, then the other is true. The endnotes
294
will guide the reader to the relevant mathematical literature.
295

296
      \begin{center}
297
       \shadowbox{
298
       \begin{minipage}{.98\textwidth}
299
       \hspace{.015\textwidth}
300
       \begin{minipage}{0.95\textwidth}
301
\mbox{}       \vspace{0.2ex}
302
       \begin{center}{\bf\large What {\em sort} of Hypothesis is the \RH{}?}\end{center}
303
       \medskip
304

305
Consider the seemingly innocuous series of questions:
306

307
\begin{quote}
308
\begin{itemize}
309
\item How many prime numbers  (2, 3, 5, 7, 11, 13, 17, $\ldots$) are there less than 100?
310
\item How many less than 10{,}000?
311
\item How many less than 1{,}000{,}000?
312
\end{itemize}
313

314
More generally, how many primes are there less than any given number $X$?
315
\end{quote}
316

317
Riemann\index{Riemann, Bernhard} proposed, a century and half ago, a strikingly
318
simple-to-describe ``very good approximation'' to the number of
319
primes less than a given number $X$. We now
320
see that if we could prove this {\em Hypothesis of Riemann} we would have
321
the key to a wealth of powerful mathematics. Mathematicians are eager
322
to find that key.
323

324

325
\vspace{1ex}
326
\end{minipage}\end{minipage}}
327
      \end{center}
328

329
% I got this from http://www.math.harvard.edu/history/bott/
330
% Barry says there  is a good picture in the book
331
% ''the Index Theorem'' that Yau published.
332
\ill{raoulbott}{0.35}{Raoul Bott (1923--2005).  Photograph by George M. Bergman, Courtesy of the Department of Mathematics, Harvard University.\label{fig:bott}}
333

334

335
The mathematician Raoul Bott\index{Bott, Raoul}---in giving advice to
336
a student---once said that whenever one
337
reads a mathematics book or article, or goes to a math lecture, one
338
should aim to come home with something very specific (it can be small,
339
but should be {\em specific}) that has application to a wider class of
340
mathematical problems than was the focus of the text or lecture.  If we
341
were to suggest some possible {\em specific} items to come home with,
342
after reading our book, three key phrases---{\bf prime numbers}\index{prime number}, {\bf
343
  square-root accurate}\index{square-root accurate}, and {\bf spectrum}\index{spectrum}---would head the
344
list. As for words of encouragement to think hard about the first of
345
these, i.e., prime numbers, we can do no better than to quote a
346
paragraph of Don Zagier's\index{Zagier, Don} classic 12-page exposition, {\em The First
347
  50 Million Prime Numbers}:
348

349
% I took this myself when we were hiking in Oberwolfach once...
350
\ill{zagier}{.35}{Don Zagier}
351

352

353
\begin{quote}
354
  ``There are two facts about the distribution of prime numbers of
355
  which I hope to convince you so overwhelmingly that they will be
356
  permanently engraved in your hearts. The first is that, [they are]
357
  the most arbitrary and ornery objects studied by mathematicians:
358
  they grow like weeds among the natural numbers, seeming to obey no
359
  other law than that of chance, and nobody can predict where the next
360
  one will sprout. The second fact is even more astonishing, for it
361
  states just the opposite: that the prime numbers exhibit stunning
362
  regularity, that there are laws governing their behavior, and that
363
  they obey these laws with almost military precision.''
364
\end{quote}
365

366

367
 Mathematics is flourishing.  Each year sees new exciting initiatives that extend and sharpen the applications of our subject,   new directions for  deep  exploration---and finer understanding---of  classical as well as very contemporary mathematical domains. We are aided in such explorations by the development of more and more powerful tools. We  see resolutions of centrally important questions.  And through all of this, we are treated to surprises and dramatic changes of viewpoint; in short: marvels.
368

369
 And what an array of wonderful techniques  allow mathematicians to do their work:  framing {\it definitions;} producing {\it constructions;} formulating {\it analogies relating disparate concepts, and disparate  mathematical fields}; posing {\it  conjectures,} that cleanly shape a possible way forward; and, the keystone: providing unassailable {\it proofs} of what is asserted,  the idea of doing such a thing being itself one of the great glories of mathematics.
370

371
    Number theory has its share of this bounty. Along with all these modes of theoretical work,  number theory also offers the pure joy of numerical experimentation, which---when it is going well---allows you to witness the intricacy of numbers and  profound inter-relations that cry out for explanation. It is striking how little you actually have to know in order to appreciate the  revelations offered by numerical exploration.
372

373
    Our book is meant to be an introduction to these pleasures. We take an experimental view of the fundamental ideas of the subject  buttressed by numerical computations, often displayed as graphs. As a result, our book is profusely illustrated, containing 131 figures,
374
diagrams, and pictures that accompany the text.\footnote{We created the
375
  figures using the free SageMath
376
  software (see \url{http://www.sagemath.org}).
377
Complete source code is available, which
378
can be used to recreate every diagram in this book
379
(see \url{http://wstein.org/rh}).
380
More adventurous readers can try
381
to experiment with the parameters for
382
the ranges of data illustrated, so as to get an even more
383
vivid sense of how the numbers ``behave.''
384
We hope that readers become inspired to carry out numerical experimentation,
385
which is becoming easier  as mathematical software
386
advances.}
387

388
  There are  few
389
mathematical equations in Part~\ref{part1}.  This first portion of our book is intended for readers who are generally interested in, or curious about, mathematical ideas, but who may not have studied any advanced topics. Part~\ref{part1} is devoted to conveying the essence of the Riemann Hypothesis and explaining why it is so intensely pursued. It requires a minimum of mathematical knowledge, and does not,  for example, use calculus,\index{calculus} although it would be helpful to know---or to learn on the run---the meaning of  the concept of {\it function}\index{function}. Given its mission, Part~\ref{part1}  is meant to be complete, in that it has a beginning, middle, and end.  We hope that our readers who only read Part~\ref{part1} will have enjoyed the excitement  of this important piece of mathematics.
390

391

392

393
    Part~\ref{part2} is for readers who have taken at least one class in calculus,\index{calculus} possibly a long time ago.  It is meant as a general preparation for the type of Fourier analysis\index{Fourier analysis} that will occur in the later parts. The notion of spectrum\index{spectrum} is key.
394

395
    Part~\ref{part3} is for readers who wish to see, more vividly, the link between the placement of prime numbers\index{prime number} and (what we call there) the {\it Riemann spectrum}.\index{Riemann spectrum}
396

397
    Part~\ref{part4} requires some familiarity with complex analytic functions, and returns to Riemann's original viewpoint.  In particular it  relates the ``Riemann spectrum'' that we discuss in Part~\ref{part3} to the {\it nontrivial zeroes\index{nontrivial zeroes} of the Riemann zeta function}.\index{zeta function} We also provide a brief sketch of the more standard route taken by published expositions of the Riemann Hypothesis.
398

399
      The end-notes are meant to link the text to references, but also to provide  more technical commentary with an increasing dependence on mathematical background in the later chapters.  References to the end notes will be in brackets.
400

401
      We wrote our book over the past decade, but devoted only one week to it each year (a week in August).    At the end of our work-week for the book, each year, we put our draft (mistakes and all) on line to get response from readers.{\footnote{See  {\url{http://library.fora.tv/2014/04/25/Riemann_Hypothesis_The_Million_Dollar_Challenge}} which is a lecture---and Q \& A---about the composition of this book.}} We therefore accumulated much important feedback, corrections, and requests from readers.\footnote{Including
402
Dan Asimov, Bret Benesh, Keren Binyaminov, Harald B\"{o}geholz, Louis-Philippe Chiasson, Keith Conrad, Karl-Dieter Crisman, Nicola Dunn, Thomas Egense, Bill Gosper,  Andrew Granville, Shaun Griffith, Michael J. Gruber, Robert Harron,  William R. Hearst III, David Jao, Fredrik Johansson, Jim Markovitch, David Mumford, James Propp, Andrew Solomon, Dennis Stein, and Chris Swenson.}
403
We thank them infinitely.
404

405

406
\part{The \RH{}\label{part1}}
407

408
\numberwithin{equation}{chapter}
409
\numberwithin{figure}{chapter}
410
\numberwithin{table}{chapter}
411

412
\chapter[Thoughts about numbers]{Thoughts about numbers: ancient, medieval, and modern}
413

414
If we are to believe the ancient Greek philosopher Aristotle\index{Aristotle}, the early
415
Pythagoreans thought that the principles governing Number are ``the
416
principles of all things,'' the concept of Number being more basic than
417
{\em earth, air, fire, or water}, which were according to ancient tradition
418
the four building blocks of matter. To think about Number is to get
419
close to the architecture of ``what is.''
420

421
So, how far along are we in our thoughts about numbers?
422

423

424

425
% This is from http://en.wikipedia.org/wiki/File:Frans_Hals_-_Portret_van_Ren%C3%A9_Descartes.jpg
426
% There is a higher resolution scan available there
427

428
\ill{descartes}{.35}{Ren\'e Descartes (1596--1650)}
429

430
The French philosopher and mathematician Ren\'e Descartes,\index{Descartes, Ren\'{e}} almost four
431
centuries ago, expressed the hope that there soon would be ``almost
432
nothing more to discover in geometry.'' Contemporary physicists dream
433
of a final theory.\footnote{See Weinberg's book {\em Dreams of a
434
    Final Theory: The Search for the Fundamental Laws of Nature}, by
435
  Steven Weinberg (New York: Pantheon Books, 1992)}  But despite its
436
venerability and its great power and beauty, the pure mathematics of
437
numbers may still be in the infancy of its development, with depths to
438
be explored as endless as the human soul, and {\it never} a final theory.
439

440
\ill{quixote}{.5}{Jean de Bosschere, ``Don Quixote and his Dulcinea del Toboso,'' from The History of Don Quixote De La Mancha, by Miguel De Cervantes. Trans. Thomas Shelton. George H. Doran Company, New York (1923).}
441

442
Numbers are obstreperous things. Don Quixote\index{Don Quixote} encountered this when he
443
requested that the ``bachelor'' compose a poem to his lady Dulcinea del
444
Toboso, the first letters of each line spelling out her name. The
445
``bachelor'' found\footnote{See Chapter IV of the Second Part of the {\em Ingenious Gentleman Don Quixote of La Mancha}.}
446

447
\begin{quote}
448
  ``a great difficulty in their composition because the number of
449
  letters in her name was $17$, and if he made four Castilian stanzas
450
  of four octosyllabic lines each, there would be one letter too many,
451
  and if he made the stanzas of five octosyllabic lines each, the ones
452
  called {\em d{\'e}cimas} or {\em redondillas,} there would be three
453
  letters too few...''
454
\end{quote}
455

456
``It must fit in, however you do it,'' pleaded Quixote\index{Don Quixote}, not willing to
457
grant the imperviousness of the number $17$ to division.
458

459
% http://www.jus.uio.no/sisu/don_quixote.miguel_de_cervantes/60.html#2068
460

461

462

463

464
{\em Seventeen} is indeed a prime number:\index{prime number} there is no way of factoring\index{factor}
465
it as the product of smaller numbers, and this accounts---people tell
466
us---for its occurrence in some phenomena of nature, as when
467
the seventeen-year cicadas all emerged to celebrate a ``reunion'' of some
468
sort in our fields and valleys.
469

470
\ill{cicada}{.35}{Cicadas emerge every 17 years}
471
% from http://biology.clc.uc.edu/steincarter/cicadas.htm
472

473

474

475

476
Prime numbers\index{prime number}, despite their {\em primary} position in our modern
477
understanding of numbers, were not specifically doted over in the
478
ancient literature before Euclid,\index{Euclid} at least not in the literature that
479
has been preserved. Primes are mentioned as a class of numbers in the
480
writings of Philolaus (a predecessor of Plato\index{Plato}); they are not mentioned
481
specifically in the Platonic dialogues, which is surprising
482
given the intense interest Plato had in mathematical developments; and
483
they make an occasional appearance in the writings of Aristotle, which
484
is not surprising, given Aristotle's emphasis on the distinction
485
between the {\em composite} and the {\em incomposite}. ``The
486
incomposite is prior to the composite,'' writes Aristotle\index{Aristotle} in Book 13 of
487
the Metaphysics.
488

489

490
Prime numbers\index{prime number} do occur, in earnest, in  Euclid's\index{Euclid}  {\it Elements}!
491

492

493
% from http://en.wikipedia.org/wiki/File:Euklid-von-Alexandria_1.jpg
494
\ill{euclid}{.35}{Euclid\index{Euclid}}
495

496

497
There is an extraordinary wealth of established truths about whole
498
numbers; these truths provoke sheer awe for the beautiful complexity
499
of prime numbers\index{prime number}. But each of the important new discoveries we make
500
gives rise to a further richness of questions, educated guesses,
501
heuristics, expectations, and unsolved problems.
502

503

504

505

506

507

508

509

510
\chapter{What are prime numbers?}\label{ch:what_are_primes}\index{prime number}
511

512
\noindent {\em Primes as atoms. }\index{atom} To begin from the beginning, think
513
of the operation of multiplication as a bond that ties numbers
514
together: the equation $2\times 3= 6$ invites us to imagine the number
515
$6$ as (a molecule, if you wish) built out of its smaller constituents
516
$2$ and $3$.  Reversing the procedure, if we start with a whole
517
number, say $6$ again, we may try to factor\index{factor} it (that is, express it as
518
a product of smaller whole numbers) and, of course, we would
519
eventually, if not immediately, come up with $6 = 2\times 3$ and
520
discover that $2$ and $3$ factor no further; the numbers $2$ and $3$,
521
then, are the indecomposable entities (atoms, if you wish) that
522
comprise our number.
523

524
\ill{factor_tree_6}{.3}{The number $6 = 2\times 3$}
525

526

527
By definition, a {\bf prime number}\index{prime number}
528
(colloquially, {\em a prime}) is a whole number, bigger than $1$, that
529
cannot be factored into a product of two smaller whole numbers. So,
530
$2$ and $3$ are the first two prime numbers. The next number along the
531
line, $4$, is not prime, for $4= 2\times 2$; the number after that,
532
$5$, is. Primes are, multiplicatively speaking, the building blocks
533
from which all numbers can be made. A fundamental theorem of
534
arithmetic tells us that any number (bigger than $1$) can be factored
535
as a product of primes, and the factorization is {\em unique} except
536
for rearranging the order of the primes.
537

538

539

540
For example, if you try to factor\index{factor} $12$ as a product of
541
two smaller numbers---ignoring the order of the factors---there are two
542
ways to begin to do this:
543
$$
544
  12 = 2 \times 6 \qquad\text{ and }\qquad   12 = 3 \times 4
545
$$
546
But neither of these ways is a full factorization of $12$, for both
547
$6$ and $4$ are not prime, so can be themselves factored, and in each
548
case after changing the ordering of the factors we arrive at:
549
$$
550
   12= 2 \times 2 \times 3.
551
$$
552

553
If you try to factor\index{factor} the number $300$, there are many
554
ways to begin:
555
$$
556
  300= 30\times 10\qquad\text{or}\qquad 300 = 6 \times 50
557
$$
558
and there are various other starting possibilities. But if you
559
continue the factorization (``climbing down'' any one of the possible
560
``factoring trees'') to the bottom, where every factor is a prime
561
number as in Figure~\ref{fig:factor300}, you always end up with the
562
same collection of prime numbers\index{prime number}\footnote{See Section~1.1 of
563
Stein's {\em Elementary Number Theory: Primes, Congruences, and Secrets} (2008) at \url{http://wstein.org/ent/}
564
for a proof of the ``fundamental theorem of arithmetic,''
565
which asserts that every positive
566
whole number factors uniquely as a product of primes.}:
567
                 $$300 = 2^2\times 3\times 5^2.$$
568

569
\illtwo{factor_tree_300_a}{factor_tree_300_b}{.47}
570
{Factor trees that illustrate the factorization of 300 as a product of primes.\label{fig:factor300}}
571

572
\ill{factor_tree_big}{1}{Factorization tree for the product of the primes up to $29$.\label{factor.tree.big}}%NOTE: in our version of the book there is an annoying
573
%battle between the footnote and this figure.  Maybe won't matter in
574
% CUP version.
575

576

577
The \RH{} probes the question: how intimately can we know
578
prime numbers\index{prime number}, those {\em atoms}\index{atom} of multiplication?  Prime numbers are
579
an important part of our daily lives.  For example, often when we visit a
580
website and purchase something online, prime numbers having hundreds of
581
decimal digits are used to keep our bank transactions private.  This
582
ubiquitous use to which giant primes are put depends upon a very
583
simple principle: it is much easier to multiply numbers together than
584
to factor\index{factor} them. If you had to factor, say, the number $391$ you might
585
scratch your head for a few minutes before discovering that $391$ is
586
$17\times 23$. But if you had to multiply $17$ by $23$ you would do it
587
straightaway.  Offer two primes, say, $P$ and $Q$ each with a few hundred
588
digits, to your computing machine and ask it to multiply them
589
together: you will get their product $N = P\times Q$ with its hundreds of digits
590
in about a microsecond. But present that number $N$ to any
591
current desktop computer, and ask it to factor $N$, and the computer
592
will (almost certainly) fail to do the task. See
593
\bibnote{{\bf How not to factor the numerator of a Bernoulli number:}
594

595
As mentioned in Chapter~\ref{ch:envision}, the coefficient $B_k$ of the linear
596
term of the polynomial
597
$$
598
S_k(n)= 1^k+2^k+3^k+\dots +(n-1)^k
599
$$
600
is (up to sign) the $k$-th {\bf Bernoulli number}.  These numbers are
601
rational numbers and, putting them in lowest terms, their numerators
602
play a role in certain problems, and their denominators in
603
others. (This is an amazing story, which we can't go into here!)
604

605
One of us (Barry Mazur) in the recent article {\em How can we
606
  construct abelian Galois extensions of basic number fields?} (see
607
\url{http://www.ams.org/journals/bull/2011-48-02/S0273-0979-2011-01326-X/})
608
found himself dealing (for various reasons) with the fraction
609
$-B_{200}/400$, where $B_{200}$ is the two-hundredth Bernoulli number.
610
The numerator of this fraction is quite large: it is---hold your
611
breath---\ $$389\cdot 691\cdot5370056528687 \ \ \ \ \text{times this
612
204-digit number:}
613
$$\vskip 10pt
614
$N:=3452690329392158031464109281736967404068448156842396721012$\newline
615
$\mbox{}\,\,\,\qquad 9920642145194459192569415445652760676623601087497272415557$\newline
616
$\mbox{}\,\,\,\qquad 0842527652727868776362959519620872735612200601036506871681$\newline
617
$\mbox{}\,\,\,\qquad 124610986596878180738901486527$
618
\vskip 10pt  and he {\it incorrectly asserted} that it was prime.  Happily, Bartosz Naskr\c{e}cki spotted this error: our $204$-digit $N$ is {\it not} prime.
619
% Note, according to http://perso.telecom-paristech.fr/~cappe/local/latex/accents.pdf
620
% the accent I get with \c{e} is ``NOTE: This is technically incorrect.  The accent should be reversed in orientation,
621
% but there is currently no such accent in plain TEX or LATEX.''
622

623
How did he know this?  By using the most basic test in the repertoire
624
of tests that we have available to check to see whether a number is
625
prime: we'll call it the ``{\bf Fermat $2$-test.}'' We'll first give a
626
general explanation of this type of test before we show how $N$ {\it
627
  fails the Fermat $2$-test.}
628

629

630
The starting idea behind this test is the famous result known as {\em
631
  Fermat's Little Theorem} where the ``little'' is meant to
632
alliteratively distinguish it from you-know-what.
633
\begin{theorem}[Fermat's Little Theorem]
634
If $p$ is a prime number, and $a$ is any number relatively prime to $p$ then $a^{p-1} - 1$ is divisible by $p$.
635
\end{theorem}
636
A good exercise is to try to prove this, and a one-word hint that might lead you to one of the many proofs of it is
637
{\em induction}.\footnote{Here's the proof:
638
$$(N+1)^p \equiv N^p +1 \equiv (N+1)\ \mod p,$$
639
where the first equality is the binomial theorem and the second
640
equality is induction.}
641

642
 Now we are going to use this as a criterion, by---in effect---restating it in what logicians would call  its {\it contrapositive}:
643
 \begin{theorem}[The Fermat $a$-test]
644
If $M$ is a positive integer, and $a$ is any number relatively prime to $M$ such that  $a^{M-1} - 1$ is {\it not} divisible by $M$, then $M$ is {\it not} a prime.
645
\end{theorem}
646

647
% sage: N =345269032939215803146410928173696740406844815684239672101299206421451944591925694154456527606766236010874972724155570842527652727868776362959519620872735612200601036506871681124610986596878180738901486527; z = Mod(2, N)^(N-1) - 1; z
648
Well, Naskr\c{e}cki computed $2^{N-1}-1$  (for the $204$-digit $N$ above)   and saw that
649
it is {\it not} divisible\footnote{The number $2^{N-1}-1$ has a residue after division by $N$ of\\ $3334581100595953025153969739282790317394606677381970645616725285996925$\newline$
650
 6610000568292727335792620957159782739813115005451450864072425835484898$\newline$
651
 565112763692970799269335402819507605691622173717318335512037457$.} by $N$.
652
Ergo, our $N$ fails the  Fermat $2$-test so is {\it not} prime.
653

654
But then, given that it is so ``easy'' to see that $N$ is not prime, a
655
natural question to ask is: what, in fact, is its prime factorization?
656
This---it turns out---isn't so easy to determine;
657
Naskr\c{e}cki devoted $24$ hours of computer time setting standard  factorization
658
algorithms on the task, and that was not sufficient time to resolve
659
the issue.  The factorization of the numerators of general Bernoulli
660
numbers is the subject of a very interesting website run by Samuel
661
Wagstaff (\url{http://homes.cerias.purdue.edu/~ssw/bernoulli}).
662
Linked to this web page one finds
663
(\url{http://homes.cerias.purdue.edu/~ssw/bernoulli/composite}) which gives
664
a list of composite numbers whose factorizations have resisted all
665
attempts to date.  The two-hundredth Bernoulli number is 12th on the list.
666

667
 The page
668
\url{http://en.wikipedia.org/wiki/Integer_factorization_records} lists
669
record challenge factorization, and one challenge that was completed
670
in 2009 involves a difficult-to-factor number with 232 digits; its
671
factorization was completed by a large team of researchers and took around
672
2000 years of CPU time.  This convinced us that
673
with sufficient motivation  it would be possible to factor $N$, and so we
674
asked some leaders in the field to try.   They succeeded!
675
\begin{quote}\sf
676
Factorisation of B200
677

678
by Bill Hart on 4 Aug 05, 2012 at 07:24pm
679

680
We are happy to announce the factorization of the numerator of the 200th
681
Bernoulli number:
682

683
\begin{align*}
684
N &= 389 \cdot 691 \cdot 5370056528687 \cdot c_{204}\\
685
c_{204} &= p_{90} \cdot p_{115}\\
686
p_{90}  &= 149474329044343594528784250333645983079497454292\\
687
        &= 838248852612270757617561057674257880592603\\
688
p_{115} &= 230988849487852221315416645031371036732923661613\\
689
        &= 619208811597595398791184043153272314198502348476\\
690
        &= 2629703896050377709
691
\end{align*}
692

693
The factorization of the 204-digit composite was made possible with the help
694
of many people:
695
\begin{itemize}
696
\item William Stein and Barry Mazur challenged us to factor this number.
697
\item Sam Wagstaff maintains a table of factorizations of numerators of Bernoulli
698
  numbers at \url{http://homes.cerias.purdue.edu/~ssw/bernoulli/bnum}. According
699
  to this table, the 200th Bernoulli number is the 2nd smallest index with
700
  unfactored numerator (the first being the 188th Bernoulli number).
701
\item Cyril Bouvier tried to factor the c204 by ECM up to 60-digit level, using
702
  the TALC cluster at Inria Nancy - Grand Est.
703
\item {\sf yoyo@home} tried to factor the c204 by ECM up to 65-digit level, using the
704
  help of many volunteers of the distributed computing platform
705
  \url{http://www.rechenkraft.net/yoyo/}.
706
 After ECM was unsuccessful, we decided to factor the c204 by GNFS.
707
\item Many people at the Mersenne forum helped for the polynomial selection. The best
708
  polynomial was found by Shi Bai, using his implementation of Kleinjung's
709
  algorithm in CADO-NFS:
710
  \url{http://www.mersenneforum.org/showthread.php?p=298264#post298264}.
711
 Sieving was performed by many volunteers using {\sf NFS@home}, thanks to Greg
712
  Childers. See \url{http://escatter11.fullerton.edu/nfs} for more details
713
  about {\sf NFS@home}.
714
  This factorization showed that such a distributed effort might be feasible for a
715
  new record GNFS factorization, in particular for the polynomial selection.
716
  This was the largest GNFS factorization performed by {\sf NFS@home} to date,
717
  the second largest being $2^{1040}+1$ at 183.7 digits.
718
\item Two independent runs of the filtering and linear algebra were done: one by Greg
719
  Childers with msieve (\url{http://www.boo.net/~jasonp/qs.html}) using a 48-core
720
  cluster made available by Bill Hart, and one by Emmanuel Thom\'{e} and Paul
721
  Zimmermann with CADO-NFS (\url{http://cado-nfs.gforge.inria.fr/}), using the Grid
722
  5000 platform.
723
\item The first linear algebra run to complete was the one with CADO-NFS, thus we
724
  decided to stop the other run.
725
\end{itemize}
726

727
Bill Hart
728
\end{quote}
729

730
We verify the factorization above in SageMath as follows: \\
731

732
{\sf
733
sage: p90 = 1494743290443435945287842503336459830794974542928382\\
734
48852612270757617561057674257880592603\\
735
sage: p115 = 230988849487852221315416645031371036732923661613619\\
736
2088115975953987911840431532723141985023484762629703896050377709\\
737
sage: c204 = p90 * p115\\
738
sage: 389 * 691 * 5370056528687 * c204 == -numerator(bernoulli(200))\\
739
True\\
740
sage: is\_prime(p90), is\_prime(p115), is\_prime(c204)\\
741
(True, True, False)
742
}
743
} and \bibnote{\label{bibnote:factor}
744
Given an integer $n$, there are
745
many algorithms available for trying to write $n$ as a product of
746
prime numbers.  First we can apply {\em trial division}, where we
747
simply divide $n$ by each prime $2, 3, 5, 7, 11, 13, \ldots$ in turn,
748
and see what small prime factors  we find (up to a few digits).  After
749
using this method to eliminate as many primes as we have patience to
750
eliminate, we typically next turn to a technique called {\em Lenstra's
751
  elliptic curve method}, which allows us to check $n$ for
752
divisibility by bigger primes (e.g., around 10--15 digits).  Once
753
we've exhausted our patience using the elliptic curve method, we would
754
next hit our number with something called the {\em quadratic sieve},
755
which works well for factoring numbers of the form $n=pq$, with $p$
756
and $q$ primes of roughly equal size, and $n$ having less than 100
757
digits (say, though the 100 depends greatly on the implementation).
758
All of the above algorithms---and then some---are implemented in SageMath,
759
and used by default when you type {\tt factor(n)} into SageMath. Try
760
typing {\tt factor(some number, verbose=8)} to see for yourself.
761

762
If the quadratic sieve fails, a final recourse is to run the {\em
763
  number field sieve} algorithm, possibly on a supercomputer.  To give
764
a sense of how powerful (or powerless, depending on perspective!)  the
765
number field sieve is, a record-setting factorization of a general
766
number using this algorithm is the factorization of a 232 digit number
767
called RSA-768 (see \url{https://eprint.iacr.org/2010/006.pdf}):
768

769
$
770
n = 12301866845301177551304949583849627207728535695953347921973224521\\
771
517264005072636575187452021997864693899564749427740638459251925573263\\
772
034537315482685079170261221429134616704292143116022212404792747377940\\
773
80665351419597459856902143413
774
$
775
\par\noindent{}which factors as $pq$, where\par\noindent{}
776
$
777
 p = 334780716989568987860441698482126908177047949837137685689124313889\\
778
82883793878002287614711652531743087737814467999489
779
$
780
\par\noindent{}and\par\noindent{}
781
$
782
q = 367460436667995904282446337996279526322791581643430876426760322838\\
783
15739666511279233373417143396810270092798736308917.
784
$
785
\par\noindent{}We encourage you to try to
786
factor $n$ in SageMath, and see that it fails.
787
SageMath does not yet include an implementation of the
788
number field sieve algorithm, though there are some free implementations
789
currently available (see \url{http://www.boo.net/~jasonp/qs.html}).
790
}.
791

792
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
793

794

795

796

797
%We illustrate this in Sage:
798
% \begin{lstlisting}[]
799
% sage: n = ZZ.random_element(10^500)
800
% sage: m = ZZ.random_element(10^500)
801
% sage: timeit('n*m')
802
% 1.51 microseconds per loop
803
% sage: timeit('n*m')
804
% 2.33 microseconds per loop
805
% sage: factor(n*m)
806
% [wait forever]
807
% \end{lstlisting}}.
808
The safety of much
809
encryption depends upon this ``guaranteed'' failure!\footnote{Nobody has ever
810
  published a {\em proof} that there is no fast way to factor
811
  integers.  This is an article of ``faith'' among some
812
  cryptographers.}
813

814
If we were latter-day number-phenomenologists we might revel in the
815
discovery and proof that
816
$$
817
  p=2^{43,112,609}-1 = 3164702693\ldots\ldots\text{\small (millions of digits)}\ldots\ldots6697152511
818
$$
819
is a prime number\index{prime number}, this number having $12{,}978{,}189$ digits!  This
820
prime, which was discovered on August 23, 2008 by the
821
GIMPS project,\footnote{The GIMPS project website is \url{http://www.mersenne.org/}.}
822
is the first prime ever found with more than ten million digits,
823
though it is not the largest prime currently known.
824

825
Now $2^{43,112,609}-1$ is quite a hefty number! Suppose someone came
826
up to you saying ``surely $p = 2^{43,112,609}-1$ is the largest prime
827
number!'' (which it is not). How might you convince that person that
828
he or she is wrong without explicitly exhibiting a larger prime?
829
\bibnote{We can use SageMath (at \url{http://sagemath.org}) to quickly compute the ``hefty
830
number'' $p = 2^{43,112,609}-1$.  Simply type
831
{\tt p = 2\^{}43112609 - 1} to instantly compute $p$.
832
In what sense have we {\em computed} $p$?  Internally, $p$ is now
833
stored in base $2$ in the computer's memory; given the special form of
834
$p$ it is not surprising that it took little time to compute.  Much more
835
challenging is to compute all the base 10 digits of $p$, which takes
836
a few seconds: {\tt d = str(p)}.
837
Now type {\tt d[-50:]} to see the last 50 digits of $p$.
838
To compute the sum $58416637$ of the digits of $p$ type {\tt sum(p.digits())}.
839
}
840

841

842
Here is a neat---and, we hope, convincing---strategy to show there are
843
prime numbers\index{prime number} larger than $p = 2^{43,112,609} - 1$. Imagine
844
forming the following humongous number: let $M$ be the product of all
845
prime numbers\index{prime number} up to and including $p = 2^{43,11,2609} - 1$.  Now go
846
one further than $M$ by taking the next number $N=M+1$.
847

848

849
OK, even though this number $N$ is wildly large, it is either a prime
850
number itself---which would mean that there would indeed be a prime
851
number larger than $p=2^{43,112,609} - 1$, namely $N$; or in any event it is
852
surely divisible by some prime number\index{prime number}, call it $P$.
853

854
Here, now, is a way of seeing that this $P$ is bigger than $p$: Since
855
every prime number smaller than or equal to $p$ divides $M$, these
856
prime numbers cannot divide $N= M+1$ (since they divide $M$ evenly, if
857
you tried to divide $N=M+1$ by any of them you would get a remainder
858
of $1$).  So, since $P$ does divide $N$ it must not be any of the
859
smaller prime numbers: $P$ is therefore a prime number bigger than $p=
860
2^{43,112,609}-1$.
861

862
This strategy, by the way, is not very new: it is, in fact, well over
863
two thousand years old, since it already occurred in Euclid's\index{Euclid} {\em
864
  Elements}\index{Euclid}. The Greeks did know that there are infinitely many prime
865
numbers and they showed it via the same method as we showed that our
866
$p = 2^{43,112,609} - 1$ is not the largest prime number.
867

868
Here is the argument again, given very succinctly:
869
Given primes $p_1, \ldots, p_m$, let $n=p_1 p_2 \cdots p_m +
870
1$.  Then $n$ is divisible by some prime not equal to any $p_i$,
871
so there are more than $m$ primes.
872

873
You can think of this strategy as a simple game that you can
874
play. Start with the bag of prime numbers\index{prime number}
875
that contains just the two primes $2$ and $3$. Now each ``move'' of the game
876
consists of multiplying together all the primes you have in your bag
877
to get a number $M$, then adding $1$ to $M$ to get the larger
878
number $N=M+1$, then factoring $N$\index{factor} into prime number factors, and then
879
including all those new prime numbers\index{prime number} in your bag. Euclid's\index{Euclid} proof
880
gives us that we will---with each move of this game---be finding more
881
prime numbers: the contents of the bag will increase. After, say,
882
a million moves our bag will be guaranteed to contain more than
883
a million prime numbers.\index{prime number}
884

885
For example, starting the game with your bag containing
886
only one prime number $2$, here is how your bag grows with
887
successive moves of the game:
888

889
$\mbox{}\qquad\{2\}$
890
\newline
891
$\mbox{}\qquad\{2,3\}$
892
\newline
893
$\mbox{}\qquad\{2,3, 7\}$
894
\newline
895
$\mbox{}\qquad\{2,3, 7, 43\}$
896
\newline
897
$\mbox{}\qquad\{2,3, 7, 43, 13, 139\}$
898
\newline
899
$\mbox{}\qquad\{2,3, 7, 43, 13, 139, 3263443\}$
900
\newline
901
$\mbox{}\qquad\{2,3, 7, 43, 13, 139, 3263443,  547, 607, 1033, 31051\}$
902
\newline
903
$\mbox{}\qquad\{2,3, 7, 43, 13, 139, 3263443,  547, 607, 1033, 31051, 29881, 67003,\\
904
\mbox{}\qquad\qquad\qquad 9119521, 6212157481\}$
905
\newline
906
\mbox{}\qquad{}etc.\footnote{The sequence of prime numbers we find by this
907
procedure is discussed in more detail with references
908
in the Online Encyclopedia of Integer Sequences \url{http://oeis.org/A126263}.}
909

910
Though there are infinitely many primes, explicitly finding large primes is a
911
major challenge.  In the 1990s, the Electronic Frontier Foundation\index{Electronic Frontier Foundation}
912
\url{http://www.eff.org/awards/coop} offered a \$100{,}000 cash reward
913
to the first group to find a prime with at least 10{,}000{,}000 decimal
914
digits (the group that found the record prime $p$ above won this prize\footnote{%
915
See \url{http://www.eff.org/press/archives/2009/10/14-0}.
916
Also the 46th Mersenne prime was declared by {\em Time Magazine} to be
917
one of the top 50 best ``inventions'' of 2008: \url{http://www.time.com/time/specials/packages/article/0,28804,1852747_1854195_1854157,00.html}.}), and offers
918
another \$150{,}000 cash prize to the first group to find a prime with
919
at least 100{,}000{,}000 decimal digits.
920

921

922
The number $p= 2^{43,112,609} - 1$  was for a time
923
the largest prime known, where by
924
``know'' we mean that we know it so explicitly that we can {\em
925
  compute} things about it.  For example, the last two digits of $p$
926
are both 1 and the sum of the digits of $p$ is 58,416,637.\label{sumdigits}  Of course $p$ is not
927
the largest prime number\index{prime number} since there are infinitely many primes, e.g.,
928
the next prime $q$ after $p$ is a prime.  But there is no known way
929
to efficiently compute anything interesting about $q$.  For example,
930
what is the last digit of $q$ in its decimal expansion?
931

932

933
\chapter{``Named'' prime numbers}\label{ch:namedprimes}\index{prime number}
934
Prime numbers come in all sorts of shapes, some more convenient to
935
deal with than others.  For example, the number we have been talking
936
about, $$p = 2^{43,112,609}-1,$$ is given to us, by its very notation,
937
in a striking form; i.e., {\it one less than a power of $2$}. It is no
938
accident that the largest ``currently known'' prime number\index{prime number} has such a
939
form.  This is because there are special techniques we can draw on to
940
show primality of a number, if it is one less than a power of $2$
941
and---of course---if it also happens to be prime.  The primes of that
942
form have a name, {\it Mersenne Primes},\index{Mersenne primes} as do the primes that are
943
{\it one more than a power of $2$}, those being called {\it Fermat
944
  Primes}.
945
\bibnote{
946
In contrast to the situation with factorization, testing integers of
947
this size (e.g., the primes $p$ and $q$) for primality is relatively
948
easy.  There are fast algorithms that can tell whether or not any
949
random thousand digit number is prime in a fraction of
950
second. Try for yourself using the SageMath  command {\tt is\_prime}.
951
For example, if $p$ and $q$ are as in endnote~\ref{bibnote:factor}, then
952
{\tt is\_prime(p)} and {\tt is\_prime(q)} quickly output True
953
and {\tt is\_prime(p*q)} outputs False.  However, if you type
954
{\tt factor(p*q, verbose=8)}  you can watch as SageMath tries
955
forever and fails to factor $pq$.}
956

957
Here are two exercises that you might try to do, if this is your first
958
encounter with primes that differ from a power of $2$ by $1$:
959

960
\begin{enumerate}
961
\item Show that if a number of the form $M=2^n-1$ is prime, then the
962
  exponent $n$ is also prime. [Hint: This is equivalent to proving
963
  that if $n$ is composite, then $2^n-1$ is also composite.]
964
  For example: $ 2^2-1= 3, 2^3-1= 7$ are
965
  primes, but $2^4-1=15$ is not.  So {\it Mersenne primes} are numbers
966
  that are
967
  \begin{itemize}
968
    \item of the form $\displaystyle 2^\text{prime number} -1,$ and
969
    \item are themselves prime numbers.
970
  \end{itemize}
971

972
\item Show that if a number of the form $F=2^n+1$ is prime, then the
973
  exponent $n$ is a power of two.  For example: $ 2^2+1= 5$ is prime,
974
  but $2^3+1= 9$ is not.  So {\it Fermat primes} are numbers that
975
  are
976
  \begin{itemize}
977
     \item of the form $\displaystyle 2^\text{power of two} + 1,$ and
978
     \item are themselves prime numbers.
979
  \end{itemize}
980
\end{enumerate}
981

982

983
Not all numbers of the form $2^{\rm prime\ number} -1$ or of the form
984
$2^{\rm power\ of\ two} +1$ are prime. We currently know only finitely
985
many primes of either of these forms. How we have come to know what we
986
know is an interesting tale.  See, for example, \url{http://www.mersenne.org/}.
987

988
%\ill{sieve_boxes_100}{0.7}{Sieve of Eratosthenes\index{Eratosthenes}\label{fig:erat}}
989

990
\chapter{Sieves}\label{ch:sieves}
991

992
Eratosthenes,\index{Eratosthenes} the mathematician from Cyrene (and later, librarian at
993
Alexandria) explained how to {\em sift} the prime numbers\index{prime number} from the
994
series of all numbers: in the sequence of numbers,
995
$$2\ \ 3\ \ 4 \ \ 5\ \ 6\ \ 7\ \ 8\ \ 9\ \ 10\ \ 11
996
\ \ 12\ \ 13\ \ 14\ \ 15\ \ 16\ \ 17\ \ 18\ \ 19\ \ 20\ \ 21\ \ 22\ \ 23\ \ 24\ \ 25\ \ 26,$$
997
for example, start by circling the $2$ and crossing out all the other
998
multiples of $2$.  Next, go back to the beginning of our sequence of
999
numbers and circle the first number that is neither circled nor
1000
crossed out (that would be, of course, the $3$), then cross out all
1001
the other multiples of $3$.  This gives the pattern: go back again to
1002
the beginning of our sequence of numbers and circle the first number
1003
that is neither circled nor crossed out; then cross out all of its
1004
other multiples.  Repeat this pattern until all the numbers in our
1005
sequence are either circled, or crossed out, the circled ones being
1006
the primes.
1007

1008
\includegraphics[width=\textwidth]{illustrations/circled_primes}
1009

1010
In Figures~\ref{fig:erat2}--\ref{fig:erat2357} we use the primes $2$,
1011
$3$, $5$, and finally $7$ to sieve out the primes up to 100, where instead of
1012
crossing out multiples we grey them out, and instead of circling
1013
primes we color their box red.
1014

1015
\ill{sieve100-2}{.8}{Using the prime 2 to sieve for primes up to 100\label{fig:erat2}}
1016
Since all the even numbers greater than two are
1017
eliminated as being composite numbers and not primes they appear as
1018
gray in Figure~\ref{fig:erat2}, but none of the odd numbers are eliminated so they
1019
still appear in white boxes.
1020

1021
\ill{sieve100-3}{.8}{Using the primes 2 and 3 to sieve for primes up to 100\label{fig:erat23}}
1022
\ill{sieve100-5}{.8}{Using the primes 2, 3, and 5 to sieve for primes up to 100\label{fig:erat235}}
1023
Looking at Figure~\ref{fig:erat235}, we see that for all but
1024
three numbers (49, 77, and 91) up to 100 we have
1025
(after sieving by 2,3, and 5) determined
1026
which are primes and which composite.
1027

1028
\ill{sieve100-7}{.8}{Using the primes 2, 3, 5, and 7 to sieve for primes up to 100\label{fig:erat2357}}
1029

1030
Finally, we see in Figure~\ref{fig:erat2357} that
1031
sieving by 2, 3, 5, and 7 determines  all primes up to $100$.
1032
See \bibnote{In Sage, the
1033
function {\tt prime\_range} enumerates primes in a range. For example,
1034
{\tt prime\_range(50)}
1035
outputs the primes up to $50$
1036
and {\tt prime\_range(50,100)}
1037
outputs the primes between $50$ and $100$.
1038
Typing {\tt prime\_range(10\^{}8)}
1039
in SageMath enumerates the primes up to a hundred million in around a second.
1040
You can also enumerate primes up to a billion by typing {\tt v=prime\_range(10\^{}9)}, but this will use
1041
a large amount of memory, so be careful not to crash your computer
1042
if you try this.  You can see that there are $\pi(10^9) = 50{,}847{,}534$ primes up to a billion
1043
by then typing {\tt len(v)}.
1044
You can also compute $\pi(10^9)$ directly, without enumerating all primes,
1045
using the command {\tt prime\_pi(10\^{}9)}.
1046
This is much faster since it uses some clever counting tricks to find the
1047
number of primes without actually listing them all.
1048

1049
In Chapter~\ref{sec:tinkering} we tinkered with the staircase of
1050
primes by first counting both primes and prime powers.
1051
There are comparatively few prime powers that are not prime.
1052
Up to $10^8$, only $1{,}405$ of the $5{,}762{,}860$ prime powers are not themselves primes.
1053
To see this, first enter
1054
{\tt a~=~prime\_pi(10\^{}8); pp~=~len(prime\_powers(10\^{}8))}.
1055
Typing {\tt (a,~pp,~pp-a)} then outputs the triple
1056
{\tt (5761455, 5762860, 1405)}.} for more about explicitly
1057
enumerating primes using a computer.
1058

1059
% GIVEN WHAT IS ABOVE, THIS IS SILLY.
1060
%Especially if you have had little experience with math, may I suggest
1061
%that you actually follow Eratosthenes'\index{Eratosthenes} lead, and perform the repeated
1062
%circling and crossing-out to watch the primes up to 100 emerge, intriguingly
1063
%staggered through our sequence of numbers,
1064
%$$2\ \ 3\ \ \bullet \ \ 5\ \ \bullet\ \ 7\ \ \bullet\ \ \bullet\ \ \bullet\ \ 11
1065
%\ \ \bullet\ \ 13\ \ \bullet\ \ \bullet\ \ \bullet\ \ 17\ \ \bullet\ \ 19\ \ \bullet\ \ \bullet\ \
1066
%\bullet\
1067
%\ 23\ \ \bullet\ \ \bullet\ \ \bullet\ \  \bullet\ \ \bullet\ 29,\dots $$
1068
%
1069

1070
%
1071

1072

1073
\chapter[Questions about primes]{Questions about primes that any person might ask}\label{ch:questions}
1074

1075
We become quickly stymied when we ask quite elementary questions about
1076
the spacing of the infinite series of prime numbers.\index{prime number}
1077

1078
For example, {\em are there infinitely many pairs of primes whose
1079
  difference is $2$?}  The sequence of primes seems to be rich in such
1080
pairs $$5-3 =2,\ \ \ 7-5=2,\ \ \ 13-11=2,\ \ \ 19-17 =2,$$ and we know
1081
that there are loads more such pairs\footnote{For example, according to
1082
\url{http://oeis.org/A007508} there are
1083
$10{,}304{,}185{,}697{,}298$ such pairs less than
1084
$10{,}000{,}000{,}000{,}000{,}000$.} but the answer to our question,
1085
{\em are there infinitely many?}, is not known. The conjecture that there are infinitely many such pairs of primes (``twin primes" as they are called) is known as the {\it Twin Primes Conjecture}.
1086
{\em Are there
1087
  infinitely many pairs of primes whose difference is $4$, $6$?}  Answer:
1088
equally unknown.
1089
Nevertheless there is very exciting recent work in this direction, specifically,
1090
Yitang Zhang\index{Zhang, Yitang} proved that there are infinitely many pairs of primes that differ
1091
by no more than $7\times 10^7$. For a brief account of
1092
Zhang's\index{Zhang, Yitang} work, see the Wikipedia entry \url{http://en.wikipedia.org/wiki/Yitang_Zhang}.
1093
Many exciting results have followed Zhang's\index{Zhang, Yitang} breakthrough;
1094
we know now, thanks to results\footnote{See \url{https://www.simonsfoundation.org/quanta/20131119-together-and-alone-closing-the-prime-gap/} and for further work
1095
\url{http://michaelnielsen.org/polymath1/index.php?title=Bounded_gaps_between_primes}.} of James Maynard and
1096
others, that there are infinitely many pairs of primes
1097
that differ by no more than $246$.
1098

1099
{\em Is every even number greater than $2$ a sum of
1100
  two primes?}  Answer: unknown. {\em Are there infinitely many primes
1101
  which are $1$ more than a perfect square?}  Answer: unknown.
1102

1103
\ill{zhang}{0.25}{Yitang\index{Zhang, Yitang} Zhang\label{fig:zhang}}
1104

1105

1106

1107
Remember the Mersenne prime $p= 2^{43,112,609}-1$ from
1108
Chapter~\ref{ch:namedprimes} and how we
1109
proved---by pure thought---that there must be
1110
a prime $P$ larger than $p$? Suppose, though, someone
1111
asked us whether there was a {\it Mersenne Prime} larger than this
1112
$p$: that is, {\em is there a prime number\index{prime number} of the form $$2^{\rm some\
1113
  prime\ number}-1$$ bigger than $p= 2^{43,112,609}-1$?} Answer:
1114
For many years we did not know; however, in 2013 Curtis Cooper discovered
1115
the even bigger Mersenne prime $2^{57,885,161}-1$, with a whopping
1116
17,425,170 digits!  Again we can ask if there is a Mersenne
1117
prime larger than Cooper's.  Answer: we do not know.
1118
It is possible that there are infinitely many Mersenne primes
1119
but we're far from being able to answer such questions.
1120

1121
% from http://www.york.ac.uk/depts/maths/histstat/people/mersenne.gif
1122
\ill{mersenne}{.3}{Marin Mersenne (1588--1648)}
1123

1124

1125

1126
{\em Is there some neat formula giving the next prime?} More
1127
specifically, {\em if I give you a number $N$, say $N=$ one million,
1128
  and ask you for the first number after $N$ that is prime, is there a
1129
  method that answers that question without, in some form or other,
1130
  running through each of the successive odd numbers after $N$ rejecting
1131
  the nonprimes until the first prime is encountered?}  Answer:
1132
unknown.
1133

1134

1135
%%%%%%%%%%%%%%%
1136

1137
One can think of many ways of ``getting at'' some understanding of the
1138
placement of prime numbers\index{prime number} among all numbers.  Up to this point we have
1139
been mainly just counting them, trying to answer the question ``how
1140
many primes are there up to $X$?''  and we have begun to get some feel
1141
for the numbers behind this question, and especially for the current
1142
``best guesses'' about estimates.
1143

1144

1145
What is wonderful about this subject is that people attracted to it
1146
cannot resist asking questions that lead to interesting, and sometimes
1147
surprising numerical experiments. Moreover, given our current state of
1148
knowledge, many of the questions that come to mind are still
1149
unapproachable: we don't yet know enough about numbers to answer them.
1150
But {\it asking interesting questions} about the mathematics that you
1151
are studying is a high art, and is probably a necessary skill to
1152
acquire, in order to get the most enjoyment---and understanding---from
1153
mathematics.  So, we offer this challenge to you:
1154

1155
Come up with your own question about primes that
1156
 \begin{itemize}
1157
 \item     is interesting to you,
1158
  \item    is not a question whose answer is known to you,
1159
 \item     is not a question that you've seen before; or at least not exactly,
1160
  \item    is a question about which you can begin to make numerical investigations.
1161
 \end{itemize}
1162
If you are having trouble coming up with a question, read on for more
1163
examples that  provide further motivation.
1164

1165
\chapter{Further questions about primes\label{ch:further}}
1166

1167
In celebration of Yitang Zhang's\index{Zhang, Yitang} recent result, let us consider more of the numerics
1168
regarding {\em gaps} between one prime and the next, rather than the tally
1169
of all primes. Of course, it is no fun at all to try to guess how many
1170
pairs of primes $p, q$ there are with gap $q-p$ equal to a fixed odd
1171
number, since the difference of two odd numbers is even, as
1172
in Chapter~\ref{ch:questions}.  The fun,
1173
though, begins in earnest if you ask for pairs of primes with
1174
difference equal to $2$ (these being called {\em twin primes}) for it
1175
has long been guessed that there are infinitely many such pairs of
1176
primes, but no one has been able to prove this yet.
1177

1178
%NOTE: I omitted the commas in the numbers in the displayed formula.
1179
As of 2014, the largest known twin primes are
1180
$$3756801695685\cdot 2^{666669} \pm 1.$$
1181
These enormous primes, which were found in 2011, have $200{,}700$ digits each.\footnote{See \url{http://primes.utm.edu/largest.html\#twin} for the top ten
1182
largest known twin primes.}
1183

1184

1185
Similarly, it is interesting to consider primes $p$ and $q$
1186
with difference $4$, or $8$, or---in fact---any even number
1187
$2k$. That is, people have guessed that there are infinitely many
1188
pairs of primes with difference $4$, with difference $6$, etc.\ but
1189
none of these guesses have yet been proved.
1190

1191

1192

1193

1194

1195
So, define
1196
$$
1197
  \Gap_{k}(X)
1198
$$
1199
to be the number of pairs of {\em consecutive} primes $(p,q)$ with
1200
$q<X$ that have ``gap $k$'' (i.e., such that their difference $q-p$ is
1201
$k$).  Here $p$ is a prime, $q>p$ is a prime, and there are no primes
1202
between $p$ and $q$.  For example, $\Gap_2(10) = 2$, since the pairs
1203
$(3,5)$ and $(5,7)$ are the pairs less than $10$ with gap $2$,
1204
and $\Gap_{4}(10)=0$ because despite $3$ and $7$ being separated
1205
by $4$, they are not consecutive primes.
1206
See Table~\ref{tab:gap} for various values of $\Gap_{k}(X)$ and
1207
Figure~\ref{fig:primegapdist} for the distribution\index{distribution} of prime gaps for
1208
$X=10^7$.
1209

1210

1211
\begin{table}[H]\centering
1212
\caption{Values of $\Gap_{k}(X)$ \label{tab:gap}}
1213
\vspace{1em}
1214

1215
{\small
1216
\begin{tabular}{|l|c|c|c|c|c|c|}\hline
1217
$X$ & $\Gap_{2}(X)$ & $\Gap_{4}(X)$& $\Gap_{6}(X)$ & $\Gap_{8}(X)$ &
1218
 $\Gap_{100}(X)$ &   $\Gap_{246}(X)$\\\hline
1219

1220
$10$ & 2 & 0 & 0 & 0 & 0 & 0\\\hline
1221
$10^{2}$ & 8 & 7 & 7 & 1 & 0 & 0\\\hline
1222
$10^{3}$ & 35 & 40 & 44 & 15 & 0 & 0\\\hline
1223
$10^{4}$ & 205 & 202 & 299 & 101 & 0 & 0\\\hline
1224
$10^{5}$ & 1224 & 1215 & 1940 & 773 & 0 & 0\\\hline
1225
$10^{6}$ & 8169 & 8143 & 13549 & 5569 & 2 & 0\\\hline
1226
$10^{7}$ & 58980 & 58621 & 99987 & 42352 & 36 & 0\\\hline
1227
$10^{8}$ & 440312 & 440257 & 768752 & 334180 & 878 & 0\\\hline
1228

1229
\end{tabular}
1230
}
1231
\end{table}
1232

1233
 The recent results of Zhang\index{Zhang, Yitang} as sharpened by Maynard (and others) we mentioned above tell us that for  at least one even number $k$ among the  even numbers $k \le 246$,    $\Gap_{k}(X)$ goes to infinity as $X$ goes to infinity. One expects that this happens for {\it all} even numbers $k$. We expect this as well, of course, for $\Gap_{246}(X)$ despite what might be misconstrued as discouragement by the above data.
1234
% NOTE: the biggest gap up to $10^{8}$ is 220, as this 30s comp shows:
1235
% v = prime_range(10^8)
1236
% gaps = [v[i]-v[i-1] for i in range(1,len(v))]
1237
% max(gaps)
1238

1239
\ill{primegapdist}{1}{Frequency histogram showing the distribution\index{distribution} of
1240
  prime gaps of size $\leq 50$ for all primes up to $10^7$.  Six is
1241
  the most popular gap in this data.
1242
\label{fig:primegapdist}}
1243

1244

1245
\ill{primegap_race}{1}{Plots of $\Gap_k(X)$ for $k=2,4,6,8$.  Which wins?}
1246

1247
Here is yet another question that deals with the spacing of prime
1248
numbers that we do not know the answer to:
1249

1250
{\em Racing Gap $2$,  Gap $4$, Gap $6$, and Gap $8$ against each other:}
1251

1252
\begin{quote}
1253
  Challenge: As $X$ tends to infinity which of $\Gap_2(X)$,
1254
  $\Gap_4(X)$,
1255
  $\Gap_6(X),$ or $\Gap_8(X)$ do you think will grow faster? How
1256
  much would you bet on the truth of your guess? \bibnote{%
1257
    Hardy and Littlewood give a nice conjectural answer to such
1258
    questions about gaps between primes.  See Problem {\bf A8} of
1259
    Guy's book {\em Unsolved Problems in Number Theory} (2004).
1260
Note that Guy's book discusses counting the number $P_k(X)$ of pairs of
1261
primes up to $X$ that differ by a fixed even number $k$; we have
1262
$P_k(X)\geq \Gap_k(X)$, since for $P_k(X)$ there is no requirement
1263
that the pairs of primes be consecutive.}
1264
\end{quote}
1265

1266

1267

1268

1269

1270

1271

1272

1273

1274
Here is a curious question that you can easily begin to check out for
1275
small numbers. We know, of course, that the {\em even} numbers and the
1276
{\em odd} numbers are nicely and simply distributed: after every odd
1277
number comes an even number, after every even, an odd. There are an
1278
equal number of positive odd numbers and positive
1279
even numbers less than any given odd
1280
number, and there may be nothing else of interest to say about the
1281
matter.  Things change considerably, though, if we focus our
1282
concentration on {\em multiplicatively even} numbers and {\em
1283
  multiplicatively odd} numbers.
1284

1285

1286
A {\bf multiplicatively even} number is one that can be expressed as a
1287
product of {\em an even number of} primes; and a {\bf multiplicatively
1288
  odd} number is one that can be expressed as a product of {\em an odd
1289
  number of} primes.  So, any prime is multiplicatively odd, the
1290
number $4 = 2\cdot 2$ is multiplicatively even, and so is $6=2\cdot
1291
3$, $9=3\cdot 3$, and $10= 2\cdot 5$; but $12=2\cdot 2\cdot 3$ is
1292
multiplicatively odd.   Below we list the numbers up to 25, and underline
1293
and bold  the multiplicatively odd numbers.
1294
\begin{center}
1295
1\, {\bf \underline{2}}\, {\bf \underline{3}}\, 4\, {\bf
1296
\underline{5}}\, 6\, {\bf \underline{7}}\, {\bf \underline{8}}\, 9\,
1297
10\, {\bf \underline{11}}\, {\bf \underline{12}}\, {\bf
1298
\underline{13}}\, 14\, 15\, 16\, {\bf \underline{17}}\, {\bf
1299
\underline{18}}\, {\bf \underline{19}}\, {\bf \underline{20}}\, 21\,
1300
22\, {\bf \underline{23}}\, 24\, 25
1301
\end{center}
1302

1303

1304
 Table~\ref{tab:evenodddata} gives some data:
1305

1306
 \begin{table}[H] \caption{Count of multiplicatively odd and
1307
    even positive numbers $\le X$\label{tab:evenodddata}}
1308
\vspace{1ex}
1309
\centering
1310
 {\small
1311
\begin{tabular}{|l|r|r|r|r|r|r|r|r|r|r|r|r|r|r|r|r|}
1312
\hline
1313
$X$ & 1 & 2 & 3 & 4 & 5  & 6 & 7 & 8 & 9 & 10 & 11 & 12 & 13 & 14 & 15 & 16  \\ \hline\hline
1314
m. odd & 0 & 1 & 2 & 2 & 3  & 3 & 4 & 5 & 5 & 5 & 6 & 7 & 8 & 8 & 8 & 8 \\ \hline
1315
m. even & 1 & 1 & 1 & 2 & 2  & 3 & 3 & 3 & 4 & 5 & 5 & 5 & 5 & 6 & 7 & 8 \\ \hline
1316
\end{tabular}}
1317
\end{table}
1318

1319

1320
   Now looking at this data, a natural, and simple, question to ask about the concept of multiplicative {\em oddness} and {\em evenness} is:
1321

1322
   \noindent {\em Is there some $X\ge 2$ for which there are more
1323
     multiplicatively even numbers less than or equal to $X$ than
1324
     multiplicatively odd ones?}
1325

1326
   Each plot in Figure~\ref{fig:liouville} gives the number of
1327
   multiplicatively even numbers between $2$ and $X$ minus the number
1328
   of multiplicatively odd numbers between $2$ and $X$, for $X$ equal
1329
   to 10, 100, 1000, 10000, 100000, and 1000000. The above question
1330
   asks whether these graphs would, for sufficiently large $X$, ever
1331
   cross the $X$-axis.
1332

1333
 \begin{figure}[H]
1334
\centering
1335
\includegraphics[width=.45\textwidth]{illustrations/liouville-17}
1336
\includegraphics[width=.45\textwidth]{illustrations/liouville-100}\\
1337
\includegraphics[width=.45\textwidth]{illustrations/liouville-1000}
1338
\includegraphics[width=.45\textwidth]{illustrations/liouville-10000}\\
1339
\includegraphics[width=.45\textwidth]{illustrations/liouville-100000}
1340
\includegraphics[width=.45\textwidth]{illustrations/liouville-1000000}\\
1341
\caption{Racing Multiplicatively Even and Odd Numbers.\label{fig:liouville}}
1342
\end{figure}
1343

1344
A {\em negative} response to this question---i.e., a proof that any
1345
plot as drawn in Figure~\ref{fig:liouville} never crosses
1346
the $X$-axis---would imply the Riemann Hypothesis!  In contrast to the list of
1347
previous questions, the answer to this question is known\footnote{For more
1348
  details, see P.~Borwein, ``Sign changes in sums of the Liouville
1349
    Function'' and the nice short paper of Norbert Wiener ``Notes on
1350
    Polya's and Turan's hypothesis concerning Liouville's factor''
1351
  (page 765 of volume II of Wiener's Collected Works); see also:
1352
  G. P\'{o}lya ``Verschiedene Bemerkungen zur Zahlentheorie,''
1353
  {\em Jahresbericht der Deutschen Mathematiker-Vereinigung},
1354
  {\bf 28} (1919)
1355
  31--40.}: alas, there is such an $X$.  In 1960, Lehman showed that
1356
for $X=906{,}400{,}000$ there are $708$ more multiplicatively even numbers
1357
up to $X$ than multiplicatively odd numbers (Tanaka found in 1980 that
1358
the smallest $X$ such that there are more multiplicative even than odd
1359
numbers is $X=906{,}150{,}257$).
1360

1361
%%%%%%%%%%%%%%%%%
1362
%\begin{wrapfigure}{r}{0.2\textwidth}
1363
%    \includegraphics[width=0.2\textwidth]{illustrations/questions}
1364
%\end{wrapfigure}
1365
These are questions that have been asked about primes (and we could
1366
give bushels more\footnote{See, e.g., Richard Guy's book {\em Unsolved Problems in Number Theory} (2004).}),
1367
questions expressible in simple vocabulary, that
1368
we can't answer today. We have been studying numbers for over two
1369
millennia and yet we are indeed in the infancy of our understanding.
1370

1371

1372
We'll continue our discussion by returning to the simplest counting
1373
question about prime numbers.\index{prime number}
1374

1375
  \chapter{How many primes are there?}
1376

1377
\ill{sieve200}{.8}{Sieving primes up to 200}
1378

1379
Slow as we are to understand primes, at the very least we can try to
1380
count them. You can see that there are $10$ primes less than $30$, so
1381
you might encapsulate this by saying that the chances that a number
1382
less than $30$ is prime is $1$ in $3$.  This frequency does not
1383
persist, though; here is some more data: There are $25$ primes less
1384
than $100$ (so $1$ in $4$ numbers up to $100$ are prime), there are
1385
$168$ primes less than a thousand (so we might say that among the
1386
numbers less than a thousand the chances that one of them is prime is
1387
roughly $1$ in $6$).
1388

1389
\ill{proportion_primes_100}{1}{Graph of the proportion of primes up to $X$ for each integer $X\leq 100$}
1390

1391
There are 78,498 primes less than a million (so we might say that
1392
the chances that a random choice among the first million numbers is
1393
prime have dropped to roughly $1$ in $13$).
1394

1395
\ill{proportion_primes_1000}{1}{Proportion of primes for $X$ up to $1{,}000$}
1396
\ill{proportion_primes_10000}{1}{Proportion of primes for $X$ up to $10{,}000$}
1397

1398
There are 455,052,512 primes less than ten billion; i.e.,
1399
10{,}000{,}000{,}000 (so we might say that the chances are down to roughly
1400
$1$ in $22$).
1401

1402
Primes, then, seem to be thinning out.  We return to the sifting process
1403
we carried out earlier, and take a look at a few graphs, to get a sense of why
1404
that might be so. There are a $100$ numbers less than or equal to
1405
$100$, a thousand numbers less than or equal to $1000$, etc.: the
1406
graph in Figure~\ref{fig:sieve_2_100} that looks like a regular staircase, each step the
1407
same length as each riser, climbing up at, so to speak, a 45 degree
1408
angle, counts all numbers up to and including~$X$.
1409

1410
Following Eratosthenes,\index{Eratosthenes} we have sifted those numbers, to pan for
1411
primes. Our first move was to throw out roughly half the numbers (the
1412
even ones!) after the number $2$. The graph that is labeled ``Sieve by 2" in Figure~\ref{fig:sieve_2_100} that is, with one hiccup, a regular staircase climbing at a
1413
smaller angle, each step twice the length of each riser, illustrates
1414
the numbers that are left after one pass through Eratosthenes'\index{Eratosthenes} sieve,
1415
which includes, of course, all the primes. So, the chances that a
1416
number bigger than $2$ is prime is {\em at most} $1$ in $2$.  Our
1417
second move was to throw out a good bunch of numbers bigger than $3$.
1418
So, the chances that a number bigger than $3$ is prime is going to be
1419
even less.  And so it goes: with each move in our
1420
sieving process, we are winnowing the field more extensively, reducing
1421
the chances that the later numbers are prime.
1422

1423
\ill{sieve_2_100}{.9}{Sieving by removing multiples of $2$ up to $100$\label{fig:sieve_2_100}}
1424
\ill{sieve1000}{.9}{Sieving for primes up to $1000$}
1425

1426
The red curve in these figures actually counts the primes: it is the
1427
beguilingly irregular {\em staircase of primes.}  Its height above any
1428
number $X$ on the horizontal line records the number of primes less
1429
than or equal to $X$, the accumulation of primes up to $X$.  Refer to
1430
this number as $\pi(X)$. So $\pi(2)=1$, $\pi(3) = 2$, $\pi(30) = 10$;
1431
of course, we could plot a few more values of $\pi(X)$, like
1432
$\pi(\text{ten billion}) = 455,052,512$.
1433

1434

1435
Let us accompany Eratosthenes\index{Eratosthenes} for a few further steps in his sieving
1436
process.  Figure~\ref{fig:sieve3_100} contains a graph of all whole
1437
numbers up to 100 after we have removed the even numbers greater than
1438
$2$, and the multiples of $3$ greater than $3$ itself.
1439

1440

1441
\ill{sieves3_100}{.7}{Sieving out multiples of $2$ and $3$.\label{fig:sieve3_100}}
1442

1443

1444
From this graph you can see that if you go ``out a way'' the
1445
likelihood that a number is a prime is less than $1$ in $3
1446
$. Figure~\ref{fig:sieve7_100} contains a graph of what Eratosthenes\index{Eratosthenes}
1447
sieve looks like up to 100 after sifting $2,3,5,$ and $7$.
1448

1449

1450

1451
\ill{sieves7_100}{.7}{Sieving out multiples of $2$, $3$, $5$, and $7$.\label{fig:sieve7_100}}
1452

1453

1454
This data may begin to suggest to you that as you go further and
1455
further out on the number line the percentage of prime numbers\index{prime number} among
1456
all whole numbers tends towards $0\%$ (it does).
1457

1458

1459
To get a sense of how the primes accumulate, we will take a look at
1460
the staircase of primes for $X= 25$ and $X=100$ in Figures~\ref{fig:staircase25}
1461
and \ref{fig:staircase100a}.
1462

1463
\ill{prime_pi_25_aspect1}{.8}{Staircase of primes up to 25\label{fig:staircase25}}
1464
\ill{prime_pi_100_aspect1}{.8}{Staircase of primes up to 100\label{fig:staircase100a}}
1465

1466

1467

1468
\chapter{Prime numbers viewed from a distance}\index{prime number}
1469
The striking thing about these figures is that as the numbers get
1470
large enough, the jagged accumulation of primes, those
1471
quintessentially discrete entities, becomes smoother and smoother to
1472
the eye. How strange and wonderful to watch, as our viewpoint zooms
1473
out to larger ranges of numbers, the accumulation of primes taking on
1474
such a smooth and elegant shape.
1475

1476
\ill{prime_pi_1000}{.8}{Staircases of primes up to 1,000\label{fig:staircases2}}
1477
\ill{prime_pi_10000}{.8}{Staircases of primes up to  10{,}000\label{fig:staircases2b}}
1478

1479
\ill{prime_pi_100000}{.8}{Primes up to 100{,}000\label{fig:pn100000}.}
1480

1481

1482

1483
But don't be fooled by the seemingly smooth shape of the curve in the
1484
last figure above: it is just as faithful a reproduction of the
1485
staircase of primes as the typographer's art can render, for there are
1486
thousands of tiny steps and risers in this curve, all hidden by the
1487
thickness of the print of the drawn curve in the figure.  It is
1488
already something of a miracle that we can approximately describe the
1489
build-up of primes, somehow, using a {\em smooth curve}.  But {\em
1490
  what} smooth curve?
1491

1492

1493
% NOTE about myriad below: After reading http://english.stackexchange.com/questions/20133/what-is-the-correct-usage-of-myriad, I'm not changing "myriad of smooth" to "myriad smooth".   The of version, which we use, is correct when myriad is used as a noun, which is now a legal usage in english, according to the above page.
1494
That last question is {\em not} rhetorical. If you draw a curve with
1495
chalk on the blackboard, this can signify a myriad of smooth
1496
(mathematical) curves all encompassed within the thickness of the
1497
chalk-line, all---if you wish---reasonable approximations of one
1498
another. So, there are many smooth curves that fit the chalk-curve.
1499
With this warning, but very much fortified by the data of Figure~\ref{fig:pn100000},
1500
let us ask: {\em what is a smooth curve that is a reasonable
1501
  approximation to the staircase of primes?}
1502

1503
\chapter{Pure and applied  mathematics}\label{ch:pureapplied}
1504

1505
Mathematicians seems to agree that, loosely speaking, there are two
1506
types of mathematics: {\em pure} and {\em applied}. Usually---when we
1507
judge whether a piece of mathematics is pure or applied---this
1508
distinction turns on whether or not the math has application to the
1509
``outside world,'' i.e., that {\em world} where bridges are built,
1510
where economic models are fashioned, where computers churn away on the
1511
Internet (for only then do we unabashedly call it {\em applied math}),
1512
or whether the piece of mathematics will find an important place
1513
within the context of mathematical theory (and then we label it {\em
1514
  pure}).  Of course, there is a great overlap (as we will see later,
1515
Fourier analysis\index{Fourier analysis} plays a major role both in data compression and in
1516
pure mathematics).
1517

1518
 Moreover, many questions in
1519
mathematics are ``hustlers'' in the sense that, at first view, what is
1520
being requested is that some simple task be done (e.g., the question
1521
raised in this book, {\em to find a smooth curve that is a reasonable
1522
  approximation to the staircase of primes}).  And only as things
1523
develop is it discovered that there are payoffs in many unexpected
1524
directions, some of these payoffs being genuinely applied (i.e., to
1525
the practical world), some of these payoffs being pure (allowing us
1526
to strike behind the mask of the mere appearance of the mathematical
1527
situation, and get at the hidden fundamentals that actually govern the
1528
phenomena), and some of these payoffs defying such simple
1529
classification, insofar as they provide powerful techniques in other
1530
branches of mathematics.  The \RH{}---even in its current
1531
unsolved state---has already shown itself to have all three types of
1532
payoff.
1533

1534
The particular issue before us is, in our opinion, twofold, both
1535
applied, and pure: can we curve-fit the ``staircase of primes'' by a
1536
well approximating smooth curve given by a simple analytic formula?
1537
The story behind this alone is
1538
marvelous, has a cornucopia of applications, and we will be telling it
1539
below. But our curiosity here is driven by a question that is pure,
1540
and less amenable to precise formulation: are there mathematical
1541
concepts at the root of, and more basic than (and ``prior to,'' to
1542
borrow Aristotle's use of the phrase) {\em prime numbers}---concepts\index{prime number}
1543
that account for the apparent complexity of the nature of primes?
1544

1545

1546
\chapter{A probabilistic first guess\label{sec:firstguess} }
1547

1548
\ill{gauss}{.3}{Gauss\index{Gauss, Carl Friedrich}}
1549

1550
The search for such approximating curves began, in fact, two centuries
1551
ago when Carl Friedrich Gauss\index{Gauss, Carl Friedrich} defined a certain beautiful curve that,
1552
experimentally, seemed to be an exceptionally good fit for the
1553
staircase of primes.
1554

1555
\ill{pi_Li}{.8}{Plot of $\pi(X)$ and Gauss's smooth curve $G(X)$}
1556

1557
Let us denote Gauss's curve $G(X)$; it has an
1558
elegant simple formula comprehensible to anyone who has had a tiny bit
1559
of calculus.\index{calculus}  If you make believe that the chances that a number $X$ is
1560
a prime is inversely proportional to the number of digits of $X$ you
1561
might well hit upon Gauss's curve.
1562
That is,
1563
\vskip10pt
1564
$$G(X)\hskip40pt  {\rm is\ roughly\ proportional\ to} \hskip40pt  {\frac{X}{{\rm the\ number\ of\ digits\ of\ } X}}.$$
1565
\vskip10pt
1566
But to describe Gauss's\index{Gauss, Carl Friedrich} guess precisely we need to discuss the {\it natural logarithm}\footnote{In advanced mathematics, ``common'' logarithms are sufficiently uncommon that ``log'' almost always denotes natural
1567
log and the notation $\ln(X)$ is not used.} ``$\log(X)$'' which is an elegant
1568
smooth function of a real number $X$ that is roughly proportional
1569
to the number of digits of the whole number part of $X$.
1570

1571

1572

1573
\ill{log}{.8}{Plot of the natural logarithm $\log(X)$}
1574

1575
%NB: the sequence below converges very, very slowly to e.
1576
 Euler's\index{Euler, Leonhard} famous constant $e=2.71828182\ldots$, which is the limit
1577
 of the sequence
1578
 $$\left(1+{\frac{1}{2}}\right)^2,
1579
       \left(1+{\frac{1}{3}}\right)^3,
1580
       \left(1+{\frac{1}{4}}\right)^4, \dots,$$
1581
is used in the definition of $\log$:
1582
\begin{center}
1583
{\em $A = \log(X)$ is the number $A$ for which $e^A = X$.}
1584
\end{center}
1585
Before electronic calculators, logarithms were frequently used to
1586
speed up calculations, since logarithms translate difficult multiplication
1587
problems into easier addition problems which can be done mechanically.
1588
Such calculations use that the logarithm of a product is the sum of the logarithms
1589
of the factors\index{factor}; that is, $$\log(X Y) = \log(X) + \log(Y).$$
1590
%FROM -- YURI TSCHINKEL's:  ''ABOUT THE COVER: ON THE DISTRIBUTION\index{distribution} OF PRIMESÑGAUSSÕ TABLES ''
1591

1592
% From: http://en.wikipedia.org/wiki/File:Slide_rule_scales_back.jpg
1593
\ill{slide_rule}{.8}{A slide rule  computes $2X$ by using that $\log(2X)=\log(2)+\log(X)$\label{fig:slide_rule}}
1594

1595
In Figure~\ref{fig:slide_rule} the numbers printed (on each of the slidable pieces of the rule)
1596
are spaced according to their logarithms, so that when one slides the
1597
rule arranging it so that the printed  number $X$ on one piece lines up
1598
with the printed number $1$ on the other, we get that for every number $Y$
1599
printed on the first piece, the printed number on the other piece that
1600
is  aligned with it is the product $XY$; in effect the ``slide'' adds
1601
$\log(X)$ to $\log(Y)$ giving $\log(XY)$.
1602

1603
% I found this here: http://kr.blog.yahoo.com/kimxx168/498
1604
% It is referenced in http://www.ams.org/bull/2006-43-01/S0273-0979-05-01096-7/home.html
1605
% but that link is broken.
1606
\ill{gauss_tables_half}{.9}{A Letter of Gauss\label{fig:gauss_letter}}
1607

1608
In 1791, when Gauss\index{Gauss, Carl Friedrich} was 14 years old, he received a book that contained
1609
logarithms of numbers up to $7$ digits and a table of primes up to 10{,}009.
1610
Years later, in a letter
1611
written in 1849 (see Figure~\ref{fig:gauss_letter}), Gauss\index{Gauss, Carl Friedrich}
1612
claimed that as early as 1792 or 1793 he had already observed that the
1613
density of prime numbers\index{prime number} over intervals of numbers of a given rough
1614
magnitude $X$ seemed to average $1/\log(X)$.
1615

1616
 Very {\em very} roughly speaking, this
1617
means that {\em the number of primes up to $X$ is approximately $X$ divided by
1618
twice the number of digits of $X$}.  For example,
1619
the number of primes less than $99$ should be roughly
1620
$$
1621
   \frac{99}{2\times 2} = 24.75 \approx  25,
1622
$$
1623
which is pretty amazing, since the correct number of
1624
primes up to $99$ is $25$.  The number of primes up to $999$ should
1625
be roughly
1626
$$
1627
   \frac{999}{2\times 3} = 166.5 \approx  167,
1628
$$
1629
which is again close, since there are 168 primes up to $1000$.
1630
The number of primes up to $999{,}999$ should be roughly
1631
$$
1632
   \frac{999999}{2\times 6} = 83333.25 \approx  83{,}333,
1633
$$
1634
which is close to the correct count of $78{,}498$.
1635

1636
Gauss\index{Gauss, Carl Friedrich} guessed that the expected number of primes up to $X$
1637
is approximated by the area under the
1638
graph of $1/\log(X)$ from $2$ to $X$ (see Figure~\ref{fig:G}).
1639
The area under $1/\log(X)$ up to $X=999{,}999$ is $78{,}626.43\ldots$, which
1640
is remarkably close to the correct count $78{,}498$ of the primes
1641
up to $999{,}999$.
1642

1643
\illthree{area_under_log_graph_30}{area_under_log_graph_100}{area_under_log_graph_1000}{.27}{The
1644
  expected tally of the number of primes $\leq X$ is approximated by the
1645
  area underneath the graph of $1/\log(X)$ from $1$ to $X$.\label{fig:G}}
1646

1647

1648
Gauss\index{Gauss, Carl Friedrich} was an inveterate computer:
1649
he wrote in his 1849 letter  that
1650
there are $216{,}745$ prime numbers\index{prime number} less than three million.  This is
1651
wrong: the actual number of these primes is $216{,}816$. Gauss's curve $G(X)$
1652
predicted that there would be $216{,}970$ primes---a miss, Gauss\index{Gauss, Carl Friedrich}
1653
thought, by
1654
$$225 = 216970 - 216745.$$
1655
But actually he was closer than he thought: the
1656
prediction of the curve $G(X)$ missed by a mere $154 = 216970  - 216816$.
1657
%; not as close as the 2004 US
1658
%elections, but pretty close nevertheless).
1659
Gauss's computation brings up two queries: will this spectacular ``good
1660
fit'' continue for arbitrarily large numbers? and, the (evidently
1661
prior) question: what counts as a good fit?
1662

1663

1664

1665
\chapter{What is a ``good approximation''?}\label{sec:sqrterror}
1666

1667
If you are trying to estimate a number, say, around ten thousand, and
1668
you get it right to within a hundred, let us celebrate this kind of
1669
accuracy by saying that you have made an approximation with {\em
1670
  square-root error}\index{square-root error} (${\sqrt{10{,}000}}=100$). Of course, we should
1671
really use the more clumsy phrase ``an approximation with at worst
1672
{\em square-root error.}''\index{square-root error}  Sometimes we'll simply refer to such
1673
approximations as {\em good approximations.} If you are trying to
1674
estimate a number in the millions, and you get it right to within a
1675
thousand, let's agree that---again---you have made an approximation
1676
with {\em square-root error}\index{square-root error} (${\sqrt{1{,}000{,}000}}=1{,}000$).
1677
Again, for short, call this a {\em good approximation.} So, when Gauss\index{Gauss, Carl Friedrich}
1678
thought his curve missed by $226$ in estimating the number of primes
1679
less than three million, it was well within the margin we have given
1680
for a ``good approximation.''
1681

1682

1683
More generally, if you are trying to estimate a number that has $D$
1684
digits and you get it almost right, but with an error that has no more
1685
than, roughly, half that many digits, let us say, again, that you have
1686
made an approximation with {\em square-root error}\index{square-root error} or synonymously, a
1687
{\em good approximation}.
1688

1689

1690
This rough account almost suffices for what we will be discussing
1691
below, but to be more precise, the specific {\em gauge of accuracy}
1692
that will be important to us is not for a mere {\em single} estimate
1693
of a {\em single} error term,
1694
$$
1695
\text{ Error term } = \text{ Exact value } - \text{  Our ``good  approximation" }
1696
$$
1697
but rather for {\em infinite sequences} of estimates of
1698
error terms. Generally, if you are interested in a numerical quantity
1699
$q(X)$ that depends on the real number parameter $X$ (e.g., $q(X)$
1700
could be $\pi(X)$, ``the number of primes $\leq{}X$'') and if you have an
1701
explicit candidate ``approximation,'' $q_{\rm approx}(X)$, to this
1702
quantity, let us say that $q_{\rm approx}(X)$ is {\bf essentially a
1703
  square-root accurate\index{square-root accurate} approximation to $q(X)$} if for {\em any}
1704
given exponent greater than $0.5$ (you choose it: $0.501$, $0.5001$,
1705
$0.50001$, $\dots$ for example) and for large enough $X$---where the
1706
phrase ``large enough'' depends on your choice of exponent---the {\bf
1707
  error term}---i.e., the difference between $q_{\rm approx}(X)$ and
1708
the {\em true} quantity, $q(X)$, is, in absolute value, less than
1709
$X$ raised to that exponent (e.g. $< X^{0.501}$, $<
1710
X^{0.5001}$, etc.). Readers who know calculus\index{calculus} and wish to have a
1711
technical formulation of this definition of {\em good approximation}
1712
might turn to the endnote \bibnote{If $f(x)$ and
1713
$g(x)$ are real-valued functions of a real variable $x$ such that
1714
for any
1715
$\epsilon >0$ both of them take their values between $ x^{1-\epsilon}$
1716
and $ x^{1+\epsilon}$ for
1717
$x$
1718
sufficiently large, then say that $f(x)$ and $g(x)$ are {\bf good
1719
approximations of one another} if,
1720
for any positive $\epsilon$ the absolute value of their difference
1721
is less than $ x^{{1\over
1722
2}+\epsilon}$ for
1723
$x$
1724
sufficiently large. The functions $\Li(X)$ and $R(X)$ are good approximations of
1725
one another.} for a precise statement.
1726

1727

1728
If you found the above confusing, don't worry: again, a
1729
square-root accurate\index{square-root accurate} approximation is one in which at least roughly
1730
half the digits are correct.
1731

1732

1733
\begin{remark}
1734
  To get a feel for how basic the notion of {\em approximation to data
1735
    being square root close to the true values of the data} is---and
1736
  how it represents the ``gold standard'' of accuracy for
1737
  approximations, consider this fable.
1738

1739
  Imagine that the devil had the idea of saddling a large committee of
1740
  people with the task of finding values of $\pi(X)$ for various large
1741
  numbers $X$.  This he did in the following manner, having already
1742
  worked out which numbers are prime himself. Since the devil is, as
1743
  everyone knows, {\em in the details,} he has made no mistakes: his
1744
  work is entirely correct.  He gives each committee member a copy of
1745
  the list of all {\em prime numbers}\index{prime number} between $1$ and one of the large
1746
  numbers $X$ in which he was interested.  Now each committee member
1747
  would count the number of primes by doing nothing more than
1748
  considering each number, in turn, on their list and tallying them
1749
  up, much like a canvasser counting votes; the committee members  needn't even know that these numbers are prime, they just think of these numbers as items on their list. But since they are human,
1750
  they will indeed be making mistakes, say $1\%$ of the time.
1751
  Assume further that it is just as likely for them to make the
1752
  mistake of undercounting or overcounting.  If many people are
1753
  engaged in such a pursuit, some of them might overcount $\pi(X)$;
1754
  some of them might undercount it. The average error (overcounted
1755
  or undercounted) would be proportional to~${\sqrt X}$.
1756

1757
  In the next chapter we'll view these undercounts and overcounts as analogous to a random walk\index{random walk}.
1758

1759

1760
\end{remark}
1761

1762

1763
\chapter{Square root error and random walk\index{random walk}s}
1764

1765
 To take a random walk\index{random walk} along a (straight) east--west path  you would start at your home base, but every minute, say, take a step along the path, each step being of the same length, but randomly either east or west. After $X$ minutes, how far are you from your home base?
1766

1767
  The answer to this {\it cannot} be a specific number, precisely because you're making a random decision that affects that number for each of the $X$ minutes of your journey. It is more reasonable to ask a statistical version of that question. Namely, if you took {\it many} random walks\index{random walk} $X$ minutes long, then---on the average---how far would you be from your home base?  The answer, as is illustrated by the figures below, is that the average distance you will find yourself from home base after (sufficiently many of) these excursions is proportional to ${\sqrt X}$. (In fact, the average is equal to $\sqrt{\frac{2}{\pi}}\cdot {\sqrt X}$.)
1768

1769
  To connect this with the committee members' histories of errors, described in the fable in Chapter~\ref{sec:sqrterror}, imagine  every  error (undercount or overcount by $1$) the committee member makes, as a ``step" East for undercount and West for overcount. Then if such errors were made, at a constant frequency over the duration of time spent counting, and if the over and undercounts were equally likely and random, then one can model the committee members' computational  accuracy by a random walk.\index{random walk} It would be---in the terminology we have already discussed---no better than {\it square-root accurate}\index{square-root accurate}; it would be subject to {\it square-root error\index{square-root error}}.
1770

1771
   To get a real sense of how constrained random walks\index{random walk} are by this ``square-root law," here are a few numerical experiments of random walks. The left-hand squiggly (blue) graphs in Figures~\ref{fig:random_walks_3}--\ref{fig:random_walks_1000} below are computer-obtained random walk trials (three, ten, a hundred, and a thousand random walks). The blue curve in the right-hand graphs of those four figures is the average distance from home-base of the corresponding (three, ten, a hundred, and a thousand) random walks.
1772
The red curve in each figure below is the graph of the quantity $\sqrt{\frac{2}{\pi}}\cdot {\sqrt X}$ over the $X$-axis.
1773
As the number of random walks\index{random walk} increases, the red curve
1774
better and better approximates the average distance.
1775

1776
\illtwo{random_walks-3}{random_walks-3-mean}{.45}{Three Random Walk\index{random walk}s\label{fig:random_walks_3}}
1777

1778
\illtwo{random_walks-10}{random_walks-10-mean}{.45}{Ten Random Walk\index{random walk}s\label{fig:random_walks_10}}
1779

1780
\illtwo{random_walks-100}{random_walks-100-mean}{.45}{One Hundred Random Walk\index{random walk}s\label{fig:random_walks_100}}
1781

1782
\illtwo{random_walks-1000}{random_walks-1000-mean}{.45}{One Thousand Random Walk\index{random walk}s\label{fig:random_walks_1000}}
1783

1784

1785

1786

1787

1788
\chapter[What is Riemann's Hypothesis?] { What is Riemann's Hypothesis?  ({\em first formulation})\label{sec:rh1}}
1789

1790

1791

1792
Recall from Chapter~\ref{sec:firstguess} that a rough guess for an approximation to
1793
$\pi(X)$, the number of primes $\leq{} X$, is given by the function
1794
$X/\log(X)$. Recall, as well, that a refinement of that guess, offered by Gauss\index{Gauss, Carl Friedrich}, stems from this curious thought:  the ``probability'' that a number $N$ is a prime  is proportional to the reciprocal of its number of digits; more precisely, the probability is $1/\log(N)$.  This would lead us to  guess that the approximate value of $\pi(X)$ would be
1795
the area of the region from $2$ to $X$ under the graph of
1796
$1/\log(X)$, a quantity sometimes referred to as $\Li(X)$.
1797
``Li'' (pronounced L$\overline{\rm i}$, so the same as ``lie" in ``lie down")
1798
is short for {\em logarithmic integral},
1799
because the area of the region from $2$ to $X$ under $1/\log(X)$
1800
is (by definition) the {\em integral} $\int_2^X 1/\log(t) dt$.
1801

1802

1803

1804

1805
Figure~\ref{fig:threeplots} contains a graph of the three functions
1806
$\Li(X)$, $\pi(X)$, and $X/\log X$ for $X\leq 200$.
1807
But data, no matter how impressive, may be deceiving (as we learned in
1808
Chapter~\ref{ch:further}). If you think
1809
that  the three graphs  never cross for all large values of $X$,  and
1810
that we have the simple relationship  $X/\log(X) < \pi(X) < \Li(X)$ for
1811
large $X$, read \url{http://en.wikipedia.org/wiki/Skewes'_number}.
1812

1813

1814
\ill{three_plots}{.9}{Plots of $\Li(X)$ (top), $\pi(X)$ (in the middle), and $X/\log(X)$ (bottom).\label{fig:threeplots}}
1815

1816
It is a {\em major challenge} to
1817
evaluate $\pi(X)$ for large values of $X$.
1818
For example, let $X=10^{24}$.
1819
Then \label{pili_vals} (see \bibnote{This computation of $\pi(X)$ was done by David J. Platt in 2012,
1820
and is the largest value of $\pi(X)$ ever computed.  See
1821
\url{http://arxiv.org/abs/1203.5712} for more details.}) we have:
1822
\begin{align*}
1823
  \pi(X) &= {\color{dredcolor} 18{,}435{,}599{,}767{,}3}49{,}200{,}867{,}866\\
1824
  \Li(X) &= {\color{dredcolor}18{,}435{,}599{,}767{,}3}66{,}347{,}775{,}143.10580\ldots \\
1825
  X/(\log(X)-1)  &=
1826
            18{,}429{,}088{,}896{,}563{,}917{,}716{,}962.93869\ldots\\
1827
  \Li(X) - \pi(X) &= \hspace{7em}
1828
     17{,}146{,}907{,}277.105803\ldots\\
1829
  \sqrt{X}\cdot \log(X) &= \hspace{5.2em}
1830
     55{,}262{,}042{,}231{,}857.096416 \ldots
1831
\end{align*}
1832
Note that several of the left-most digits of $\pi(X)$ and $\Li(X)$ are the
1833
same (as indicated in red), a point we will return to on page~\pageref{page:leftmost}.
1834

1835

1836
More fancifully, we can think of the error in this approximation to $\pi(X)$, i.e., $|\Li(X)-\pi(X)|$, (the absolute value) of the difference between $\Li(X)$ and $\pi(X)$, as (approximately) the result of  a walk having roughly $X$ steps where you move by the following rule:  go  east by a distance of $1/\log N$ feet if $N$ is not a prime and west by a distance of $1-\frac{1}{\log N}$ feet if $N$ is a prime. Your distance, then, from home base after $X$ steps is approximately $|\Li(X)-\pi(X)|$ feet.
1837
%% Computer code to double check the above claim without using
1838
%% your brain (will be useful for a SageMath version of the book):
1839
% def f(X):
1840
%     t = 0
1841
%     from math import log
1842
%     for N in [2..floor(X)]:
1843
%         if is_prime(N):
1844
%             t -= 1-1/log(N)
1845
%         else:
1846
%             t += 1/log(N)
1847
%     return t
1848
%
1849
% def g(X):
1850
%     return N(Li(X) - prime_pi(X))
1851

1852

1853
We have no idea if this picture of things resembles a truly {\it random walk}\index{random walk} but at least it makes it reasonable to ask the question: is $\Li(X)$  {\it essentially a square root approximation} to
1854
$\pi(X)$? Our first formulation of Riemann's
1855
Hypothesis says yes:
1856

1857
      \begin{center}
1858
       \shadowbox{ \begin{minipage}{0.9\textwidth}
1859
\mbox{}       \vspace{0.2ex}
1860
       \begin{center}{\bf\large The {\bf \RH{}} (first formulation)}\end{center}
1861
       \medskip
1862

1863
For any real number $X$ the number of prime numbers\index{prime number} less than $X$ is
1864
approximately $\Li(X)$ and this approximation is essentially square
1865
root accurate (see \bibnote{In fact, the Riemann Hypothesis is equivalent to $|\Li(X) - \pi(X)| \leq \sqrt{X}\log(X)$ for all $X\geq 2.01$.  See Section 1.4.1 of Crandall and Pomerance's book {\em Prime Numbers: A Computational Perspective}.}).
1866
\label{rh:first}
1867

1868
\vspace{1ex}
1869
\end{minipage}}
1870
      \end{center}
1871

1872
\chapter{The mystery moves to  the error term}
1873

1874
 Let's think of what happens when you have a {\it mysterious quantity} (say,  a function of a real number $X$) you wish to understand. Suppose you manage to approximate that quantity by  an easy to understand  expression---which we'll call the  ``dominant term"---that is also simple to compute, but only approximates your mysterious quantity. The approximation is not exact; it has a possible error, which happily is significantly smaller than the size of the dominant term. ``Dominant" here just means exactly that: it  is of size significantly larger than the error of approximation.
1875

1876
 $${\it Mysterious\  quantity}(X)\ \ = \ \ {\it Simple, \ but \ dominant\ quantity}(X) \ + \ {\it Error}(X).$$
1877

1878
 If all you are after is a general estimate of {\it size}  your job is done. You might declare victory and ignore ${\it Error}(X)$ as being---in size---negligible. But if you are interested in the deep structure of your {\it mysterious quantity} perhaps all you have done is to manage to transport most questions about it to  ${\it Error}(X)$. In conclusion, ${\it Error}(X)$ is now your new mysterious quantity.
1879

1880
 Returning to the issue of $\pi(X)$ (our {\it mysterious quantity}) and $\Li(X)$ (our {\it dominant term}) the first formulation of the Riemann Hypothesis  (as in Chapter \ref{sec:rh1} above)  puts the spotlight on the {\it Error\ term} $|\Li(X) - \pi(X)|$, which therefore deserves our scrutiny, since---after all---we're not interested in merely counting the primes: we want to understand as much as we can of their structure.
1881

1882
   To get a feel for this error term, we shall smooth it out a bit, and look at a few of its graphs.
1883

1884

1885
\chapter{Ces\`aro smoothing\index{Ces\`aro Smoothing}}\label{ces}
1886

1887
Often when you hop in a car and reset the trip counter, the car
1888
will display information about your trip.  For instance, it might show you the
1889
average speed up until now, a number that is
1890
``sticky'', changing much less erratically than your
1891
actual speed, and you might use it to make a rough estimate of
1892
how long until you will reach your destination.
1893
Your car is computing the {\em Ces\`aro smoothing\index{Ces\`aro Smoothing} }of your speed.
1894
We can use this same idea to better understand the behavior
1895
of other things, such as the sums appearing in
1896
the previous chapter.
1897
\ill{camaro}{.7}{The ``Average Vehicle Speed,'' as displayed on the dashboard of the 2013 Camaro SS car that one of us drove during the writing of this chapter.}
1898
%I took this picture of my rental car -- William Stein.
1899

1900
 Suppose you are trying to say something intelligent about the behavior of a certain quantity that varies with time in what seems to be a somewhat erratic, volatile pattern. Call the quantity $f(t)$ and think of it as a function defined for positive real values of ``time'' $t$.  The natural impulse might be to take some sort of ``average value''\footnote{For readers who know calculus: that  average would be ${\frac{1}{T}}\int_0^Tf(t)dt$.} of  $f(t)$ over a time interval, say from $0$ to $T$.   This would indeed be an intelligent thing to do if this average stabilized and depended relatively little on  the interval of time over which one is computing this average, e.g., if that interval were large enough.  Sometimes, of course, these averages themselves are relatively sensitive to the times $T$ that are chosen, and don't stabilize. In such a case, it usually pays to consider {\it all} these averages  as  your chosen $T$ varies, as a function (of $T$) in its own right{\footnote{ For readers who know calculus: this is
1901
  $$F(T):= {\frac{1}{T}}\int_0^Tf(t)dt.$$}.
1902
   This new function $F(T)$, called the {\bf Ces\`aro Smoothing}\index{Ces\`aro Smoothing} of the function $f(t)$, is a good indicator of certain eventual trends of the original function $f(t)$ that is less visible directly from the function $f(t)$.  The effect of passing from $f(t)$ to $F(T)$ is to ``smooth out" some of the volatile local behavior of the original function, as can be seen in
1903
Figure~\ref{fig:cesaro-smoothing}.
1904

1905

1906
\ill{cesaro}{.9}{The red plot is the Ces\`aro smoothing\index{Ces\`aro Smoothing} of the blue plot\label{fig:cesaro-smoothing}}
1907
\chapter{ A view of $|\Li(X) - \pi(X)|$}
1908
 Returning to our mysterious error term,  consider Figure \ref{fig:li-minus-pi-250000} where the volatile blue curve in the middle
1909
 is $\Li(X) - \pi(X)$, its Ces\`aro smoothing\index{Ces\`aro Smoothing} is the red relatively smooth curve on the bottom, and the curve on the top
1910
 is the graph of $\sqrt{\frac{2}{\pi}}\cdot \sqrt{X/\log(X)}$ over the range of $X\le 250{,}000$.
1911

1912
\ill{li-minus-pi-250000}{.9}{$\Li(X)-\pi(X)$ (blue middle), its Ces\`aro smoothing\index{Ces\`aro Smoothing} (red bottom), and
1913
$\sqrt{\frac{2}{\pi}}\cdot \sqrt{X/\log(X)}$ (top), all for $X\leq 250{,}000$\label{fig:li-minus-pi-250000}}
1914

1915
% TODO: this is from http://seriesdivergentes.wordpress.com/2011/07/21/john-e-littlewood\index{Littlewood, John Edensor }/
1916
Data such as this graph can be revealing and misleading at the same time.  For example, the volatile blue graph (of $\Li(X)-\pi(X)$) {\it seems} to be roughly sandwiched between two rising functions, but this will not continue for all values of $X$.  The Cambridge University mathematician, John Edensor Littlewood\index{Littlewood, John Edensor } (see
1917
Figure~\ref{fig:littlewood}),\index{Littlewood, John Edensor } proved in 1914 that there {\it exists} a real number $X$ for which the quantity $\Li(X)-\pi(X)$ vanishes, and then for slightly larger $X$ crosses over into negative values.  This theorem attracted lots of attention at the time (and continues to do so) because of the inaccessibility of achieving good estimates (upper or lower bounds) for the first such number.  That ``first" $X$ for which   $\Li(X) =\pi(X)$ is called {\bf Skewes Number} in honor of the South African mathematician (a student of Littlewood)  Stanley Skewes, who (in 1933) gave the  first---fearfully large---upper bound for that number (conditional on the Riemann Hypothesis!).  Despite a steady stream of subsequent improvements, we currently can only  locate Skewes Number\index{Skewes Number}  as being in the range:
1918

1919
$$
1920
10^{14}\ \ \ \le\ \ \ {\rm Skewes\ Number}\ \ \ <\ \ \    10^{317},
1921
$$ and it is proven that at some  values of $X$ fairly close to the upper bound $10^{317}$   $\pi(X)$ is greater than $\Li(X)$. So  the trend suggested in Figure \ref{fig:li-minus-pi-250000} will not continue indefinitely.
1922

1923
%\begin{wrapfigure}{l}{0.3\textwidth}
1924
%    \includegraphics[width=0.3\textwidth]{illustrations/littlewood\index{Littlewood, John Edensor }}\\
1925
%    John Edensor Littlewood\index{Littlewood, John Edensor } (1885--1977)
1926
%\end{wrapfigure}
1927

1928
\ill{littlewood}{0.35}{John\index{Littlewood, John Edensor } Edensor Littlewood (1885--1977)\label{fig:littlewood}}
1929

1930

1931

1932

1933
\chapter{The Prime Number Theorem\label{sec:pnt}}\index{prime number}
1934

1935
Take a look at Figure~\ref{fig:threeplots} again.
1936
All three functions, $\Li(X)$, $\pi(X)$ and $X/\log(X)$
1937
are ``going to infinity with $X$'' (this means
1938
that for any real number $R$, for all sufficiently large $X$,
1939
the values of these functions at $X$ exceed $R$).
1940

1941
Are these functions ``going to infinity'' at {\it the same rate}?
1942

1943
To answer such a question, we have to know what we mean by {\it going
1944
  to infinity at the same rate}. So, here's a definition. Two
1945
functions, $A(X)$ and $B(X)$, that each go to infinity will be said to
1946
{\bf go to infinity at the same rate} if their {\it ratio}
1947
$$A(X)/B(X)$$
1948
tends to $1$ as $X$ goes to infinity.
1949

1950
If for example  two functions, $A(X)$ and $B(X)$ that take positive whole number values, have the same number of digits for large $X$  and if,  for any
1951
number you give us, say: a million (or a billion, or a trillion) and if $X$ is large enough, then the
1952
``leftmost'' million (or billion, or trillion) digits of $A(X)$ and $B(X)$ are the same,  then  $A(X)$ and $B(X)$  {\it go to infinity at the same rate}.  For example,
1953
$$
1954
\frac{A(X)}{B(X)} =
1955
\frac{{\color{dredcolor}2810675971}83743525105611755423}{{\color{dredcolor}2810675971}61361511527766294585}
1956
= 1.00000000007963213762060\ldots
1957
$$
1958

1959

1960
While we're defining things, let us say that two functions, $A(X)$
1961
and $B(X)$, that each go to infinity {\bf go to infinity at
1962
similar rates} if there are two positive constants $c$ and $C$
1963
such that for $X$ sufficiently large the {\it ratio}
1964
$$
1965
      A(X)/B(X)
1966
$$
1967
is greater than $c$ and smaller than $C$.
1968

1969
    \ill{similar_rates}{1.0}{The polynomials $A(X)=2 X^{2} + 3 X - 5$ (bottom)
1970
and $B(X)=  3 X^{2} - 2 X + 1$ (top) go to infinity at similar rates.\label{fig:simrates}}
1971

1972

1973

1974
For example, two polynomials in $X$ with positive leading coefficients
1975
{\it go to infinity at the same rate} if and only if they have the
1976
same degrees and the same leading coefficient; they {\it go to
1977
  infinity at similar rates} if they have the same degree.
1978
See Figures~\ref{fig:simrates} and~\ref{fig:samerate}. %where we may take $c=1/2$ and $C=1$.
1979

1980
 \ill{same_rates}{1.0}{The polynomials $A(X)=X^{2} + 3 X - 5$ (top)
1981
and $B(X)= X^{2} - 2 X + 1$ (bottom) go to infinity
1982
at the same rate.\label{fig:samerate}}
1983

1984

1985

1986
Now a theorem from elementary calculus\index{calculus} tells us that the ratio of
1987
$\Li(X)$ to $X/\log(X)$ tends to $1$ as $X$ gets larger and larger.
1988
That is---using the definition we've just introduced---$\Li(X)$ and
1989
$X/\log(X)$ go to infinity at the same rate (see
1990
\bibnote{For a proof of this here's a hint. Compute the difference
1991
between the derivatives of $\Li(x)$ and of  $x/\log x$. The answer
1992
is $1/\log^2(x)$.  So you must show that the ratio of
1993
$\int_2^X dx/\log^2(x)$ to $\Li(x)= \int_2^Xdx/\log(x)$
1994
tends to zero as~$x$ goes to infinity, and this is a good
1995
calculus exercise.}).
1996

1997

1998
Recall (on page~\pageref{pili_vals} above in Chapter~\ref{sec:rh1})  that
1999
if $X = 10^{24}$, the left-most\label{page:leftmost}
2000
twelve digits of $\pi(X)$ and $\Li(X)$ are the same:
2001
both numbers start
2002
$18{,}435{,}599{,}767{,}3\ldots$. Well, that's a good start. Can we guarantee that for  $X$ large enough, the ``left-most'' million (or billion, or trillion) digits of $\pi(X)$ and $\Li(X)$ are the same, i.e., that these two functions go to infinity at the same rate?
2003

2004
The \RH{}, as we have just formulated it, would tell us
2005
that the {\it difference} between $\Li(X)$ and $\pi(X)$ is pretty small
2006
in comparison with the size of $X$. This information would imply (but
2007
would be {\it much} more precise information than) the statement that
2008
the {\it ratio} $\Li(X)/\pi(X)$ tends to $1$, i.e., that $\Li(X)$ and
2009
$\pi(X)$ go to infinity at the same rate.
2010

2011
This last statement gives, of course, a far less precise relationship
2012
between $\Li(X)$ and $\pi(X)$ than the \RH{} (once it is proved!)
2013
would give us.  The advantage, though, of the less precise statement
2014
is that it is currently known to be true, and---in fact---has been
2015
known for over a century. It goes under the name of
2016

2017

2018
 \noindent {\bf The Prime Number Theorem:\ \ } $\Li(X)$ and $\pi(X)$ go to infinity at the same rate.\index{prime number}
2019

2020

2021
   Since $\Li(X)$ and $X/\log(X)$ go to infinity at the same rate, we could
2022
   equally well have expressed the ``same'' theorem by saying:
2023

2024

2025
  \noindent {\bf The Prime Number Theorem:\ \ } $X/\log(X)$ and $\pi(X)$ go to infinity at the same rate.\index{prime number}
2026

2027

2028
   This fact is a very hard-won piece of mathematics!  It was proved
2029
   in 1896 independently by Jacques Hadamard\index{Hadamard, Jacques} and Charles de la Vall\'{e}e Poussin.\index{de la Vall\'{e}e Poussin, Charles}
2030

2031

2032
   A milestone in the history leading up to the proof of the Prime Number\index{prime number}
2033
   Theorem is the earlier work of Pafnuty Lvovich  Chebyshev\index{Chebyshev, Pafnuty Lvovich} (see
2034
   \url{http://en.wikipedia.org/wiki/Chebyshev_function})\index{Chebyshev, Pafnuty Lvovich} showing that
2035
   (to use the terminology we introduced) $X/\log(X)$ and $\pi(X)$ go
2036
   to infinity at similar rates.
2037

2038

2039
The elusive \RH{}, however, is much deeper than the Prime
2040
Number Theorem, and takes its origin from some awe-inspiring,
2041
difficult to interpret, lines in Bernhard Riemann's\index{Riemann, Bernhard} magnificent 8-page
2042
paper, ``On the number of primes less than a given magnitude,''
2043
published in 1859
2044
(see \bibnote{See \url{http://www.maths.tcd.ie/pub/HistMath/People/Riemann/Zeta/}
2045
for the original German version and an English translation.}).
2046

2047

2048
\ill{riemann_zoom}{1}{From a manuscript of Riemann's 1859 paper written by a contem- porary of Riemann (possibly the mathematician Alfred Clebsch). \label{fig:riemamn}}
2049

2050
Riemann's hypothesis, as it is currently interpreted, turns up as
2051
relevant, as a key, again and again in different parts of the subject:
2052
if you accept it as {\em hypothesis} you have an immensely powerful
2053
tool at your disposal: a mathematical magnifying glass that sharpens
2054
our focus on number theory. But it also has a wonderful protean
2055
quality---there are many ways of formulating it, any of these
2056
formulations being provably equivalent to any of the others.
2057

2058
\ill{riemann}{.3}{Bernhard Riemann (1826--1866)\index{Riemann, Bernhard}}
2059

2060

2061
The \RH{} remains unproved to this day, and therefore is ``only a
2062
hypothesis,'' as Osiander said of Copernicus's theory, but one for
2063
which we have overwhelming theoretical and numerical evidence in its
2064
support.  It is the kind of conjecture that contemporary Dutch
2065
mathematician Frans Oort might label a {\em suffusing conjecture} in
2066
that it has unusually broad implications: many, many results are now
2067
known to follow, if the conjecture, familiarly known as RH, is true.
2068
A proof of RH would, therefore, fall into the {\em applied} category,
2069
given our discussion above in Chapter~\ref{ch:pureapplied}.  But
2070
however you classify RH, it is a central concern in mathematics to
2071
find its proof (or, a counter-example!).  RH is one of the weightiest
2072
statements in all of mathematics.
2073

2074

2075
\chapter[The staircase of primes]{The {\em information} contained in the staircase of primes\label{sec:information}}
2076

2077

2078

2079
We have borrowed the phrase ``staircase of primes'' from the popular
2080
book {\em The Music of the Primes} by Marcus du Sautoy, for we feel that
2081
it captures the sense that there is a deeply hidden architecture to
2082
the graphs that compile the number of primes (up to $N$) and also
2083
because---in a bit---we will be tinkering with this carpentry.  Before
2084
we do so, though, let us review in Figure~\ref{fig:staircases}
2085
what this staircase looks like for different ranges.
2086

2087
\begin{figure}[H]
2088
\centering
2089
\includegraphics[width=.4\textwidth]{illustrations/PN_25}
2090
\includegraphics[width=.4\textwidth]{illustrations/PN_100}\\
2091

2092
\includegraphics[width=.4\textwidth]{illustrations/PN_1000}
2093
\includegraphics[width=.4\textwidth]{illustrations/PN_10000}\\
2094

2095
\includegraphics[width=.8\textwidth]{illustrations/PN_100000}
2096

2097

2098
\caption{The Staircase of Primes\label{fig:staircases}}
2099
\end{figure}
2100

2101
The mystery of this staircase is that the {\em information} contained
2102
within it is---in effect---the full story of where the primes are
2103
placed. This story seems to elude any simple description.  Can we
2104
``tinker with'' this staircase without destroying this valuable
2105
information?
2106

2107

2108

2109

2110
\chapter[Tinkering with the staircase of primes]{Tinkering with the carpentry of the staircase of primes\label{sec:tinkering}}
2111

2112

2113
For starters, notice that all the (vertical) {\em risers} of this staircase (Figure~\ref{fig:staircases} above) have
2114
unit height. That is, they contain no numerical information except for
2115
their placement on the $x$-axis. So, we could distort our staircase by
2116
changing (in any way we please) the height of each riser; and as long
2117
as we haven't brought new risers into---or old risers out
2118
of---existence, and have not modified their position over the
2119
$x$-axis, we have retained all the information of our original
2120
staircase.
2121

2122

2123
A more drastic-sounding thing we could do is to judiciously add new
2124
steps to our staircase. At present, we have a step at each prime
2125
number $p$, and no step anywhere else. Suppose we built a staircase
2126
with a new step not only at $x=p$ for $p$ each prime number\index{prime number} but also at
2127
$x =1$ and $x=p^n$ where $p^n$ runs through all powers of prime numbers as
2128
well. Such a staircase would have, indeed, many more steps than our
2129
original staircase had, but, nevertheless, would retain much of the
2130
quality of the old staircase: namely it contains within it the full
2131
story of the placement of primes {\em and their powers}.
2132

2133
A final thing we can do is to perform a distortion of the $x$-axis
2134
(elongating or shortening it, as we wish) in any specific way, as long
2135
as we can perform the inverse process, and ``undistort'' it if we wish.
2136
Clearly such an operation may have mangled the staircase, but hasn't destroyed
2137
information irretrievably.
2138

2139
We shall perform all three of these kinds of operations eventually,
2140
and will see some great surprises as a result.  But for now, we will
2141
perform distortions only of the first two types.  We are about to
2142
build a new staircase that retains the precious information we need,
2143
but is constructed according to the following architectural plan.
2144

2145
 \begin{itemize}
2146

2147
 \item We first build a staircase that has a new step precisely at $x
2148
   =1$, and $ x= p^n$ for every {\em prime power} $p^n$ with $n\geq
2149
   1$. That is, there will be a new step at $x= 1,2,3,4,5,7,8,9,11,
2150
   \dots$
2151

2152
 \item Our staircase starts on the ground at $x=0$ and the height of the
2153
   riser of the step at $x=1$ will be $\log(2\pi)$. The height of the
2154
   riser of the step at $x=p^n$ will not be $1$
2155
   (as was the height of all risers in the old staircase of primes)
2156
   but rather: the step at $x=p^n$ will have the height of its riser
2157
   equal to $\log p$.  So for the first few steps listed in the
2158
   previous item, the risers will be of height $\log(2\pi), \log
2159
   2,\log 3,\log 2,\log 5,\log 7, \log 2,\log 3,\log 11, \dots$
2160
   Since $\log(p)>1$,
2161
  these vertical dimensions lead to a steeper ascent but no great loss
2162
  of {\em information}.
2163

2164
   Although we are not quite done with our architectural work, Figure~\ref{fig:psi} shows
2165
   what our new staircase looks like, so far.
2166

2167
\illtwoh{psi_9}{psi_100}{.27}{The newly constructed staircase that counts prime powers\label{fig:psi}}
2168

2169

2170
  \end{itemize}
2171

2172
  Notice that this new staircase looks, from afar, as if it were
2173
  nicely approximated by the $45$ degree straight line, i.e., by the
2174
  simple function $X$. In fact, we have---by this new
2175
  architecture---a second {\em equivalent} way of formulating
2176
  Riemann's hypothesis.   For this, let $\psi(X)$ denote the
2177
  function
2178
  of $X$ whose graph is
2179
depicted in Figure~\ref{fig:psi}
2180
(see \bibnote{We have $$\psi(X) =\sum_{p^n \le X}\log p$$ where the summation is over prime
2181
powers $p^n$ that are $\le X$.}).
2182

2183
          \begin{center}
2184
       \shadowbox{ \begin{minipage}{0.9\textwidth}
2185
\mbox{}       \vspace{0.2ex}
2186
       \begin{center}{\bf\large The {\bf \RH{}} (second formulation)}\end{center}
2187
       \medskip
2188

2189
       This new staircase is essentially square root close to the 45 degree
2190
straight line; i.e., the function $\psi(X)$ is essentially square root
2191
close to the function $f(X)=X$. See Figure~\ref{fig:psi_diag_1000}.
2192
          \vspace{1ex}
2193
   \end{minipage}}
2194
\end{center}
2195

2196
\ill{psi_diag_1000}{0.75}{The newly constructed staircase is close to the 45 degree line.\label{fig:psi_diag_1000}}
2197

2198
Do not worry if you do not understand why our first and second
2199
formulations of Riemann's Hypothesis are equivalent. Our aim, in
2200
offering the second formulation---a way of phrasing Riemann's\index{Riemann, Bernhard} guess
2201
that mathematicians know to be equivalent to the first one---is to
2202
celebrate the variety of equivalent ways we have to express Riemann's
2203
\index{Riemann, Bernhard} proposed answers to the question ``How many primes are there?'', and to
2204
point out that some formulations would reveal a startling
2205
simplicity---not immediately apparent---to the behavior of prime
2206
numbers, no matter how erratic primes initially appear to us to
2207
be. After all, what could be simpler than a 45 degree straight line?
2208

2209

2210

2211

2212
\chapter[Computer music files and prime numbers]{What do  computer music files,
2213
data compression, and prime numbers have to do with each other?\label{sec:fourier1}}\index{prime number}
2214

2215

2216

2217

2218
Sounds of all sorts---and in particular the sounds of music---travel
2219
as vibrations of air molecules at roughly 768 miles per hour. These
2220
vibrations---fluctuations of pressure---are often represented, or
2221
``pictured,'' by a graph where the horizontal axis corresponds to
2222
time, and the vertical axis corresponds to pressure at that time.  The
2223
very purest of sounds---a single sustained note---would look
2224
something like this (called a ``sine wave'') when pictured (see
2225
Figure~\ref{fig:sine}), so that if you fixed your position somewhere
2226
and measured air pressure due to this sound at that position, the
2227
peaks correspond to the times when the varying air pressure is maximal
2228
or minimal and the zeroes to the times when it is normal pressure.
2229

2230
\ill{sin}{.7}{Graph of a sine wave\label{fig:sine}}
2231

2232
You'll notice that there are two features to the graph in Figure~\ref{fig:sine}.
2233

2234
\begin{enumerate}
2235
\item {\em The height of the peaks of this sine wave:} This height is
2236
  referred to as the {\bf amplitude} and corresponds to the {\em
2237
    loudness} of the sound.
2238
\item {\em The number of peaks per second:} This number is referred to
2239
  as the {\bf frequency} and corresponds to the {\em pitch} of the
2240
  sound.
2241
\end{enumerate}
2242

2243

2244
Of course, music is rarely---perhaps never---just given by a single
2245
pure sustained note and nothing else. A next most simple example of a
2246
sound would be a simple chord (say a C and an E played together on
2247
some electronic instrument that could approximate pure notes). Its
2248
graph would be just the {\em sum} of the graphs of each of the pure
2249
notes (see Figures~\ref{fig:sinetwofreq} and \ref{fig:sinetwofreqsum}).
2250

2251

2252
\ill{sin-twofreq}{0.6}{Graph of two sine waves with different frequencies\label{fig:sinetwofreq}}
2253

2254
\ill{sin-twofreq-sum}{.6}{Graph of sum of the two sine waves with different frequencies\label{fig:sinetwofreqsum}}
2255

2256
So the picture of the changing frequencies of this chord would be
2257
already a pretty complicated configuration.  What we have described in
2258
these graphs are two sine waves (our C and our E) when they are played
2259
{\em in phase} (meaning they start at the same time) but we could
2260
also ``delay'' the onset of the E note and play them with some
2261
different phase relationship, for example, as illustrated
2262
in Figures~\ref{fig:sin-twofreq-phase} and \ref{fig:sum-sin-phase}.
2263

2264
\ill{sin-twofreq-phase}{.6}{\label{fig:sin-twofreq-phase}Graph of two ``sine'' waves with different phases.}
2265

2266
\ill{sin-twofreq-phase-sum}{.6}{Graph of the sum of the two ``sine'' waves with different frequencies
2267
and phases.\label{fig:sum-sin-phase}}
2268

2269

2270
  So, {\em all you need} to reconstruct the chord graphed above is to
2271
  know five numbers:
2272
  \begin{itemize}
2273
  \item the two frequencies---the collection of frequencies that make
2274
    up the sound is called the {\em spectrum}\index{spectrum} of the sound,
2275
  \item the two {\em amplitudes} of each of these two frequencies,
2276
  \item the {\em phase} between them.
2277

2278
 \end{itemize}
2279

2280
 Now suppose you came across such a sound as pictured in
2281
 Figure~\ref{fig:sum-sin-phase} and wanted to ``record it.''  Well,
2282
 one way would be to sample the amplitude of the sound at many
2283
 different times, as for example in Figure~\ref{fig:sum-sin-phase-sample}.
2284

2285
\ill{sin-twofreq-phase-sum-points}{.6}{Graph of sampling of a sound wave\label{fig:sum-sin-phase-sample}}
2286

2287
Then, fill in the rest of the points to obtain Figure~\ref{fig:sum-sin-phase-sample-fill}.
2288

2289
\ill{sin-twofreq-phase-sum-fill}{0.6}{Graph obtained from Figure~\ref{fig:sum-sin-phase-sample} by
2290
filling in the rest of the points\label{fig:sum-sin-phase-sample-fill}}
2291

2292
But this sampling would take an enormous amount of storage space, at
2293
least compared to storing five numbers, as explained above!
2294
Current audio compact discs do their sampling 44,100 times a second to
2295
get a reasonable quality of sound.
2296

2297
Another way is to simply record the {\em five} numbers: the {\em
2298
  spectrum\index{spectrum}, amplitudes,} and {\em phase}.  Surprisingly, this seems to
2299
be roughly the way our ear processes such a sound when we hear it.\footnote{%
2300
We recommend downloading Dave Benson's marvelous book
2301
{\em Music: A Mathematical Offering} from
2302
\url{https://homepages.abdn.ac.uk/mth192/pages/html/maths-music.html}.
2303
This is free, and gives a beautiful account of the superb
2304
mechanism of hearing, and of the mathematics of music.}
2305

2306
  Even in this simplest of examples (our pure chord: the pure note C
2307
  played simultaneously with pure note E) the {\em efficiency of data
2308
    compression} that is the immediate bonus of analyzing the picture
2309
  of the chords as built {\em just} with the five numbers giving {\em
2310
    spectrum\index{spectrum}, amplitudes,} and {\em phase} is staggering.
2311

2312
\ill{fourier}{0.3}{Jean Baptiste Joseph Fourier (1768--1830)}
2313

2314
This type of analysis, in general, is called {\em Fourier Analysis}
2315
and is one of the glorious chapters of mathematics.  One way of
2316
picturing {\em spectrum}\index{spectrum} and {\em amplitudes} of a sound is by a bar
2317
graph which might be called the {\em spectral picture} of the sound,
2318
the horizontal axis depicting frequency and the vertical one depicting
2319
amplitude: the height of a bar at any frequency is proportional to the
2320
amplitude of that frequency ``in'' the sound.
2321

2322
So our CE chord would have the spectral picture in
2323
Figure~\ref{fig:ce-spectral}.
2324

2325

2326
\illtwo{sound-ce-general_sum}{sound-ce-general_sum-blips}{.45}
2327
       {Spectral picture of a CE chord\label{fig:ce-spectral}}
2328

2329

2330
This spectral picture ignores the phase but is nevertheless a very
2331
good portrait of the sound.  The spectral picture of a graph gets us
2332
to think of that graph as ``built up by the superposition of a bunch
2333
of pure waves,'' and if the graph is complicated enough we may very well
2334
need {\em infinitely} many pure waves to build it up!  Fourier analysis\index{Fourier analysis} is a
2335
mathematical theory that allows us to start with any graph---we are
2336
thinking here of graphs that picture sounds, but any graph will
2337
do---and actually compute its spectral picture (and even keep track of
2338
phases).
2339

2340

2341
The operation that starts with a graph and goes to its spectral
2342
picture that records the frequencies, amplitudes, and phases of the
2343
pure sine waves that, together, compose the graph is called the {\em
2344
  Fourier transform} and nowadays there are very fast procedures for
2345
getting accurate {\em Fourier transforms} (meaning accurate spectral
2346
pictures including information about phases) by
2347
computer \bibnote{See  \url{http://en.wikipedia.org/wiki/Fast_Fourier_transform}}.
2348

2349

2350
%If you pass to the worksheet at this point, you'll be able to see the
2351
%Fourier transforms of many different sounds, and experiment for
2352
%yourself.
2353
The theory behind this operation (Fourier transform giving
2354
us a spectral analysis of a graph) is quite beautiful, but equally
2355
impressive is how---given the power of modern computation---you can
2356
immediately perform this operation for yourself to get a sense of how
2357
different wave-sounds can be constructed from the superposition of
2358
pure tones.
2359

2360
The {\em sawtooth} wave in Figure~\ref{fig:sawtooth} has a spectral picture, its Fourier transform, given in Figure~\ref{fig:sawtooth-spectrum}:
2361

2362
   \ill{sawtooth}{.7}{Graph of sawtooth wave\label{fig:sawtooth}}
2363
   \ill{sawtooth-spectrum}{.7}{The Spectrum\index{spectrum} of the sawtooth wave has a spike of height $1/k$ at
2364
each integer $k$\label{fig:sawtooth-spectrum}}
2365

2366

2367

2368
 \ill{complicated-wave}{.7}{A Complicated sound wave\label{fig:complicated-wave}}
2369

2370
Suppose you have a complicated sound wave, say as in
2371
Figure~\ref{fig:complicated-wave}, and you want to record it.
2372
Standard audio CDs record their data by intensive sampling as we
2373
mentioned. In contrast, current MP3 audio compression technology uses
2374
Fourier transforms plus sophisticated algorithms based on
2375
knowledge of which frequencies the human ear can hear.
2376
With this, MP3 technology manages to
2377
get a compression factor of 8--12 with little {\em perceived} loss in
2378
quality, so that you can fit your entire music collection on your
2379
phone, instead of just a few of your favorite CDs.
2380

2381
\chapter{The word ``Spectrum"}
2382

2383
% TODO: this is from wikipedia -- requires attribution.
2384
\begin{wrapfigure}{r}{0.2\textwidth}
2385
    \includegraphics[width=0.2\textwidth]{illustrations/rainbow}
2386
\end{wrapfigure}
2387

2388
  It is worth noting how often this word appears in scientific literature, with an array of different uses and meanings.   It  comes from Latin where its meaning is ``image," or ``appearance," related to the verb meaning {\it to look} (the older form being  {\it specere} and the later form {\it spectare}).  In most of its meanings, nowadays, it has to do with some procedure, an analysis, allowing one to see clearly the  constituent parts of something to be analyzed, these constituent parts often organized in some continuous scale, such as in the discussion of the previous chapter.
2389

2390
  The Oxford Dictionary lists, as one of its many uses:
2391

2392
\begin{quote} Used to classify something, or suggest that it can be classified, in terms of its position on a scale between two extreme or opposite points.\end{quote}
2393

2394
This works well for the color spectrum\index{spectrum}, as initiated by Newton (as in the figure above, sunlight is separated by a prism into a rainbow  continuum of colors): an analysis of white light into its components. Or in mass spectrometry, where
2395
 beams of ions are separated (analyzed) according to their
2396
mass/charge ratio and the mass spectrum\index{spectrum} is recorded on a photographic plate or film.  Or in the recording of the various component frequencies, with their  corresponding intensities of some audible phenomenon.
2397

2398
 In mathematics the word  has found its use in many different fields, the most basic use occurring in Fourier analysis\index{Fourier analysis}, which has as its goal the aim of either {\it analyzing}  a function $f(t)$ as being  comprised of simpler (specifically: sine and cosine) functions, or {\it synthesizing} such a function by combining simpler functions to produce it. The understanding here and in the previous chapter, is that  an analysis of $f(t)$ as built up of simpler functions is meant to provide a significantly clearer image of the constitution of $f(t)$.  If, to take a very particular example, the simpler functions  that are needed for the synthesis of $f(t)$ are of the form $a\cos(\theta t)$  (for $a$ some real number which is the {\it amplitude} or size of the peaks of this periodic function)---and  if $f(t)$ is given as the limit of $$a_1\cos(\theta_1t) +  a_2\cos(\theta_2t) + a_3cos(\theta_3t) \dots$$  for a sequence of real numbers $\theta_1, \theta_2,
2399
 \theta_3,
2400
 \dots$  (i.e., these simpler functions are functions of {\it periods} ${\frac{2\pi}{\theta_1}}, {\frac{2\pi}{\theta_2}}, {\frac{2\pi}{\theta_3}}, \dots$) it is natural to call these $\theta_i$ the  {\bf spectrum}\index{spectrum} of $f(t)$.
2401
   This will eventually show up in our discussion  below of trigonometric sums and the {\it Riemann spectrum}.
2402

2403

2404
 \chapter{Spectra and trigonometric sums \label{sec:trigsums}}
2405

2406
As we saw in Chapter~\ref{sec:fourier1}, a pure tone can be represented by a periodic {\it sine wave}---a function of  time $f(t)$---the equation of which might be:
2407
$$f(t)\ \ = \ \ a\cdot \cos(b +\theta t).$$
2408

2409
\ill{pure_tone}{.7}{Plot of the periodic sine wave $f(t) = 2\cdot \cos(1+t/2)$}
2410

2411
The $\theta$ determines the {\it frequency} of the periodic wave, the
2412
larger $\theta$ is the higher the ``pitch.''  The coefficient $a$
2413
determines the envelope of size of the periodic wave, and we call it
2414
the {\it amplitude} of the periodic wave.
2415

2416
Sometimes we encounter functions $F(t)$ that are not pure tones, but
2417
that can be expressed as (or we might say ``decomposed into'') a finite
2418
sum of pure tones, for example three of them:
2419

2420
$$F(t)  = a_1\cdot \cos(b_1 +\theta_1 t) + a_2\cdot \cos(b_2 +\theta_2 t) + a_3\cdot \cos(b_3 +\theta_3 t)$$
2421

2422
\ill{mixed_tone3}{.7}{Plot of the sum $5  \cos\left(-t - 2\right) + 2 \cos\left(t/2 + 1\right) + 3  \cos\left(2  t + 4\right)$\label{fig:mixed_tone3}}
2423

2424
We refer to such functions $F(t)$ as in Figure~\ref{fig:mixed_tone3}
2425
as {\it finite trigonometric
2426
  sums}, because---well---they are.  In this example, there are three
2427
frequencies involved---i.e., $\theta_1,\theta_2,\theta_3$---and we
2428
say that {\it the spectrum\index{spectrum} of $F(t)$} is the set of these frequencies,
2429
i.e.,
2430

2431
$$
2432
  {\rm The\ spectrum \ of \ } F(t) \ \ = \ \ \{\theta_1,\theta_2,\theta_3\}.
2433
$$
2434

2435
More generally we might consider a sum of any finite number of pure
2436
cosine waves---or in a moment we'll also see some infinite ones as
2437
well. Again, for these more general trigonometric sums, their {\it
2438
  spectrum}\index{spectrum} will denote the set of frequencies that compose them.
2439

2440
\chapter{The spectrum and the staircase of primes\label{sec:fourier_staircase}}
2441

2442
\ill{prime_pi_100_aspect1}{0.95}{The Staircase of primes\label{fig:staircase100}}
2443

2444

2445
In view of the amazing data-compression virtues of Fourier analysis\index{Fourier analysis},
2446
it isn't unnatural to ask these questions:
2447

2448
\begin{itemize}
2449
\item Is there a way of using Fourier analysis\index{Fourier analysis} to better understand
2450
  the complicated picture of the staircase of primes?
2451

2452
\item Does this staircase of primes (or, perhaps, some tinkered
2453
  version of the staircase that contains the same basic information)
2454
  have a {\em spectrum}\index{spectrum}?
2455

2456
\item If such a {\em spectrum}\index{spectrum} exists, can we compute it conveniently,
2457
  just as we have done for the saw-tooth wave above, or for the major
2458
  third CE chord?
2459

2460

2461
\item Assuming the spectrum\index{spectrum} exists, and is computable, will our
2462
  understanding of this spectrum\index{spectrum} allow us to reproduce all the
2463
  pertinent information about the placement of primes among all whole
2464
  numbers, elegantly and faithfully?
2465

2466
\item And here is a most important question: will that spectrum\index{spectrum} show
2467
  us order and organization lurking within the staircase that we would
2468
  otherwise be blind to?
2469

2470
\end{itemize}
2471

2472

2473
Strangely enough, it is towards questions like these that Riemann's
2474
Hypothesis takes us. We began with the simple question about primes:
2475
how to count them, and are led to ask for profound, and hidden,
2476
regularities in structure.
2477

2478
\chapter{To our readers of Part~\ref{part1}}
2479
The statement of the \RH{}---admittedly as elusive as
2480
before---has, at least, been expressed elegantly and more simply,
2481
given our new staircase that approximates (conjecturally with {\em
2482
  essential square root accuracy}) a 45 degree straight line.
2483

2484
We have offered two equivalent formulations of the \RH{},
2485
both having to do with the manner in which the prime numbers\index{prime number} are
2486
situated among all whole numbers.
2487

2488
In doing this, we hope that we have convinced you that---in the words
2489
of Don Zagier---primes seem to obey no other law than that of chance
2490
and yet exhibit stunning regularity.  This is the end of Part~\ref{part1} of our
2491
book, and is largely the end of our main mission, to explain---in
2492
elementary terms---{\em what is Riemann's Hypothesis?}
2493

2494

2495
For readers who have at some point studied differential calculus,\index{calculus} in
2496
Part~\ref{part2} we shall discuss Fourier analysis\index{Fourier analysis}, a fundamental tool that will be used in Part~\ref{part3}
2497
where we show how
2498
Riemann's hypothesis provides a key to some deeper structure of the
2499
prime numbers\index{prime number}, and to the nature of the laws that they obey. We will---if not explain---at least
2500
hint at how the above series of questions have been answered so far,
2501
and how the \RH{} offers a surprise for the last question in this
2502
series.
2503

2504

2505

2506
%\chapter{To our readers of Part~\ref{part1}}
2507
%The statement of the \RH{}---admittedly as elusive as
2508
%before---has, at least, been expressed elegantly and more simply,
2509
%given our new staircase that approximates (conjecturally with {\em
2510
 % essential square root accuracy}) a 45 degree straight line.
2511

2512
%We have offered two equivalent formulations of the \RH{},
2513
%both having to do with the manner in which the prime numbers are
2514
%situated among all whole numbers.
2515

2516
%In doing this, we hope that we have convinced you that---in the words
2517
%of Don Zagier---primes seem to obey no other law than that of chance
2518
%and yet exhibit stunning regularity.  This is the end of Part~\ref{part1} of our
2519
%book, and is largely the end of our main mission, to explain---in
2520
%elementary terms---{\em what is Riemann's Hypothesis?}
2521

2522

2523
%For readers who have at some point studied Differential calculus,\index{calculus} in
2524
%Part~\ref{part2} we shall
2525
%go further and explain how the pursuit of
2526
%Riemann's hypothesis may provide a key to some deeper structure of the
2527
%prime numbers, and to the nature of the laws that they obey.
2528

2529

2530
\part{Distribution\index{distribution}s\label{part2}}
2531

2532

2533
\chapter[Slopes of graphs that have no slopes]{How Calculus\index{calculus} manages to
2534
  find the slopes of graphs that have no slopes}\label{ch:calculusmanages}
2535

2536
Differential calculus,\index{calculus} initially the creation of Newton and Leibniz
2537
in the 1680s, acquaints us with {\it slopes} of graphs of functions of
2538
a real variable.  So, to discuss this we should say a word about what
2539
a {\it function} is, and what its {\it graph} is.
2540

2541
 \illtwo{newton}{leibniz}{0.25}{Isaac Newton and Gottfried Leibniz created calculus\index{calculus}}
2542

2543

2544
 A {\bf function} (let us refer to it in this discussion as $f$) is
2545
 often described as a {\it kind of machine} that for any specific
2546
 input numerical value $a$ will give, as output, a well-defined
2547
 numerical value.
2548

2549
 This ``output number'' is denoted $f(a)$ and is called {\it the
2550
   ``value'' of the function $f$ at $a$}.  For example, the {\it
2551
   machine that adds $1$ to any number} can be thought of as the
2552
 function $f$ whose value at any $a$ is given by the equation $f(a) =
2553
 a+1$.  Often we choose a letter---say, $X$---to stand for a ``general
2554
 number'' and we denote the function $f$ by the symbol $f(X)$ so that
2555
 this symbolization allows to ``substitute for $X$ any specific number
2556
 $a$'' to get its value $f(a)$.
2557

2558
 The {\bf graph} of a function provides a vivid visual representation of the
2559
 function in the Euclidean\index{Euclid} plane where over every point $a$ on the
2560
 $x$-axis you plot a point above it of ``height'' equal to the value of
2561
 the function at $a$, i.e., $f(a)$. In Cartesian coordinates, then,
2562
 you are plotting points $(a, f(a))$ in the plane where $a$ runs
2563
 through all real numbers.
2564

2565
 \ill{graph_aplusone}{0.6}{Graph of the function $f(a)=a+1$\label{fig:graph_aplusone}}
2566

2567
 In this book we will very often be talking about ``graphs'' when we
2568
 are also specifically interested in the functions---of which they are
2569
 the graphs. We will use these words almost synonymously since we like
2570
 to adopt a very visual attitude towards the behavior of the functions
2571
 that interest us.
2572

2573

2574
 \ill{graph_slope_deriv}{1}{\label{fig:graph_slope_deriv}Calculus\index{calculus}}
2575

2576

2577

2578
 Figure~\ref{fig:graph_slope_deriv} illustrates a function (blue), the
2579
 slope at a point (green straight line), and the derivative (red) of
2580
 the function; the red derivative is the function whose value at a point
2581
 is the slope of the blue function at that point.  Differential
2582
 calculus\index{calculus} explains to us how to calculate slopes of graphs, and
2583
 finally, shows us the power that we then have to answer problems we
2584
 could not answer if we couldn't compute those slopes.
2585

2586
Usually, in elementary calculus\index{calculus} classes we are called upon to compute
2587
slopes only of smooth graphs, i.e., graphs that actually {\em have}
2588
slopes at each of their points, such as in the illustration just
2589
above.  What could calculus\index{calculus} possibly do if confronted with a graph
2590
that has {\em jumps}, such as in Figure~\ref{fig:jump}:
2591
$$f(x) = \begin{cases}1 & x \leq 3\\ 2 & x > 3?\end{cases}$$
2592
(Note that for purely aesthetic reasons, we draw a vertical line at the point where the jump occurs, though technically that vertical line is not part of the graph of the function.)
2593

2594
\ill{jump}{0.5}{The graph of the function $f(x)$ above that jumps---it is $1$ up to $3$ and then $2$ after that point.\label{fig:jump}}
2595

2596

2597
The most comfortable way to deal with the graph of such a function is
2598
to just approximate it by a differentiable function as in
2599
Figure~\ref{fig:jumpsmooth}. % Note that the function in
2600
%Figure~\ref{fig:jumpsmooth} is not smooth, since the
2601
%derivative is continuous but not differentiable; in our
2602
%discussion all
2603
%we will need is that our approximating function is once
2604
%differentiable.
2605

2606

2607
\ill{jump-smooth}{0.5}{A picture of a smooth graph approximating the
2608
  graph that is $1$ up to some point $x$ and then $2$ after that
2609
  point, the smooth graph being flat mostly.\label{fig:jumpsmooth}}
2610

2611

2612
Then take the {\em derivative} of that smooth function.  Of course,
2613
this is just an approximation, so we might try to make a better
2614
approximation, which we do in each successive graph starting
2615
with Figure~\ref{fig:djump1} below.
2616

2617

2618
\ill{jump-smooth-deriv-7}{0.6}{A picture of the derivative of
2619
a smooth approximation to a function that jumps.\label{fig:djump1}}
2620

2621
Note that---as you would expect---in the range where the initial
2622
function is constant, its derivative is zero. In the subsequent
2623
figures, our initial function will be {\it nonconstant} for smaller
2624
and smaller intervals about the origin. Note also that, in our series
2625
of pictures below, we will be successively rescaling the $y$-axis; all
2626
our initial functions have the value $1$ for ``large'' negative numbers
2627
and the value $2$ for large positive numbers.
2628

2629
\ill{jump-smooth-deriv-2}{0.5}{Second picture of the derivative of
2630
a smooth approximation to a function that jumps.\label{fig:djump2}}
2631
\ill{jump-smooth-deriv-05}{0.5}{Third picture of the derivative of
2632
a smooth approximation to a function that jumps.\label{fig:djump3}}
2633
\ill{jump-smooth-deriv-01}{0.5}{Fourth picture of the derivative of
2634
a smooth approximation to a function that jumps.\label{fig:djump4}}
2635

2636

2637
Notice what is happening: as the approximation gets better and
2638
better, the derivative will be zero mostly, with a blip at the point
2639
of discontinuity, and the blip will get higher and higher.
2640
In each of these pictures,  for any interval of real numbers $[a,b]$ the total area under the red graph over that interval is equal to
2641
\begin{center}
2642
{\it the height of the blue graph at $x=b$}\\
2643
minus\\
2644
{\it  the height of the blue graph  at $x=a$}.
2645
\end{center}
2646
This is a manifestation of one of the fundamental facts of life of
2647
calculus\index{calculus} relating a function to its derivative:
2648

2649
\begin{quote} Given any real-valued function $F(x)$---that has a
2650
  derivative---for any interval of real numbers $[a,b]$ the total
2651
  signed\footnote{When $F(x)<0$ we count that area as negative.} area
2652
  between the graph and the horizontal axis
2653
  of the derivative of $F(x)$ over that interval is
2654
  equal to $F(b)-F(a)$.
2655
  \end{quote}
2656
    What happens if we take the series of figures \ref{fig:djump1}--\ref{fig:djump4}, etc.,
2657
    {\it to the limit}?  This is quite curious:
2658

2659
  \begin{itemize}
2660
  \item {\bf the series of red graphs:} these are getting thinner and
2661
    thinner and higher and higher: can we make any sense of what the
2662
    red graph might mean in the limit (even though the only picture of
2663
    it that we have at present makes it infinitely thin and infinitely
2664
    high)?
2665

2666
  \item {\bf the series of blue graphs:} these are happily looking
2667
    more and more like the tame Figure~\ref{fig:jump}.
2668
   \end{itemize}
2669

2670
   Each of our red graphs is the derivative of the corresponding blue
2671
   graph. It is tempting to think of the limit of the red
2672
   graphs---whatever we might construe this to be---as standing for
2673
   the derivative of the limit of the blue graphs, i.e., of the graph
2674
   in Figure~\ref{fig:jump}.
2675

2676
   Well, the temptation is so great that, in fact, mathematicians and
2677
   physicists of the early twentieth century struggled to give a
2678
   meaning to things like {\it the limit of the red graphs}---such
2679
   things were initially called {\bf generalized functions}, which
2680
   might be considered the derivative of {\it the limit of the blue
2681
     graphs}, i.e., of the graph of Figure~\ref{fig:jump}.
2682

2683

2684
   Of course, to achieve progress in mathematics, all the concepts
2685
   that play a role in the theory have to be unambiguously defined,
2686
   and it took a while before {\it generalized functions} such as the
2687
   limit of our series of red graphs had been rigorously introduced.
2688

2689
   But many of the great moments in the development of mathematics
2690
   occur when mathematicians---requiring some concept not yet
2691
   formalized---work with the concept tentatively, dismissing---if
2692
   need be---mental tortures, in hopes that the experience they
2693
   acquire by working with the concept will eventually help to put
2694
   that concept on sure footing. For example, early mathematicians
2695
   (Newton, Leibniz)|in replacing approximate speeds by instantaneous
2696
   velocities by passing to limits|had to wait a while before later
2697
   mathematicians (e.g., Weierstrass) gave a rigorous foundation for
2698
   what they were doing.
2699

2700

2701
 % From http://en.wikipedia.org/wiki/Karl_Weierstrass
2702

2703
% From http://en.wikipedia.org/wiki/Laurent_Schwartz
2704
 \illtwo{weierstrass}{schwartz}{.3}{Karl Weierstrass (1815--1897) and Laurent Schwartz (1915--2002)\index{Schwartz, Laurent}}
2705

2706
 Karl Weierstrass\index{Weierstrass, Karl}, who worked during the latter part of the nineteenth
2707
 century, was known as the ``father of modern analysis.'' He oversaw
2708
 one of the glorious moments of rigorization of concepts that were
2709
 long in use, but never before systematically organized.  He, and
2710
 other analysts of the time, were interested in providing a rigorous
2711
 language to talk about {\it functions}, and more specifically {\it
2712
   continuous functions} and {\it smooth} (i.e., {\it differentiable})
2713
 functions. They wished to have a firm understanding of limits (i.e.,
2714
 of sequences of numbers, or of functions).
2715

2716

2717
 For Weierstrass\index{Weierstrass, Karl} and his companions, even though the functions they
2718
 worked with needn't be smooth, or continuous, at the very least, the
2719
 functions they studied had {\it well-defined---and usually
2720
   finite---values}.  But our ``limit of red graphs'' is not so easily
2721
 formalized as the concepts that occupied the efforts of
2722
 Weierstrass.
2723

2724
 Happily however, this general process of approximating
2725
 discontinuous functions more and more exactly by smooth functions,
2726
 and taking their derivatives to get the blip-functions as we have
2727
 just seen in the red graphs above was eventually given a
2728
 mathematically rigorous foundation; notably, by the French
2729
 mathematician, Laurent Schwartz\index{Schwartz, Laurent} who provided a beautiful theory that
2730
 we will not go into here, that made perfect sense of ``generalized
2731
 functions'' such as our limit of the series of red graphs, and that
2732
 allows mathematicians to work with these concepts with ease. These
2733
 ``generalized functions'' are called {\it distributions}\index{distribution} in
2734
 Schwartz's\index{Schwartz, Laurent} theory \bibnote{
2735
   \url{http://en.wikipedia.org/wiki/Distribution_\%28mathematics\%29}
2736
   contains a more formal definition and treatment of distributions.  Here is Schwartz's explanation for his choice of the word {\it distribution:} \begin{quote}``Why did we choose the name distribution?
2737
   Because, if $\mu$ is a measure, i.e., a particular kind of
2738
   distribution, it can be considered as a distribution of electric
2739
   charges in the universe.  Distributions give more general types of
2740
   electric charges, for example dipoles and magnetic distributions.
2741
   If we consider the dipole placed at the point $a$ having magnetic
2742
   moment $M$, we easily see that it is defined by the distribution
2743
   $-D_M \delta_{(a)}$.  These objects occur in physics.  Deny's
2744
   thesis, which he defended shortly after, introduced electric
2745
   distributions of finite energy, the only ones which really occur in
2746
   practice; these objects really are distributions, and do not
2747
   correspond to measures.  Thus, distributions have two very
2748
   different aspects: they are a generalization of the notion of
2749
   function, and a generalization of the notion of distribution of
2750
   electric charges in space. [...] Both these interpretations of
2751
   distributions are currently used.''\end{quote}}.
2752

2753

2754

2755

2756
\chapter[Distributions]{Distributions:\index{distribution} sharpening our approximating functions even if
2757
  we have to let them shoot out to infinity\label{sec:dist}}
2758

2759

2760

2761
The curious {\it limit of the red graphs} of the previous section,
2762
which you might be tempted to think of as a ``blip-function'' $f(t)$ that
2763
vanishes for $t$ nonzero and is somehow ``infinite'' (whatever that
2764
means) at $0$ is an example of a {\it generalized function} (in the
2765
sense of the earlier mathematicians) or a {\it distribution}\index{distribution} in the
2766
sense of Laurent Schwartz.\index{Schwartz, Laurent}
2767

2768
This particular {\it limit of the red graphs} also goes by another
2769
name (it is officially called a
2770
Dirac $\delta$-function\index{$\delta$-function} (see \bibnote{
2771
David Mumford suggested that we offer the following paragraph from\\
2772
\url{http://en.wikipedia.org/wiki/Dirac_delta_function}\\
2773
on the Dirac  delta function:
2774
\begin{quote}
2775
An infinitesimal formula for an infinitely tall, unit impulse delta function (infinitesimal version of Cauchy distribution) explicitly appears in an 1827 text of Augustin Louis Cauchy. Sim\'{e}on Denis Poisson considered the issue in connection with the study of wave propagation as did Gustav Kirchhoff somewhat later. Kirchhoff and Hermann von Helmholtz also introduced the unit impulse as a limit of Gaussians, which also corresponded to Lord Kelvin's notion of a point heat source. At the end of the 19th century, Oliver Heaviside used formal Fourier series to manipulate the unit impulse. The Dirac delta function as such was introduced as a ``convenient notation'' by Paul Dirac in his influential 1930 book {\em Principles of Quantum Mechanics}. He called it the ``delta function'' since he used it as a continuous analogue of the discrete Kronecker delta.
2776
\end{quote}}),
2777
the adjective ``Dirac'' being in honor of the physicist who first worked with this
2778
concept, the ``$\delta$'' being the symbol he assigned to these
2779
objects). The noun ``function'' should be in quotation marks for,
2780
properly speaking, the Dirac $\delta$-function\index{$\delta$-function} is not---as we have
2781
explained above---a bona fide function but rather a distribution\index{distribution}.
2782

2783
      \ill{dirac}{0.3}{Paul\index{Dirac, Paul} Adrien Maurice Dirac (1902--1984)}
2784

2785

2786
      Now may be a good time to summarize what the major difference is
2787
      between {\it honest functions} and {\it generalized functions}
2788
      or {\it distribution\index{distribution}s}.
2789

2790
      An honest (by which we mean {\it integrable}) function of a real
2791
      variable $f(t)$ possesses two ``features.''
2792

2793

2794
\begin{itemize}
2795
\item {\bf It has values.}  That is, at any real number $t$, e.g., $t
2796
  =2$ or $t =0$ or $t =\pi$ etc., our function has a definite real
2797
  number value ($f(2)$ or $f(0)$ or $f(\pi)$ etc.) {\it and if we know
2798
    all those values we know the function.}
2799

2800
\item {\bf It has areas under its graph.} If we are given any interval
2801
  of real numbers, say the interval between $a$ and $b$, we can talk
2802
  unambiguously about the area ``under'' the graph of the function
2803
  $f(t)$ over the interval between $a$ and $b$.  That is, in the
2804
  terminology of integral calculus,\index{calculus} we can talk of {\it the integral of $f(t)$
2805
    from $a$ to $b$}.  And in the notation of calculus,\index{calculus} this---thanks
2806
  to Leibniz---is elegantly denoted
2807
$$\int_a^bf(t)dt.$$
2808

2809
\end{itemize}
2810

2811
   \ill{oo_integral}{.8}{This figure illustrates
2812
      $\int_{-\infty}^{\infty} f(x) dx$, which is the signed area
2813
      between the graph of $f(x)$ and the $x$-axis, where area below
2814
      the $x$-axis (yellow) counts negative, and area above (grey) is
2815
      positive.}
2816

2817

2818
In contrast, a {\it generalized function}  or  {\it distribution\index{distribution}}~\begin{itemize}
2819
\item {\bf may not have ``definite values''} at all real numbers if it
2820
  is not an honest function. Nevertheless,
2821

2822
\item {\bf It has well-defined areas under portions of its ``graph.''}
2823
  If we are given any interval of real numbers, say the (open)
2824
  interval between $a$ and $b$, we can still talk unambiguously about
2825
  the {\it area ``under'' the graph of the generalized function $D(t)$
2826
    over the interval between $a$ and~$b$} and we will denote
2827
  this--extending what we do in ordinary calculus---by\index{calculus} the symbol
2828
$$\int_a^bD(t)dt.$$
2829
\end{itemize}
2830

2831
This description is important to bear in mind and it gives us a handy
2832
way of thinking about ``generalized functions'' (i.e., distribution\index{distribution}s)
2833
as opposed to functions: when we consider an (integrable) function of
2834
a real variable, $f(t)$, we may invoke its {\it value} at every real
2835
number and for every interval $[a,b]$ we may consider the quantity
2836
$\int_a^bf(t)dt$. BUT when we are given a generalized function $D(t)$
2837
we {\it only} have at our disposal the latter quantities.  In fact, a
2838
generalized function of a real variable $D(t)$ is (formally) nothing
2839
more than a {\it rule} that assigns to any finite interval $(a,b]$ ($a
2840
\le b$) a quantity that we might denote $\int_a^bD(t)dt$ and that {\it
2841
  behaves as if it were the integral of a function} and in
2842
particular---for three real numbers $a\le b\le c$ we have the
2843
additivity relation
2844
 $$  \int_a^cD(t)dt \ = \ \int_a^bD(t)dt \ + \ \int_b^cD(t)dt.$$
2845

2846
 SO, any honest function integrable over finite intervals clearly {\it
2847
   is} a distribution\index{distribution} (forget about its values!) but $\dots$ there are
2848
 many more generalized functions, and including them in our sights
2849
 gives us a very important tool.
2850

2851

2852
 It is natural to talk, as well, of Cauchy sequences, and limits, of
2853
 distributions.\index{distribution} We'll say that such a sequence $D_1(t), D_2(t),
2854
 D_3(t),\dots$ is a {\bf Cauchy sequence} if for every interval
2855
 $[a,b]$ the quantities $$\int_a^bD_1(t)dt,\quad
2856
 \int_a^bD_2(t)dt,\quad\int_a^bD_3(t)dt,\dots$$ form a Cauchy sequence
2857
 of real numbers (so for any $\varepsilon>0$ eventually all terms
2858
 in the sequence of real numbers
2859
 are within $\varepsilon$ of each other).
2860
 Now, any Cauchy sequence of distributions\index{distribution} {\it
2861
   converges to a limiting distribution}\index{distribution} $D(t)$ which is defined by
2862
 the rule that for every interval $[a,b]$,
2863

2864
  $$ \int_a^bD(t)dt\ =\ \lim_{i \to \infty} \int_a^bD_i(t)dt.$$
2865

2866

2867
  If, by the way, you have an infinite sequence---say---of honest,
2868
  continuous, functions that converges uniformly to a limit (which
2869
  will again be a continuous function) then that sequence certainly
2870
  converges---in the above sense---to the same limit when these
2871
  functions are viewed as generalized functions. BUT, there are many
2872
  important occasions where your sequence of honest continuous
2873
  functions doesn't have that convergence property and {\it yet} when
2874
  these continuous functions are viewed as generalized functions they do converge to some
2875
  generalized function as a limit. We will see this soon when we get
2876
  back to the ``sequence of the red graphs.'' This sequence {\bf does}
2877
  converge (in the above sense) to the Dirac $\delta$-function\index{$\delta$-function} when
2878
  these red graphs are thought of as a sequence of generalized
2879
  functions.
2880

2881

2882

2883

2884
  The integral notation for distribution\index{distribution} is very useful, and allows us
2885
  the flexibility to define, for nice enough---and honest---functions
2886
  $c(t)$ useful expressions such as $$\int_a^bc(t)D(t).$$
2887

2888
 \ill{dirac_delta}{0.5}{The Dirac $\delta$-``function'' (actually
2889
   distribution),\index{distribution} where we draw a vertical arrow to illustrate the
2890
   delta function with support at a given point.}
2891

2892
 For example, the Dirac $\delta$-function\index{$\delta$-function} we have been discussing
2893
 (i.e., the limit of the red graphs of Chapter~\ref{ch:calculusmanages})\index{calculus} {\it is}
2894
 an honest function away from $t=3$ and---in fact---is the ``trivial
2895
 function'' zero away from $3$.  And at $3$, we may {\it say}
2896
 that it has the ``value'' infinity, in honor of it being the limit of
2897
 blip functions getting taller and taller at $3$. The feature that
2898
 pins it down as a distribution\index{distribution} is given by its behavior relative to
2899
 the second feature above, the area of its graph over the open
2900
 interval $(a,b)$ between $a$ and $b$:
2901

2902
\begin{itemize}
2903
\item If $3$ is
2904
  not in the open interval spanned by $a$ and $b$, then the ``area
2905
  under the graph of our Dirac $\delta$-function''\index{$\delta$-function} over
2906
  the interval $(a,b)$
2907
  is $0$.
2908
\item If $3$ is in the open interval $(a,b)$, then the ``area under the
2909
  graph of our Dirac $\delta$-function''\index{$\delta$-function} is $1$---in
2910
  notation $$\int_a^b\delta = 1.$$
2911
\end{itemize}
2912

2913

2914

2915

2916
We sometimes summarize the fact that these areas vanish so long as $3$
2917
is not included in the interval we are considering by saying
2918
that the {\bf support} of this $\delta$-function\index{$\delta$-function} is ``at $3$.''
2919

2920

2921
Once you're happy with {\it this} Dirac $\delta$-function,\index{$\delta$-function} you'll also
2922
be happy with a Dirac $\delta$-function---call\index{$\delta$-function} it $\delta_x$---with
2923
support concentrated at any specific real number~$x$.
2924
This $\delta_x$ vanishes for $t \ne x$ and intuitively speaking, has an
2925
{\it infinite blip} at $t=x$.
2926

2927
So, the original delta-function we were discussing, i.e., $\delta(t)$
2928
would be denoted $\delta_3(t)$.
2929

2930
 \noindent {\bf A question:} If you've never seen distribution\index{distribution}s
2931
 before, but know the Riemann integral, can you guess at what the
2932
 definition of $\int_a^bc(t)D(t)$ is, and can you formulate hypotheses
2933
 on $c(t)$ that would allow you to endow this expression with a
2934
 definite meaning?
2935

2936

2937
\noindent {\bf A second question:} If you have not seen distribution\index{distribution}s
2938
before, and have answered the first question above, let
2939
$c(t)$ be an honest function for which your definition
2940
of $$\int_a^bc(t)D(t)$$ applies. Now let $x$ be a real number.
2941
 Can you use your definition to compute
2942

2943
 $$\int_{-{\infty}}^{+{\infty}}c(t)\delta_x(t)?$$
2944

2945
 The answer to this second question, by the way, is:
2946
 $\int_{-{\infty}}^{+{\infty}}c(t)\delta_x(t)=c(x).$ This will be useful in the later sections!
2947

2948

2949
The theory of distributions\index{distribution} gives a partial answer to the following funny question:
2950

2951

2952
\begin{quote} How in the world can you ``take the derivative'' of a
2953
  function $F(t)$ that doesn't have a derivative?
2954
\end{quote}
2955

2956
The short answer to this question is that {\it this derivative $F'(t)$
2957
  which doesn't exist as a function may exist as a distribution\index{distribution}.}
2958
What then is the integral of that distribution?\index{distribution} Well, it is given by
2959
the original function!
2960

2961
$$\int_a^bF'(t)dt \ = \ F(b) -F(a).$$
2962

2963
Let us practice this with simple staircase functions. For example,
2964
what is the {\it derivative}---in the sense of the theory of
2965
distributions---of\index{distribution} the function in Figure~\ref{fig:simple_staircase}?
2966
{\bf Answer:} $\delta_0 + 2 \delta_1$.
2967

2968

2969
\ill{simple_staircase}{.6}{The staircase function that is $0$ for $t
2970
  \le 0$, $1$ for $0 <t \le 1$ and $3$ for $1< t \le 2$ has derivative
2971
  $\delta_0 + 2\delta_1$.\label{fig:simple_staircase}}
2972

2973

2974
We'll be dealing with much more complicated staircase functions in the
2975
next chapter, but the general principles discussed here will nicely
2976
apply there \bibnote{As discussed in\\ \url{http://en.wikipedia.org/wiki/Distribution_\%28mathematics\%29},
2977
``generalized functions'' were introduced by Sergei
2978
Sobolev in the 1930s, then later
2979
independently introduced in the late
2980
1940s by Laurent Schwartz,  who developed a comprehensive theory of
2981
distributions.
2982
}.
2983

2984

2985
\chapter{Fourier transforms: second visit}
2986

2987
In Chapter~\ref{sec:fourier1} above we wrote:
2988

2989
\begin{quote} The operation that starts with a graph and goes to its
2990
  spectral picture that records the frequencies, amplitudes, and
2991
  phases of the pure sine waves that, together, compose the graph is
2992
  called the {\bf Fourier transform}.
2993
\end{quote}
2994

2995

2996
  Now let's take a closer look at this operation {\it Fourier transform}.
2997

2998
  We will focus our discussion on an {\bf even} function $f(t)$ of a
2999
  real variable $t$.  ``{\bf Even}'' means that its graph is symmetric
3000
  about the $y$-axis; that is, $f(-t)= f(t)$.  See
3001
  Figure~\ref{fig:even_function}.
3002

3003
  \ill{even_function}{.8}{The graph of an even function is symmetrical
3004
    about the $y$-axis.\label{fig:even_function}}
3005

3006
  When we get to apply this discussion to the {\it staircase of
3007
    primes} $\pi(t)$ or the {\it tinkered staircase of primes}
3008
  $\psi(t)$, both of which being defined only for positive values of
3009
  $t$, then we would ``lose little information'' in our quest to
3010
  understand them if we simply ``symmetrized their graphs'' by
3011
  defining their values on negative numbers $-t$ via the formulas
3012
  $\pi(-t)=\pi(t)$ and $\psi(-t)=\psi(t)$ thereby turning each of them
3013
  into {\it even functions}.
3014

3015
\ill{even_pi}{.8}{Even extension of the staircase of primes.}
3016

3017
The idea behind the Fourier transform is to express $f(t)$ as {\it
3018
  made up out of sine and cosine wave functions}.  Since we have
3019
agreed to consider only even functions, we can dispense with the sine
3020
waves---they won't appear in our Fourier analysis\index{Fourier analysis}---and ask how to
3021
reconstruct $f(t)$ as a {\it sum} (with coefficients) of cosine
3022
functions (if only finitely many frequencies occur in the spectrum\index{spectrum} of
3023
our function) or more generally, as an {\it integral} if the spectrum\index{spectrum}
3024
is more elaborate.  For this work, we need a little machine that tells
3025
us, for each real number $\theta$, whether or not $\theta$ is in the
3026
spectrum\index{spectrum} of $f(t)$, and if so, what the amplitude is of the cosine
3027
function $\cos(\theta t)$ that occurs in the Fourier expansion of
3028
$f(t)$---this amplitude answers the awkwardly phrased question:
3029

3030
{\it
3031
  How much $\cos(\theta t)$ ``occurs in'' $f(t)$?}
3032

3033
  We will denote this
3034
amplitude by ${\hat f}(\theta)$, and refer to it as {\bf the Fourier
3035
  transform} of $f(t)$.  The {\bf spectrum},\index{spectrum} then, of $f(t)$ is the
3036
set of all frequencies $\theta$ where the amplitude is nonzero.
3037

3038
\ill{fourier_machine}{.6}{The Fourier transform machine, which transforms $f(t)$ into ${\hat f}(\theta)$\label{fig:fourier_machine}}
3039

3040

3041
Now in certain easy circumstances---specifically, if
3042
$\int_{-{\infty}}^{+{\infty}}|f(t)|dt$ (exists, and)
3043
is finite---integral calculus\index{calculus} provides us with an easy construction of that
3044
machine (see Figure~\ref{fig:fourier_machine}); namely:
3045

3046
    $${\hat f}(\theta) = \int_{-{\infty}}^{+{\infty}}f(t)\cos(-\theta t)dt.$$
3047

3048

3049
    This concise machine manages to ``pick out'' just the part of
3050
    $f(t)$ that has frequency $\theta$!  It provides for us the {\it
3051
      analysis} part of the Fourier analysis\index{Fourier analysis} of our function
3052
    $f(t)$.
3053

3054
    But there is a {\it synthesis} part to our work as well,
3055
    for we can reconstruct $f(t)$ from its Fourier transform, by a
3056
    process intriguingly similar to the analysis part; namely:  if
3057
$\int_{-{\infty}}^{+{\infty}}|{\hat f}(\theta)|d\theta$ (exists, and) is finite, we retrieve $f(t)$ by the integral
3058

3059
      $$f(t)  = {\frac{1}{2\pi}}\int_{-{\infty}}^{+{\infty}}{\hat f}(\theta)\cos(\theta t)d\theta.$$
3060

3061
      We are not so lucky to have
3062
      $\int_{-{\infty}}^{+{\infty}}|f(t)|dt$ finite when we try our
3063
      hand at a Fourier analysis\index{Fourier analysis} of the staircase of primes, but we'll
3064
      work around this!
3065

3066

3067

3068
\chapter[Fourier transform of delta]{What is the Fourier transform of a delta function?\label{sec:ftdelta}}
3069

3070
Consider the $\delta$-function\index{$\delta$-function} that we denoted $\delta(t)$ (or
3071
$\delta_0(t)$). This is also the ``generalized function'' that we
3072
thought of as the ``limit of the red graphs'' in Chapter~\ref{sec:dist}
3073
above. Even though $\delta(t)$ is a distribution\index{distribution} and {\it not} a bona
3074
fide function, it is symmetric about the origin, and
3075
also $$\int_{-{\infty}}^{+{\infty}}|\delta(t)|dt$$ exists, and is
3076
finite (its value is, in fact, $1$). All this means that,
3077
appropriately understood, the discussion of the previous section
3078
applies, and we can {\it feed} this delta-function into our Fourier
3079
Transform Machine (Figure~\ref{fig:fourier_machine}) to see what
3080
frequencies and amplitudes arise in our attempt to express---whatever
3081
this means!---the delta-function as a sum, or an integral, of cosine
3082
functions.
3083

3084

3085
     So what is the Fourier transform,  ${\hat \delta_0}(\theta)$, of the delta-function?
3086

3087

3088
Well, the general formula would give us:
3089
  $$ {\hat \delta_0}(\theta) = \int_{-\infty}^{+\infty}\cos(-\theta t)\delta_0(t)dt.$$
3090
For any nice function $c(t)$ we
3091
  have that the integral of the product of $c(t)$ by the distribution\index{distribution}
3092
  $\delta_x(t)$ is given by the {\it value} of the function $c(t)$ at
3093
  $t=x$.  So:
3094

3095
$$ {\hat \delta_0}(\theta) = \int_{-\infty}^{+\infty}\cos(-\theta t)\delta_0(t)dt = \cos(0) = 1.$$
3096

3097

3098
In other words, the Fourier transform of $\delta_0(t)$ is the constant
3099
function $$ {\hat \delta_0}(\theta)=1.$$ One can think of this
3100
colloquially as saying that the delta-function is a perfect example of
3101
{\it white noise} in that {\it every} frequency occurs in its Fourier
3102
analysis and they all occur in equal amounts.
3103

3104
To generalize this computation, let us consider for any real number $x$
3105
the symmetrized delta-function with support at $x$ and $-x$, given
3106
by $$d_x(t) \ = \ (\delta_x(t) + \delta_{-x}(t))/2$$
3107
in Figure~\ref{fig:two_delta}.
3108

3109
    \ill{two_delta}{0.4}{The sum $(\delta_x(t) + \delta_{-x}(t))/2$, where we draw vertical arrows to illustrate the Dirac delta functions.\label{fig:two_delta}}
3110

3111
    What is the Fourier transform of this $d_x(t)$?  The answer is
3112
    given by making the same computation as we've just made:
3113

3114
\begin{align*}\label{dx}
3115
{\hat d_x}(\theta)  &=  {\frac{1}{2}}\left(\int_{-\infty}^{+\infty}\cos(-\theta t)\delta_x(t)dt + \int_{-\infty}^{+\infty}\cos(-\theta t)\delta_{-x}(t)dt\right)\\
3116
    &= {\frac{1}{2}}\big(\cos(-\theta x)+ \cos(+\theta x)\big)\\
3117
    &= \cos(x\theta)
3118
\end{align*}
3119

3120

3121
To summarize this in ridiculous (!) colloquial terms: {\it for any
3122
  frequency $\theta$ the amount of $\cos(\theta t)$ you need to build
3123
  up the generalized function $(\delta_x(t) + \delta_{-x}(t))/2$ is
3124
  $\cos(x\theta).$ }
3125

3126

3127
So far, so good, but remember that the theory of the Fourier transform
3128
has---like much of mathematics---two parts: an {\it analysis part} and
3129
a {\it synthesis} part.  We've just performed the {\it analysis} part
3130
of the theory for these symmetrized delta functions $(\delta_x(t) +
3131
\delta_{-x}(t))/2$.
3132

3133
Can we synthesize them---i.e., build them up again---from their Fourier transforms?
3134

3135

3136
  We'll leave this, at least for now, as a question for you.
3137

3138
%\chapter{Supports, Spectra, Blips and Spikes}
3139

3140
%(to be written)
3141

3142
% Here is where we will
3143
% be really justifying our heavy discussion of distribution\index{distribution}s,
3144
% trigonometric sums, etc.
3145

3146
\chapter{Trigonometric series}\label{ch:trigseries}
3147
Given our interest in the ideas of Fourier, it is not surprising that
3148
we'll want to deal with things like $$F(\theta) = \sum_{k=1}^{\infty}
3149
a_k\cos(s_k\cdot \theta)$$ where the $s_k$ are real numbers tending
3150
(strictly monotonically) to infinity.  These we'll just call {\bf
3151
  trigonometric series} without asking whether they converge in any
3152
sense for all values of $\theta$, or even for {\it any} value of $\theta$. The
3153
$s_k$'s that occur in such a trigonometric series we will call the
3154
{\bf spectral values} or for short, the {\bf spectrum}\index{spectrum} of the series,
3155
and the $a_k$'s the (corresponding) {\bf amplitudes}.  We repeat that
3156
we impose no convergence requirements at all. But we also think of
3157
these things as providing ``cutoff'' finite trigonometric sums, which
3158
we think of as functions of two variables, $\theta$ and $C$ (the
3159
``cutoff'') where $$F(\theta,C): = \sum_{s_k\le C} a_k\cos(s_k\cdot \theta).$$ These functions $F(\theta,C)$ are finite trigonometric series and therefore ``honest functions" having finite values everywhere.
3160

3161

3162
%\section{Trigonometric Series as Fourier Transforms}
3163

3164

3165
Recall, as in Chapter \ref{sec:ftdelta}, that for any real number $x$, we considered
3166
the symmetrized delta-function with support at $x$ and $-x$, given
3167
by
3168
$$
3169
 d_x(t) \ = \ (\delta_x(t) + \delta_{-x}(t))/2,
3170
$$
3171
and noted that the Fourier transform of this $d_x(t)$  is
3172
$$
3173
   \label{dx2}{\hat d_x}(\theta)  = \cos(x\theta).
3174
$$
3175
\ill{two_delta}{0.4}{The sum $(\delta_x(t) + \delta_{-x}(t))/2$,
3176
where we draw vertical arrows to illustrate the Dirac delta functions.}
3177

3178
It follows, of course, that a cutoff finite trigonometric series,
3179
$F(\theta,C)$ associated to an infinite trigonometric series\index{trigonometric series}
3180
$$
3181
 F(\theta) = \sum_{k=1}^{\infty} a_k\cos(s_k\cdot \theta)
3182
$$
3183
is the Fourier transform of the distribution\index{distribution}
3184
$$
3185
 D(t,C):=\sum_{s_k \le C}a_kd_{s_k}(t).
3186
$$
3187
Given the discreteness of the set of spectral values $s_k$ ($k=1,2,\dots$)    we can
3188
consider the infinite sum
3189
$$
3190
 D(t):=\sum_{k=1}^{\infty}a_kd_{s_k}(t),
3191
$$ viewed as distribution\index{distribution} playing the role of
3192
the ``inverse Fourier transform" of our trigonometric
3193
series $F(t)$.
3194

3195

3196
%\section{\bf Spike values}
3197

3198
\begin{definition} Say
3199
  that a trigonometric series $F(\theta)$ has a {\bf spike} at the real number $\theta=\tau \in {\bf R}$
3200
if the set of absolute values $|F(\tau,C)|$  as $C$ ranges through positive number cutoffs is
3201
  unbounded.  A real number $\tau \in {\bf R}$ is, in contrast, a {\bf
3202
    non-spike} if those values admit a finite upper
3203
  bound. \end{definition}
3204

3205
  % One mission in this book will be furthered just by paying attention to the  %spike values of certain (infinite) trigonometric series.
3206

3207

3208
 % The trigonometric sums we study are
3209
 % going to be particularly nice. Under the assumption of the Riemann
3210
 % Hypothesis, they  will have the
3211
 % property that the set of their spike values are particularly interesting {\it %discrete}
3212
 % subsets of the real line.
3213

3214
 In the chapters that follow we will be exhibiting graphs of trigonometric functions, cutoff at various values of $C$, that (in our opinion) strongly hint that as $C$ goes to infinity, one gets convergence to certain (discrete) sequences of very interesting {\it spike values}. To be sure, no finite computation, exhibited by a graph, can {\it prove} that this is in fact the case. But on the one hand, the vividness of those spikes is in itself worth experiencing, and on the other hand, given RH, there is justification that the {\it strong hints} are not misleading; for some theoretical background see the endnotes.
3215

3216

3217

3218
% WARNING: I had to hard-code the III in the title, since
3219
% otherwise it would be ?? in some places!! (Maybe due to limitations
3220
% of latex.)
3221
%\chapter{A sneak preview of Part~\ref{part3}}\label{snpr}
3222
\chapter{A sneak preview of Part~III}\label{snpr}
3223

3224
   In this chapter, as a striking illustration of the type of phenomena that will be studied in Part~\ref{part3}, we will consider two infinite trigonometric sums---that seem to be related one to the other in that the {\it frequencies} of the terms in the one trigonometric sum give the {\it spike values} of the other, and vice versa:   the {\it frequencies} of the other give the {\it spike values} of the one: a kind of duality as in the theory of Fourier transforms. We show this duality by exhibiting the graphs of more and more accurate finite approximations (cutoffs) of these infinite sums. More specifically,
3225

3226
\begin{enumerate}\item The first infinite trigonometric sum $F(t)$  is a sum{\footnote{Here we
3227
    make use of the Greek symbol $\sum$ as a shorthand way of
3228
    expressing a sum of many terms.  We are not requesting this sum to
3229
    converge.}}  of pure cosine waves with frequencies given by {\it logarithms of powers of primes} and with amplitudes given by the formula $$F(t):=
3230
-\sum_{p^n}{\frac{\log(p)}{p^{n/2}}}\cos(t\log(p^n))
3231
$$ the summation being over all powers $p^n$ of all prime numbers\index{prime number} $p$.
3232

3233
The graphs of longer and longer finite truncations of these trigonometric sums, as you will see, have ``higher and higher peaks" concentrated more and more accurately at a  {\it certain infinite discrete set of real numbers} that we will be referring to as {\bf the Riemann spectrum}, indicated in our pictures below (Figures~\ref{fig:pnsum5}--\ref{fig:pnsum500}) by the series of vertical red lines.
3234

3235
\item  In contrast, the second infinite trigonometric sum $H(t)$ is a sum of pure cosine waves with frequencies given by what we have dubbed above {\it the Riemann spectrum} and with amplitudes all equal to $1$.
3236

3237
$$
3238
  H(s):=1+\sum_{\theta}\cos(\theta \log(s)).
3239
 $$ These graphs will have ``higher and higher peaks" concentrated more and more accurately at {\bf the logarithms of powers of primes} indicated in our pictures below (see Figure~\ref{fig:phihat}) by the series of vertical blue spikes.
3240

3241
               \end{enumerate}
3242

3243

3244

3245

3246
That the series of {\it blue lines} (i.e., the logarithms of powers of
3247
primes) in our pictures below determines---via the trigonometric
3248
sums we describe---the series of {\it red lines} (i.e., what we are
3249
calling the spectrum)\index{spectrum} and conversely is a consequence of the Riemann
3250
Hypothesis.
3251
\begin{enumerate}
3252

3253
\item {\bf Viewing the Riemann spectrum as the spike values of a trigonometric series with
3254
frequencies equal to (logs of) powers of the primes:}
3255

3256
  To get warmed up, let's plot the positive values of the following
3257
  sum of (co-)sine waves:
3258

3259
    \begin{align*}
3260
   f(t) =& -{\frac{\log(2)}{2^{1/2}}}\cos(t\log(2))- {\frac{\log(3)}{3^{1/2}}}\cos(t\log(3))\\
3261
     &\qquad -{\frac{\log(2)}{4^{1/2}}}\cos(t\log(4))-{\frac{\log(5)}{5^{1/2}}}\cos(t\log(5))
3262
  \end{align*}
3263

3264

3265

3266
\ill{mini_phihat_even}{1}{Plot of $f(t)$\label{fig:mini_phihat_even}}
3267

3268

3269
Look at the peaks of Figure~\ref{fig:mini_phihat_even}. There is nothing very impressive
3270
about them, you might think; but wait, for $f(t)$ is just a very ``early''
3271
piece of  the infinite trigonometric sum $F(t)$ described above.
3272

3273

3274

3275
Let us truncate the infinite sum $F(t)$ taking only finitely many terms, by
3276
choosing various ``cutoff values'' $C$ and forming the finite sums
3277
$$
3278
F_{\le C}(t):= -\sum_{p^n\leq C}{\frac{\log(p)}{p^{n/2}}}\cos(t\log(p^n))
3279
$$
3280
and plotting their positive
3281
values. Figures~\ref{fig:pnsum5}--\ref{fig:pnsum500} show what we get
3282
for a few values of $C$.
3283

3284

3285
In each of the graphs, we have indicated by red vertical arrows the
3286
real numbers that give the values of the {\it Riemann spectrum}\index{Riemann spectrum} that we will
3287
be discussing. These numbers at the red
3288
vertical arrows in Figures~\ref{fig:pnsum5}--\ref{fig:pnsum500},
3289
$$
3290
\theta_1, \theta_2, \theta_3, \dots
3291
$$
3292
are  {\it spike values}---as described in Chapter~\ref{ch:trigseries}---of the infinite trigonometric series\index{trigonometric series}
3293
$$
3294
  -\sum_{p^n<C}{\frac{\log(p)}{p^{n/2}}} \cos(t \log(p^n)).
3295
$$
3296
They constitute what we are calling the Riemann spectrum\index{Riemann spectrum} and are key to the staircase of primes
3297
\bibnote{If the \RH{} holds
3298
they are precisely the  {\it imaginary parts of the
3299
  ``nontrivial'' zeroes of the Riemann zeta function}.}.
3300

3301
\begin{itemize}
3302
\item {\bf The sum with $p^n \le C=5$}
3303

3304
In Figure~\ref{fig:pnsum5} we plot the function $f(t)$ displayed above; it consists of the sum of the first four terms of our infinite sum, and doesn't yet show very much ``structure'':
3305

3306
\ill{phihat_even-5}{1}{Plot of $-\sum_{p^n\leq 5}{\frac{\log(p)}{p^{n/2}}}\cos(t\log(p^n))$ with
3307
arrows pointing to the spectrum\index{spectrum} of the primes\label{fig:pnsum5}}
3308

3309
\vskip20pt
3310
\item {\bf The sum with $p^n \le C=20$}
3311

3312
 Something,
3313
 (don't you agree?)  is already beginning to happen
3314
 in the graph in Figure~\ref{phihat_even-20}:
3315
\ill{phihat_even-20}{1}{Plot of $-\sum_{p^n\leq 20}{\frac{\log(p)}{p^{n/2}}}\cos(t\log(p^n))$ with arrows pointing to the spectrum\index{spectrum} of the primes\label{phihat_even-20}}\vskip20pt
3316
\item {\bf The sum with $p^n \le C=50$}
3317

3318
  Note that the high peaks in Figure~\ref{fig:phihat_even-50}
3319
  seem to be lining up more accurately with
3320
  the vertical red lines. Note also that the $y$-axis has been
3321
  rescaled.
3322

3323
\ill{phihat_even-50}{1}{Plot of $-\sum_{p^n\leq 50}{\frac{\log(p)}{p^{n/2}}}\cos(t\log(p^n))$ with arrows pointing to the spectrum\index{spectrum} of the primes\label{fig:phihat_even-50}}\vskip20pt
3324
\item {\bf The sum with $p^n \le C=500$}
3325

3326
  Here, the peaks are even sharper, and note that again they are
3327
  higher; that is, we have rescaled the $y$-axis.
3328
  \ill{phihat_even-500}{1}{Plot of $-\sum_{p^n\leq
3329
      500}{\frac{\log(p)}{p^{n/2}}}\cos(t\log(p^n))$ with arrows
3330
    pointing to the spectrum\index{spectrum} of the primes\label{fig:pnsum500}}
3331
\end{itemize}
3332

3333
  We will pay attention to:
3334

3335
\begin{itemize}
3336
\item how the spikes ``play out'' as we take the sums of longer and longer
3337
  pieces of the infinite sum of cosine waves above, given by larger
3338
  and larger cutoffs $C$,
3339
\item how this spectrum\index{spectrum} of red lines more closely matches the high
3340
  peaks of the graphs of the positive values of these finite sums,
3341
\item how these peaks are climbing higher and higher,
3342
\item what relationship these peaks have to the Fourier analysis\index{Fourier analysis} of the
3343
  staircase of primes,
3344
\item and, equally importantly, what these mysterious red lines
3345
  signify.
3346
\end{itemize}
3347

3348
\item{\bf Toward (logs of) powers of the primes, starting from the Riemann spectrum\index{Riemann spectrum}:}
3349

3350
  Here we will be making use of the series of numbers $$\theta_1,
3351
  \theta_2, \theta_3, \dots$$ comprising what we called the {\it
3352
    spectrum}\index{spectrum}. We consider the infinite trigonometric series\index{trigonometric series}
3353
  $$
3354
  G(t):= 1 + \cos(\theta_1t)+\cos(\theta_2t)+\cos(\theta_3t)+\dots
3355
  $$
3356
  or, using the $\sum$ notation,
3357
 $$
3358
 G(t):=  1+\sum_{\theta}\cos(\theta t)
3359
 $$
3360
 where the summation is over the spectrum\index{spectrum}, $\theta=\theta_1, \theta_2,
3361
 \theta_3, \dots$. Again we will consider finite cutoffs $C$ of this
3362
 infinite trigonometric sum (on a logarithmic scale),
3363
 $$
3364
 H_{\le C}(s):= 1+\sum_{i \leq C}\cos(\log(s) \theta_i)
3365
 $$
3366
 and to see the spikes in $H_{\le 1000}(s)$
3367
 consider Figure~\ref{fig:phihat}.
3368

3369
 \ill{phi_cos_sum_2_30_1000}{.8}{Illustration of $-\sum_{i=1}^{1000}
3370
   \cos(\log(s)\theta_i)$, where $\theta_1 \sim 14.13, \ldots$ are the
3371
   first $1000$ contributions to the spectrum\index{spectrum}.  The red dots
3372
   are at the prime powers $p^n$, whose size is proportional to
3373
   $\log(p)$.\label{fig:phihat}}
3374

3375
\end{enumerate}
3376

3377
This passage---thanks to the Riemann Hypothesis---from spectrum\index{spectrum} to
3378
prime powers and back again via consideration of the ``high peaks'' in
3379
the graphs of the appropriate trigonometric sums provides a kind of
3380
visual duality emphasizing, for us, that the information inherent in
3381
the wild spacing of prime powers, is somehow ``packaged'' in the Riemann spectrum\index{Riemann spectrum}, and reciprocally,
3382
the information given in that series of mysterious numbers is
3383
obtainable from the sequence of prime powers.
3384

3385

3386

3387
\part{The Riemann Spectrum of the Prime Numbers\label{part3}}\index{prime number}
3388

3389

3390
\chapter{On losing no information\label{sec:loseno}}
3391

3392
To manage to repackage the ``same'' data in various ways---where each
3393
way brings out some features that would be kept in the shadows if the
3394
data were packaged in some different way---is a high art, in
3395
mathematics. In a sense {\it every} mathematical equation does this,
3396
for the ``equal sign'' in the middle of the equation tells us that
3397
even though the two sides of the equation may seem different, or have
3398
different shapes, they are nonetheless ``the same data.''  For
3399
example, the equation
3400

3401
  $$\log(XY) = \log(X) + \log(Y)$$
3402

3403
  which we encountered earlier in Chapter~\ref{sec:firstguess}, is
3404
  just two ways of looking at the same thing, yet it was the basis for
3405
  much manual calculation for several centuries.
3406

3407

3408

3409

3410
  Now, the problem we have been concentrating on, in this book, has
3411
  been---in effect---to understand the pattern, if we can call it
3412
  that, given by the placement of prime numbers\index{prime number} among the natural
3413
  line-up of all whole numbers.
3414
\ill{primes_line}{1}{Prime numbers up to $37$}
3415

3416
 There are, of course, many ways for us to present this basic
3417
 pattern. Our initial strategy was to focus attention on the {\it
3418
   staircase of primes} which gives us a vivid portrait, if you wish,
3419
 of the order of appearance of primes among all numbers.
3420
\ill{PN_38}{.9}{Prime numbers up to $37$}
3421

3422
 As we have already hinted in the previous sections, however, there
3423
 are various ways open to us to tinker with---and significantly
3424
 modify---our staircase {\it without losing the essential information
3425
   it contains}. Of course, there is always the danger of modifying
3426
 things in such a way that ``retrieval'' of the original data becomes
3427
 difficult.  Moreover, we had better remember every change we have
3428
 made if we are to have any hope of retrieving the original data!
3429

3430
 With this in mind, let us go back to Chapter~\ref{sec:information}
3431
 (discussing the staircase of primes) and Chapter~\ref{sec:tinkering},
3432
 where we tinkered with the original staircase of primes---alias: the
3433
 graph of $\pi(X)$---to get $\psi(X)$ whose risers look---from
3434
 afar---as if they approximated the 45 degree staircase.% (see Figure~\ref{fig:psi38} below).
3435

3436

3437
   At this point we'll do some further carpentry on $\psi(X)$ {\it without destroying the valuable
3438
   information it contains}. We will be replacing  $\psi(X)$ by a generalized function, i.e., a
3439
   distribution,\index{distribution} which we denote $\Phi(t)$ that has support at all positive integral multiples
3440
     of logs of prime numbers,\index{prime number} and is  zero on the complement of that discrete set.  Recall that
3441
     by definition, a discrete subset $S$ of real numbers is  the  {\bf support} of a function,
3442
     or of a distribution,\index{distribution} if the function vanishes on the complement of  $S$ and doesn't vanish
3443
     on the complement of any proper subset of $S$.
3444

3445
   Given the mission of our book, it may be less important for us to  elaborate on  the construction
3446
   of  $\Phi(t)$  than it is  {\bf (a)} to note that $\Phi(t)$  contains all the valuable information
3447
   that  $\psi(X)$ has and {\bf (b)} to pay close attention to the spike values of the trigonometric
3448
   series that is the Fourier transform of $\Phi(t)$.
3449

3450
     For the definition  of the distribution\index{distribution} $\Phi(t)$ see the end-note
3451
     \bibnote{
3452
 {\it The construction of  $ \Phi(t)$ from $\psi(X)$:}
3453

3454
 Succinctly, for positive real numbers $t$, $$\Phi(t): \ = \ e^{- t/2}\Psi'(t),$$ where  $\Psi(t) = \psi(e^t)$
3455
 (see Figure~\ref{fig:psi_200}),  and $\Psi'$ is the derivative of $\Psi(t)$, viewed as a distribution. We extend this function to all real arguments $t$ by requiring $\Phi(t)$ to be an even function of $t$, i.e., $\Phi(-t)= \Phi(t)$.
3456
 But, to review this at a more leisurely pace,
3457
   \begin{enumerate}
3458

3459
   \item Distort the $X$-axis of our staircase by replacing the
3460
     variable $X$ by $e^t$ to get the function $$\Psi(t):=
3461
     \psi(e^t).$$ No harm is done by this for we can retrieve our
3462
     original $\psi(X)$ as $$\psi(X) = \Psi(\log(X)).$$ Our distorted
3463
     staircase has risers at ($0$ and) all positive integral multiples
3464
     of logs of prime numbers.
3465

3466

3467
%  \illtwo{psi_38}{bigPsi_38}{.4}{The two staircases $\psi$ (left) and $\Psi$ (right).  Notice that the plot of $\Psi$ on the  right is just the plot of $\psi$ but with the $X$-axis plotted on a logarithmic scale.\label{fig:psi38}}
3468

3469

3470
\item Now we'll do something that might seem a bit more brutal: {\it
3471
    take the derivative of this distorted staircase $\Psi(t)$.}  This
3472
  derivative $\Psi'(t)$ is a {\it generalized} function with support
3473
  at all nonnegative integral multiples of logs of prime numbers.
3474

3475
 \vskip20pt
3476
 \ill{bigPsi_prime}{1}{$\Psi'(t)$ is a (weighted) sum of Dirac delta functions at the logarithms of prime powers $p^n$ weighted by $\log(p)$ (and by $\log(2\pi)$ at $0$).  The taller the arrow, the larger the  weight.}
3477

3478
\item Now---for normalization purposes---multiply $\Psi'(t)$ by the
3479
  function $e^{-t/2}$ which has no effect whatsoever on the support.
3480
  \end{enumerate}
3481
  \vskip20pt\ill{psi_200}{.4}{Illustration of the  staircase $\psi(X)$  constructed in Chapter~\ref{sec:tinkering} that counts weighted prime powers.\label{fig:psi_200}}
3482

3483
%So what happens when we take the derivative---in the sense of
3484
%distributions---of a complicated staircase?  For example, see
3485
%%Figure~\ref{fig:psi_200}.  Well, we would have blip-functions (alias:
3486
%%Dirac $\delta$-functions) at each point of discontinuity of $\Psi(X)$;
3487
%that is, at $X=$ any power of a prime.
3488

3489

3490
In summary: The generalized function that resulted from the above carpentry:
3491
  $$\Phi(t) = e^{-t/2}\Psi'(t),$$ retains the information we want (the placement of primes) even if in a slightly different format.
3492
}.% end bibnote
3493

3494

3495
A distribution\index{distribution} that has a discrete set of real numbers as its
3496
  support---as $\Phi(t)$ does---we sometimes like to call {\bf spike
3497
    distributions}\index{distribution} since the pictures of functions approximating it
3498
  tend to look like a series of spikes.
3499

3500
  We have then before us a spike distribution\index{distribution} with support at integral
3501
  multiples of logarithms of prime numbers\index{prime number}, and this generalized
3502
  function retains the essential information about the placement of
3503
  prime numbers among all whole numbers, and will be playing a major
3504
  role in our story: knowledge of the placement of the ``blips'' constituting this distribution\index{distribution} (its
3505
support), being at  integral multiples of logs of prime numbers, would allow us to reconstruct the position of the prime
3506
numbers among all numbers. Of course there are many other ways to package this vital information, so we
3507
must explain our motivation for subjecting our poor initial staircase
3508
to the particular series of brutal acts of distortion that we
3509
described, which ends up with the distribution\index{distribution} $\Phi(t)$.
3510

3511

3512

3513

3514

3515

3516

3517
\chapter[From primes to the Riemann spectrum]{Going from
3518
the primes to the Riemann spectrum}
3519
\label{ch:prime-to-spectrum}\index{Riemann spectrum}
3520
%\todo{This chapter has not been fully written, there are some
3521
%  mathematical issues that we don't yet explain, and we will need to
3522
 % work out in order to do a satisfactory job here.}
3523

3524

3525

3526
We  discussed the nature of the Fourier transform of (symmetrized)
3527
$\delta$-functions\index{$\delta$-function} in Chapter~\ref{sec:ftdelta}. In particular, recall
3528
the ``spike function'' $$d_x(t) \ = \ (\delta_x(t) +
3529
\delta_{-x}(t))/2$$ that has support at the points $x$ and $-x$.  We
3530
mentioned that its Fourier transform, ${\hat d}_x(\theta)$, is equal
3531
to $\cos(x\theta)$ (and gave some hints about why this may be true).
3532

3533

3534
Our next goal is to work with the much more interesting ``spike
3535
function'' $$\Phi(t) \ = \ e^{- t/2}\Psi'(t),$$ which was one of the
3536
generalized functions that we engineered in Chapter~\ref{sec:loseno},
3537
and that has support at all nonnegative integral multiples of
3538
logarithms of prime numbers\index{prime number}.
3539

3540

3541

3542

3543
As with any function---or generalized function---defined for
3544
non-negative values of $t$, we can ``symmetrize it'' (about the
3545
$t$-axis) which means that we can define it on negative real numbers
3546
by the equation
3547
  $$\Phi(-t) = \Phi(t).$$
3548
  Let us make that convention, thereby turning $\Phi(t)$ into an {\it
3549
    even} generalized function, as illustrated in Figure~\ref{fig:bigPhi}. (An {\bf even} function on the real line is a function that takes the same value on any real number and its negative as in the formula above.)
3550

3551
%\vskip20pt{\bf William: sum of Dirac delta functions  Figure goes here}\vskip20pt
3552
\ill{bigPhi}{1}{$\Phi(t)$ is a sum of Dirac delta functions at the logarithms of prime powers $p^n$ weighted by $p^{-n/2}\log(p)$ (and $\log(2\pi)$ at $0$).\label{fig:bigPhi}}
3553

3554

3555
We may want to think of $\Phi(t)$ as a limit of this sequence of distribution\index{distribution}s,
3556
 $$\Phi(t)\ = \ \lim_{C \to {\infty}}\Phi_{\le C}(t)$$
3557
where $\Phi_{\le C}(t)$  is the following finite linear
3558
combination of (symmetrized) $\delta$-functions\index{$\delta$-function}  $d_x(t)$:
3559
 $$\Phi_{\le C}(t)\quad :=\quad  2\sum_{{\rm prime\ powers \ }p^n  \le C} p^{-n/2}\log(p)d_{n\log(p)}(t).$$
3560
 Since the Fourier transform of $d_x(t)$ is  $\cos(x\theta)$, the Fourier transform of  each $d_{n\log(p)}(t)$ is
3561
 $\cos(n\log(p)\theta)$, so the Fourier transform of
3562
 $\Phi_{\le C}(t)$ is
3563

3564
  $${\hat \Phi}_{\leq C}(\theta):= 2\sum_{{\rm prime\ powers\ }p^n  \leq{} C} p^{-n/2} \log(p)\cos(n\log(p)\theta).$$
3565

3566
   So, following the discussion in Chapter~\ref{ch:trigseries} above, we are dealing with the cutoffs at finite points  $C$  of the {\it trigonometric series}{\footnote{The trigonometric series in the text---whose spectral values are the logarithms of prime powers---may also be written as
3567
   $$\sum_{m=2}^{\infty}\Lambda(m)m^{-s} + \sum_{m=2}^{\infty}\Lambda(m)m^{-{\bar s}}$$ for $s={\frac{1}{2}}+i\theta$, where  $\Lambda(m)$ is the von-Mangoldt function.}}
3568

3569
   $${\hat \Phi}(\theta):= 2\sum_{{\rm prime\ powers\ }p^n } p^{-n/2} \log(p)\cos(n\log(p)\theta).$$
3570

3571

3572

3573

3574
For example, when $C=3$, we have the rather severe cutoff of these trigonometric series: ${\hat \Phi}_{\leq 3}(\theta)$ takes account {\it only} of the primes $p=2$ and $p=3$:
3575
$$
3576
  {\hat \Phi}_{\leq 3}(\theta) =
3577
  \frac{2}{\sqrt{2}} \log(2)\cos(\log(2)\theta)
3578
 +
3579
 \frac{2}{\sqrt{3}} \log(3)\cos(\log(3)\theta),
3580
$$
3581
which we plot in Figure~\ref{fig:theta3}.
3582

3583

3584
\ill{theta_3_intro-1}{.85}{Plot of ${\hat \Phi}_{\leq 3}(\theta)$\label{fig:theta3}}
3585

3586

3587
We will be interested in the values of $\theta$ that correspond to higher and higher {\it peaks} of our trigonometric series ${\hat \Phi}_{\leq C}(\theta)$ as $C\to \infty$. For example, the value of $\theta$ that provides the first peak of $
3588
  {\hat \Phi'}_{\leq 3}(\theta)$ such that $
3589
  |{\hat \Phi'}_{\leq 3}(\theta)| > 2$  is $$\theta=14.135375354\ldots.$$
3590

3591
  So in Figure~\ref{fig:theta3} we begin this exploration by plotting ${\hat \Phi}_{\leq 3}(\theta)$, together with its derivative, highlighting the zeroes of the derivative.
3592

3593

3594
\ill{theta_3_intro-2}{.85}{Plot of ${\hat \Phi}_{\leq 3}(\theta)$ in blue and its derivative in grey\label{fig:theta4}}
3595

3596
As we shall see in subsequent figures of this chapter, there seems to be an eventual convergence of the values of $\theta$ that correspond to higher and higher {\it peaks}, the red dots in the figure above,  as $C$ tends to $\infty$. These ``limit" $\theta$-values we now insert as the endpoints of red vertical lines into Figure~\ref{fig:theta5} comparing them with the red dots for our humble cutoff $C=3$.
3597

3598

3599

3600
\ill{theta_C-3}{0.9}{${\hat \Phi}_{\leq 3}(\theta)$ \label{fig:theta5}}
3601

3602

3603

3604
We give a further sample of graphs for a few higher cutoff values $C$ (introducing a few more primes into the game!).
3605

3606
Figures~\ref{fig:theta_C-5-10}--\ref{fig:theta_C-200-500}
3607
contain graphs of various cutoffs of ${\hat \Phi}_{\leq C}(\theta)$.
3608
As $C$ increases a sequence of spikes down emerge
3609
which we indicate with red vertical arrows.
3610

3611

3612

3613
 \illtwo{theta_C-5}{theta_C-10}{0.45}{Plot of ${\hat \Phi}_{\leq C}(\theta)$ for $C=5$ and $10$\label{fig:theta_C-5-10}}
3614

3615
 Given the numerical-experimental approach we have been adopting in this book, it is  a particularly fortunate (and to us: surprising) thing that the convergence to those vertical red lines can already be illustrated using {\it such small} cutoff values $C$.  One might almost imagine making hand computations that exhibit this phenomenon! Following in this spirit, see David Mumford's blog post {\url{http://www.dam.brown.edu/people/mumford/blog/2014/RiemannZeta.html}}.
3616

3617
  To continue:
3618

3619
\illtwo{theta_C-20}{theta_C-100}{0.45}{Plot of ${\hat \Phi}_{\leq C}(\theta)$ for $C=10$ and $100$\label{fig:theta_C-10-100}}
3620

3621
\illtwo{theta_C-200}{theta_C-500}{0.45}{Plot of ${\hat \Phi}_{\leq C}(\theta)$ for $C=200$ and $500$\label{fig:theta_C-200-500}}
3622

3623

3624
For a theoretical discussion of these spikes, see endnote~\bibnote{
3625
 A version of the Riemann--von Mangoldt explicit formula gives some theoretical affirmation of the phenomena we are seeing here. We thank Andrew Granville for a  discussion about this.
3626

3627
 \ill{granville}{0.25}{Andrew Granville}
3628

3629

3630
 Even though the present endnote is not the place to give anything like a full account, we can't resist setting down a few of Granville's comments that might be helpful to people who wish to go further. (This discussion can be modified to analyze what happens unconditionally, but we will be assuming the \RH{} below.) The function ${\hat \Phi}_{\le C}(\theta)$ that we are graphing in this chapter can be written as:
3631
 $${\hat \Phi}_{\le C}(\theta)= \sum_{n\le C}\Lambda(n)n^{-w}$$ where $w = {\frac{1}{2}}+i\theta$. This function, in turn, may be written (by Perron's formula) as
3632
\begin{align*}
3633
&{\frac{1}{2\pi i }}\lim_{T \to \infty}\int_{s=\sigma_o-iT}^{s=\sigma_o+iT}\sum_{n}\Lambda(n)n^{-w}\left({\frac {C}{n}}\right)^{s}{\frac{ds}{s}}\\
3634
&=  {\frac{1}{2\pi i }}\lim_{T \to \infty}\int_{s=\sigma_o-iT}^{s=\sigma_o+iT}\sum_{n}\Lambda(n)n^{-w-s}C^{s}{\frac{ds}{s}}\\
3635
&= -{\frac{1}{2\pi i }}\lim_{T \to \infty}\int_{s=\sigma_o-iT}^{s=\sigma_o+iT}\left({\frac{\zeta'}{\zeta}}\right)(w+s){\frac{C^{s}}{s}}ds. \end{align*}
3636

3637
Here, we assume that $\sigma_o$ is sufficiently large and $C$ is not a prime power.
3638

3639
One proceeds, as  is standard in the derivation of Explicit Formulae, by moving the line of integration to the left, picking up residues along the way. Fix the value of $w= {\frac{1}{2}}+i\theta$ and consider
3640
 $$K_w(s,C):=\ \   {\frac{1}{2\pi i }}\left(\frac{\zeta'}{\zeta}\right)(w+s){\frac{C^{s}}{s}},$$   which has poles at $$s= 0, \ \ \ 1-w,\ \ \ {\rm and }\ \ \ \rho-w, $$  for every zero $\rho$ of $\zeta(s)$.
3641
 We distinguish five cases, giving descriptive names to each:
3642

3643
 \begin{enumerate} \item {\it Singular pole:} $s= 1-w$.
3644
                        \item {\it Trivial poles:}  $s= \rho-w$ with $\rho$ a trivial zero of $\zeta(s)$.
3645
                        \item  {\it Oscillatory poles:} $s= \rho-w = i(\gamma-\theta) \ne 0$ with $\rho= 1/2 + i \gamma (\ne w)$ a nontrivial zero of $\zeta(s)$. (Recall that we are assuming the \RH{}, and our variable $w= {\frac{1}{2}}+i\theta$ runs through complex numbers of real part equal to ${\frac{1}{2}}$. So, in this case, $s$ is purely imaginary.)
3646
                        \item {\it Elementary pole:} $s=0$ when  $ w$ is not a nontrivial zero of $\zeta(s)$---i.e., when $0=s\ne\rho-w$ for any nontrivial zero $\rho$.
3647
                        \item {\it Double pole:}  $s=0$ when  $ w$ is a nontrivial zero of $\zeta(s)$---i.e., when $0=s=\rho-w$ for some nontrivial zero $\rho$. This, when it occurs, is indeed a double pole, and the residue is given by $m\cdot \log C +\epsilon$.  Here $m$ is the multiplicity of the zero $\rho$  (which we expect always---or at least usually---to be equal to $1$) and $\epsilon$ is a constant (depending on $\rho$, but not on $C$).
3648

3649
\end{enumerate}
3650

3651
           The standard technique for the ``Explicit formula'' will provide us with a formula for our function of interest  ${\hat \Phi}_{\le C}(\theta)$. The formula has terms resulting from the residues of each of the first three types of poles, and of the {\it Elementary} or  the  {\it Double} pole---whichever exists.  Here is the shape of the formula, given with terminology that is meant to be  evocative:
3652

3653

3654
           $$ {\bf(1)}\ \ \ {\hat \Phi}_{\le C}(\theta) = {\Sing}_{\le C}(\theta) + {\Triv}_{\le C}(\theta) +  {\Osc}_{\le C}(\theta) + {\Elem}_{\le C}(\theta)$$
3655

3656
           Or:
3657

3658
           $${\bf(2)}\ \ \ {\hat \Phi}_{\le C}(\theta) = {\Sing}_{\le C}(\theta) + {\Triv}_{\le C}(\theta) +  {\Osc}_{\le C}(\theta) +  {\Double}_{\le C}(\theta),$$
3659

3660
           \noindent the first if $w$ is not a nontrivial zero of $\zeta(s)$ and the second if it is.
3661

3662
                     The good news is that  the functions ${\Sing}_{\le C}(\theta),  {\Triv}_{\le C}(\theta)$ (and also ${\Elem}_{\le C}(\theta)$ when it exists) are smooth (easily describable) functions of the two variables $C$ and $\theta$; for us, this means that they are
3663
not that related to the essential information-laden {\it discontinuous} structure of  $ {\hat \Phi}_{\le C}(\theta)$. Let us bunch these three contributions together and call the sum ${\Smooth}(C,\theta)$, and rewrite the above two formulae as:
3664
                       $$ {\bf(1)}\ \ \ {\hat \Phi}_{\le C}(\theta) ={\Smooth}(C,\theta) +  {\Osc}_{\le C}(\theta) $$
3665

3666
           Or:
3667

3668
           $${\bf(2)}\ \ \ {\hat \Phi}_{\le C}(\theta) = {\Smooth}(C,\theta)+  {\Osc}_{\le C}(\theta) +  m\cdot \log C +\epsilon,$$
3669

3670
           depending upon whether or not $w$ is a nontrivial zero of $\zeta(s)$.
3671

3672

3673
 We now focus our attention on the Oscillatory term,  ${\Osc}_{\le C}(\theta) $, approximating it by a truncation:  $$\Osc_w(C,X):=  2\sum_{|\gamma| <
3674
 X} {{\frac{e^{i\log C\cdot (\gamma-\theta)}}{i(\gamma-\theta)}}}.$$
3675
Here if a zero has multiplicity $m$, then we count it $m$ times in the sum.
3676
Also, in this formula we have relegated the ``$\theta$" to the status of  a subscript
3677
(i.e., $w = {\frac{1}{2}} +i\theta$) since we are keeping it constant, and we view the
3678
two variables $X$ and $C$ as linked in that we want the cutoff ``$X$" to be sufficiently
3679
large, say $X \gg C^2$, so that the error term can be controlled.
3680

3681
 At this point, we can perform a ``multiplicative version'' of  a Ces\`aro summation---i.e.,  the operator $F(c) \mapsto ({\Ces} F)(C):= \int_1^CF(c) dc/c.$
3682
This has the effect of forcing the oscillatory term to be bounded as $C$ tends to infinity.
3683

3684
 This implies that for any fixed $\theta$,
3685
\begin{itemize}
3686
 \item ${\Ces}{\hat \Phi}_{\le C}(\theta)$  is bounded independent of $C$  if  $\theta$ is not the imaginary part of a nontrivial zero of $\zeta(s)$  and
3687
 \item ${\Ces}{\hat \Phi}_{\le C}(\theta)$ grows as ${\frac{m}{2}}\cdot (\log C)^2 +O(\log C)$ if $\theta$ is the imaginary part of a nontrivial zero of $\zeta(s)$ of multiplicity $m$,
3688
 % WARNING: above item is wrong; grows as should be different: ``we replace "m log(C)+O(1)" by "m/2  log(C)^2 + m gamma log(C)"?''
3689
\end{itemize}
3690
giving a theoretical confirmation of the striking feature of the
3691
graphs of our Chapter~\ref{ch:prime-to-spectrum}.}.
3692

3693

3694
   %  \ill{phihat_even_all}{1}{Plots of $-\frac{1}{2}{\hat \Phi}_{\le C}(\theta)$ for $C=5$ (top), 20, 50, and 500 (bottom).\label{fig:phic}}
3695

3696

3697

3698
%\vskip20pt
3699
%{\bf William: Figure goes here}
3700
%\vskip20pt
3701
 %  \ill{phihat_even_all}{1}{Plots of $-\frac{1}{2}{\hat \Phi}_{\le C}(\theta)$ for $C=5$ (top), 20, 50, and 500 (bottom).\label{fig:phic}}
3702

3703
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3704

3705
The $\theta$-coordinates of these spikes  {\it seem to be}
3706
vaguely clustered about a discrete set of positive real numbers.
3707
%In
3708
%fact, this is already a hint of what happens to the graphs of these
3709
%functions ${\frac{1}{C}}{\hat\Phi}_{\le C}(\theta)$ as we allow the cut-off $C$ to
3710
%tend to infinity.
3711
These ``spikes" are our first glimpse of a
3712
certain infinite set of positive real numbers
3713
 $$\theta_1,\theta_2,\theta_3,\dots$$ which constitute the {\bf Riemann spectrum\index{Riemann spectrum} of
3714
  the primes}. If the \RH{} holds, these numbers would
3715
be the key to the placement of primes on the number line.
3716

3717

3718

3719

3720

3721
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3722

3723

3724

3725
By tabulating these peaks we compute---at least very approximately---$\dots$
3726
\begin{eqnarray*}
3727
\theta_1 &=& 14.134725 \dots\\
3728
\theta_2 &=& 21.022039 \dots\\
3729
\theta_3 &=& 25.010857 \dots\\
3730
\theta_4 &=& 30.424876 \dots\\
3731
\theta_5 &=& 32.935061 \dots\\
3732
\theta_6 &=& 37.586178 \dots
3733
\end{eqnarray*}
3734

3735
Riemann\index{Riemann, Bernhard} defined this sequence of numbers in his 1859 article in a
3736
manner somewhat different from the treatment we have given.  In that
3737
article these $\theta_i$ appear as ``imaginary parts of the
3738
nontrivial zeroes\index{nontrivial zeroes} of his zeta function;''\index{zeta function} we will discuss this
3739
briefly in Part \ref{part4}, Chapter~\ref{ch:envision} below.
3740

3741

3742

3743

3744
\chapter{How many $\theta_i$'s are there?}
3745

3746
  The Riemann spectrum\index{Riemann spectrum}, $\theta_1, \theta_2, \theta_3, \dots$ clearly deserves to be well understood!  What do we know about this sequence of positive real numbers?
3747

3748
Just as we did with the prime numbers\index{prime number} before, we can count
3749
these numbers $\theta_1=14.1347\ldots$,
3750
$\theta_2=21.0220\ldots$, etc., and form the staircase
3751
of Figure~\ref{fig:staircase-riemann-spectrum-30},
3752
with a step up at $\theta_1$, a step up at $\theta_2$, etc.
3753

3754
\ill{staircase-riemann-spectrum-30}{0.8}{The staircase of the Riemann spectrum\index{Riemann spectrum}\label{fig:staircase-riemann-spectrum-30}}
3755

3756
Again, just as with the staircase of primes, we might hope
3757
that as we plot this staircase from a distance as in
3758
Figures~\ref{fig:staircase-riemann-spectrum-50} and \ref{fig:staircase-riemann-spectrum-1000} that it will
3759
look like a beautiful smooth curve.
3760

3761

3762
In fact, we know, conditional on RH, the staircase of real numbers $\theta_1, \theta_2, \theta_3,\dots$ is very closely approximated by the
3763
curve  $${\frac{T}{2\pi}}\log {\frac{T}{2\pi e}},$$ (the error term being bounded by a constant times $\log T$).
3764

3765

3766

3767
\illtwo{staircase-riemann-spectrum-50}{staircase-riemann-spectrum-100}{0.45}{The staircase of the Riemann spectrum\index{Riemann spectrum} and the curve ${\frac{T}{2\pi}}\log {\frac{T}{2\pi e}}$\label{fig:staircase-riemann-spectrum-50}}
3768

3769
\ill{staircase-riemann-spectrum-1000}{0.95}{The staircase of the Riemann spectrum\index{Riemann spectrum} looks like a smooth curve\label{fig:staircase-riemann-spectrum-1000}}
3770

3771

3772

3773

3774

3775
Nowadays, these mysterious numbers $\theta_i$, these spectral lines for the
3776
staircase of primes, are known in great abundance and to great accuracy.  Here is the smallest
3777
one, $\theta_1$, given with over $1{,}000$ digits of its decimal
3778
expansion:
3779

3780
\vskip20pt
3781
{\small
3782
$14.134725141734693790457251983562470270784257115699243175685567460149\newline
3783
9634298092567649490103931715610127792029715487974367661426914698822545\newline
3784
8250536323944713778041338123720597054962195586586020055556672583601077\newline
3785
3700205410982661507542780517442591306254481978651072304938725629738321\newline
3786
5774203952157256748093321400349904680343462673144209203773854871413783\newline
3787
1735639699536542811307968053149168852906782082298049264338666734623320\newline
3788
0787587617920056048680543568014444246510655975686659032286865105448594\newline
3789
4432062407272703209427452221304874872092412385141835146054279015244783\newline
3790
3835425453344004487936806761697300819000731393854983736215013045167269\newline
3791
6838920039176285123212854220523969133425832275335164060169763527563758\newline
3792
9695376749203361272092599917304270756830879511844534891800863008264831\newline
3793
2516911271068291052375961797743181517071354531677549515382893784903647\newline
3794
4709727019948485532209253574357909226125247736595518016975233461213977\newline
3795
3160053541259267474557258778014726098308089786007125320875093959979666\newline
3796
60675378381214891908864977277554420656532052405$}
3797

3798
\vskip20pt
3799

3800

3801

3802
\noindent{}and if, by any chance, you wish to peruse the first
3803
$2{,}001{,}052$
3804
 of these $\theta_i$'s calculated to an accuracy
3805
within $3\cdot 10^{-9}$, consult Andrew Odlyzko's tables:
3806
\begin{center}
3807
\url{http://www.dtc.umn.edu/~odlyzko/zeta_tables}
3808
\end{center}
3809

3810

3811

3812
\chapter{Further questions about the Riemann spectrum\index{Riemann spectrum}}
3813

3814
Since people have already computed\footnote{See \url{http://numbers.computation.free.fr/Constants/constants.html} for details.}
3815
the first $10$ trillion $\theta$'s
3816
and have never found one with
3817
multiplicity $>1$, it is generally expected that the multiplicity of all
3818
the $\theta$'s in the Riemann spectrum\index{Riemann spectrum} is $1$.
3819

3820
  But, independent of
3821
that expectation, our convention in what follows will be that we {\it
3822
  count} each of the elements in the Riemann spectrum\index{Riemann spectrum} repeated as many
3823
times as their multiplicity. So, if it so happens that $\theta_n$
3824
occurs with multiplicity two, we view the Riemann spectrum\index{Riemann spectrum} as being
3825
the series of numbers
3826
  $$\theta_1, \theta_2, \dots, \theta_{n-1}, \theta_n, \theta_n, \theta_{n+1}, \dots$$
3827

3828
It has been conjectured that there are no infinite arithmetic progressions among these numbers.  More broadly, one might expect that there is no visible  correlation between the $\theta_i$'s and translation, i.e., that the distribution\index{distribution} of $\theta_i$'s modulo any positive number $T$ is random, as in Figure~\ref{fig:zero-spacing}.
3829

3830

3831

3832
\illtwo{zero-spacing-mod2pi}{zero-spacing-mod1}{0.4}{Frequency histogram of Odlyzko's computation of the Riemann spectrum\index{Riemann spectrum} modulo $2\pi$ (left) and modulo 1 (right)\label{fig:zero-spacing}}
3833

3834
        In analogy with the discussion of prime gaps in Chapter~\ref{ch:further}, we might compute {\it pair correlation functions} and the statistics of gaps between successive $\theta$'s in the Riemann spectrum\index{Riemann spectrum} (see {\url{http://www.baruch.cuny.edu/math/Riemann_Hypothesis/zeta.zero.spacing.pdf}). This study was  begun by H.\thinspace{}L. Montgomery and
3835
F.\thinspace{}J. Dyson.
3836
As  Dyson noticed, the distributions\index{distribution} one gets from the Riemann spectrum\index{Riemann spectrum} bears a similarity to the distributions of eigenvalues of a random unitary matrix.{\footnote{Any connection of the Riemann spectrum to eigenvalues of matrices would indeed be exciting for our understanding of the \RH{}, in view of what is known as the {\it Hilbert-P{\'o}lya Conjecture}  (see {\url{http://en.wikipedia.org/wiki/Hilbert\%E2\%80\%93P\%C3\%B3lya_conjecture}}).}} This has given rise to what is know as the {\it random matrix heuristics},  a powerful source of conjectures for number theory and other branches of mathematics.
3837

3838
        Here is a histogram of the distribution\index{distribution} of differences $\theta_{i+1}-\theta_i$:
3839
\
3840

3841

3842
\ill{riemann_spectrum_gaps}{0.95}{Frequency histogram of the
3843
first 99{,}999 gaps in the Riemann spectrum\label{fig:riemann_spectrum_gaps}}
3844

3845
\chapter[From the Riemann spectrum to primes]{Going
3846
from the Riemann spectrum to the primes}\index{Riemann spectrum}
3847

3848
To justify the name {\it Riemann spectrum of primes} we will
3849
investigate graphically whether in an analogous manner we can use this
3850
spectrum to get information about the placement of prime numbers\index{prime number}. We
3851
might ask, for example, if there is a trigonometric function with
3852
frequencies given by this collection of real
3853
numbers, $$\theta_1,\theta_2,\theta_3,\dots$$ that somehow pinpoints
3854
the prime powers, just as our functions $${\hat \Phi}(\theta)_{\le C}=
3855
2\sum_{{\rm prime\ powers\ } p^n \le C}
3856
p^{-m/2}\log(p)\cos(n\log(p)\theta)$$ for large $C$ pinpoint the
3857
spectrum\index{spectrum} (as discussed in the previous two chapters).
3858

3859
  To start the return game, consider this sequence of trigonometric functions that have ($zero$ and)  the $\theta_i$ as  spectrum\index{spectrum}
3860

3861
   $$G_C(x):= 1+ \sum_{i < C}\cos(\theta_i\cdot x).$$
3862

3863
    As we'll see  presently it is best to view these functions on a logarithmic scale so we will make the substitution of variables  $x = \log(s)$ and write
3864

3865
    $$H_C(s):= G_C(\log(s))= 1+ \sum_{i < C}\cos(\theta_i\cdot \log(s)).$$
3866

3867

3868
   The theoretical story behind the phenomena that we will
3869
     see graphically in this chapter is a manifestation of
3870
     Riemann's explicit formula.  For modern text references that discuss this general subject, see endnote \bibnote{  A reference for this is:
3871

3872
\vskip10pt
3873
 {\bf [I-K]:  }H. Iwaniec; E. Kowalski,  {\bf Analytic Number Theory}, {\em American Mathematical Society Colloquium Publications} {\bf 53} (2004).
3874

3875
      (See also the bibliography there.)
3876
      \vskip10pt
3877
       Many
3878
     ways of seeing the explicit relationship are given in Chapter 5
3879
     of {\bf [I-K]}. For example, consider Exercise 5 on page 109.
3880

3881
$$\sum_{\rho}{\hat \phi}(\rho) = -\sum_{n \ge 1}\Lambda(n)\phi(n) + I(\phi),$$
3882

3883
where \begin{itemize} \item $\phi$ is any smooth complex-valued
3884
  function on $[1,+\infty)$  with compact support,\item ${\hat \phi}$ is
3885
  its Mellin transform $${\hat \phi}(s):=
3886
  \int_0^{\infty}\phi(x)x^{s-1}dx,$$ \item the last term on the right,
3887
  $I(\phi)$, is just
3888
 $$I(\phi):= \int_1^{\infty}(1-{\frac{1}{x^3-x}})\phi(x)dx$$ (coming from the pole at $s=1$ and the ``trivial zeroes''). \item The more serious  summation on the left hand side of the equation is over the nontrivial zeroes $\rho$, noting that if $\rho$ is a nontrivial zero so is~${\bar \rho}$. \end{itemize}
3889

3890

3891

3892
Of course, this ``explicit formulation'' is not {\it immediately}
3893
applicable to the graphs we are constructing since we cannot naively
3894
take ${\hat \phi}$ to be a function forcing the left hand side to be
3895
$G_C(x)$.
3896

3897
   See also Exercise 7 on page 112,  which discusses the sums
3898
$$
3899
x - \sum_{|\theta| \le C} {\frac{x^{{\frac{1}{2}} +i\theta}-1}{{\frac{1}{2}} +i\theta}}.
3900
$$}
3901

3902

3903
 \ill{phi_cos_sum_2_30_1000}{.8}{Illustration of $-\sum_{i=1}^{1000}
3904
   \cos(\log(s)\theta_i)$, where $\theta_1 \sim 14.13, \ldots$ are the
3905
   first $1000$ contributions to the Riemann spectrum\index{Riemann spectrum}.  The red dots
3906
   are at the prime powers $p^n$, whose size is proportional to
3907
   $\log(p)$.}
3908

3909
 \ill{phi_cos_sum_26_34_1000}{.8}{Illustration of $-\sum_{i=1}^{1000}
3910
   \cos(\log(s)\theta_i)$ in the neighborhood of a twin prime.  Notice
3911
   how the two primes $29$ and $31$ are separated out by the Fourier
3912
   series, and how the prime powers $3^3$ and $2^5$ also appear.}
3913

3914
 \ill{phi_cos_sum_1010_1026_15000}{.7}{Fourier series from $1,000$ to
3915
   $1,030$ using 15,000 of the numbers $\theta_i$.  Note the twin
3916
   primes $1{,}019$ and $1{,}021$ and that $1{,}024=2^{10}$.}
3917

3918

3919
\part{Back to Riemann\label{part4}}
3920

3921

3922
\chapter[Building $\pi(X)$ from the spectrum]{How to build $\pi(X)$ from the spectrum (Riemann's way)}
3923
  We have been dealing in Part~\ref{part3} of our book with $\Phi(t)$, a
3924
  distribution\index{distribution} that---we said---contains all the essential information
3925
  about the placement of primes among numbers. We have given a clean
3926
  restatement of Riemann's hypothesis, the third restatement so far,
3927
  in term of this $\Phi(t)$.  But $\Phi(t)$ was the effect of a series
3928
  of recalibrations and reconfigurings of the original untampered-with
3929
  staircase of primes.  A test of whether we have strayed from our
3930
  original problem---to understand this staircase---would be whether
3931
  we can return to the original staircase, and ``reconstruct it'' so to
3932
  speak, solely from the information of $\Phi(t)$---or equivalently,
3933
  assuming the \RH{} as formulated in Chapter~\ref{sec:tinkering}---can
3934
  we construct the staircase of primes $\pi(X)$ solely
3935
  from knowledge of the sequence of real numbers $\theta_1,
3936
  \theta_2,\theta_3,\dots$?
3937

3938

3939

3940

3941
  The answer to this is yes (given the \RH{}), and is discussed very
3942
  beautifully by Bernhard Riemann\index{Riemann, Bernhard} himself in his famous 1859 article.
3943

3944
  Bernhard Riemann used the spectrum\index{spectrum} of the prime numbers\index{prime number} to provide
3945
  an exact analytic formula that analyzes and/or synthesizes the
3946
  staircase of primes.  This formula is motivated by Fourier's
3947
  analysis of functions as constituted out of cosines.
3948
  %Riemann\index{Riemann, Bernhard} started
3949
  %with a specific smooth function, which we will refer to as $R(X)$, a
3950
 % function that Riemann offered, just as Gauss\index{Gauss, Carl Friedrich} offered his $\Li(X)$,
3951
 % as a candidate smooth function approximating the staircase of
3952
  %primes.
3953
  Recall from Chapter~\ref{sec:rh1} that Gauss's guess is
3954
  $\Li(X)= \int_2^{X}dt/{\rm log}(t).$ To continue this discussion, we do need some familiarity with complex numbers, for the definition of Riemann's\index{Riemann, Bernhard} exact formula
3955
requires extending
3956
the definition of the function $\Li(X)$ to make sense for
3957
complex numbers $X=a+bi$. In fact, more naturally, one might work with the path integral $\li(X):= \int_0^{X}dt/{\rm log}(t).$
3958

3959
  Riemann \index{Riemann, Bernhard} begins his discussion
3960
  (see Figure~\ref{fig:riemann_RX}) by defining
3961
  $$
3962
     R(X) = \sum_{n=1}^{\infty}{\mu(n)\over n} \li(X^{1\over n})= \lim_{N\to \infty} R^{(N)}(X):= \lim_{N\to \infty} \sum_{n=1}^{N}{\mu(n)\over n} \li(X^{1\over n}),
3963
  $$
3964
where  $R^{(N)}(X)$ denotes the truncated sum, which one can compute as an approximation.
3965
\ill{riemann_RX}{1}{Riemann\index{Riemann, Bernhard} defining $R(X)$ in his manuscript\label{fig:riemann_RX}}
3966
In all the discussion of this section the order of summation is
3967
important. For such considerations and issues regarding actual
3968
computation we refer to Riesel-Gohl (see \url{http://wstein.org/rh/rg.pdf}).
3969

3970
Here $\mu(n)$ is the M\"{o}bius function\index{M\"{o}bius function}
3971
 which is defined by
3972
$$
3973
 \mu(n) = \begin{cases}
3974
    1 &
3975
       \mbox{\begin{minipage}{0.6\textwidth}if $n$ is a square-free
3976
       positive integer with an even number of distinct prime
3977
       factors,\end{minipage}}\vspace{1em}\\
3978
    -1& \mbox{\begin{minipage}{0.6\textwidth}if $n$ is a square-free
3979
    positive integer with an odd number of distinct
3980
    prime factors,\end{minipage}}\vspace{1em}\\
3981
    0 & \mbox{if $n$ is not square-free.}
3982
 \end{cases}
3983
 $$
3984
 See Figure~\ref{fig:moebius} for a plot of the M\"{o}bius function.\index{M\"{o}bius function}
3985

3986
\ill{moebius}{1}{The blue dots plot the values of the M\"{o}bius function\index{M\"{o}bius function} $\mu(n)$, which is only defined at integers.\label{fig:moebius}}
3987

3988

3989
In Chapter~\ref{sec:pnt} we encountered the Prime Number Theorem,\index{prime number} which
3990
asserts that $X/\log(X)$ and $\Li(X)$ are both approximations for
3991
$\pi(X)$, in the sense that both go to infinity at the same rate.  That is, the ratio of any two of these three functions tends to $1$ as $X$ goes to $\infty$.
3992
Our first formulation of the \RH{} (see page~\pageref{rh:first}) was
3993
that $\Li(X)$ is an essentially square root accurate approximation of
3994
$\pi(X)$.  Figures~\ref{fig:guess100}--\ref{fig:guess10000} illustrate
3995
that Riemann's function $R(X)$ appears to be an even better
3996
approximation to $\pi(X)$ than anything we have seen before.
3997

3998
\illtwo{pi_riemann_gauss_100}{pi_riemann_gauss_1000}{0.47}{Comparisons of $\Li(X)$ (top), $\pi(X)$ (middle), and $R(X)$ (bottom, computed using 100 terms)\label{fig:guess100}}
3999

4000

4001
\ill{pi_riemann_gauss_10000-11000}{0.8}{Closeup comparison of  $\Li(X)$ (top), $\pi(X)$ (middle), and $R(X)$ (bottom, computed using 100 terms)\label{fig:guess10000}}
4002

4003

4004
Think of Riemann's smooth curve $R(X)$ as the {\em fundamental}
4005
approximation to $\pi(X)$.
4006
Riemann\index{Riemann, Bernhard} offered much more than just a (conjecturally) better
4007
approximation to $\pi(X)$ in his wonderful 1859 article
4008
(see Figure~\ref{fig:riemann_Rk}).
4009
He found a way to construct what looks vaguely like a Fourier series,
4010
but with $\sin(X)$ replaced by $R(X)$ and its spectrum\index{spectrum} the $\theta_i$, which
4011
conjecturally  equals $\pi(X)$ (with a slight correction if the number $X$ is itself a prime).
4012
\ill{riemann_Rk}{1}{Riemann's\index{Riemann, Bernhard} analytic formula for $\pi(X)$\label{fig:riemann_Rk}}
4013
In this manner, Riemann gave an infinite sequence of improved guesses, beginning with $R_0(X)$ (see equation (18)  of Riesel-Gohl at  \url{http://wstein.org/rh/rg.pdf})  a modification of $R(X)$ that takes account of the pole and the trivial zeroes of the Riemann zeta-function, and then considered a sequence:
4014

4015

4016
$$
4017
R_0(X),\quad R_1(X), \quad R_2(X), \quad R_3(X), \quad \ldots
4018
$$
4019
and he hypothesized that one and all of them were all
4020
essentially square root approximations to $\pi(X)$,
4021
and that the sequence of these better and better approximations converge to give an exact formula
4022
for $\pi(X)$.
4023

4024
Thus not only did Riemann\index{Riemann, Bernhard} provide a ``fundamental'' (that is, a smooth curve
4025
that is astoundingly close to $\pi(X)$) but he viewed this as just a
4026
starting point, for he gave the recipe for providing an infinite
4027
sequence of corrective terms---call them Riemann's {\em harmonics}\index{harmonics}; we
4028
will denote the first of these ``harmonics'' $C_1(X)$, the second
4029
$C_2(X)$, etc.  Riemann\index{Riemann, Bernhard} gets his first corrected curve, $R_1(X)$, from
4030
$R_0(X)$ by adding this first harmonic to the fundamental, $$R_1(X) =
4031
R_0(X) + C_1(X),$$ he gets the second by correcting $R_1(X)$ by adding
4032
the second harmonic $$R_2(X) = R_1 (X) + C_2(X),$$ and so on $$R_3(X)
4033
= R_2 (X) + C_3(X),$$ and in the limit provides us with an exact fit.
4034

4035

4036
The \RH{}, if true, would tell us that these correction
4037
terms $C_1(X), C_2(X), C_3(X),\dots$ are all {\em square-root small}
4038
%,
4039
%and all the successively corrected smooth curves $$R_0(X), R_1(X),
4040
%R_2(X), R_3(X),\dots$$ are good approximations to $\pi(X)$.
4041
%Moreover,
4042
%$$
4043
 %\pi(X) = R_0(X) + \sum_{k=1}^{\infty} C_k(X).
4044
%$$
4045

4046
The elegance of Riemann's\index{Riemann, Bernhard} treatment of this problem is that the
4047
corrective terms $C_k(X)$ are all {\em modeled on} the fundamental
4048
$R(X)$ and are completely described if you know the sequence of real
4049
numbers $\theta_1, \theta_2, \theta_3,\dots$ of the last section.
4050

4051

4052
  Assuming the \RH{}, the Riemann\index{Riemann, Bernhard} correction
4053
terms $C_k(X)$ are defined to be
4054
$$
4055
   C_k(X)= -R(X^{\frac{1}{2} + i\theta_k}) -R(X^{\frac{1}{2} - i\theta_k}),
4056
$$
4057
where $\theta_1 = 14.134725\dots, \theta_2 = 21.022039\dots$, etc.,
4058
is the spectrum\index{spectrum} of the prime numbers\index{prime number} \bibnote{You may well ask how we propose to order these correction terms if RH
4059
  is false. Order them in terms of
4060
  (the absolute value of) their imaginary part, and in the unlikely
4061
  situation that there is more than one zero with the same imaginary
4062
  part, order zeroes of the same imaginary part by their real parts,
4063
  going from right to left.}.
4064

4065
In sum, Riemann\index{Riemann, Bernhard} provided an extraordinary recipe that allows us to work
4066
out the harmonics\index{harmonics}, $$C_1(X),\quad C_2(X),\quad C_3(X),\quad \dots$$ without our having
4067
to consult, or compute with, the actual staircase of primes. As with
4068
Fourier's modus operandi where both {\em fundamental} and all {\em
4069
  harmonics}\index{harmonics} are modeled on the sine wave, but appropriately
4070
calibrated, Riemann\index{Riemann, Bernhard} fashioned his higher harmonics\index{harmonics}, modeling them all
4071
on a single function, namely  $R(X)$.
4072

4073
The convergence of $R_k(X)$ to $\pi(X)$ is strikingly illustrated
4074
in the plots in Figures~\ref{fig:rkfirst}--\ref{fig:rklast} of $R_k$ for various values of $k$.
4075

4076

4077
\ill{Rk_1_2_100}{.9}{The function $R_{1}$ approximating the staircase of primes up to $100$\label{fig:rkfirst}}
4078

4079
\ill{Rk_10_2_100}{.9}{The function $R_{10}$ approximating the staircase of primes up to $100$}
4080

4081
\ill{Rk_25_2_100}{.9}{The function $R_{25}$ approximating the staircase of primes up to $100$}
4082

4083
\ill{Rk_50_2_100}{.9}{The function $R_{50}$ approximating the staircase of primes up to $100$}
4084

4085

4086
\ill{Rk_50_2_500}{.9}{The function $R_{50}$ approximating the staircase of primes up to $500$}
4087

4088
\ill{Rk_50_350_400}{.9}{The function $\Li(X)$ (top, green), the function $R_{50}(X)$ (in blue), and the staircase of primes on the interval from 350 to 400.\label{fig:rklast}}
4089

4090

4091
\chapter[As Riemann envisioned it]{As Riemann envisioned it, the zeta function relates the staircase of primes to its Riemann\index{Riemann, Bernhard} spectrum\index{Riemann spectrum}\label{ch:envision}}
4092
In the previous chapters we have described---using Riemann's
4093
Hypothesis---how to obtain the {\it
4094
  spectrum}\index{spectrum} $$\theta_1,\theta_2,\theta_3,\dots$$ from the staircase of
4095
primes, and hinted at how to go back. Roughly speaking, we were
4096
performing ``Fourier transformations'' to make this transit. But
4097
Riemann\index{Riemann, Bernhard}, on the very first page of his 1859 memoir, construes the
4098
relationship we have been discussing, between spectrum\index{spectrum} and staircase,
4099
in an even more profound way.
4100

4101
To talk about this extraordinary insight of Riemann\index{Riemann, Bernhard}, one would need to
4102
do two things that might seem remote from our topic, given our
4103
discussion so far.
4104

4105
\begin{itemize} \item We will discuss a key idea that Leonhard Euler\index{Euler, Leonhard}
4106
  had (circa 1740).
4107
 \item To follow the evolution of this idea in the hands of Riemann\index{Riemann, Bernhard}, we would have to assume familiarity with basic complex analysis.
4108
 \end{itemize}
4109

4110
 We will say only a few words here about this, in hopes of giving at
4111
 least a shred of a hint of how marvelous Riemann's idea is.  We will be drawing, at this point, on some further mathematical background.  For
4112
 readers who wish to pursue the themes we discuss, here is a list of sources,
4113
 that are our favorites among those meant to be read by a somewhat
4114
 broader audience than people very advanced in the subject. We list
4115
 them in order of ``required background.''
4116

4117
 \begin{enumerate}
4118
 \item John Derbyshire's {\it Prime Obsession: Bernhard Riemann and
4119
     the Greatest Unsolved Problem in Mathematics} (2003).  We have already
4120
   mentioned this book in our foreword, but feel that it is so
4121
   good, that it is worth a second mention here.
4122
 \item The Wikipedia entry for Riemann's Zeta Function\index{zeta function}
4123
   (\url{http://en.wikipedia.org/wiki/Riemann\_zeta\_function}). It is
4124
   difficult to summarize who wrote this, but we feel that it is a
4125
   gift to the community in its clarity. Thanks authors!
4126
 \item Enrico Bombieri's article \bibnote{Bombieri, {\em The Riemann
4127
       Hypothesis}, available at
4128
     \url{http://www.claymath.org/sites/default/files/official_problem_description.pdf}}.
4129
   To comprehend all ten pages of this excellent and fairly thorough
4130
   account may require significant background, but try your hand at
4131
   it; no matter where you stop, you will have seen good things in
4132
   what you have read.
4133
\end{enumerate}
4134
\vskip20pt
4135
{\bf Leonhard Euler's\index{Euler, Leonhard} idea ($\simeq$1740):}
4136
As readers of Jacob Bernoulli's {\it Ars Conjectandi} (or of the works
4137
of John Wallis) know, there was in the early 18th century
4138
already a rich mathematical theory of
4139
finite sums of (non-negative $k$-th powers) of consecutive integers.
4140
This sum, $$S_k(N) = 1^k+2^k+3^k+\cdots +(N-1)^k,$$ is a polynomial
4141
in $N$ of degree $k+1$ with no constant term, a leading term equal to
4142
${1\over k+1}N^{k+1}$, and a famous linear term.  The coefficient of
4143
the linear term of the polynomial $S_k(N)$ is the {\it Bernoulli
4144
  number} $B_{k}$:
4145
\begin{align*}
4146
S_1(N) &= 1+2+3+ \dots + (N-1) \ = \ {N(N-1)\over 2}  \ = \  {N^2\over 2}\ -\  {1\over 2}\cdot N,\\
4147
S_2(N) &= 1^2+2^2+3^2+ \dots + (N-1)^2 \ = \   {N^3\over 3} +\cdots -\ {1\over 6}\cdot N,\\
4148
S_3(N) &= 1^3+2^3+3^3+ \dots + (N-1)^3 \ = \  {N^4\over 4} +\cdots  -\ 0\cdot N,\\
4149
S_4(N) &= 1^4+2^4+3^4+ \dots + (N-1)^4 \ = \  {N^5\over 5} +\cdots  -\ {1\over 30}\cdot N,
4150
\end{align*}
4151
etc.
4152
For odd integers $k
4153
> 1$ this linear term vanishes.  For even integers $2k$ the  Bernoulli number
4154
$B_{2k}$ is the rational number given by the coefficient
4155
of $\frac{x^{2k}}{(2k)!}$ in the power series expansion
4156
\    \newline \bigskip \     \newline
4157
$${x\over e^x-1} \ = \  1 - {x\over 2} +  \sum_{k=1}^{\infty}
4158
(-1)^{k+1}B_{2k}{x^{2k}\over{(2k)}!}.$$
4159
\    \newline \bigskip \     \newline
4160
So
4161
  $$ B_2 = {1\over 6},\ \ \ \ \ \ \ \   B_4 = {1\over 30},\ \ \ \ \ \ \ \   B_6 = {1\over 42},\ \ \ \ \ \ \
4162
\   B_8 = {1\over 30},$$ and to convince you that the numerator of these numbers is not always $1$, here are
4163
a few more:
4164
$$B_{10} = {5\over 66},\ \ \ \ \ \ \ \   B_{12}= {691\over 2730},\ \ \ \ \ \ \ \  B_{14} = {7\over 6}.$$
4165
\    \newline \bigskip \     \newline
4166
If you turn attention to sums of negative $k$-th powers of consecutive
4167
integers, then when $k= -1$, $$S_{-1}(N)= {\frac{1}{1}}+
4168
{\frac{1}{2}}+ {\frac{1}{3}}+ \cdots+ {\frac{1}{N}}$$ tends to infinity
4169
like $\log(N)$, but for $k < -1$ we are facing the sum of reciprocals
4170
of powers (of exponent $>1$) of consecutive whole numbers, and
4171
$S_k(N)$ converges. This is the first appearance of the zeta function\index{zeta function}
4172
$\zeta(s)$ for arguments $s=2,3,4,\dots$ So let us denote these limits
4173
by notation that has been standard, after Riemann\index{Riemann, Bernhard}:
4174
$$\zeta(s):=  {\frac{1}{1^s}}+ {\frac{1}{2^s}}+ {\frac{1}{3^s}}+ \cdots$$
4175
The striking reformulation that Euler\index{Euler, Leonhard} discovered was the expression of
4176
this infinite sum as an infinite product of factors associated to the
4177
prime numbers:\index{prime number}
4178
$$
4179
\zeta(s)=\sum_n{\frac{1}{n^s}} =
4180
\prod_{p \ {\rm prime}}{\frac{1}{1-p^{-s}}},
4181
$$
4182
where the infinite sum on the left and the infinite product on the
4183
right both converge (and are equal) if $s > 1$. He also evaluated these
4184
sums at even positive integers, where---surprise---the Bernoulli
4185
numbers come in again; and they and $\pi$ combine to yield the values of the zeta function\index{zeta function} at even positive integers:
4186

4187
\begin{align*}
4188
\zeta(2) &= {\frac{1}{1^2}}+{\frac{1}{2^2}}+\cdots = \pi^2/6 \simeq 1.65\dots\\
4189
\zeta(4) &= {\frac{1}{1^4}}+{\frac{1}{2^4}}+\cdots = \pi^4/90\simeq 1.0823\dots
4190
\end{align*}
4191
and, in general,
4192
  $$\zeta(2n) = {\frac{1}{1^{2n}}}+{\frac{1}{2^{2n}}}+\cdots \ \ \ = \ \ \ (-1)^{n+1}B_{2n}\pi^{2n}\cdot {\frac{2^{2n-1}}{(2n)!}}.$$
4193

4194
  A side note to Euler's\index{Euler, Leonhard} formulas comes from the fact (only known much
4195
  later) that no power of $\pi$ is rational: do you see how to use
4196
  this to give a proof that there are infinitely many primes,
4197
  combining Euler's\index{Euler, Leonhard} infinite product expansion displayed above with
4198
  the formula for $\zeta(2)$, or with the formula for $\zeta(4)$, or,
4199
  in fact, for the formulas for $\zeta(2n)$ for {\it any} $n$ you
4200
  choose?
4201

4202
  {\bf Pafnuty Lvovich Chebyshev's\index{Chebyshev, Pafnuty Lvovich} idea ($\simeq$1845):} The second moment
4203
  in the history of evolution of this function $\zeta(s)$ is when
4204
  Chebyshev\index{Chebyshev, Pafnuty Lvovich} used the {\it same formula} as above in the extended range
4205
  where $s$ is allowed now to be a real variable---not just an
4206
  integer---greater than $1$.  Making use of this extension of the
4207
  range of definition of Euler's\index{Euler, Leonhard} sum of reciprocals of powers of
4208
  consecutive whole numbers, Chebyshev\index{Chebyshev, Pafnuty Lvovich} could prove that for large $x$
4209
  the ratio of $\pi(x)$ and $x/\log(x)$ is bounded above and below by
4210
  two explicitly given constants. He also proved that there exists a
4211
  prime number\index{prime number} in the interval bounded by $n$ and $2n$ for any positive
4212
  integer $n$ (this was called {\it Bertrand's postulate}; see \url{http://en.wikipedia.org/wiki/Proof_of_Bertrand%27s_postulate}).
4213
  \vskip20pt
4214

4215
  {\bf Riemann's idea (1859):} It is in the third step of the
4216
  evolution of $\zeta(s)$ that something quite surprising
4217
  happens. Riemann\index{Riemann, Bernhard} extended the range of Chebyshev's\index{Chebyshev, Pafnuty Lvovich} sum of
4218
  reciprocals of positive real powers of consecutive whole numbers
4219
  allowing the argument $s$ to range over the entire complex plane $s$
4220
  (avoiding $s=1$).  Now this is a more mysterious extension of
4221
  Euler's\index{Euler, Leonhard} function, and it is deeper in two ways:
4222

4223
\begin{itemize}
4224
\item The formula $$\zeta(s):= {\frac{1}{1^s}}+ {\frac{1}{2^s}}+
4225
  {\frac{1}{3^s}}+ \cdots$$ does converge when the real part of the
4226
  exponent $s$ is greater than $1$ (i.e., this allows us to use the
4227
  same formula, as Chebyshev\index{Chebyshev, Pafnuty Lvovich} had done, for the right half plane
4228
  in the complex plane determined by the condition $s=x+iy$ with $x>1$
4229
  but not beyond this).  You can't simply use the same formula for the
4230
  extension.\item So you must face the fact that if you wish to
4231
  ``extend'' a function beyond the natural range in which its defining
4232
  formula makes sense, there may be many ways of doing it.
4233
\end{itemize}
4234

4235
To appreciate the second point, the theory of a complex variable is
4236
essential. The {\it uniqueness} (but not yet the {\it existence}) of
4237
Riemann's extension of $\zeta(s)$ to the entire complex plane is
4238
guaranteed by the phenomenon referred to as {\it analytic
4239
  continuation}.  If you are given a function on any infinite subset
4240
$X$ of the complex plane that contains a limit point, and if you are
4241
looking for a function on the entire complex plane{\footnote{ or to
4242
    any connected open subset that contains $X$}} that is
4243
differentiable in the sense of complex analysis, there may be no
4244
functions at all that have that property, but if there is one, that
4245
function is {\it unique}.  But Riemann\index{Riemann, Bernhard} succeeded: he was indeed able
4246
to extend Euler's\index{Euler, Leonhard} function to the entire complex plane except for the
4247
point $s=1$, thereby defining what we now call {\it Riemann's zeta
4248
  function}.  \vskip20pt
4249

4250
Those ubiquitous Bernoulli numbers reappear yet again as
4251
values of this {\it extended} zeta function\index{zeta function} at negative integers:
4252
$$\zeta(-n) = -B_{n+1}/(n+1)$$
4253
so since the Bernoulli numbers indexed by odd integers $>1$ all
4254
vanish, the extended zeta function\index{zeta function} $\zeta(s)$ actually vanishes at
4255
{\it all} even negative integers.
4256

4257
The even integers $-2, -4, -6, \dots$ are often called the {\bf
4258
trivial zeroes} of the Riemann zeta function.\index{zeta function}  There are indeed
4259
other zeroes of the zeta function,\index{zeta function} and those other zeroes could---in no way---be
4260
dubbed ``trivial,'' as we shall shortly see.
4261
\vskip20pt
4262
It is time to consider these facts:
4263

4264
  \begin{enumerate}\item {\bf Riemann's zeta function\index{zeta function} codes the
4265
      placement of prime powers among all numbers.} The key here is to
4266
    take the logarithm and then the derivative of $\zeta(s)$ (this boils
4267
    down to forming ${\frac{d{\zeta}}{ds}}(s)/\zeta(s)$). Assuming
4268
    that the real part of $s$ is $>1$, taking the logarithm of
4269
    $\zeta(s)$---using Euler's\index{Euler, Leonhard} infinite product formulation---gives
4270
    us $$\log\zeta(s)= \sum_{p \ {\rm
4271
        prime}}-\log{\big(1-p^{-s}\big)},$$ and we can do this
4272
    term-by-term, since the real part of $s$ is $>1$.  Then taking the
4273
    derivative gives us:
4274

4275
  \vskip20pt
4276
$${\frac{d{\zeta}}{ds}}(s)/\zeta(s)= -\sum_{n=1}^{\infty}\Lambda(n)n^{-s}$$
4277
where
4278
$$
4279
    \Lambda(n) :=
4280
    \begin{cases}
4281
     \log(p) & \text{when $n= p^k$ for $p$ a prime number and $k >0$, and}\\
4282
       0     & \text{if $n$ is not a power of a prime number.}
4283
    \end{cases}
4284
$$
4285
In particular,  $\Lambda(n)$ ``records" the placement of prime powers.
4286

4287
%\todo{Put in log deriv of zeta.}
4288
%$$ {\frac{d{\zeta}}{ds}}(s)/\zeta(s)=-\sum_{p \ {\rm prime}}\log(p)p^{-s}.$$  For any particular $s$ this is an infinite sum which tallies the primes  $p$, each weighted by the quantity  $\log(p)p^{-s}$ and is absolutely convergent if the real part of $s$ is $>1$.   % note : this forgets the prime powers.
4289

4290
 \vskip20pt
4291

4292
\item {\bf You know lots about an analytic function if you know its zeroes and
4293
    poles.} For example for polynomials, or
4294
  even rational functions: if someone told you that a
4295
  certain rational function $f(s)$ vanishes to order $1$ at $0$ and at
4296
  $\infty$; and that it has a double pole at $s=2$ and at all other
4297
  points has finite nonzero values, then you can immediate say that
4298
  this mystery function is a nonzero constant times $s/(s-2)^2$.
4299

4300
  Knowing the zeroes and poles (in the complex plane) alone of the
4301
  Riemann zeta function\index{zeta function} doesn't entirely pin it down---you have to
4302
  know more about its behavior at infinity since---for example,
4303
  multiplying a function by $e^z$ doesn't change the structure of its
4304
  zeroes and poles in the finite plane.  But a complete understanding
4305
  of the zeroes and poles of $\zeta(s)$ will give all the information
4306
  you need to pin down the placement of primes among all numbers.
4307

4308
  So here is the score:
4309

4310
  \begin{itemize} \item As for poles, $\zeta(s)$ has only one pole. It
4311
    is at $s=1$ and is of order $1$ (a ``simple pole'').\item As for
4312
    zeroes, we have already mentioned the trivial zeroes (at negative
4313
    even integers), but $\zeta(s)$ also has infinitely many {\it
4314
      nontrivial} zeroes. These nontrivial zeroes\index{nontrivial zeroes} are known to lie in
4315
    the vertical strip $$0\ < \ {\rm real\ part\ of\ } s\ < \
4316
    1.$$\end{itemize}
4317

4318
 \end{enumerate}
4319

4320
 And here is yet another equivalent statement of Riemann's
4321
 Hypothesis---this being the formulation closest to the one given in
4322
 his 1859 memoir:
4323

4324
      \begin{center}
4325
       \shadowbox{ \begin{minipage}{0.9\textwidth}
4326
\mbox{}       \vspace{0.2ex}
4327
       \begin{center}{\bf\large The {\bf \RH{}} (fourth formulation)}\end{center}
4328
       \medskip
4329

4330
       All the nontrivial zeroes\index{nontrivial zeroes} of $\zeta(s)$ lie on the vertical
4331
       line in the complex plane consisting of the
4332
       complex numbers with real part equal to $1/2$.
4333
       These zeroes are none other than
4334
       ${\frac{1}{2}}\pm i\theta_1,{\frac{1}{2}}\pm i\theta_2,
4335
       {\frac{1}{2}}\pm i\theta_3,\dots,$ where $\theta_1, \theta_2,
4336
       \theta_3, \dots$ comprise the spectrum\index{spectrum} of primes we talked
4337
       about in the earlier chapters.
4338

4339
\vspace{1ex}
4340
\end{minipage}}
4341
      \end{center}
4342

4343
The ``${\frac{1}{2}}$" that appears in this formula is directly related to the fact---correspondingly conditional on RH---that $\pi(X)$ is ``square-root accurately"\index{square-root accurate} approximated by $\Li(X)$.  That is, the error term is bounded by $X^{{\bf{\frac{1}{2}}}+\epsilon}.$  It has been conjectured that {\it all} the zeroes of $\zeta(s)$ are simple zeroes.
4344

4345

4346
Here is how Riemann\index{Riemann, Bernhard} phrased RH:
4347

4348
 \begin{quote}
4349
  ``One now finds indeed approximately this number of real roots
4350
  within these limits, and it is very probable that all roots are
4351
  real. Certainly one would wish for a stricter proof here; I have
4352
  meanwhile temporarily put aside the search for this after some
4353
  futile attempts, as it appears unnecessary for the next objective of
4354
  my investigation.''
4355
\end{quote}
4356

4357
 In the above quotation, Riemann's roots are the $\theta_i$'s and the statement that they are ``real" is equivalent to RH.
4358

4359
      The zeta function,\index{zeta function} then, is the vise, that so elegantly clamps
4360
      together information about the placement of primes and their
4361
      spectrum\index{spectrum}!
4362

4363

4364
That a simple geometric property of these zeroes (lying on a line!) is
4365
directly equivalent to such profound (and more difficult to express)
4366
regularities among prime numbers\index{prime number} suggests that these zeroes and the
4367
parade of Riemann's corrections governed by them---when we truly
4368
comprehend their message---may have lots more to teach us, may
4369
eventually allow us a more powerful understanding of arithmetic.  This
4370
infinite collection of complex numbers, i.e., the nontrivial zeroes\index{nontrivial zeroes} of
4371
the Riemann zeta function,\index{zeta function} plays a role with respect to $\pi(X)$ rather
4372
like the role the {\em spectrum}\index{spectrum} of the Hydrogen atom plays in
4373
Fourier's theory.  Are the primes themselves no more than an
4374
epiphenomenon, behind which there lies, still veiled from us, a
4375
yet-to-be-discovered, yet-to-be-hypothesized, profound conceptual key
4376
to their perplexing orneriness?  Are the many innocently posed, yet
4377
unanswered, phenomenological questions about numbers---such as in the
4378
ones listed earlier---waiting for our discovery of this deeper level
4379
of arithmetic?  Or for layers deeper still?  Are we, in fact, just at
4380
the beginning?
4381

4382

4383

4384

4385
These are not completely idle thoughts, for a tantalizing analogy
4386
relates the number theory we have been discussing to an already
4387
established branch of mathematics---due, largely, to the work of
4388
Alexander Grothendieck,\index{Grothendieck, Alexandre} and Pierre Deligne\index{Deligne, Pierre}---where the corresponding
4389
analogue of Riemann's hypothesis has indeed been proved\ldots.
4390

4391

4392

4393
\chapter{Companions to the zeta function\index{zeta function}}\label{ch:companions}
4394

4395
Our book, so far, has been exclusively about Riemann's $\zeta(s)$ and
4396
its zeroes. We have been discussing how (the placement of) the zeroes
4397
of $\zeta(s)$ in the complex plane contains the information needed to
4398
understand (the placement of) the primes in the set of all whole
4399
numbers; and conversely.
4400

4401
It would be wrong---we think---if we don't even mention that
4402
$\zeta(s)$ fits into a broad family of similar functions that connect to other problems in number theory.
4403

4404

4405
% for every {\it quadratic number field} $K:={\bf Q}({\sqrt
4406
%  d})$, there is a corresponding ``$L$-function'' $L(K,s)$ with an
4407
%expansion of the form $$L(K,s)= \sum_{N=1}^{\infty} a_n(K)\cdot
4408
%n^{-s}$$ convergent for complex numbers $s$ with real part $> 1$---and
4409
%where the coefficients $a_n(K)$ are either $0$ or $\pm 1$.  The
4410
%product $\zeta(s)\cdot L(K,s)$ is called the {\it zeta function\index{zeta function} of
4411
%  $K$} and is sometimes denoted $\zeta_K(s)$ to distinguish it from
4412
%the Riemann zeta function.\index{zeta function}  The ``nontrivial zeroes''\index{nontrivial zeroes} of $\zeta_K(s)$
4413
%contains the information needed to understand the (placement of) the
4414
%norms of prime ideals of the ring of integers of $K$.
4415

4416
For example---instead of the ordinary integers---consider the {\it
4417
  Gaussian integers}.\index{Gaussian integers} This is the collection of numbers
4418
 $$ \{ a+bi\}$$
4419
 where $i = {\sqrt{-1}}$ and $a,b$ are ordinary integers. We can add
4420
 and multiply two such numbers and get another of the same form. The
4421
 only ``units'' among the Gaussian integers\index{Gaussian integers} (i.e., numbers whose
4422
 inverse is again a Gaussian integer) are the four numbers $\pm 1, \pm
4423
 i$ and if we multiply any Gaussian integer $a+bi$ by any of these
4424
 four units, we get the collection $\{a+bi, -a-bi, -b+ai, b -ai\}$.
4425
 We measure the {\it size} of a Gaussian integer by the square of its
4426
 distance to the origin, i.e., $$|a+bi|^2 = {a^2+b^2}.$$   This size function  is called the {\bf norm} of the Gaussian integer $a+bi$ and can also be thought of as  the product of $a+bi$ and its ``conjugate'' $a-bi$.   Note that
4427
 the norm is a nice multiplicative function on the set of Gaussian
4428
 integers, in that the norm of a product of two Gaussian integers\index{Gaussian integers} is
4429
 the product of the norms of each of them.
4430

4431
 We have a natural notion of {\bf prime Gaussian integer}, i.e., one
4432
 with $a>0$ and $b\geq 0$ that cannot be factored\index{factor} as the product of
4433
 two Gaussian integers\index{Gaussian integers} of smaller size.
4434
 Given that every nonzero Gaussian integer is uniquely expressible as a unit times a product of prime Gaussian integers, can you prove that if a Gaussian integer is a prime
4435
 Gaussian integer, then its size must either be an ordinary prime
4436
 number, or the square of an ordinary prime number?\index{prime number}
4437

4438
Figure~\ref{fig:gaussian-10} contains a plot of the first few Gaussian
4439
primes as they display themselves amongst complex numbers:
4440

4441
\ill{gaussian_primes-10}{.8}{Gaussian primes with coordinates up to 10\label{fig:gaussian-10}}
4442

4443
Figure~\ref{fig:gaussian-100} plots a much larger number of Gaussian primes:
4444

4445
\ill{gaussian_primes-100}{1}{Gaussian primes with coordinates up to 100\label{fig:gaussian-100}}
4446

4447
Figures~\ref{fig:gaussian-staircase-14}--\ref{fig:gaussian-staircase-10000} plot the number
4448
of Gaussian primes up to each norm:
4449

4450
\ill{gaussian_staircase_14}{.8}{Staircase of Gaussian primes of norm up to 14\label{fig:gaussian-staircase-14}}
4451
\ill{gaussian_staircase_100}{.8}{Staircase of Gaussian primes of norm up to 100}
4452
\ill{gaussian_staircase_1000}{.8}{Staircase of Gaussian primes of norm up to 1000}
4453
\ill{gaussian_staircase_10000}{.8}{Staircase of Gaussian primes of norm up to 10000\label{fig:gaussian-staircase-10000}}
4454

4455
The natural question to ask, then, is: how are the Gaussian prime numbers\index{prime number} distributed? Can one provide as close an estimate to their distribution\index{distribution} and structure, as one has for ordinary primes?  The answer, here is yes: there is a companion theory,  with an analogue to the Riemann zeta function\index{zeta function} playing  a role similar to the prototype $\zeta(s)$.  And, it seems as if its ``nontrivial zeroes''\index{nontrivial zeroes} behave similarly: as far as things have been computed, they  all have the property that
4456
their real part is equal to ${\frac{1}{2}}$.  That is, we
4457
have a companion to the \RH{}.
4458

4459
 This is just the beginning of a much larger story related to what has been come to be called  the ``Grand Riemann Hypotheses" and connects to analogous problems, some of them actually solved, that give some measure of evidence for the truth of these hypotheses.  For example, for any system
4460
of polynomials in a fixed number of variables (with integer
4461
coefficients, say) and for each prime number $p$ there are
4462
``zeta-type'' functions that contain all the information needed to
4463
count the number of simultaneous solutions in finite fields of
4464
characteristic $p$.  That such counts can be well-approximated with a
4465
neatly small error term is related to the placement of the zeroes of
4466
these ``zeta-type'' functions. There is then an analogous ``\RH{}''
4467
that prescribes precise conditions on the real parts of their
4468
zeroes---this prescription being called the ``\RH{} for
4469
function fields.'' Now the beauty of this analogous hypothesis is that
4470
it has, in fact, been proved!
4471

4472
Is this yet another reason to believe the Grand \RH{}?
4473

4474

4475
%
4476
%\chapter{Glossary}
4477
%
4478
%Here we give all the connections with the standard literature and
4479
%conventional terminology that we restrained
4480
%  ourselves from giving in the text itself.
4481
%
4482
%  For the moment the list of entries is the following but it will
4483
%  expand.
4484
%
4485
%  $\pi(X)= \pi_0(X)$, $Q(X)= \psi_0(X)$, $\log, \exp$, $\delta$,
4486
%  distributions,\index{distribution} RSA cryptography, Mersenne prime, $\Li(x)$, random
4487
%  walk, spectrum, harmonic, fundamental, frequency, phase, amplitude,
4488
%  band-pass, complex numbers, complex plane, Riemann Zeta function\index{zeta function},
4489
%  zeroes of zeta.
4490
%
4491
%
4492
%  Also mention Brian Conrey's Notices article on RH as ``among the
4493
%  best 12 pages of RH survey material that there is---at least for an
4494
%  audience of general mathematicians.''  And mention
4495
% mention Sarnak and Bombieri articles at the CMI website on RH.
4496
%
4497

4498
\backmatter
4499

4500

4501
%\eject
4502
%\addcontentsline{toc}{chapter}{Index}
4503
%\printindex
4504

4505
%\label{lastpage}
4506

4507
% no longer in previous chapter, so we increase the counter
4508
% so that figure and other labels aren't for the previous chapter.
4509
\stepcounter{chapter}
4510
\addcontentsline{toc}{chapter}{Endnotes}
4511

4512
\renewcommand\bibname{Endnotes}
4513

4514
\bibliography{biblio}
4515

4516

4517
\printindex
4518

4519

4520
\end{document}
4521

4522

4523
%sagemathcloud={"zoom_width":115,"latex_command":"killall pdflatex; makeindex rh; pdflatex -synctex=1 -interact=nonstopmode rh.tex;  bibtex rh"}
4524

4525
Product

Resources

Company