Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
| Download

William Stein -- Talk for Mathematics is a long conversation: a celebration of Barry Mazur

Views: 2342
1
\documentclass[openany]{book}
2
3
4
%\documentclass[11pt,draft]{article} % uncomment this and comment out the above line for *fast* typesetting (no images)
5
6
\usepackage{fix-cm}
7
8
\usepackage{soul}
9
10
\usepackage{float}
11
\usepackage{tikz}
12
13
\usepackage{wrapfig}
14
15
16
\usepackage{color}
17
\definecolor{dblackcolor}{rgb}{0.0,0.0,0.0}
18
\definecolor{dbluecolor}{rgb}{.01,.02,0.7}
19
\definecolor{dredcolor}{rgb}{0.8,0,0}
20
\definecolor{dgraycolor}{rgb}{0.30,0.3,0.30}
21
\usepackage{listings}
22
\lstdefinelanguage{Sage}[]{Python}
23
{morekeywords={True,False,sage,singular},
24
sensitive=true}
25
\lstset{frame=none,
26
showtabs=False,
27
showspaces=False,
28
showstringspaces=False,
29
commentstyle={\ttfamily\color{dredcolor}},
30
keywordstyle={\ttfamily\color{dbluecolor}\bfseries},
31
stringstyle ={\ttfamily\color{dgraycolor}\bfseries},
32
language = Sage,
33
basicstyle={\scriptsize \ttfamily},
34
aboveskip=.3em,
35
belowskip=.1em
36
}
37
38
39
\usepackage{fancybox}
40
\usepackage{graphicx}
41
\usepackage{amsmath}
42
\usepackage{amsfonts}
43
\usepackage{amssymb}
44
\usepackage{amsthm}
45
46
\usepackage{url}
47
48
\usepackage{makeidx}\makeindex
49
50
\DeclareMathOperator{\Gap}{Gap}
51
\DeclareMathOperator{\Li}{Li}
52
\DeclareMathOperator{\li}{li}
53
54
\DeclareMathOperator{\Sing}{Sing}
55
\DeclareMathOperator{\Triv}{Triv}
56
\DeclareMathOperator{\Osc}{Osc}
57
\DeclareMathOperator{\Elem}{Elem}
58
\DeclareMathOperator{\Double}{Double}
59
\DeclareMathOperator{\Smooth}{Smooth}
60
\DeclareMathOperator{\Ces}{Ces}
61
62
63
\DeclareGraphicsRule{.tif}{png}{.png}{`convert #1 `dirname #1`/`basename #1 .tif`.png}
64
65
\newcommand{\mycaption}[1]{\begin{quote}{\bf Figure: } \large #1\end{quote}}
66
67
\newcommand{\ill}[3]{%
68
\begin{figure}[H]%
69
\vspace{-2ex}
70
\centering%
71
\includegraphics[width=#2\textwidth]{illustrations/#1}%
72
\caption{#3}%
73
\vspace{-2ex}
74
\end{figure}}
75
76
\newcommand{\illtwo}[4]{%
77
\begin{figure}[H]\centering%
78
\includegraphics[width=#3\textwidth]{illustrations/#1}$\qquad$\includegraphics[width=#3\textwidth]{illustrations/#2}%
79
\caption{#4}%
80
\end{figure}}
81
82
\newcommand{\illthree}[5]{%
83
\begin{figure}[H]%
84
\centering%
85
\includegraphics[width=#4\textwidth]{illustrations/#1}$\qquad$\includegraphics[width=#4\textwidth]{illustrations/#2}$\qquad$\includegraphics[width=#4\textwidth]{illustrations/#3}%
86
\caption{#5}%
87
\end{figure}}
88
89
\newcommand{\illtwoh}[4]{%
90
\begin{figure}[H]\centering%
91
\includegraphics[height=#3\textheight]{illustrations/#1}$\qquad$\includegraphics[height=#3\textheight]{illustrations/#2}%
92
\caption{#4}%
93
\end{figure}}
94
95
\newcommand{\illthreeh}[5]{%
96
\begin{figure}[H]%
97
\centering%
98
\includegraphics[height=#4\textheight]{illustrations/#1}$\qquad$\includegraphics[height=#4\textheight]{illustrations/#2}$\qquad$\includegraphics[height=#4\textheight]{illustrations/#3}%
99
\caption{#5}%
100
\end{figure}}
101
102
103
%%%% Theoremstyles
104
\theoremstyle{plain}
105
\newtheorem{theorem}{Theorem}[chapter]
106
\newtheorem{proposition}[theorem]{Proposition}
107
\newtheorem{corollary}[theorem]{Corollary}
108
\newtheorem{claim}[theorem]{Claim}
109
\newtheorem{lemma}[theorem]{Lemma}
110
\newtheorem{hypothesis}[theorem]{Hypothesis}
111
\newtheorem{conjecture}[theorem]{Conjecture}
112
113
\theoremstyle{definition}
114
\newtheorem{definition}[theorem]{Definition}
115
\newtheorem{question}[theorem]{Question}
116
\newtheorem{problem}[theorem]{Problem}
117
\newtheorem{alg}[theorem]{Algorithm}
118
\newtheorem{openproblem}[theorem]{Open Problem}
119
120
%\theoremstyle{remark}
121
\newtheorem{goal}[theorem]{Goal}
122
\newtheorem{remark}[theorem]{Remark}
123
\newtheorem{remarks}[theorem]{Remarks}
124
\newtheorem{example}[theorem]{Example}
125
\newtheorem{exercise}[theorem]{Exercise}
126
127
128
129
%\hoffset=-0.05\textwidth
130
%\textwidth = 1.1\textwidth
131
%\voffset=-0.05\textheight
132
%\textheight = 1.1\textheight
133
%
134
% Set equal margins on book style
135
\setlength{\oddsidemargin}{53pt}
136
\setlength{\evensidemargin}{53pt}
137
\setlength{\marginparwidth}{57pt}
138
\setlength{\footskip}{30pt}
139
140
% Dutch style of paragraph formatting, i.e. no indents.
141
\setlength{\parskip}{1.3ex plus 0.2ex minus 0.2ex}
142
\setlength{\parindent}{0pt}
143
144
\setcounter{tocdepth}{0}
145
146
%\textheight = 9 in
147
%\oddsidemargin = 0.0 in
148
%\evensidemargin = 0.0 in
149
%\topmargin = 0.0 in
150
%\headheight = 0.0 in
151
%\headsep = 0.0 in
152
%\parskip = 0.2in
153
%\parindent = 0.0in
154
155
156
\def\GL{\mathrm{GL}}
157
\def\PGL{\mathrm{PGL}}
158
\def\PSL{\mathrm{PSL}}
159
\def\GSP{\mathrm{GSP}}
160
\def\Z{\mathrm{Z}}
161
\def\Q{\mathrm{Q}}
162
\def\Gal{\mathrm{Gal}}
163
\def\Hom{\mathrm{Hom}}
164
\def\Ind{\mathrm{Ind}}
165
\def\End{\mathrm{End}}
166
\def\Aut{\mathrm{Aut}}
167
\def\loc{\mathrm{loc}}
168
\def\glob{\mathrm{glob}}
169
\def\Kbar{{\bar K}}
170
\def\D{{\mathcal D}}
171
\def\L{{\mathcal L}}
172
\def\R{{\mathcal R}}
173
\def\G{{\mathcal G}}
174
\def\W{{\mathcal W}}
175
\def\H{{\mathcal H}}
176
\def\OH{{\mathcal OH}}
177
178
179
180
\newcommand{\RH}{Riemann Hypothesis\index{Riemann Hypothesis}}
181
182
\title{Prime Numbers and the Riemann Hypothesis}
183
\date{}
184
\author{\Large Barry Mazur \and \Large William Stein \and \vspace{20ex}\\
185
%\begin{center}
186
%A formula that counts the prime numbers ...
187
%\includegraphics[width=.65\textwidth]{illustrations/Rk_50_2_100}
188
%... built out of their spectrum:
189
% cover = use fractal trace with params 0.3 -0.1 0 0
190
\includegraphics[width=1.1\textwidth]{illustrations/cover}
191
%$$
192
%f(t) = -\sum_{\text{prime powers }p^n}{\frac{\log(p)}{p^{n/2}}}\cos(t\log(p^n))
193
%$$
194
%\end{center}
195
}
196
197
\usepackage{notes2bib}
198
\bibliographystyle{amsplain}
199
200
\usepackage{hyperref}
201
202
203
\newcommand{\todo}[1]{\par\vspace{1em}{\small---------\\{{\bf To be done:} #1}\\-----------}\par\vspace{1em}}
204
205
206
% More space between footnotes -- see
207
% http://www.latex-community.org/forum/viewtopic.php?f=44&t=22437
208
\setlength{\footnotesep}{1.2\baselineskip}
209
210
211
\usepackage{pdfpages}
212
213
\begin{document}
214
215
%\maketitle
216
\includepdf{cover}
217
218
% Remove parskip for toc
219
\setlength{\parskip}{0ex plus 0.5ex minus 0.2ex}
220
221
\tableofcontents
222
223
224
% Dutch style of paragraph formatting, i.e. no indents.
225
\setlength{\parskip}{1.3ex plus 0.2ex minus 0.2ex}
226
227
\chapter*{\label{preface}Preface}
228
\addcontentsline{toc}{chapter}{Preface}
229
The \RH{} is one of the great unsolved problems of
230
mathematics, and the reward of \$1{,}000{,}000 of {\em Clay Mathematics
231
Institute} prize money awaits the person who solves it. But---with
232
or without money---its resolution is crucial for our understanding of
233
the nature of numbers.
234
235
236
There are several full-length books recently published, written
237
for a general audience, that have the \RH{} as their main
238
topic. A reader of these books will get a fairly rich picture of the
239
personalities engaged in the pursuit, and of related mathematical and
240
historical issues.\footnote{See, e.g., {\em The Music of the Primes} by Marcus du Sautoy (2003)
241
and {\em Prime Obsession: Bernhard Riemann and
242
the Greatest Unsolved Problem in Mathematics} by John Derbyshire (2003).}
243
244
This is {\em not} the mission of the book that you now hold in your
245
hands. We aim---instead---to explain, in as direct a manner as
246
possible and with the least mathematical background required, what
247
this problem is all about and why it is so important. For even before
248
anyone proves this {\em hypothesis} to be true (or false!), just
249
getting familiar with it, and with some of the ideas behind it, is
250
exciting. Moreover, this hypothesis is of crucial importance in a
251
wide range of mathematical fields; for example, it is a
252
confidence-booster for computational mathematics: even if the \RH{}
253
is never proved, assuming its truth (and that of closely
254
related hypotheses) gives us an excellent sense of
255
how long certain computer programs will take to run, which, in some
256
cases, gives us the assurance we need to initiate a computation that
257
might take weeks or even months to complete.
258
259
260
% I (William Stein) took this very picture in the Princeton common room in 2001!
261
\ill{sarnak}{0.4}{Peter Sarnak\index{Sarnak, Peter}}
262
263
264
265
Here is how the
266
Princeton mathematician Peter Sarnak describes the broad impact the
267
\RH{} has had\footnote{See page 222 of
268
{\em The Riemann hypothesis: the greatest unsolved problem in mathematics} by Karl Sabbagh (2002).}:
269
\begin{quote}
270
``The Riemann hypothesis is the central problem and it implies many,
271
many things. One thing that makes it rather unusual in mathematics
272
today is that there must be over five hundred papers---somebody should
273
go and count---which start `Assume the Riemann hypothesis\footnote{Technically,
274
a generalized version of the Riemann hypothesis (see
275
Chapter~\ref{ch:companions} below).},' and
276
the conclusion is fantastic. And those [conclusions] would then become
277
theorems ... With this one solution you would have proven five hundred
278
theorems or more at once.''
279
\end{quote}
280
% This is from page 106 of the Borwein book, which is just citing Sabbagh.
281
282
283
284
So, what {\it is} the \RH{}? Below is a {\it first
285
description} of what it is about. The task of our book is to develop
286
the following boxed paragraph into a fuller explanation and to
287
convince you of the importance and beauty of the mathematics it
288
represents. We will be offering, throughout our book, a number of
289
different---but equivalent---ways of precisely formulating this
290
hypothesis (we display these in boxes). When we say that two
291
mathematical statements are ``equivalent'' we mean that, given the
292
present state of mathematical knowledge, we can prove that if either
293
one of those statements is true, then the other is true. The endnotes
294
will guide the reader to the relevant mathematical literature.
295
296
\begin{center}
297
\shadowbox{
298
\begin{minipage}{.98\textwidth}
299
\hspace{.015\textwidth}
300
\begin{minipage}{0.95\textwidth}
301
\mbox{} \vspace{0.2ex}
302
\begin{center}{\bf\large What {\em sort} of Hypothesis is the \RH{}?}\end{center}
303
\medskip
304
305
Consider the seemingly innocuous series of questions:
306
307
\begin{quote}
308
\begin{itemize}
309
\item How many prime numbers (2, 3, 5, 7, 11, 13, 17, $\ldots$) are there less than 100?
310
\item How many less than 10{,}000?
311
\item How many less than 1{,}000{,}000?
312
\end{itemize}
313
314
More generally, how many primes are there less than any given number $X$?
315
\end{quote}
316
317
Riemann\index{Riemann, Bernhard} proposed, a century and half ago, a strikingly
318
simple-to-describe ``very good approximation'' to the number of
319
primes less than a given number $X$. We now
320
see that if we could prove this {\em Hypothesis of Riemann} we would have
321
the key to a wealth of powerful mathematics. Mathematicians are eager
322
to find that key.
323
324
325
\vspace{1ex}
326
\end{minipage}\end{minipage}}
327
\end{center}
328
329
% I got this from http://www.math.harvard.edu/history/bott/
330
% Barry says there is a good picture in the book
331
% ''the Index Theorem'' that Yau published.
332
\ill{raoulbott}{0.35}{Raoul Bott (1923--2005). Photograph by George M. Bergman, Courtesy of the Department of Mathematics, Harvard University.\label{fig:bott}}
333
334
335
The mathematician Raoul Bott\index{Bott, Raoul}---in giving advice to
336
a student---once said that whenever one
337
reads a mathematics book or article, or goes to a math lecture, one
338
should aim to come home with something very specific (it can be small,
339
but should be {\em specific}) that has application to a wider class of
340
mathematical problems than was the focus of the text or lecture. If we
341
were to suggest some possible {\em specific} items to come home with,
342
after reading our book, three key phrases---{\bf prime numbers}\index{prime number}, {\bf
343
square-root accurate}\index{square-root accurate}, and {\bf spectrum}\index{spectrum}---would head the
344
list. As for words of encouragement to think hard about the first of
345
these, i.e., prime numbers, we can do no better than to quote a
346
paragraph of Don Zagier's\index{Zagier, Don} classic 12-page exposition, {\em The First
347
50 Million Prime Numbers}:
348
349
% I took this myself when we were hiking in Oberwolfach once...
350
\ill{zagier}{.35}{Don Zagier}
351
352
353
\begin{quote}
354
``There are two facts about the distribution of prime numbers of
355
which I hope to convince you so overwhelmingly that they will be
356
permanently engraved in your hearts. The first is that, [they are]
357
the most arbitrary and ornery objects studied by mathematicians:
358
they grow like weeds among the natural numbers, seeming to obey no
359
other law than that of chance, and nobody can predict where the next
360
one will sprout. The second fact is even more astonishing, for it
361
states just the opposite: that the prime numbers exhibit stunning
362
regularity, that there are laws governing their behavior, and that
363
they obey these laws with almost military precision.''
364
\end{quote}
365
366
367
Mathematics is flourishing. Each year sees new exciting initiatives that extend and sharpen the applications of our subject, new directions for deep exploration---and finer understanding---of classical as well as very contemporary mathematical domains. We are aided in such explorations by the development of more and more powerful tools. We see resolutions of centrally important questions. And through all of this, we are treated to surprises and dramatic changes of viewpoint; in short: marvels.
368
369
And what an array of wonderful techniques allow mathematicians to do their work: framing {\it definitions;} producing {\it constructions;} formulating {\it analogies relating disparate concepts, and disparate mathematical fields}; posing {\it conjectures,} that cleanly shape a possible way forward; and, the keystone: providing unassailable {\it proofs} of what is asserted, the idea of doing such a thing being itself one of the great glories of mathematics.
370
371
Number theory has its share of this bounty. Along with all these modes of theoretical work, number theory also offers the pure joy of numerical experimentation, which---when it is going well---allows you to witness the intricacy of numbers and profound inter-relations that cry out for explanation. It is striking how little you actually have to know in order to appreciate the revelations offered by numerical exploration.
372
373
Our book is meant to be an introduction to these pleasures. We take an experimental view of the fundamental ideas of the subject buttressed by numerical computations, often displayed as graphs. As a result, our book is profusely illustrated, containing 131 figures,
374
diagrams, and pictures that accompany the text.\footnote{We created the
375
figures using the free SageMath
376
software (see \url{http://www.sagemath.org}).
377
Complete source code is available, which
378
can be used to recreate every diagram in this book
379
(see \url{http://wstein.org/rh}).
380
More adventurous readers can try
381
to experiment with the parameters for
382
the ranges of data illustrated, so as to get an even more
383
vivid sense of how the numbers ``behave.''
384
We hope that readers become inspired to carry out numerical experimentation,
385
which is becoming easier as mathematical software
386
advances.}
387
388
There are few
389
mathematical equations in Part~\ref{part1}. This first portion of our book is intended for readers who are generally interested in, or curious about, mathematical ideas, but who may not have studied any advanced topics. Part~\ref{part1} is devoted to conveying the essence of the Riemann Hypothesis and explaining why it is so intensely pursued. It requires a minimum of mathematical knowledge, and does not, for example, use calculus,\index{calculus} although it would be helpful to know---or to learn on the run---the meaning of the concept of {\it function}\index{function}. Given its mission, Part~\ref{part1} is meant to be complete, in that it has a beginning, middle, and end. We hope that our readers who only read Part~\ref{part1} will have enjoyed the excitement of this important piece of mathematics.
390
391
392
393
Part~\ref{part2} is for readers who have taken at least one class in calculus,\index{calculus} possibly a long time ago. It is meant as a general preparation for the type of Fourier analysis\index{Fourier analysis} that will occur in the later parts. The notion of spectrum\index{spectrum} is key.
394
395
Part~\ref{part3} is for readers who wish to see, more vividly, the link between the placement of prime numbers\index{prime number} and (what we call there) the {\it Riemann spectrum}.\index{Riemann spectrum}
396
397
Part~\ref{part4} requires some familiarity with complex analytic functions, and returns to Riemann's original viewpoint. In particular it relates the ``Riemann spectrum'' that we discuss in Part~\ref{part3} to the {\it nontrivial zeroes\index{nontrivial zeroes} of the Riemann zeta function}.\index{zeta function} We also provide a brief sketch of the more standard route taken by published expositions of the Riemann Hypothesis.
398
399
The end-notes are meant to link the text to references, but also to provide more technical commentary with an increasing dependence on mathematical background in the later chapters. References to the end notes will be in brackets.
400
401
We wrote our book over the past decade, but devoted only one week to it each year (a week in August). At the end of our work-week for the book, each year, we put our draft (mistakes and all) on line to get response from readers.{\footnote{See {\url{http://library.fora.tv/2014/04/25/Riemann_Hypothesis_The_Million_Dollar_Challenge}} which is a lecture---and Q \& A---about the composition of this book.}} We therefore accumulated much important feedback, corrections, and requests from readers.\footnote{Including
402
Dan Asimov, Bret Benesh, Keren Binyaminov, Harald B\"{o}geholz, Louis-Philippe Chiasson, Keith Conrad, Karl-Dieter Crisman, Nicola Dunn, Thomas Egense, Bill Gosper, Andrew Granville, Shaun Griffith, Michael J. Gruber, Robert Harron, William R. Hearst III, David Jao, Fredrik Johansson, Jim Markovitch, David Mumford, James Propp, Andrew Solomon, Dennis Stein, and Chris Swenson.}
403
We thank them infinitely.
404
405
406
\part{The \RH{}\label{part1}}
407
408
\numberwithin{equation}{chapter}
409
\numberwithin{figure}{chapter}
410
\numberwithin{table}{chapter}
411
412
\chapter[Thoughts about numbers]{Thoughts about numbers: ancient, medieval, and modern}
413
414
If we are to believe the ancient Greek philosopher Aristotle\index{Aristotle}, the early
415
Pythagoreans thought that the principles governing Number are ``the
416
principles of all things,'' the concept of Number being more basic than
417
{\em earth, air, fire, or water}, which were according to ancient tradition
418
the four building blocks of matter. To think about Number is to get
419
close to the architecture of ``what is.''
420
421
So, how far along are we in our thoughts about numbers?
422
423
424
425
% This is from http://en.wikipedia.org/wiki/File:Frans_Hals_-_Portret_van_Ren%C3%A9_Descartes.jpg
426
% There is a higher resolution scan available there
427
428
\ill{descartes}{.35}{Ren\'e Descartes (1596--1650)}
429
430
The French philosopher and mathematician Ren\'e Descartes,\index{Descartes, Ren\'{e}} almost four
431
centuries ago, expressed the hope that there soon would be ``almost
432
nothing more to discover in geometry.'' Contemporary physicists dream
433
of a final theory.\footnote{See Weinberg's book {\em Dreams of a
434
Final Theory: The Search for the Fundamental Laws of Nature}, by
435
Steven Weinberg (New York: Pantheon Books, 1992)} But despite its
436
venerability and its great power and beauty, the pure mathematics of
437
numbers may still be in the infancy of its development, with depths to
438
be explored as endless as the human soul, and {\it never} a final theory.
439
440
\ill{quixote}{.5}{Jean de Bosschere, ``Don Quixote and his Dulcinea del Toboso,'' from The History of Don Quixote De La Mancha, by Miguel De Cervantes. Trans. Thomas Shelton. George H. Doran Company, New York (1923).}
441
442
Numbers are obstreperous things. Don Quixote\index{Don Quixote} encountered this when he
443
requested that the ``bachelor'' compose a poem to his lady Dulcinea del
444
Toboso, the first letters of each line spelling out her name. The
445
``bachelor'' found\footnote{See Chapter IV of the Second Part of the {\em Ingenious Gentleman Don Quixote of La Mancha}.}
446
447
\begin{quote}
448
``a great difficulty in their composition because the number of
449
letters in her name was $17$, and if he made four Castilian stanzas
450
of four octosyllabic lines each, there would be one letter too many,
451
and if he made the stanzas of five octosyllabic lines each, the ones
452
called {\em d{\'e}cimas} or {\em redondillas,} there would be three
453
letters too few...''
454
\end{quote}
455
456
``It must fit in, however you do it,'' pleaded Quixote\index{Don Quixote}, not willing to
457
grant the imperviousness of the number $17$ to division.
458
459
% http://www.jus.uio.no/sisu/don_quixote.miguel_de_cervantes/60.html#2068
460
461
462
463
464
{\em Seventeen} is indeed a prime number:\index{prime number} there is no way of factoring\index{factor}
465
it as the product of smaller numbers, and this accounts---people tell
466
us---for its occurrence in some phenomena of nature, as when
467
the seventeen-year cicadas all emerged to celebrate a ``reunion'' of some
468
sort in our fields and valleys.
469
470
\ill{cicada}{.35}{Cicadas emerge every 17 years}
471
% from http://biology.clc.uc.edu/steincarter/cicadas.htm
472
473
474
475
476
Prime numbers\index{prime number}, despite their {\em primary} position in our modern
477
understanding of numbers, were not specifically doted over in the
478
ancient literature before Euclid,\index{Euclid} at least not in the literature that
479
has been preserved. Primes are mentioned as a class of numbers in the
480
writings of Philolaus (a predecessor of Plato\index{Plato}); they are not mentioned
481
specifically in the Platonic dialogues, which is surprising
482
given the intense interest Plato had in mathematical developments; and
483
they make an occasional appearance in the writings of Aristotle, which
484
is not surprising, given Aristotle's emphasis on the distinction
485
between the {\em composite} and the {\em incomposite}. ``The
486
incomposite is prior to the composite,'' writes Aristotle\index{Aristotle} in Book 13 of
487
the Metaphysics.
488
489
490
Prime numbers\index{prime number} do occur, in earnest, in Euclid's\index{Euclid} {\it Elements}!
491
492
493
% from http://en.wikipedia.org/wiki/File:Euklid-von-Alexandria_1.jpg
494
\ill{euclid}{.35}{Euclid\index{Euclid}}
495
496
497
There is an extraordinary wealth of established truths about whole
498
numbers; these truths provoke sheer awe for the beautiful complexity
499
of prime numbers\index{prime number}. But each of the important new discoveries we make
500
gives rise to a further richness of questions, educated guesses,
501
heuristics, expectations, and unsolved problems.
502
503
504
505
506
507
508
509
510
\chapter{What are prime numbers?}\label{ch:what_are_primes}\index{prime number}
511
512
\noindent {\em Primes as atoms. }\index{atom} To begin from the beginning, think
513
of the operation of multiplication as a bond that ties numbers
514
together: the equation $2\times 3= 6$ invites us to imagine the number
515
$6$ as (a molecule, if you wish) built out of its smaller constituents
516
$2$ and $3$. Reversing the procedure, if we start with a whole
517
number, say $6$ again, we may try to factor\index{factor} it (that is, express it as
518
a product of smaller whole numbers) and, of course, we would
519
eventually, if not immediately, come up with $6 = 2\times 3$ and
520
discover that $2$ and $3$ factor no further; the numbers $2$ and $3$,
521
then, are the indecomposable entities (atoms, if you wish) that
522
comprise our number.
523
524
\ill{factor_tree_6}{.3}{The number $6 = 2\times 3$}
525
526
527
By definition, a {\bf prime number}\index{prime number}
528
(colloquially, {\em a prime}) is a whole number, bigger than $1$, that
529
cannot be factored into a product of two smaller whole numbers. So,
530
$2$ and $3$ are the first two prime numbers. The next number along the
531
line, $4$, is not prime, for $4= 2\times 2$; the number after that,
532
$5$, is. Primes are, multiplicatively speaking, the building blocks
533
from which all numbers can be made. A fundamental theorem of
534
arithmetic tells us that any number (bigger than $1$) can be factored
535
as a product of primes, and the factorization is {\em unique} except
536
for rearranging the order of the primes.
537
538
539
540
For example, if you try to factor\index{factor} $12$ as a product of
541
two smaller numbers---ignoring the order of the factors---there are two
542
ways to begin to do this:
543
$$
544
12 = 2 \times 6 \qquad\text{ and }\qquad 12 = 3 \times 4
545
$$
546
But neither of these ways is a full factorization of $12$, for both
547
$6$ and $4$ are not prime, so can be themselves factored, and in each
548
case after changing the ordering of the factors we arrive at:
549
$$
550
12= 2 \times 2 \times 3.
551
$$
552
553
If you try to factor\index{factor} the number $300$, there are many
554
ways to begin:
555
$$
556
300= 30\times 10\qquad\text{or}\qquad 300 = 6 \times 50
557
$$
558
and there are various other starting possibilities. But if you
559
continue the factorization (``climbing down'' any one of the possible
560
``factoring trees'') to the bottom, where every factor is a prime
561
number as in Figure~\ref{fig:factor300}, you always end up with the
562
same collection of prime numbers\index{prime number}\footnote{See Section~1.1 of
563
Stein's {\em Elementary Number Theory: Primes, Congruences, and Secrets} (2008) at \url{http://wstein.org/ent/}
564
for a proof of the ``fundamental theorem of arithmetic,''
565
which asserts that every positive
566
whole number factors uniquely as a product of primes.}:
567
$$300 = 2^2\times 3\times 5^2.$$
568
569
\illtwo{factor_tree_300_a}{factor_tree_300_b}{.47}
570
{Factor trees that illustrate the factorization of 300 as a product of primes.\label{fig:factor300}}
571
572
\ill{factor_tree_big}{1}{Factorization tree for the product of the primes up to $29$.\label{factor.tree.big}}%NOTE: in our version of the book there is an annoying
573
%battle between the footnote and this figure. Maybe won't matter in
574
% CUP version.
575
576
577
The \RH{} probes the question: how intimately can we know
578
prime numbers\index{prime number}, those {\em atoms}\index{atom} of multiplication? Prime numbers are
579
an important part of our daily lives. For example, often when we visit a
580
website and purchase something online, prime numbers having hundreds of
581
decimal digits are used to keep our bank transactions private. This
582
ubiquitous use to which giant primes are put depends upon a very
583
simple principle: it is much easier to multiply numbers together than
584
to factor\index{factor} them. If you had to factor, say, the number $391$ you might
585
scratch your head for a few minutes before discovering that $391$ is
586
$17\times 23$. But if you had to multiply $17$ by $23$ you would do it
587
straightaway. Offer two primes, say, $P$ and $Q$ each with a few hundred
588
digits, to your computing machine and ask it to multiply them
589
together: you will get their product $N = P\times Q$ with its hundreds of digits
590
in about a microsecond. But present that number $N$ to any
591
current desktop computer, and ask it to factor $N$, and the computer
592
will (almost certainly) fail to do the task. See
593
\bibnote{{\bf How not to factor the numerator of a Bernoulli number:}
594
595
As mentioned in Chapter~\ref{ch:envision}, the coefficient $B_k$ of the linear
596
term of the polynomial
597
$$
598
S_k(n)= 1^k+2^k+3^k+\dots +(n-1)^k
599
$$
600
is (up to sign) the $k$-th {\bf Bernoulli number}. These numbers are
601
rational numbers and, putting them in lowest terms, their numerators
602
play a role in certain problems, and their denominators in
603
others. (This is an amazing story, which we can't go into here!)
604
605
One of us (Barry Mazur) in the recent article {\em How can we
606
construct abelian Galois extensions of basic number fields?} (see
607
\url{http://www.ams.org/journals/bull/2011-48-02/S0273-0979-2011-01326-X/})
608
found himself dealing (for various reasons) with the fraction
609
$-B_{200}/400$, where $B_{200}$ is the two-hundredth Bernoulli number.
610
The numerator of this fraction is quite large: it is---hold your
611
breath---\ $$389\cdot 691\cdot5370056528687 \ \ \ \ \text{times this
612
204-digit number:}
613
$$\vskip 10pt
614
$N:=3452690329392158031464109281736967404068448156842396721012$\newline
615
$\mbox{}\,\,\,\qquad 9920642145194459192569415445652760676623601087497272415557$\newline
616
$\mbox{}\,\,\,\qquad 0842527652727868776362959519620872735612200601036506871681$\newline
617
$\mbox{}\,\,\,\qquad 124610986596878180738901486527$
618
\vskip 10pt and he {\it incorrectly asserted} that it was prime. Happily, Bartosz Naskr\c{e}cki spotted this error: our $204$-digit $N$ is {\it not} prime.
619
% Note, according to http://perso.telecom-paristech.fr/~cappe/local/latex/accents.pdf
620
% the accent I get with \c{e} is ``NOTE: This is technically incorrect. The accent should be reversed in orientation,
621
% but there is currently no such accent in plain TEX or LATEX.''
622
623
How did he know this? By using the most basic test in the repertoire
624
of tests that we have available to check to see whether a number is
625
prime: we'll call it the ``{\bf Fermat $2$-test.}'' We'll first give a
626
general explanation of this type of test before we show how $N$ {\it
627
fails the Fermat $2$-test.}
628
629
630
The starting idea behind this test is the famous result known as {\em
631
Fermat's Little Theorem} where the ``little'' is meant to
632
alliteratively distinguish it from you-know-what.
633
\begin{theorem}[Fermat's Little Theorem]
634
If $p$ is a prime number, and $a$ is any number relatively prime to $p$ then $a^{p-1} - 1$ is divisible by $p$.
635
\end{theorem}
636
A good exercise is to try to prove this, and a one-word hint that might lead you to one of the many proofs of it is
637
{\em induction}.\footnote{Here's the proof:
638
$$(N+1)^p \equiv N^p +1 \equiv (N+1)\ \mod p,$$
639
where the first equality is the binomial theorem and the second
640
equality is induction.}
641
642
Now we are going to use this as a criterion, by---in effect---restating it in what logicians would call its {\it contrapositive}:
643
\begin{theorem}[The Fermat $a$-test]
644
If $M$ is a positive integer, and $a$ is any number relatively prime to $M$ such that $a^{M-1} - 1$ is {\it not} divisible by $M$, then $M$ is {\it not} a prime.
645
\end{theorem}
646
647
% sage: N =345269032939215803146410928173696740406844815684239672101299206421451944591925694154456527606766236010874972724155570842527652727868776362959519620872735612200601036506871681124610986596878180738901486527; z = Mod(2, N)^(N-1) - 1; z
648
Well, Naskr\c{e}cki computed $2^{N-1}-1$ (for the $204$-digit $N$ above) and saw that
649
it is {\it not} divisible\footnote{The number $2^{N-1}-1$ has a residue after division by $N$ of\\ $3334581100595953025153969739282790317394606677381970645616725285996925$\newline$
650
6610000568292727335792620957159782739813115005451450864072425835484898$\newline$
651
565112763692970799269335402819507605691622173717318335512037457$.} by $N$.
652
Ergo, our $N$ fails the Fermat $2$-test so is {\it not} prime.
653
654
But then, given that it is so ``easy'' to see that $N$ is not prime, a
655
natural question to ask is: what, in fact, is its prime factorization?
656
This---it turns out---isn't so easy to determine;
657
Naskr\c{e}cki devoted $24$ hours of computer time setting standard factorization
658
algorithms on the task, and that was not sufficient time to resolve
659
the issue. The factorization of the numerators of general Bernoulli
660
numbers is the subject of a very interesting website run by Samuel
661
Wagstaff (\url{http://homes.cerias.purdue.edu/~ssw/bernoulli}).
662
Linked to this web page one finds
663
(\url{http://homes.cerias.purdue.edu/~ssw/bernoulli/composite}) which gives
664
a list of composite numbers whose factorizations have resisted all
665
attempts to date. The two-hundredth Bernoulli number is 12th on the list.
666
667
The page
668
\url{http://en.wikipedia.org/wiki/Integer_factorization_records} lists
669
record challenge factorization, and one challenge that was completed
670
in 2009 involves a difficult-to-factor number with 232 digits; its
671
factorization was completed by a large team of researchers and took around
672
2000 years of CPU time. This convinced us that
673
with sufficient motivation it would be possible to factor $N$, and so we
674
asked some leaders in the field to try. They succeeded!
675
\begin{quote}\sf
676
Factorisation of B200
677
678
by Bill Hart on 4 Aug 05, 2012 at 07:24pm
679
680
We are happy to announce the factorization of the numerator of the 200th
681
Bernoulli number:
682
683
\begin{align*}
684
N &= 389 \cdot 691 \cdot 5370056528687 \cdot c_{204}\\
685
c_{204} &= p_{90} \cdot p_{115}\\
686
p_{90} &= 149474329044343594528784250333645983079497454292\\
687
&= 838248852612270757617561057674257880592603\\
688
p_{115} &= 230988849487852221315416645031371036732923661613\\
689
&= 619208811597595398791184043153272314198502348476\\
690
&= 2629703896050377709
691
\end{align*}
692
693
The factorization of the 204-digit composite was made possible with the help
694
of many people:
695
\begin{itemize}
696
\item William Stein and Barry Mazur challenged us to factor this number.
697
\item Sam Wagstaff maintains a table of factorizations of numerators of Bernoulli
698
numbers at \url{http://homes.cerias.purdue.edu/~ssw/bernoulli/bnum}. According
699
to this table, the 200th Bernoulli number is the 2nd smallest index with
700
unfactored numerator (the first being the 188th Bernoulli number).
701
\item Cyril Bouvier tried to factor the c204 by ECM up to 60-digit level, using
702
the TALC cluster at Inria Nancy - Grand Est.
703
\item {\sf yoyo@home} tried to factor the c204 by ECM up to 65-digit level, using the
704
help of many volunteers of the distributed computing platform
705
\url{http://www.rechenkraft.net/yoyo/}.
706
After ECM was unsuccessful, we decided to factor the c204 by GNFS.
707
\item Many people at the Mersenne forum helped for the polynomial selection. The best
708
polynomial was found by Shi Bai, using his implementation of Kleinjung's
709
algorithm in CADO-NFS:
710
\url{http://www.mersenneforum.org/showthread.php?p=298264#post298264}.
711
Sieving was performed by many volunteers using {\sf NFS@home}, thanks to Greg
712
Childers. See \url{http://escatter11.fullerton.edu/nfs} for more details
713
about {\sf NFS@home}.
714
This factorization showed that such a distributed effort might be feasible for a
715
new record GNFS factorization, in particular for the polynomial selection.
716
This was the largest GNFS factorization performed by {\sf NFS@home} to date,
717
the second largest being $2^{1040}+1$ at 183.7 digits.
718
\item Two independent runs of the filtering and linear algebra were done: one by Greg
719
Childers with msieve (\url{http://www.boo.net/~jasonp/qs.html}) using a 48-core
720
cluster made available by Bill Hart, and one by Emmanuel Thom\'{e} and Paul
721
Zimmermann with CADO-NFS (\url{http://cado-nfs.gforge.inria.fr/}), using the Grid
722
5000 platform.
723
\item The first linear algebra run to complete was the one with CADO-NFS, thus we
724
decided to stop the other run.
725
\end{itemize}
726
727
Bill Hart
728
\end{quote}
729
730
We verify the factorization above in SageMath as follows: \\
731
732
{\sf
733
sage: p90 = 1494743290443435945287842503336459830794974542928382\\
734
48852612270757617561057674257880592603\\
735
sage: p115 = 230988849487852221315416645031371036732923661613619\\
736
2088115975953987911840431532723141985023484762629703896050377709\\
737
sage: c204 = p90 * p115\\
738
sage: 389 * 691 * 5370056528687 * c204 == -numerator(bernoulli(200))\\
739
True\\
740
sage: is\_prime(p90), is\_prime(p115), is\_prime(c204)\\
741
(True, True, False)
742
}
743
} and \bibnote{\label{bibnote:factor}
744
Given an integer $n$, there are
745
many algorithms available for trying to write $n$ as a product of
746
prime numbers. First we can apply {\em trial division}, where we
747
simply divide $n$ by each prime $2, 3, 5, 7, 11, 13, \ldots$ in turn,
748
and see what small prime factors we find (up to a few digits). After
749
using this method to eliminate as many primes as we have patience to
750
eliminate, we typically next turn to a technique called {\em Lenstra's
751
elliptic curve method}, which allows us to check $n$ for
752
divisibility by bigger primes (e.g., around 10--15 digits). Once
753
we've exhausted our patience using the elliptic curve method, we would
754
next hit our number with something called the {\em quadratic sieve},
755
which works well for factoring numbers of the form $n=pq$, with $p$
756
and $q$ primes of roughly equal size, and $n$ having less than 100
757
digits (say, though the 100 depends greatly on the implementation).
758
All of the above algorithms---and then some---are implemented in SageMath,
759
and used by default when you type {\tt factor(n)} into SageMath. Try
760
typing {\tt factor(some number, verbose=8)} to see for yourself.
761
762
If the quadratic sieve fails, a final recourse is to run the {\em
763
number field sieve} algorithm, possibly on a supercomputer. To give
764
a sense of how powerful (or powerless, depending on perspective!) the
765
number field sieve is, a record-setting factorization of a general
766
number using this algorithm is the factorization of a 232 digit number
767
called RSA-768 (see \url{https://eprint.iacr.org/2010/006.pdf}):
768
769
$
770
n = 12301866845301177551304949583849627207728535695953347921973224521\\
771
517264005072636575187452021997864693899564749427740638459251925573263\\
772
034537315482685079170261221429134616704292143116022212404792747377940\\
773
80665351419597459856902143413
774
$
775
\par\noindent{}which factors as $pq$, where\par\noindent{}
776
$
777
p = 334780716989568987860441698482126908177047949837137685689124313889\\
778
82883793878002287614711652531743087737814467999489
779
$
780
\par\noindent{}and\par\noindent{}
781
$
782
q = 367460436667995904282446337996279526322791581643430876426760322838\\
783
15739666511279233373417143396810270092798736308917.
784
$
785
\par\noindent{}We encourage you to try to
786
factor $n$ in SageMath, and see that it fails.
787
SageMath does not yet include an implementation of the
788
number field sieve algorithm, though there are some free implementations
789
currently available (see \url{http://www.boo.net/~jasonp/qs.html}).
790
}.
791
792
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
793
794
795
796
797
%We illustrate this in Sage:
798
% \begin{lstlisting}[]
799
% sage: n = ZZ.random_element(10^500)
800
% sage: m = ZZ.random_element(10^500)
801
% sage: timeit('n*m')
802
% 1.51 microseconds per loop
803
% sage: timeit('n*m')
804
% 2.33 microseconds per loop
805
% sage: factor(n*m)
806
% [wait forever]
807
% \end{lstlisting}}.
808
The safety of much
809
encryption depends upon this ``guaranteed'' failure!\footnote{Nobody has ever
810
published a {\em proof} that there is no fast way to factor
811
integers. This is an article of ``faith'' among some
812
cryptographers.}
813
814
If we were latter-day number-phenomenologists we might revel in the
815
discovery and proof that
816
$$
817
p=2^{43,112,609}-1 = 3164702693\ldots\ldots\text{\small (millions of digits)}\ldots\ldots6697152511
818
$$
819
is a prime number\index{prime number}, this number having $12{,}978{,}189$ digits! This
820
prime, which was discovered on August 23, 2008 by the
821
GIMPS project,\footnote{The GIMPS project website is \url{http://www.mersenne.org/}.}
822
is the first prime ever found with more than ten million digits,
823
though it is not the largest prime currently known.
824
825
Now $2^{43,112,609}-1$ is quite a hefty number! Suppose someone came
826
up to you saying ``surely $p = 2^{43,112,609}-1$ is the largest prime
827
number!'' (which it is not). How might you convince that person that
828
he or she is wrong without explicitly exhibiting a larger prime?
829
\bibnote{We can use SageMath (at \url{http://sagemath.org}) to quickly compute the ``hefty
830
number'' $p = 2^{43,112,609}-1$. Simply type
831
{\tt p = 2\^{}43112609 - 1} to instantly compute $p$.
832
In what sense have we {\em computed} $p$? Internally, $p$ is now
833
stored in base $2$ in the computer's memory; given the special form of
834
$p$ it is not surprising that it took little time to compute. Much more
835
challenging is to compute all the base 10 digits of $p$, which takes
836
a few seconds: {\tt d = str(p)}.
837
Now type {\tt d[-50:]} to see the last 50 digits of $p$.
838
To compute the sum $58416637$ of the digits of $p$ type {\tt sum(p.digits())}.
839
}
840
841
842
Here is a neat---and, we hope, convincing---strategy to show there are
843
prime numbers\index{prime number} larger than $p = 2^{43,112,609} - 1$. Imagine
844
forming the following humongous number: let $M$ be the product of all
845
prime numbers\index{prime number} up to and including $p = 2^{43,11,2609} - 1$. Now go
846
one further than $M$ by taking the next number $N=M+1$.
847
848
849
OK, even though this number $N$ is wildly large, it is either a prime
850
number itself---which would mean that there would indeed be a prime
851
number larger than $p=2^{43,112,609} - 1$, namely $N$; or in any event it is
852
surely divisible by some prime number\index{prime number}, call it $P$.
853
854
Here, now, is a way of seeing that this $P$ is bigger than $p$: Since
855
every prime number smaller than or equal to $p$ divides $M$, these
856
prime numbers cannot divide $N= M+1$ (since they divide $M$ evenly, if
857
you tried to divide $N=M+1$ by any of them you would get a remainder
858
of $1$). So, since $P$ does divide $N$ it must not be any of the
859
smaller prime numbers: $P$ is therefore a prime number bigger than $p=
860
2^{43,112,609}-1$.
861
862
This strategy, by the way, is not very new: it is, in fact, well over
863
two thousand years old, since it already occurred in Euclid's\index{Euclid} {\em
864
Elements}\index{Euclid}. The Greeks did know that there are infinitely many prime
865
numbers and they showed it via the same method as we showed that our
866
$p = 2^{43,112,609} - 1$ is not the largest prime number.
867
868
Here is the argument again, given very succinctly:
869
Given primes $p_1, \ldots, p_m$, let $n=p_1 p_2 \cdots p_m +
870
1$. Then $n$ is divisible by some prime not equal to any $p_i$,
871
so there are more than $m$ primes.
872
873
You can think of this strategy as a simple game that you can
874
play. Start with the bag of prime numbers\index{prime number}
875
that contains just the two primes $2$ and $3$. Now each ``move'' of the game
876
consists of multiplying together all the primes you have in your bag
877
to get a number $M$, then adding $1$ to $M$ to get the larger
878
number $N=M+1$, then factoring $N$\index{factor} into prime number factors, and then
879
including all those new prime numbers\index{prime number} in your bag. Euclid's\index{Euclid} proof
880
gives us that we will---with each move of this game---be finding more
881
prime numbers: the contents of the bag will increase. After, say,
882
a million moves our bag will be guaranteed to contain more than
883
a million prime numbers.\index{prime number}
884
885
For example, starting the game with your bag containing
886
only one prime number $2$, here is how your bag grows with
887
successive moves of the game:
888
889
$\mbox{}\qquad\{2\}$
890
\newline
891
$\mbox{}\qquad\{2,3\}$
892
\newline
893
$\mbox{}\qquad\{2,3, 7\}$
894
\newline
895
$\mbox{}\qquad\{2,3, 7, 43\}$
896
\newline
897
$\mbox{}\qquad\{2,3, 7, 43, 13, 139\}$
898
\newline
899
$\mbox{}\qquad\{2,3, 7, 43, 13, 139, 3263443\}$
900
\newline
901
$\mbox{}\qquad\{2,3, 7, 43, 13, 139, 3263443, 547, 607, 1033, 31051\}$
902
\newline
903
$\mbox{}\qquad\{2,3, 7, 43, 13, 139, 3263443, 547, 607, 1033, 31051, 29881, 67003,\\
904
\mbox{}\qquad\qquad\qquad 9119521, 6212157481\}$
905
\newline
906
\mbox{}\qquad{}etc.\footnote{The sequence of prime numbers we find by this
907
procedure is discussed in more detail with references
908
in the Online Encyclopedia of Integer Sequences \url{http://oeis.org/A126263}.}
909
910
Though there are infinitely many primes, explicitly finding large primes is a
911
major challenge. In the 1990s, the Electronic Frontier Foundation\index{Electronic Frontier Foundation}
912
\url{http://www.eff.org/awards/coop} offered a \$100{,}000 cash reward
913
to the first group to find a prime with at least 10{,}000{,}000 decimal
914
digits (the group that found the record prime $p$ above won this prize\footnote{%
915
See \url{http://www.eff.org/press/archives/2009/10/14-0}.
916
Also the 46th Mersenne prime was declared by {\em Time Magazine} to be
917
one of the top 50 best ``inventions'' of 2008: \url{http://www.time.com/time/specials/packages/article/0,28804,1852747_1854195_1854157,00.html}.}), and offers
918
another \$150{,}000 cash prize to the first group to find a prime with
919
at least 100{,}000{,}000 decimal digits.
920
921
922
The number $p= 2^{43,112,609} - 1$ was for a time
923
the largest prime known, where by
924
``know'' we mean that we know it so explicitly that we can {\em
925
compute} things about it. For example, the last two digits of $p$
926
are both 1 and the sum of the digits of $p$ is 58,416,637.\label{sumdigits} Of course $p$ is not
927
the largest prime number\index{prime number} since there are infinitely many primes, e.g.,
928
the next prime $q$ after $p$ is a prime. But there is no known way
929
to efficiently compute anything interesting about $q$. For example,
930
what is the last digit of $q$ in its decimal expansion?
931
932
933
\chapter{``Named'' prime numbers}\label{ch:namedprimes}\index{prime number}
934
Prime numbers come in all sorts of shapes, some more convenient to
935
deal with than others. For example, the number we have been talking
936
about, $$p = 2^{43,112,609}-1,$$ is given to us, by its very notation,
937
in a striking form; i.e., {\it one less than a power of $2$}. It is no
938
accident that the largest ``currently known'' prime number\index{prime number} has such a
939
form. This is because there are special techniques we can draw on to
940
show primality of a number, if it is one less than a power of $2$
941
and---of course---if it also happens to be prime. The primes of that
942
form have a name, {\it Mersenne Primes},\index{Mersenne primes} as do the primes that are
943
{\it one more than a power of $2$}, those being called {\it Fermat
944
Primes}.
945
\bibnote{
946
In contrast to the situation with factorization, testing integers of
947
this size (e.g., the primes $p$ and $q$) for primality is relatively
948
easy. There are fast algorithms that can tell whether or not any
949
random thousand digit number is prime in a fraction of
950
second. Try for yourself using the SageMath command {\tt is\_prime}.
951
For example, if $p$ and $q$ are as in endnote~\ref{bibnote:factor}, then
952
{\tt is\_prime(p)} and {\tt is\_prime(q)} quickly output True
953
and {\tt is\_prime(p*q)} outputs False. However, if you type
954
{\tt factor(p*q, verbose=8)} you can watch as SageMath tries
955
forever and fails to factor $pq$.}
956
957
Here are two exercises that you might try to do, if this is your first
958
encounter with primes that differ from a power of $2$ by $1$:
959
960
\begin{enumerate}
961
\item Show that if a number of the form $M=2^n-1$ is prime, then the
962
exponent $n$ is also prime. [Hint: This is equivalent to proving
963
that if $n$ is composite, then $2^n-1$ is also composite.]
964
For example: $ 2^2-1= 3, 2^3-1= 7$ are
965
primes, but $2^4-1=15$ is not. So {\it Mersenne primes} are numbers
966
that are
967
\begin{itemize}
968
\item of the form $\displaystyle 2^\text{prime number} -1,$ and
969
\item are themselves prime numbers.
970
\end{itemize}
971
972
\item Show that if a number of the form $F=2^n+1$ is prime, then the
973
exponent $n$ is a power of two. For example: $ 2^2+1= 5$ is prime,
974
but $2^3+1= 9$ is not. So {\it Fermat primes} are numbers that
975
are
976
\begin{itemize}
977
\item of the form $\displaystyle 2^\text{power of two} + 1,$ and
978
\item are themselves prime numbers.
979
\end{itemize}
980
\end{enumerate}
981
982
983
Not all numbers of the form $2^{\rm prime\ number} -1$ or of the form
984
$2^{\rm power\ of\ two} +1$ are prime. We currently know only finitely
985
many primes of either of these forms. How we have come to know what we
986
know is an interesting tale. See, for example, \url{http://www.mersenne.org/}.
987
988
%\ill{sieve_boxes_100}{0.7}{Sieve of Eratosthenes\index{Eratosthenes}\label{fig:erat}}
989
990
\chapter{Sieves}\label{ch:sieves}
991
992
Eratosthenes,\index{Eratosthenes} the mathematician from Cyrene (and later, librarian at
993
Alexandria) explained how to {\em sift} the prime numbers\index{prime number} from the
994
series of all numbers: in the sequence of numbers,
995
$$2\ \ 3\ \ 4 \ \ 5\ \ 6\ \ 7\ \ 8\ \ 9\ \ 10\ \ 11
996
\ \ 12\ \ 13\ \ 14\ \ 15\ \ 16\ \ 17\ \ 18\ \ 19\ \ 20\ \ 21\ \ 22\ \ 23\ \ 24\ \ 25\ \ 26,$$
997
for example, start by circling the $2$ and crossing out all the other
998
multiples of $2$. Next, go back to the beginning of our sequence of
999
numbers and circle the first number that is neither circled nor
1000
crossed out (that would be, of course, the $3$), then cross out all
1001
the other multiples of $3$. This gives the pattern: go back again to
1002
the beginning of our sequence of numbers and circle the first number
1003
that is neither circled nor crossed out; then cross out all of its
1004
other multiples. Repeat this pattern until all the numbers in our
1005
sequence are either circled, or crossed out, the circled ones being
1006
the primes.
1007
1008
\includegraphics[width=\textwidth]{illustrations/circled_primes}
1009
1010
In Figures~\ref{fig:erat2}--\ref{fig:erat2357} we use the primes $2$,
1011
$3$, $5$, and finally $7$ to sieve out the primes up to 100, where instead of
1012
crossing out multiples we grey them out, and instead of circling
1013
primes we color their box red.
1014
1015
\ill{sieve100-2}{.8}{Using the prime 2 to sieve for primes up to 100\label{fig:erat2}}
1016
Since all the even numbers greater than two are
1017
eliminated as being composite numbers and not primes they appear as
1018
gray in Figure~\ref{fig:erat2}, but none of the odd numbers are eliminated so they
1019
still appear in white boxes.
1020
1021
\ill{sieve100-3}{.8}{Using the primes 2 and 3 to sieve for primes up to 100\label{fig:erat23}}
1022
\ill{sieve100-5}{.8}{Using the primes 2, 3, and 5 to sieve for primes up to 100\label{fig:erat235}}
1023
Looking at Figure~\ref{fig:erat235}, we see that for all but
1024
three numbers (49, 77, and 91) up to 100 we have
1025
(after sieving by 2,3, and 5) determined
1026
which are primes and which composite.
1027
1028
\ill{sieve100-7}{.8}{Using the primes 2, 3, 5, and 7 to sieve for primes up to 100\label{fig:erat2357}}
1029
1030
Finally, we see in Figure~\ref{fig:erat2357} that
1031
sieving by 2, 3, 5, and 7 determines all primes up to $100$.
1032
See \bibnote{In Sage, the
1033
function {\tt prime\_range} enumerates primes in a range. For example,
1034
{\tt prime\_range(50)}
1035
outputs the primes up to $50$
1036
and {\tt prime\_range(50,100)}
1037
outputs the primes between $50$ and $100$.
1038
Typing {\tt prime\_range(10\^{}8)}
1039
in SageMath enumerates the primes up to a hundred million in around a second.
1040
You can also enumerate primes up to a billion by typing {\tt v=prime\_range(10\^{}9)}, but this will use
1041
a large amount of memory, so be careful not to crash your computer
1042
if you try this. You can see that there are $\pi(10^9) = 50{,}847{,}534$ primes up to a billion
1043
by then typing {\tt len(v)}.
1044
You can also compute $\pi(10^9)$ directly, without enumerating all primes,
1045
using the command {\tt prime\_pi(10\^{}9)}.
1046
This is much faster since it uses some clever counting tricks to find the
1047
number of primes without actually listing them all.
1048
1049
In Chapter~\ref{sec:tinkering} we tinkered with the staircase of
1050
primes by first counting both primes and prime powers.
1051
There are comparatively few prime powers that are not prime.
1052
Up to $10^8$, only $1{,}405$ of the $5{,}762{,}860$ prime powers are not themselves primes.
1053
To see this, first enter
1054
{\tt a~=~prime\_pi(10\^{}8); pp~=~len(prime\_powers(10\^{}8))}.
1055
Typing {\tt (a,~pp,~pp-a)} then outputs the triple
1056
{\tt (5761455, 5762860, 1405)}.} for more about explicitly
1057
enumerating primes using a computer.
1058
1059
% GIVEN WHAT IS ABOVE, THIS IS SILLY.
1060
%Especially if you have had little experience with math, may I suggest
1061
%that you actually follow Eratosthenes'\index{Eratosthenes} lead, and perform the repeated
1062
%circling and crossing-out to watch the primes up to 100 emerge, intriguingly
1063
%staggered through our sequence of numbers,
1064
%$$2\ \ 3\ \ \bullet \ \ 5\ \ \bullet\ \ 7\ \ \bullet\ \ \bullet\ \ \bullet\ \ 11
1065
%\ \ \bullet\ \ 13\ \ \bullet\ \ \bullet\ \ \bullet\ \ 17\ \ \bullet\ \ 19\ \ \bullet\ \ \bullet\ \
1066
%\bullet\
1067
%\ 23\ \ \bullet\ \ \bullet\ \ \bullet\ \ \bullet\ \ \bullet\ 29,\dots $$
1068
%
1069
1070
%
1071
1072
1073
\chapter[Questions about primes]{Questions about primes that any person might ask}\label{ch:questions}
1074
1075
We become quickly stymied when we ask quite elementary questions about
1076
the spacing of the infinite series of prime numbers.\index{prime number}
1077
1078
For example, {\em are there infinitely many pairs of primes whose
1079
difference is $2$?} The sequence of primes seems to be rich in such
1080
pairs $$5-3 =2,\ \ \ 7-5=2,\ \ \ 13-11=2,\ \ \ 19-17 =2,$$ and we know
1081
that there are loads more such pairs\footnote{For example, according to
1082
\url{http://oeis.org/A007508} there are
1083
$10{,}304{,}185{,}697{,}298$ such pairs less than
1084
$10{,}000{,}000{,}000{,}000{,}000$.} but the answer to our question,
1085
{\em are there infinitely many?}, is not known. The conjecture that there are infinitely many such pairs of primes (``twin primes" as they are called) is known as the {\it Twin Primes Conjecture}.
1086
{\em Are there
1087
infinitely many pairs of primes whose difference is $4$, $6$?} Answer:
1088
equally unknown.
1089
Nevertheless there is very exciting recent work in this direction, specifically,
1090
Yitang Zhang\index{Zhang, Yitang} proved that there are infinitely many pairs of primes that differ
1091
by no more than $7\times 10^7$. For a brief account of
1092
Zhang's\index{Zhang, Yitang} work, see the Wikipedia entry \url{http://en.wikipedia.org/wiki/Yitang_Zhang}.
1093
Many exciting results have followed Zhang's\index{Zhang, Yitang} breakthrough;
1094
we know now, thanks to results\footnote{See \url{https://www.simonsfoundation.org/quanta/20131119-together-and-alone-closing-the-prime-gap/} and for further work
1095
\url{http://michaelnielsen.org/polymath1/index.php?title=Bounded_gaps_between_primes}.} of James Maynard and
1096
others, that there are infinitely many pairs of primes
1097
that differ by no more than $246$.
1098
1099
{\em Is every even number greater than $2$ a sum of
1100
two primes?} Answer: unknown. {\em Are there infinitely many primes
1101
which are $1$ more than a perfect square?} Answer: unknown.
1102
1103
\ill{zhang}{0.25}{Yitang\index{Zhang, Yitang} Zhang\label{fig:zhang}}
1104
1105
1106
1107
Remember the Mersenne prime $p= 2^{43,112,609}-1$ from
1108
Chapter~\ref{ch:namedprimes} and how we
1109
proved---by pure thought---that there must be
1110
a prime $P$ larger than $p$? Suppose, though, someone
1111
asked us whether there was a {\it Mersenne Prime} larger than this
1112
$p$: that is, {\em is there a prime number\index{prime number} of the form $$2^{\rm some\
1113
prime\ number}-1$$ bigger than $p= 2^{43,112,609}-1$?} Answer:
1114
For many years we did not know; however, in 2013 Curtis Cooper discovered
1115
the even bigger Mersenne prime $2^{57,885,161}-1$, with a whopping
1116
17,425,170 digits! Again we can ask if there is a Mersenne
1117
prime larger than Cooper's. Answer: we do not know.
1118
It is possible that there are infinitely many Mersenne primes
1119
but we're far from being able to answer such questions.
1120
1121
% from http://www.york.ac.uk/depts/maths/histstat/people/mersenne.gif
1122
\ill{mersenne}{.3}{Marin Mersenne (1588--1648)}
1123
1124
1125
1126
{\em Is there some neat formula giving the next prime?} More
1127
specifically, {\em if I give you a number $N$, say $N=$ one million,
1128
and ask you for the first number after $N$ that is prime, is there a
1129
method that answers that question without, in some form or other,
1130
running through each of the successive odd numbers after $N$ rejecting
1131
the nonprimes until the first prime is encountered?} Answer:
1132
unknown.
1133
1134
1135
%%%%%%%%%%%%%%%
1136
1137
One can think of many ways of ``getting at'' some understanding of the
1138
placement of prime numbers\index{prime number} among all numbers. Up to this point we have
1139
been mainly just counting them, trying to answer the question ``how
1140
many primes are there up to $X$?'' and we have begun to get some feel
1141
for the numbers behind this question, and especially for the current
1142
``best guesses'' about estimates.
1143
1144
1145
What is wonderful about this subject is that people attracted to it
1146
cannot resist asking questions that lead to interesting, and sometimes
1147
surprising numerical experiments. Moreover, given our current state of
1148
knowledge, many of the questions that come to mind are still
1149
unapproachable: we don't yet know enough about numbers to answer them.
1150
But {\it asking interesting questions} about the mathematics that you
1151
are studying is a high art, and is probably a necessary skill to
1152
acquire, in order to get the most enjoyment---and understanding---from
1153
mathematics. So, we offer this challenge to you:
1154
1155
Come up with your own question about primes that
1156
\begin{itemize}
1157
\item is interesting to you,
1158
\item is not a question whose answer is known to you,
1159
\item is not a question that you've seen before; or at least not exactly,
1160
\item is a question about which you can begin to make numerical investigations.
1161
\end{itemize}
1162
If you are having trouble coming up with a question, read on for more
1163
examples that provide further motivation.
1164
1165
\chapter{Further questions about primes\label{ch:further}}
1166
1167
In celebration of Yitang Zhang's\index{Zhang, Yitang} recent result, let us consider more of the numerics
1168
regarding {\em gaps} between one prime and the next, rather than the tally
1169
of all primes. Of course, it is no fun at all to try to guess how many
1170
pairs of primes $p, q$ there are with gap $q-p$ equal to a fixed odd
1171
number, since the difference of two odd numbers is even, as
1172
in Chapter~\ref{ch:questions}. The fun,
1173
though, begins in earnest if you ask for pairs of primes with
1174
difference equal to $2$ (these being called {\em twin primes}) for it
1175
has long been guessed that there are infinitely many such pairs of
1176
primes, but no one has been able to prove this yet.
1177
1178
%NOTE: I omitted the commas in the numbers in the displayed formula.
1179
As of 2014, the largest known twin primes are
1180
$$3756801695685\cdot 2^{666669} \pm 1.$$
1181
These enormous primes, which were found in 2011, have $200{,}700$ digits each.\footnote{See \url{http://primes.utm.edu/largest.html\#twin} for the top ten
1182
largest known twin primes.}
1183
1184
1185
Similarly, it is interesting to consider primes $p$ and $q$
1186
with difference $4$, or $8$, or---in fact---any even number
1187
$2k$. That is, people have guessed that there are infinitely many
1188
pairs of primes with difference $4$, with difference $6$, etc.\ but
1189
none of these guesses have yet been proved.
1190
1191
1192
1193
1194
1195
So, define
1196
$$
1197
\Gap_{k}(X)
1198
$$
1199
to be the number of pairs of {\em consecutive} primes $(p,q)$ with
1200
$q<X$ that have ``gap $k$'' (i.e., such that their difference $q-p$ is
1201
$k$). Here $p$ is a prime, $q>p$ is a prime, and there are no primes
1202
between $p$ and $q$. For example, $\Gap_2(10) = 2$, since the pairs
1203
$(3,5)$ and $(5,7)$ are the pairs less than $10$ with gap $2$,
1204
and $\Gap_{4}(10)=0$ because despite $3$ and $7$ being separated
1205
by $4$, they are not consecutive primes.
1206
See Table~\ref{tab:gap} for various values of $\Gap_{k}(X)$ and
1207
Figure~\ref{fig:primegapdist} for the distribution\index{distribution} of prime gaps for
1208
$X=10^7$.
1209
1210
1211
\begin{table}[H]\centering
1212
\caption{Values of $\Gap_{k}(X)$ \label{tab:gap}}
1213
\vspace{1em}
1214
1215
{\small
1216
\begin{tabular}{|l|c|c|c|c|c|c|}\hline
1217
$X$ & $\Gap_{2}(X)$ & $\Gap_{4}(X)$& $\Gap_{6}(X)$ & $\Gap_{8}(X)$ &
1218
$\Gap_{100}(X)$ & $\Gap_{246}(X)$\\\hline
1219
1220
$10$ & 2 & 0 & 0 & 0 & 0 & 0\\\hline
1221
$10^{2}$ & 8 & 7 & 7 & 1 & 0 & 0\\\hline
1222
$10^{3}$ & 35 & 40 & 44 & 15 & 0 & 0\\\hline
1223
$10^{4}$ & 205 & 202 & 299 & 101 & 0 & 0\\\hline
1224
$10^{5}$ & 1224 & 1215 & 1940 & 773 & 0 & 0\\\hline
1225
$10^{6}$ & 8169 & 8143 & 13549 & 5569 & 2 & 0\\\hline
1226
$10^{7}$ & 58980 & 58621 & 99987 & 42352 & 36 & 0\\\hline
1227
$10^{8}$ & 440312 & 440257 & 768752 & 334180 & 878 & 0\\\hline
1228
1229
\end{tabular}
1230
}
1231
\end{table}
1232
1233
The recent results of Zhang\index{Zhang, Yitang} as sharpened by Maynard (and others) we mentioned above tell us that for at least one even number $k$ among the even numbers $k \le 246$, $\Gap_{k}(X)$ goes to infinity as $X$ goes to infinity. One expects that this happens for {\it all} even numbers $k$. We expect this as well, of course, for $\Gap_{246}(X)$ despite what might be misconstrued as discouragement by the above data.
1234
% NOTE: the biggest gap up to $10^{8}$ is 220, as this 30s comp shows:
1235
% v = prime_range(10^8)
1236
% gaps = [v[i]-v[i-1] for i in range(1,len(v))]
1237
% max(gaps)
1238
1239
\ill{primegapdist}{1}{Frequency histogram showing the distribution\index{distribution} of
1240
prime gaps of size $\leq 50$ for all primes up to $10^7$. Six is
1241
the most popular gap in this data.
1242
\label{fig:primegapdist}}
1243
1244
1245
\ill{primegap_race}{1}{Plots of $\Gap_k(X)$ for $k=2,4,6,8$. Which wins?}
1246
1247
Here is yet another question that deals with the spacing of prime
1248
numbers that we do not know the answer to:
1249
1250
{\em Racing Gap $2$, Gap $4$, Gap $6$, and Gap $8$ against each other:}
1251
1252
\begin{quote}
1253
Challenge: As $X$ tends to infinity which of $\Gap_2(X)$,
1254
$\Gap_4(X)$,
1255
$\Gap_6(X),$ or $\Gap_8(X)$ do you think will grow faster? How
1256
much would you bet on the truth of your guess? \bibnote{%
1257
Hardy and Littlewood give a nice conjectural answer to such
1258
questions about gaps between primes. See Problem {\bf A8} of
1259
Guy's book {\em Unsolved Problems in Number Theory} (2004).
1260
Note that Guy's book discusses counting the number $P_k(X)$ of pairs of
1261
primes up to $X$ that differ by a fixed even number $k$; we have
1262
$P_k(X)\geq \Gap_k(X)$, since for $P_k(X)$ there is no requirement
1263
that the pairs of primes be consecutive.}
1264
\end{quote}
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
Here is a curious question that you can easily begin to check out for
1275
small numbers. We know, of course, that the {\em even} numbers and the
1276
{\em odd} numbers are nicely and simply distributed: after every odd
1277
number comes an even number, after every even, an odd. There are an
1278
equal number of positive odd numbers and positive
1279
even numbers less than any given odd
1280
number, and there may be nothing else of interest to say about the
1281
matter. Things change considerably, though, if we focus our
1282
concentration on {\em multiplicatively even} numbers and {\em
1283
multiplicatively odd} numbers.
1284
1285
1286
A {\bf multiplicatively even} number is one that can be expressed as a
1287
product of {\em an even number of} primes; and a {\bf multiplicatively
1288
odd} number is one that can be expressed as a product of {\em an odd
1289
number of} primes. So, any prime is multiplicatively odd, the
1290
number $4 = 2\cdot 2$ is multiplicatively even, and so is $6=2\cdot
1291
3$, $9=3\cdot 3$, and $10= 2\cdot 5$; but $12=2\cdot 2\cdot 3$ is
1292
multiplicatively odd. Below we list the numbers up to 25, and underline
1293
and bold the multiplicatively odd numbers.
1294
\begin{center}
1295
1\, {\bf \underline{2}}\, {\bf \underline{3}}\, 4\, {\bf
1296
\underline{5}}\, 6\, {\bf \underline{7}}\, {\bf \underline{8}}\, 9\,
1297
10\, {\bf \underline{11}}\, {\bf \underline{12}}\, {\bf
1298
\underline{13}}\, 14\, 15\, 16\, {\bf \underline{17}}\, {\bf
1299
\underline{18}}\, {\bf \underline{19}}\, {\bf \underline{20}}\, 21\,
1300
22\, {\bf \underline{23}}\, 24\, 25
1301
\end{center}
1302
1303
1304
Table~\ref{tab:evenodddata} gives some data:
1305
1306
\begin{table}[H] \caption{Count of multiplicatively odd and
1307
even positive numbers $\le X$\label{tab:evenodddata}}
1308
\vspace{1ex}
1309
\centering
1310
{\small
1311
\begin{tabular}{|l|r|r|r|r|r|r|r|r|r|r|r|r|r|r|r|r|}
1312
\hline
1313
$X$ & 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 11 & 12 & 13 & 14 & 15 & 16 \\ \hline\hline
1314
m. odd & 0 & 1 & 2 & 2 & 3 & 3 & 4 & 5 & 5 & 5 & 6 & 7 & 8 & 8 & 8 & 8 \\ \hline
1315
m. even & 1 & 1 & 1 & 2 & 2 & 3 & 3 & 3 & 4 & 5 & 5 & 5 & 5 & 6 & 7 & 8 \\ \hline
1316
\end{tabular}}
1317
\end{table}
1318
1319
1320
Now looking at this data, a natural, and simple, question to ask about the concept of multiplicative {\em oddness} and {\em evenness} is:
1321
1322
\noindent {\em Is there some $X\ge 2$ for which there are more
1323
multiplicatively even numbers less than or equal to $X$ than
1324
multiplicatively odd ones?}
1325
1326
Each plot in Figure~\ref{fig:liouville} gives the number of
1327
multiplicatively even numbers between $2$ and $X$ minus the number
1328
of multiplicatively odd numbers between $2$ and $X$, for $X$ equal
1329
to 10, 100, 1000, 10000, 100000, and 1000000. The above question
1330
asks whether these graphs would, for sufficiently large $X$, ever
1331
cross the $X$-axis.
1332
1333
\begin{figure}[H]
1334
\centering
1335
\includegraphics[width=.45\textwidth]{illustrations/liouville-17}
1336
\includegraphics[width=.45\textwidth]{illustrations/liouville-100}\\
1337
\includegraphics[width=.45\textwidth]{illustrations/liouville-1000}
1338
\includegraphics[width=.45\textwidth]{illustrations/liouville-10000}\\
1339
\includegraphics[width=.45\textwidth]{illustrations/liouville-100000}
1340
\includegraphics[width=.45\textwidth]{illustrations/liouville-1000000}\\
1341
\caption{Racing Multiplicatively Even and Odd Numbers.\label{fig:liouville}}
1342
\end{figure}
1343
1344
A {\em negative} response to this question---i.e., a proof that any
1345
plot as drawn in Figure~\ref{fig:liouville} never crosses
1346
the $X$-axis---would imply the Riemann Hypothesis! In contrast to the list of
1347
previous questions, the answer to this question is known\footnote{For more
1348
details, see P.~Borwein, ``Sign changes in sums of the Liouville
1349
Function'' and the nice short paper of Norbert Wiener ``Notes on
1350
Polya's and Turan's hypothesis concerning Liouville's factor''
1351
(page 765 of volume II of Wiener's Collected Works); see also:
1352
G. P\'{o}lya ``Verschiedene Bemerkungen zur Zahlentheorie,''
1353
{\em Jahresbericht der Deutschen Mathematiker-Vereinigung},
1354
{\bf 28} (1919)
1355
31--40.}: alas, there is such an $X$. In 1960, Lehman showed that
1356
for $X=906{,}400{,}000$ there are $708$ more multiplicatively even numbers
1357
up to $X$ than multiplicatively odd numbers (Tanaka found in 1980 that
1358
the smallest $X$ such that there are more multiplicative even than odd
1359
numbers is $X=906{,}150{,}257$).
1360
1361
%%%%%%%%%%%%%%%%%
1362
%\begin{wrapfigure}{r}{0.2\textwidth}
1363
% \includegraphics[width=0.2\textwidth]{illustrations/questions}
1364
%\end{wrapfigure}
1365
These are questions that have been asked about primes (and we could
1366
give bushels more\footnote{See, e.g., Richard Guy's book {\em Unsolved Problems in Number Theory} (2004).}),
1367
questions expressible in simple vocabulary, that
1368
we can't answer today. We have been studying numbers for over two
1369
millennia and yet we are indeed in the infancy of our understanding.
1370
1371
1372
We'll continue our discussion by returning to the simplest counting
1373
question about prime numbers.\index{prime number}
1374
1375
\chapter{How many primes are there?}
1376
1377
\ill{sieve200}{.8}{Sieving primes up to 200}
1378
1379
Slow as we are to understand primes, at the very least we can try to
1380
count them. You can see that there are $10$ primes less than $30$, so
1381
you might encapsulate this by saying that the chances that a number
1382
less than $30$ is prime is $1$ in $3$. This frequency does not
1383
persist, though; here is some more data: There are $25$ primes less
1384
than $100$ (so $1$ in $4$ numbers up to $100$ are prime), there are
1385
$168$ primes less than a thousand (so we might say that among the
1386
numbers less than a thousand the chances that one of them is prime is
1387
roughly $1$ in $6$).
1388
1389
\ill{proportion_primes_100}{1}{Graph of the proportion of primes up to $X$ for each integer $X\leq 100$}
1390
1391
There are 78,498 primes less than a million (so we might say that
1392
the chances that a random choice among the first million numbers is
1393
prime have dropped to roughly $1$ in $13$).
1394
1395
\ill{proportion_primes_1000}{1}{Proportion of primes for $X$ up to $1{,}000$}
1396
\ill{proportion_primes_10000}{1}{Proportion of primes for $X$ up to $10{,}000$}
1397
1398
There are 455,052,512 primes less than ten billion; i.e.,
1399
10{,}000{,}000{,}000 (so we might say that the chances are down to roughly
1400
$1$ in $22$).
1401
1402
Primes, then, seem to be thinning out. We return to the sifting process
1403
we carried out earlier, and take a look at a few graphs, to get a sense of why
1404
that might be so. There are a $100$ numbers less than or equal to
1405
$100$, a thousand numbers less than or equal to $1000$, etc.: the
1406
graph in Figure~\ref{fig:sieve_2_100} that looks like a regular staircase, each step the
1407
same length as each riser, climbing up at, so to speak, a 45 degree
1408
angle, counts all numbers up to and including~$X$.
1409
1410
Following Eratosthenes,\index{Eratosthenes} we have sifted those numbers, to pan for
1411
primes. Our first move was to throw out roughly half the numbers (the
1412
even ones!) after the number $2$. The graph that is labeled ``Sieve by 2" in Figure~\ref{fig:sieve_2_100} that is, with one hiccup, a regular staircase climbing at a
1413
smaller angle, each step twice the length of each riser, illustrates
1414
the numbers that are left after one pass through Eratosthenes'\index{Eratosthenes} sieve,
1415
which includes, of course, all the primes. So, the chances that a
1416
number bigger than $2$ is prime is {\em at most} $1$ in $2$. Our
1417
second move was to throw out a good bunch of numbers bigger than $3$.
1418
So, the chances that a number bigger than $3$ is prime is going to be
1419
even less. And so it goes: with each move in our
1420
sieving process, we are winnowing the field more extensively, reducing
1421
the chances that the later numbers are prime.
1422
1423
\ill{sieve_2_100}{.9}{Sieving by removing multiples of $2$ up to $100$\label{fig:sieve_2_100}}
1424
\ill{sieve1000}{.9}{Sieving for primes up to $1000$}
1425
1426
The red curve in these figures actually counts the primes: it is the
1427
beguilingly irregular {\em staircase of primes.} Its height above any
1428
number $X$ on the horizontal line records the number of primes less
1429
than or equal to $X$, the accumulation of primes up to $X$. Refer to
1430
this number as $\pi(X)$. So $\pi(2)=1$, $\pi(3) = 2$, $\pi(30) = 10$;
1431
of course, we could plot a few more values of $\pi(X)$, like
1432
$\pi(\text{ten billion}) = 455,052,512$.
1433
1434
1435
Let us accompany Eratosthenes\index{Eratosthenes} for a few further steps in his sieving
1436
process. Figure~\ref{fig:sieve3_100} contains a graph of all whole
1437
numbers up to 100 after we have removed the even numbers greater than
1438
$2$, and the multiples of $3$ greater than $3$ itself.
1439
1440
1441
\ill{sieves3_100}{.7}{Sieving out multiples of $2$ and $3$.\label{fig:sieve3_100}}
1442
1443
1444
From this graph you can see that if you go ``out a way'' the
1445
likelihood that a number is a prime is less than $1$ in $3
1446
$. Figure~\ref{fig:sieve7_100} contains a graph of what Eratosthenes\index{Eratosthenes}
1447
sieve looks like up to 100 after sifting $2,3,5,$ and $7$.
1448
1449
1450
1451
\ill{sieves7_100}{.7}{Sieving out multiples of $2$, $3$, $5$, and $7$.\label{fig:sieve7_100}}
1452
1453
1454
This data may begin to suggest to you that as you go further and
1455
further out on the number line the percentage of prime numbers\index{prime number} among
1456
all whole numbers tends towards $0\%$ (it does).
1457
1458
1459
To get a sense of how the primes accumulate, we will take a look at
1460
the staircase of primes for $X= 25$ and $X=100$ in Figures~\ref{fig:staircase25}
1461
and \ref{fig:staircase100a}.
1462
1463
\ill{prime_pi_25_aspect1}{.8}{Staircase of primes up to 25\label{fig:staircase25}}
1464
\ill{prime_pi_100_aspect1}{.8}{Staircase of primes up to 100\label{fig:staircase100a}}
1465
1466
1467
1468
\chapter{Prime numbers viewed from a distance}\index{prime number}
1469
The striking thing about these figures is that as the numbers get
1470
large enough, the jagged accumulation of primes, those
1471
quintessentially discrete entities, becomes smoother and smoother to
1472
the eye. How strange and wonderful to watch, as our viewpoint zooms
1473
out to larger ranges of numbers, the accumulation of primes taking on
1474
such a smooth and elegant shape.
1475
1476
\ill{prime_pi_1000}{.8}{Staircases of primes up to 1,000\label{fig:staircases2}}
1477
\ill{prime_pi_10000}{.8}{Staircases of primes up to 10{,}000\label{fig:staircases2b}}
1478
1479
\ill{prime_pi_100000}{.8}{Primes up to 100{,}000\label{fig:pn100000}.}
1480
1481
1482
1483
But don't be fooled by the seemingly smooth shape of the curve in the
1484
last figure above: it is just as faithful a reproduction of the
1485
staircase of primes as the typographer's art can render, for there are
1486
thousands of tiny steps and risers in this curve, all hidden by the
1487
thickness of the print of the drawn curve in the figure. It is
1488
already something of a miracle that we can approximately describe the
1489
build-up of primes, somehow, using a {\em smooth curve}. But {\em
1490
what} smooth curve?
1491
1492
1493
% NOTE about myriad below: After reading http://english.stackexchange.com/questions/20133/what-is-the-correct-usage-of-myriad, I'm not changing "myriad of smooth" to "myriad smooth". The of version, which we use, is correct when myriad is used as a noun, which is now a legal usage in english, according to the above page.
1494
That last question is {\em not} rhetorical. If you draw a curve with
1495
chalk on the blackboard, this can signify a myriad of smooth
1496
(mathematical) curves all encompassed within the thickness of the
1497
chalk-line, all---if you wish---reasonable approximations of one
1498
another. So, there are many smooth curves that fit the chalk-curve.
1499
With this warning, but very much fortified by the data of Figure~\ref{fig:pn100000},
1500
let us ask: {\em what is a smooth curve that is a reasonable
1501
approximation to the staircase of primes?}
1502
1503
\chapter{Pure and applied mathematics}\label{ch:pureapplied}
1504
1505
Mathematicians seems to agree that, loosely speaking, there are two
1506
types of mathematics: {\em pure} and {\em applied}. Usually---when we
1507
judge whether a piece of mathematics is pure or applied---this
1508
distinction turns on whether or not the math has application to the
1509
``outside world,'' i.e., that {\em world} where bridges are built,
1510
where economic models are fashioned, where computers churn away on the
1511
Internet (for only then do we unabashedly call it {\em applied math}),
1512
or whether the piece of mathematics will find an important place
1513
within the context of mathematical theory (and then we label it {\em
1514
pure}). Of course, there is a great overlap (as we will see later,
1515
Fourier analysis\index{Fourier analysis} plays a major role both in data compression and in
1516
pure mathematics).
1517
1518
Moreover, many questions in
1519
mathematics are ``hustlers'' in the sense that, at first view, what is
1520
being requested is that some simple task be done (e.g., the question
1521
raised in this book, {\em to find a smooth curve that is a reasonable
1522
approximation to the staircase of primes}). And only as things
1523
develop is it discovered that there are payoffs in many unexpected
1524
directions, some of these payoffs being genuinely applied (i.e., to
1525
the practical world), some of these payoffs being pure (allowing us
1526
to strike behind the mask of the mere appearance of the mathematical
1527
situation, and get at the hidden fundamentals that actually govern the
1528
phenomena), and some of these payoffs defying such simple
1529
classification, insofar as they provide powerful techniques in other
1530
branches of mathematics. The \RH{}---even in its current
1531
unsolved state---has already shown itself to have all three types of
1532
payoff.
1533
1534
The particular issue before us is, in our opinion, twofold, both
1535
applied, and pure: can we curve-fit the ``staircase of primes'' by a
1536
well approximating smooth curve given by a simple analytic formula?
1537
The story behind this alone is
1538
marvelous, has a cornucopia of applications, and we will be telling it
1539
below. But our curiosity here is driven by a question that is pure,
1540
and less amenable to precise formulation: are there mathematical
1541
concepts at the root of, and more basic than (and ``prior to,'' to
1542
borrow Aristotle's use of the phrase) {\em prime numbers}---concepts\index{prime number}
1543
that account for the apparent complexity of the nature of primes?
1544
1545
1546
\chapter{A probabilistic first guess\label{sec:firstguess} }
1547
1548
\ill{gauss}{.3}{Gauss\index{Gauss, Carl Friedrich}}
1549
1550
The search for such approximating curves began, in fact, two centuries
1551
ago when Carl Friedrich Gauss\index{Gauss, Carl Friedrich} defined a certain beautiful curve that,
1552
experimentally, seemed to be an exceptionally good fit for the
1553
staircase of primes.
1554
1555
\ill{pi_Li}{.8}{Plot of $\pi(X)$ and Gauss's smooth curve $G(X)$}
1556
1557
Let us denote Gauss's curve $G(X)$; it has an
1558
elegant simple formula comprehensible to anyone who has had a tiny bit
1559
of calculus.\index{calculus} If you make believe that the chances that a number $X$ is
1560
a prime is inversely proportional to the number of digits of $X$ you
1561
might well hit upon Gauss's curve.
1562
That is,
1563
\vskip10pt
1564
$$G(X)\hskip40pt {\rm is\ roughly\ proportional\ to} \hskip40pt {\frac{X}{{\rm the\ number\ of\ digits\ of\ } X}}.$$
1565
\vskip10pt
1566
But to describe Gauss's\index{Gauss, Carl Friedrich} guess precisely we need to discuss the {\it natural logarithm}\footnote{In advanced mathematics, ``common'' logarithms are sufficiently uncommon that ``log'' almost always denotes natural
1567
log and the notation $\ln(X)$ is not used.} ``$\log(X)$'' which is an elegant
1568
smooth function of a real number $X$ that is roughly proportional
1569
to the number of digits of the whole number part of $X$.
1570
1571
1572
1573
\ill{log}{.8}{Plot of the natural logarithm $\log(X)$}
1574
1575
%NB: the sequence below converges very, very slowly to e.
1576
Euler's\index{Euler, Leonhard} famous constant $e=2.71828182\ldots$, which is the limit
1577
of the sequence
1578
$$\left(1+{\frac{1}{2}}\right)^2,
1579
\left(1+{\frac{1}{3}}\right)^3,
1580
\left(1+{\frac{1}{4}}\right)^4, \dots,$$
1581
is used in the definition of $\log$:
1582
\begin{center}
1583
{\em $A = \log(X)$ is the number $A$ for which $e^A = X$.}
1584
\end{center}
1585
Before electronic calculators, logarithms were frequently used to
1586
speed up calculations, since logarithms translate difficult multiplication
1587
problems into easier addition problems which can be done mechanically.
1588
Such calculations use that the logarithm of a product is the sum of the logarithms
1589
of the factors\index{factor}; that is, $$\log(X Y) = \log(X) + \log(Y).$$
1590
%FROM -- YURI TSCHINKEL's: ''ABOUT THE COVER: ON THE DISTRIBUTION\index{distribution} OF PRIMESÑGAUSSÕ TABLES ''
1591
1592
% From: http://en.wikipedia.org/wiki/File:Slide_rule_scales_back.jpg
1593
\ill{slide_rule}{.8}{A slide rule computes $2X$ by using that $\log(2X)=\log(2)+\log(X)$\label{fig:slide_rule}}
1594
1595
In Figure~\ref{fig:slide_rule} the numbers printed (on each of the slidable pieces of the rule)
1596
are spaced according to their logarithms, so that when one slides the
1597
rule arranging it so that the printed number $X$ on one piece lines up
1598
with the printed number $1$ on the other, we get that for every number $Y$
1599
printed on the first piece, the printed number on the other piece that
1600
is aligned with it is the product $XY$; in effect the ``slide'' adds
1601
$\log(X)$ to $\log(Y)$ giving $\log(XY)$.
1602
1603
% I found this here: http://kr.blog.yahoo.com/kimxx168/498
1604
% It is referenced in http://www.ams.org/bull/2006-43-01/S0273-0979-05-01096-7/home.html
1605
% but that link is broken.
1606
\ill{gauss_tables_half}{.9}{A Letter of Gauss\label{fig:gauss_letter}}
1607
1608
In 1791, when Gauss\index{Gauss, Carl Friedrich} was 14 years old, he received a book that contained
1609
logarithms of numbers up to $7$ digits and a table of primes up to 10{,}009.
1610
Years later, in a letter
1611
written in 1849 (see Figure~\ref{fig:gauss_letter}), Gauss\index{Gauss, Carl Friedrich}
1612
claimed that as early as 1792 or 1793 he had already observed that the
1613
density of prime numbers\index{prime number} over intervals of numbers of a given rough
1614
magnitude $X$ seemed to average $1/\log(X)$.
1615
1616
Very {\em very} roughly speaking, this
1617
means that {\em the number of primes up to $X$ is approximately $X$ divided by
1618
twice the number of digits of $X$}. For example,
1619
the number of primes less than $99$ should be roughly
1620
$$
1621
\frac{99}{2\times 2} = 24.75 \approx 25,
1622
$$
1623
which is pretty amazing, since the correct number of
1624
primes up to $99$ is $25$. The number of primes up to $999$ should
1625
be roughly
1626
$$
1627
\frac{999}{2\times 3} = 166.5 \approx 167,
1628
$$
1629
which is again close, since there are 168 primes up to $1000$.
1630
The number of primes up to $999{,}999$ should be roughly
1631
$$
1632
\frac{999999}{2\times 6} = 83333.25 \approx 83{,}333,
1633
$$
1634
which is close to the correct count of $78{,}498$.
1635
1636
Gauss\index{Gauss, Carl Friedrich} guessed that the expected number of primes up to $X$
1637
is approximated by the area under the
1638
graph of $1/\log(X)$ from $2$ to $X$ (see Figure~\ref{fig:G}).
1639
The area under $1/\log(X)$ up to $X=999{,}999$ is $78{,}626.43\ldots$, which
1640
is remarkably close to the correct count $78{,}498$ of the primes
1641
up to $999{,}999$.
1642
1643
\illthree{area_under_log_graph_30}{area_under_log_graph_100}{area_under_log_graph_1000}{.27}{The
1644
expected tally of the number of primes $\leq X$ is approximated by the
1645
area underneath the graph of $1/\log(X)$ from $1$ to $X$.\label{fig:G}}
1646
1647
1648
Gauss\index{Gauss, Carl Friedrich} was an inveterate computer:
1649
he wrote in his 1849 letter that
1650
there are $216{,}745$ prime numbers\index{prime number} less than three million. This is
1651
wrong: the actual number of these primes is $216{,}816$. Gauss's curve $G(X)$
1652
predicted that there would be $216{,}970$ primes---a miss, Gauss\index{Gauss, Carl Friedrich}
1653
thought, by
1654
$$225 = 216970 - 216745.$$
1655
But actually he was closer than he thought: the
1656
prediction of the curve $G(X)$ missed by a mere $154 = 216970 - 216816$.
1657
%; not as close as the 2004 US
1658
%elections, but pretty close nevertheless).
1659
Gauss's computation brings up two queries: will this spectacular ``good
1660
fit'' continue for arbitrarily large numbers? and, the (evidently
1661
prior) question: what counts as a good fit?
1662
1663
1664
1665
\chapter{What is a ``good approximation''?}\label{sec:sqrterror}
1666
1667
If you are trying to estimate a number, say, around ten thousand, and
1668
you get it right to within a hundred, let us celebrate this kind of
1669
accuracy by saying that you have made an approximation with {\em
1670
square-root error}\index{square-root error} (${\sqrt{10{,}000}}=100$). Of course, we should
1671
really use the more clumsy phrase ``an approximation with at worst
1672
{\em square-root error.}''\index{square-root error} Sometimes we'll simply refer to such
1673
approximations as {\em good approximations.} If you are trying to
1674
estimate a number in the millions, and you get it right to within a
1675
thousand, let's agree that---again---you have made an approximation
1676
with {\em square-root error}\index{square-root error} (${\sqrt{1{,}000{,}000}}=1{,}000$).
1677
Again, for short, call this a {\em good approximation.} So, when Gauss\index{Gauss, Carl Friedrich}
1678
thought his curve missed by $226$ in estimating the number of primes
1679
less than three million, it was well within the margin we have given
1680
for a ``good approximation.''
1681
1682
1683
More generally, if you are trying to estimate a number that has $D$
1684
digits and you get it almost right, but with an error that has no more
1685
than, roughly, half that many digits, let us say, again, that you have
1686
made an approximation with {\em square-root error}\index{square-root error} or synonymously, a
1687
{\em good approximation}.
1688
1689
1690
This rough account almost suffices for what we will be discussing
1691
below, but to be more precise, the specific {\em gauge of accuracy}
1692
that will be important to us is not for a mere {\em single} estimate
1693
of a {\em single} error term,
1694
$$
1695
\text{ Error term } = \text{ Exact value } - \text{ Our ``good approximation" }
1696
$$
1697
but rather for {\em infinite sequences} of estimates of
1698
error terms. Generally, if you are interested in a numerical quantity
1699
$q(X)$ that depends on the real number parameter $X$ (e.g., $q(X)$
1700
could be $\pi(X)$, ``the number of primes $\leq{}X$'') and if you have an
1701
explicit candidate ``approximation,'' $q_{\rm approx}(X)$, to this
1702
quantity, let us say that $q_{\rm approx}(X)$ is {\bf essentially a
1703
square-root accurate\index{square-root accurate} approximation to $q(X)$} if for {\em any}
1704
given exponent greater than $0.5$ (you choose it: $0.501$, $0.5001$,
1705
$0.50001$, $\dots$ for example) and for large enough $X$---where the
1706
phrase ``large enough'' depends on your choice of exponent---the {\bf
1707
error term}---i.e., the difference between $q_{\rm approx}(X)$ and
1708
the {\em true} quantity, $q(X)$, is, in absolute value, less than
1709
$X$ raised to that exponent (e.g. $< X^{0.501}$, $<
1710
X^{0.5001}$, etc.). Readers who know calculus\index{calculus} and wish to have a
1711
technical formulation of this definition of {\em good approximation}
1712
might turn to the endnote \bibnote{If $f(x)$ and
1713
$g(x)$ are real-valued functions of a real variable $x$ such that
1714
for any
1715
$\epsilon >0$ both of them take their values between $ x^{1-\epsilon}$
1716
and $ x^{1+\epsilon}$ for
1717
$x$
1718
sufficiently large, then say that $f(x)$ and $g(x)$ are {\bf good
1719
approximations of one another} if,
1720
for any positive $\epsilon$ the absolute value of their difference
1721
is less than $ x^{{1\over
1722
2}+\epsilon}$ for
1723
$x$
1724
sufficiently large. The functions $\Li(X)$ and $R(X)$ are good approximations of
1725
one another.} for a precise statement.
1726
1727
1728
If you found the above confusing, don't worry: again, a
1729
square-root accurate\index{square-root accurate} approximation is one in which at least roughly
1730
half the digits are correct.
1731
1732
1733
\begin{remark}
1734
To get a feel for how basic the notion of {\em approximation to data
1735
being square root close to the true values of the data} is---and
1736
how it represents the ``gold standard'' of accuracy for
1737
approximations, consider this fable.
1738
1739
Imagine that the devil had the idea of saddling a large committee of
1740
people with the task of finding values of $\pi(X)$ for various large
1741
numbers $X$. This he did in the following manner, having already
1742
worked out which numbers are prime himself. Since the devil is, as
1743
everyone knows, {\em in the details,} he has made no mistakes: his
1744
work is entirely correct. He gives each committee member a copy of
1745
the list of all {\em prime numbers}\index{prime number} between $1$ and one of the large
1746
numbers $X$ in which he was interested. Now each committee member
1747
would count the number of primes by doing nothing more than
1748
considering each number, in turn, on their list and tallying them
1749
up, much like a canvasser counting votes; the committee members needn't even know that these numbers are prime, they just think of these numbers as items on their list. But since they are human,
1750
they will indeed be making mistakes, say $1\%$ of the time.
1751
Assume further that it is just as likely for them to make the
1752
mistake of undercounting or overcounting. If many people are
1753
engaged in such a pursuit, some of them might overcount $\pi(X)$;
1754
some of them might undercount it. The average error (overcounted
1755
or undercounted) would be proportional to~${\sqrt X}$.
1756
1757
In the next chapter we'll view these undercounts and overcounts as analogous to a random walk\index{random walk}.
1758
1759
1760
\end{remark}
1761
1762
1763
\chapter{Square root error and random walk\index{random walk}s}
1764
1765
To take a random walk\index{random walk} along a (straight) east--west path you would start at your home base, but every minute, say, take a step along the path, each step being of the same length, but randomly either east or west. After $X$ minutes, how far are you from your home base?
1766
1767
The answer to this {\it cannot} be a specific number, precisely because you're making a random decision that affects that number for each of the $X$ minutes of your journey. It is more reasonable to ask a statistical version of that question. Namely, if you took {\it many} random walks\index{random walk} $X$ minutes long, then---on the average---how far would you be from your home base? The answer, as is illustrated by the figures below, is that the average distance you will find yourself from home base after (sufficiently many of) these excursions is proportional to ${\sqrt X}$. (In fact, the average is equal to $\sqrt{\frac{2}{\pi}}\cdot {\sqrt X}$.)
1768
1769
To connect this with the committee members' histories of errors, described in the fable in Chapter~\ref{sec:sqrterror}, imagine every error (undercount or overcount by $1$) the committee member makes, as a ``step" East for undercount and West for overcount. Then if such errors were made, at a constant frequency over the duration of time spent counting, and if the over and undercounts were equally likely and random, then one can model the committee members' computational accuracy by a random walk.\index{random walk} It would be---in the terminology we have already discussed---no better than {\it square-root accurate}\index{square-root accurate}; it would be subject to {\it square-root error\index{square-root error}}.
1770
1771
To get a real sense of how constrained random walks\index{random walk} are by this ``square-root law," here are a few numerical experiments of random walks. The left-hand squiggly (blue) graphs in Figures~\ref{fig:random_walks_3}--\ref{fig:random_walks_1000} below are computer-obtained random walk trials (three, ten, a hundred, and a thousand random walks). The blue curve in the right-hand graphs of those four figures is the average distance from home-base of the corresponding (three, ten, a hundred, and a thousand) random walks.
1772
The red curve in each figure below is the graph of the quantity $\sqrt{\frac{2}{\pi}}\cdot {\sqrt X}$ over the $X$-axis.
1773
As the number of random walks\index{random walk} increases, the red curve
1774
better and better approximates the average distance.
1775
1776
\illtwo{random_walks-3}{random_walks-3-mean}{.45}{Three Random Walk\index{random walk}s\label{fig:random_walks_3}}
1777
1778
\illtwo{random_walks-10}{random_walks-10-mean}{.45}{Ten Random Walk\index{random walk}s\label{fig:random_walks_10}}
1779
1780
\illtwo{random_walks-100}{random_walks-100-mean}{.45}{One Hundred Random Walk\index{random walk}s\label{fig:random_walks_100}}
1781
1782
\illtwo{random_walks-1000}{random_walks-1000-mean}{.45}{One Thousand Random Walk\index{random walk}s\label{fig:random_walks_1000}}
1783
1784
1785
1786
1787
1788
\chapter[What is Riemann's Hypothesis?] { What is Riemann's Hypothesis? ({\em first formulation})\label{sec:rh1}}
1789
1790
1791
1792
Recall from Chapter~\ref{sec:firstguess} that a rough guess for an approximation to
1793
$\pi(X)$, the number of primes $\leq{} X$, is given by the function
1794
$X/\log(X)$. Recall, as well, that a refinement of that guess, offered by Gauss\index{Gauss, Carl Friedrich}, stems from this curious thought: the ``probability'' that a number $N$ is a prime is proportional to the reciprocal of its number of digits; more precisely, the probability is $1/\log(N)$. This would lead us to guess that the approximate value of $\pi(X)$ would be
1795
the area of the region from $2$ to $X$ under the graph of
1796
$1/\log(X)$, a quantity sometimes referred to as $\Li(X)$.
1797
``Li'' (pronounced L$\overline{\rm i}$, so the same as ``lie" in ``lie down")
1798
is short for {\em logarithmic integral},
1799
because the area of the region from $2$ to $X$ under $1/\log(X)$
1800
is (by definition) the {\em integral} $\int_2^X 1/\log(t) dt$.
1801
1802
1803
1804
1805
Figure~\ref{fig:threeplots} contains a graph of the three functions
1806
$\Li(X)$, $\pi(X)$, and $X/\log X$ for $X\leq 200$.
1807
But data, no matter how impressive, may be deceiving (as we learned in
1808
Chapter~\ref{ch:further}). If you think
1809
that the three graphs never cross for all large values of $X$, and
1810
that we have the simple relationship $X/\log(X) < \pi(X) < \Li(X)$ for
1811
large $X$, read \url{http://en.wikipedia.org/wiki/Skewes'_number}.
1812
1813
1814
\ill{three_plots}{.9}{Plots of $\Li(X)$ (top), $\pi(X)$ (in the middle), and $X/\log(X)$ (bottom).\label{fig:threeplots}}
1815
1816
It is a {\em major challenge} to
1817
evaluate $\pi(X)$ for large values of $X$.
1818
For example, let $X=10^{24}$.
1819
Then \label{pili_vals} (see \bibnote{This computation of $\pi(X)$ was done by David J. Platt in 2012,
1820
and is the largest value of $\pi(X)$ ever computed. See
1821
\url{http://arxiv.org/abs/1203.5712} for more details.}) we have:
1822
\begin{align*}
1823
\pi(X) &= {\color{dredcolor} 18{,}435{,}599{,}767{,}3}49{,}200{,}867{,}866\\
1824
\Li(X) &= {\color{dredcolor}18{,}435{,}599{,}767{,}3}66{,}347{,}775{,}143.10580\ldots \\
1825
X/(\log(X)-1) &=
1826
18{,}429{,}088{,}896{,}563{,}917{,}716{,}962.93869\ldots\\
1827
\Li(X) - \pi(X) &= \hspace{7em}
1828
17{,}146{,}907{,}277.105803\ldots\\
1829
\sqrt{X}\cdot \log(X) &= \hspace{5.2em}
1830
55{,}262{,}042{,}231{,}857.096416 \ldots
1831
\end{align*}
1832
Note that several of the left-most digits of $\pi(X)$ and $\Li(X)$ are the
1833
same (as indicated in red), a point we will return to on page~\pageref{page:leftmost}.
1834
1835
1836
More fancifully, we can think of the error in this approximation to $\pi(X)$, i.e., $|\Li(X)-\pi(X)|$, (the absolute value) of the difference between $\Li(X)$ and $\pi(X)$, as (approximately) the result of a walk having roughly $X$ steps where you move by the following rule: go east by a distance of $1/\log N$ feet if $N$ is not a prime and west by a distance of $1-\frac{1}{\log N}$ feet if $N$ is a prime. Your distance, then, from home base after $X$ steps is approximately $|\Li(X)-\pi(X)|$ feet.
1837
%% Computer code to double check the above claim without using
1838
%% your brain (will be useful for a SageMath version of the book):
1839
% def f(X):
1840
% t = 0
1841
% from math import log
1842
% for N in [2..floor(X)]:
1843
% if is_prime(N):
1844
% t -= 1-1/log(N)
1845
% else:
1846
% t += 1/log(N)
1847
% return t
1848
%
1849
% def g(X):
1850
% return N(Li(X) - prime_pi(X))
1851
1852
1853
We have no idea if this picture of things resembles a truly {\it random walk}\index{random walk} but at least it makes it reasonable to ask the question: is $\Li(X)$ {\it essentially a square root approximation} to
1854
$\pi(X)$? Our first formulation of Riemann's
1855
Hypothesis says yes:
1856
1857
\begin{center}
1858
\shadowbox{ \begin{minipage}{0.9\textwidth}
1859
\mbox{} \vspace{0.2ex}
1860
\begin{center}{\bf\large The {\bf \RH{}} (first formulation)}\end{center}
1861
\medskip
1862
1863
For any real number $X$ the number of prime numbers\index{prime number} less than $X$ is
1864
approximately $\Li(X)$ and this approximation is essentially square
1865
root accurate (see \bibnote{In fact, the Riemann Hypothesis is equivalent to $|\Li(X) - \pi(X)| \leq \sqrt{X}\log(X)$ for all $X\geq 2.01$. See Section 1.4.1 of Crandall and Pomerance's book {\em Prime Numbers: A Computational Perspective}.}).
1866
\label{rh:first}
1867
1868
\vspace{1ex}
1869
\end{minipage}}
1870
\end{center}
1871
1872
\chapter{The mystery moves to the error term}
1873
1874
Let's think of what happens when you have a {\it mysterious quantity} (say, a function of a real number $X$) you wish to understand. Suppose you manage to approximate that quantity by an easy to understand expression---which we'll call the ``dominant term"---that is also simple to compute, but only approximates your mysterious quantity. The approximation is not exact; it has a possible error, which happily is significantly smaller than the size of the dominant term. ``Dominant" here just means exactly that: it is of size significantly larger than the error of approximation.
1875
1876
$${\it Mysterious\ quantity}(X)\ \ = \ \ {\it Simple, \ but \ dominant\ quantity}(X) \ + \ {\it Error}(X).$$
1877
1878
If all you are after is a general estimate of {\it size} your job is done. You might declare victory and ignore ${\it Error}(X)$ as being---in size---negligible. But if you are interested in the deep structure of your {\it mysterious quantity} perhaps all you have done is to manage to transport most questions about it to ${\it Error}(X)$. In conclusion, ${\it Error}(X)$ is now your new mysterious quantity.
1879
1880
Returning to the issue of $\pi(X)$ (our {\it mysterious quantity}) and $\Li(X)$ (our {\it dominant term}) the first formulation of the Riemann Hypothesis (as in Chapter \ref{sec:rh1} above) puts the spotlight on the {\it Error\ term} $|\Li(X) - \pi(X)|$, which therefore deserves our scrutiny, since---after all---we're not interested in merely counting the primes: we want to understand as much as we can of their structure.
1881
1882
To get a feel for this error term, we shall smooth it out a bit, and look at a few of its graphs.
1883
1884
1885
\chapter{Ces\`aro smoothing\index{Ces\`aro Smoothing}}\label{ces}
1886
1887
Often when you hop in a car and reset the trip counter, the car
1888
will display information about your trip. For instance, it might show you the
1889
average speed up until now, a number that is
1890
``sticky'', changing much less erratically than your
1891
actual speed, and you might use it to make a rough estimate of
1892
how long until you will reach your destination.
1893
Your car is computing the {\em Ces\`aro smoothing\index{Ces\`aro Smoothing} }of your speed.
1894
We can use this same idea to better understand the behavior
1895
of other things, such as the sums appearing in
1896
the previous chapter.
1897
\ill{camaro}{.7}{The ``Average Vehicle Speed,'' as displayed on the dashboard of the 2013 Camaro SS car that one of us drove during the writing of this chapter.}
1898
%I took this picture of my rental car -- William Stein.
1899
1900
Suppose you are trying to say something intelligent about the behavior of a certain quantity that varies with time in what seems to be a somewhat erratic, volatile pattern. Call the quantity $f(t)$ and think of it as a function defined for positive real values of ``time'' $t$. The natural impulse might be to take some sort of ``average value''\footnote{For readers who know calculus: that average would be ${\frac{1}{T}}\int_0^Tf(t)dt$.} of $f(t)$ over a time interval, say from $0$ to $T$. This would indeed be an intelligent thing to do if this average stabilized and depended relatively little on the interval of time over which one is computing this average, e.g., if that interval were large enough. Sometimes, of course, these averages themselves are relatively sensitive to the times $T$ that are chosen, and don't stabilize. In such a case, it usually pays to consider {\it all} these averages as your chosen $T$ varies, as a function (of $T$) in its own right{\footnote{ For readers who know calculus: this is
1901
$$F(T):= {\frac{1}{T}}\int_0^Tf(t)dt.$$}.
1902
This new function $F(T)$, called the {\bf Ces\`aro Smoothing}\index{Ces\`aro Smoothing} of the function $f(t)$, is a good indicator of certain eventual trends of the original function $f(t)$ that is less visible directly from the function $f(t)$. The effect of passing from $f(t)$ to $F(T)$ is to ``smooth out" some of the volatile local behavior of the original function, as can be seen in
1903
Figure~\ref{fig:cesaro-smoothing}.
1904
1905
1906
\ill{cesaro}{.9}{The red plot is the Ces\`aro smoothing\index{Ces\`aro Smoothing} of the blue plot\label{fig:cesaro-smoothing}}
1907
\chapter{ A view of $|\Li(X) - \pi(X)|$}
1908
Returning to our mysterious error term, consider Figure \ref{fig:li-minus-pi-250000} where the volatile blue curve in the middle
1909
is $\Li(X) - \pi(X)$, its Ces\`aro smoothing\index{Ces\`aro Smoothing} is the red relatively smooth curve on the bottom, and the curve on the top
1910
is the graph of $\sqrt{\frac{2}{\pi}}\cdot \sqrt{X/\log(X)}$ over the range of $X\le 250{,}000$.
1911
1912
\ill{li-minus-pi-250000}{.9}{$\Li(X)-\pi(X)$ (blue middle), its Ces\`aro smoothing\index{Ces\`aro Smoothing} (red bottom), and
1913
$\sqrt{\frac{2}{\pi}}\cdot \sqrt{X/\log(X)}$ (top), all for $X\leq 250{,}000$\label{fig:li-minus-pi-250000}}
1914
1915
% TODO: this is from http://seriesdivergentes.wordpress.com/2011/07/21/john-e-littlewood\index{Littlewood, John Edensor }/
1916
Data such as this graph can be revealing and misleading at the same time. For example, the volatile blue graph (of $\Li(X)-\pi(X)$) {\it seems} to be roughly sandwiched between two rising functions, but this will not continue for all values of $X$. The Cambridge University mathematician, John Edensor Littlewood\index{Littlewood, John Edensor } (see
1917
Figure~\ref{fig:littlewood}),\index{Littlewood, John Edensor } proved in 1914 that there {\it exists} a real number $X$ for which the quantity $\Li(X)-\pi(X)$ vanishes, and then for slightly larger $X$ crosses over into negative values. This theorem attracted lots of attention at the time (and continues to do so) because of the inaccessibility of achieving good estimates (upper or lower bounds) for the first such number. That ``first" $X$ for which $\Li(X) =\pi(X)$ is called {\bf Skewes Number} in honor of the South African mathematician (a student of Littlewood) Stanley Skewes, who (in 1933) gave the first---fearfully large---upper bound for that number (conditional on the Riemann Hypothesis!). Despite a steady stream of subsequent improvements, we currently can only locate Skewes Number\index{Skewes Number} as being in the range:
1918
1919
$$
1920
10^{14}\ \ \ \le\ \ \ {\rm Skewes\ Number}\ \ \ <\ \ \ 10^{317},
1921
$$ and it is proven that at some values of $X$ fairly close to the upper bound $10^{317}$ $\pi(X)$ is greater than $\Li(X)$. So the trend suggested in Figure \ref{fig:li-minus-pi-250000} will not continue indefinitely.
1922
1923
%\begin{wrapfigure}{l}{0.3\textwidth}
1924
% \includegraphics[width=0.3\textwidth]{illustrations/littlewood\index{Littlewood, John Edensor }}\\
1925
% John Edensor Littlewood\index{Littlewood, John Edensor } (1885--1977)
1926
%\end{wrapfigure}
1927
1928
\ill{littlewood}{0.35}{John\index{Littlewood, John Edensor } Edensor Littlewood (1885--1977)\label{fig:littlewood}}
1929
1930
1931
1932
1933
\chapter{The Prime Number Theorem\label{sec:pnt}}\index{prime number}
1934
1935
Take a look at Figure~\ref{fig:threeplots} again.
1936
All three functions, $\Li(X)$, $\pi(X)$ and $X/\log(X)$
1937
are ``going to infinity with $X$'' (this means
1938
that for any real number $R$, for all sufficiently large $X$,
1939
the values of these functions at $X$ exceed $R$).
1940
1941
Are these functions ``going to infinity'' at {\it the same rate}?
1942
1943
To answer such a question, we have to know what we mean by {\it going
1944
to infinity at the same rate}. So, here's a definition. Two
1945
functions, $A(X)$ and $B(X)$, that each go to infinity will be said to
1946
{\bf go to infinity at the same rate} if their {\it ratio}
1947
$$A(X)/B(X)$$
1948
tends to $1$ as $X$ goes to infinity.
1949
1950
If for example two functions, $A(X)$ and $B(X)$ that take positive whole number values, have the same number of digits for large $X$ and if, for any
1951
number you give us, say: a million (or a billion, or a trillion) and if $X$ is large enough, then the
1952
``leftmost'' million (or billion, or trillion) digits of $A(X)$ and $B(X)$ are the same, then $A(X)$ and $B(X)$ {\it go to infinity at the same rate}. For example,
1953
$$
1954
\frac{A(X)}{B(X)} =
1955
\frac{{\color{dredcolor}2810675971}83743525105611755423}{{\color{dredcolor}2810675971}61361511527766294585}
1956
= 1.00000000007963213762060\ldots
1957
$$
1958
1959
1960
While we're defining things, let us say that two functions, $A(X)$
1961
and $B(X)$, that each go to infinity {\bf go to infinity at
1962
similar rates} if there are two positive constants $c$ and $C$
1963
such that for $X$ sufficiently large the {\it ratio}
1964
$$
1965
A(X)/B(X)
1966
$$
1967
is greater than $c$ and smaller than $C$.
1968
1969
\ill{similar_rates}{1.0}{The polynomials $A(X)=2 X^{2} + 3 X - 5$ (bottom)
1970
and $B(X)= 3 X^{2} - 2 X + 1$ (top) go to infinity at similar rates.\label{fig:simrates}}
1971
1972
1973
1974
For example, two polynomials in $X$ with positive leading coefficients
1975
{\it go to infinity at the same rate} if and only if they have the
1976
same degrees and the same leading coefficient; they {\it go to
1977
infinity at similar rates} if they have the same degree.
1978
See Figures~\ref{fig:simrates} and~\ref{fig:samerate}. %where we may take $c=1/2$ and $C=1$.
1979
1980
\ill{same_rates}{1.0}{The polynomials $A(X)=X^{2} + 3 X - 5$ (top)
1981
and $B(X)= X^{2} - 2 X + 1$ (bottom) go to infinity
1982
at the same rate.\label{fig:samerate}}
1983
1984
1985
1986
Now a theorem from elementary calculus\index{calculus} tells us that the ratio of
1987
$\Li(X)$ to $X/\log(X)$ tends to $1$ as $X$ gets larger and larger.
1988
That is---using the definition we've just introduced---$\Li(X)$ and
1989
$X/\log(X)$ go to infinity at the same rate (see
1990
\bibnote{For a proof of this here's a hint. Compute the difference
1991
between the derivatives of $\Li(x)$ and of $x/\log x$. The answer
1992
is $1/\log^2(x)$. So you must show that the ratio of
1993
$\int_2^X dx/\log^2(x)$ to $\Li(x)= \int_2^Xdx/\log(x)$
1994
tends to zero as~$x$ goes to infinity, and this is a good
1995
calculus exercise.}).
1996
1997
1998
Recall (on page~\pageref{pili_vals} above in Chapter~\ref{sec:rh1}) that
1999
if $X = 10^{24}$, the left-most\label{page:leftmost}
2000
twelve digits of $\pi(X)$ and $\Li(X)$ are the same:
2001
both numbers start
2002
$18{,}435{,}599{,}767{,}3\ldots$. Well, that's a good start. Can we guarantee that for $X$ large enough, the ``left-most'' million (or billion, or trillion) digits of $\pi(X)$ and $\Li(X)$ are the same, i.e., that these two functions go to infinity at the same rate?
2003
2004
The \RH{}, as we have just formulated it, would tell us
2005
that the {\it difference} between $\Li(X)$ and $\pi(X)$ is pretty small
2006
in comparison with the size of $X$. This information would imply (but
2007
would be {\it much} more precise information than) the statement that
2008
the {\it ratio} $\Li(X)/\pi(X)$ tends to $1$, i.e., that $\Li(X)$ and
2009
$\pi(X)$ go to infinity at the same rate.
2010
2011
This last statement gives, of course, a far less precise relationship
2012
between $\Li(X)$ and $\pi(X)$ than the \RH{} (once it is proved!)
2013
would give us. The advantage, though, of the less precise statement
2014
is that it is currently known to be true, and---in fact---has been
2015
known for over a century. It goes under the name of
2016
2017
2018
\noindent {\bf The Prime Number Theorem:\ \ } $\Li(X)$ and $\pi(X)$ go to infinity at the same rate.\index{prime number}
2019
2020
2021
Since $\Li(X)$ and $X/\log(X)$ go to infinity at the same rate, we could
2022
equally well have expressed the ``same'' theorem by saying:
2023
2024
2025
\noindent {\bf The Prime Number Theorem:\ \ } $X/\log(X)$ and $\pi(X)$ go to infinity at the same rate.\index{prime number}
2026
2027
2028
This fact is a very hard-won piece of mathematics! It was proved
2029
in 1896 independently by Jacques Hadamard\index{Hadamard, Jacques} and Charles de la Vall\'{e}e Poussin.\index{de la Vall\'{e}e Poussin, Charles}
2030
2031
2032
A milestone in the history leading up to the proof of the Prime Number\index{prime number}
2033
Theorem is the earlier work of Pafnuty Lvovich Chebyshev\index{Chebyshev, Pafnuty Lvovich} (see
2034
\url{http://en.wikipedia.org/wiki/Chebyshev_function})\index{Chebyshev, Pafnuty Lvovich} showing that
2035
(to use the terminology we introduced) $X/\log(X)$ and $\pi(X)$ go
2036
to infinity at similar rates.
2037
2038
2039
The elusive \RH{}, however, is much deeper than the Prime
2040
Number Theorem, and takes its origin from some awe-inspiring,
2041
difficult to interpret, lines in Bernhard Riemann's\index{Riemann, Bernhard} magnificent 8-page
2042
paper, ``On the number of primes less than a given magnitude,''
2043
published in 1859
2044
(see \bibnote{See \url{http://www.maths.tcd.ie/pub/HistMath/People/Riemann/Zeta/}
2045
for the original German version and an English translation.}).
2046
2047
2048
\ill{riemann_zoom}{1}{From a manuscript of Riemann's 1859 paper written by a contem- porary of Riemann (possibly the mathematician Alfred Clebsch). \label{fig:riemamn}}
2049
2050
Riemann's hypothesis, as it is currently interpreted, turns up as
2051
relevant, as a key, again and again in different parts of the subject:
2052
if you accept it as {\em hypothesis} you have an immensely powerful
2053
tool at your disposal: a mathematical magnifying glass that sharpens
2054
our focus on number theory. But it also has a wonderful protean
2055
quality---there are many ways of formulating it, any of these
2056
formulations being provably equivalent to any of the others.
2057
2058
\ill{riemann}{.3}{Bernhard Riemann (1826--1866)\index{Riemann, Bernhard}}
2059
2060
2061
The \RH{} remains unproved to this day, and therefore is ``only a
2062
hypothesis,'' as Osiander said of Copernicus's theory, but one for
2063
which we have overwhelming theoretical and numerical evidence in its
2064
support. It is the kind of conjecture that contemporary Dutch
2065
mathematician Frans Oort might label a {\em suffusing conjecture} in
2066
that it has unusually broad implications: many, many results are now
2067
known to follow, if the conjecture, familiarly known as RH, is true.
2068
A proof of RH would, therefore, fall into the {\em applied} category,
2069
given our discussion above in Chapter~\ref{ch:pureapplied}. But
2070
however you classify RH, it is a central concern in mathematics to
2071
find its proof (or, a counter-example!). RH is one of the weightiest
2072
statements in all of mathematics.
2073
2074
2075
\chapter[The staircase of primes]{The {\em information} contained in the staircase of primes\label{sec:information}}
2076
2077
2078
2079
We have borrowed the phrase ``staircase of primes'' from the popular
2080
book {\em The Music of the Primes} by Marcus du Sautoy, for we feel that
2081
it captures the sense that there is a deeply hidden architecture to
2082
the graphs that compile the number of primes (up to $N$) and also
2083
because---in a bit---we will be tinkering with this carpentry. Before
2084
we do so, though, let us review in Figure~\ref{fig:staircases}
2085
what this staircase looks like for different ranges.
2086
2087
\begin{figure}[H]
2088
\centering
2089
\includegraphics[width=.4\textwidth]{illustrations/PN_25}
2090
\includegraphics[width=.4\textwidth]{illustrations/PN_100}\\
2091
2092
\includegraphics[width=.4\textwidth]{illustrations/PN_1000}
2093
\includegraphics[width=.4\textwidth]{illustrations/PN_10000}\\
2094
2095
\includegraphics[width=.8\textwidth]{illustrations/PN_100000}
2096
2097
2098
\caption{The Staircase of Primes\label{fig:staircases}}
2099
\end{figure}
2100
2101
The mystery of this staircase is that the {\em information} contained
2102
within it is---in effect---the full story of where the primes are
2103
placed. This story seems to elude any simple description. Can we
2104
``tinker with'' this staircase without destroying this valuable
2105
information?
2106
2107
2108
2109
2110
\chapter[Tinkering with the staircase of primes]{Tinkering with the carpentry of the staircase of primes\label{sec:tinkering}}
2111
2112
2113
For starters, notice that all the (vertical) {\em risers} of this staircase (Figure~\ref{fig:staircases} above) have
2114
unit height. That is, they contain no numerical information except for
2115
their placement on the $x$-axis. So, we could distort our staircase by
2116
changing (in any way we please) the height of each riser; and as long
2117
as we haven't brought new risers into---or old risers out
2118
of---existence, and have not modified their position over the
2119
$x$-axis, we have retained all the information of our original
2120
staircase.
2121
2122
2123
A more drastic-sounding thing we could do is to judiciously add new
2124
steps to our staircase. At present, we have a step at each prime
2125
number $p$, and no step anywhere else. Suppose we built a staircase
2126
with a new step not only at $x=p$ for $p$ each prime number\index{prime number} but also at
2127
$x =1$ and $x=p^n$ where $p^n$ runs through all powers of prime numbers as
2128
well. Such a staircase would have, indeed, many more steps than our
2129
original staircase had, but, nevertheless, would retain much of the
2130
quality of the old staircase: namely it contains within it the full
2131
story of the placement of primes {\em and their powers}.
2132
2133
A final thing we can do is to perform a distortion of the $x$-axis
2134
(elongating or shortening it, as we wish) in any specific way, as long
2135
as we can perform the inverse process, and ``undistort'' it if we wish.
2136
Clearly such an operation may have mangled the staircase, but hasn't destroyed
2137
information irretrievably.
2138
2139
We shall perform all three of these kinds of operations eventually,
2140
and will see some great surprises as a result. But for now, we will
2141
perform distortions only of the first two types. We are about to
2142
build a new staircase that retains the precious information we need,
2143
but is constructed according to the following architectural plan.
2144
2145
\begin{itemize}
2146
2147
\item We first build a staircase that has a new step precisely at $x
2148
=1$, and $ x= p^n$ for every {\em prime power} $p^n$ with $n\geq
2149
1$. That is, there will be a new step at $x= 1,2,3,4,5,7,8,9,11,
2150
\dots$
2151
2152
\item Our staircase starts on the ground at $x=0$ and the height of the
2153
riser of the step at $x=1$ will be $\log(2\pi)$. The height of the
2154
riser of the step at $x=p^n$ will not be $1$
2155
(as was the height of all risers in the old staircase of primes)
2156
but rather: the step at $x=p^n$ will have the height of its riser
2157
equal to $\log p$. So for the first few steps listed in the
2158
previous item, the risers will be of height $\log(2\pi), \log
2159
2,\log 3,\log 2,\log 5,\log 7, \log 2,\log 3,\log 11, \dots$
2160
Since $\log(p)>1$,
2161
these vertical dimensions lead to a steeper ascent but no great loss
2162
of {\em information}.
2163
2164
Although we are not quite done with our architectural work, Figure~\ref{fig:psi} shows
2165
what our new staircase looks like, so far.
2166
2167
\illtwoh{psi_9}{psi_100}{.27}{The newly constructed staircase that counts prime powers\label{fig:psi}}
2168
2169
2170
\end{itemize}
2171
2172
Notice that this new staircase looks, from afar, as if it were
2173
nicely approximated by the $45$ degree straight line, i.e., by the
2174
simple function $X$. In fact, we have---by this new
2175
architecture---a second {\em equivalent} way of formulating
2176
Riemann's hypothesis. For this, let $\psi(X)$ denote the
2177
function
2178
of $X$ whose graph is
2179
depicted in Figure~\ref{fig:psi}
2180
(see \bibnote{We have $$\psi(X) =\sum_{p^n \le X}\log p$$ where the summation is over prime
2181
powers $p^n$ that are $\le X$.}).
2182
2183
\begin{center}
2184
\shadowbox{ \begin{minipage}{0.9\textwidth}
2185
\mbox{} \vspace{0.2ex}
2186
\begin{center}{\bf\large The {\bf \RH{}} (second formulation)}\end{center}
2187
\medskip
2188
2189
This new staircase is essentially square root close to the 45 degree
2190
straight line; i.e., the function $\psi(X)$ is essentially square root
2191
close to the function $f(X)=X$. See Figure~\ref{fig:psi_diag_1000}.
2192
\vspace{1ex}
2193
\end{minipage}}
2194
\end{center}
2195
2196
\ill{psi_diag_1000}{0.75}{The newly constructed staircase is close to the 45 degree line.\label{fig:psi_diag_1000}}
2197
2198
Do not worry if you do not understand why our first and second
2199
formulations of Riemann's Hypothesis are equivalent. Our aim, in
2200
offering the second formulation---a way of phrasing Riemann's\index{Riemann, Bernhard} guess
2201
that mathematicians know to be equivalent to the first one---is to
2202
celebrate the variety of equivalent ways we have to express Riemann's
2203
\index{Riemann, Bernhard} proposed answers to the question ``How many primes are there?'', and to
2204
point out that some formulations would reveal a startling
2205
simplicity---not immediately apparent---to the behavior of prime
2206
numbers, no matter how erratic primes initially appear to us to
2207
be. After all, what could be simpler than a 45 degree straight line?
2208
2209
2210
2211
2212
\chapter[Computer music files and prime numbers]{What do computer music files,
2213
data compression, and prime numbers have to do with each other?\label{sec:fourier1}}\index{prime number}
2214
2215
2216
2217
2218
Sounds of all sorts---and in particular the sounds of music---travel
2219
as vibrations of air molecules at roughly 768 miles per hour. These
2220
vibrations---fluctuations of pressure---are often represented, or
2221
``pictured,'' by a graph where the horizontal axis corresponds to
2222
time, and the vertical axis corresponds to pressure at that time. The
2223
very purest of sounds---a single sustained note---would look
2224
something like this (called a ``sine wave'') when pictured (see
2225
Figure~\ref{fig:sine}), so that if you fixed your position somewhere
2226
and measured air pressure due to this sound at that position, the
2227
peaks correspond to the times when the varying air pressure is maximal
2228
or minimal and the zeroes to the times when it is normal pressure.
2229
2230
\ill{sin}{.7}{Graph of a sine wave\label{fig:sine}}
2231
2232
You'll notice that there are two features to the graph in Figure~\ref{fig:sine}.
2233
2234
\begin{enumerate}
2235
\item {\em The height of the peaks of this sine wave:} This height is
2236
referred to as the {\bf amplitude} and corresponds to the {\em
2237
loudness} of the sound.
2238
\item {\em The number of peaks per second:} This number is referred to
2239
as the {\bf frequency} and corresponds to the {\em pitch} of the
2240
sound.
2241
\end{enumerate}
2242
2243
2244
Of course, music is rarely---perhaps never---just given by a single
2245
pure sustained note and nothing else. A next most simple example of a
2246
sound would be a simple chord (say a C and an E played together on
2247
some electronic instrument that could approximate pure notes). Its
2248
graph would be just the {\em sum} of the graphs of each of the pure
2249
notes (see Figures~\ref{fig:sinetwofreq} and \ref{fig:sinetwofreqsum}).
2250
2251
2252
\ill{sin-twofreq}{0.6}{Graph of two sine waves with different frequencies\label{fig:sinetwofreq}}
2253
2254
\ill{sin-twofreq-sum}{.6}{Graph of sum of the two sine waves with different frequencies\label{fig:sinetwofreqsum}}
2255
2256
So the picture of the changing frequencies of this chord would be
2257
already a pretty complicated configuration. What we have described in
2258
these graphs are two sine waves (our C and our E) when they are played
2259
{\em in phase} (meaning they start at the same time) but we could
2260
also ``delay'' the onset of the E note and play them with some
2261
different phase relationship, for example, as illustrated
2262
in Figures~\ref{fig:sin-twofreq-phase} and \ref{fig:sum-sin-phase}.
2263
2264
\ill{sin-twofreq-phase}{.6}{\label{fig:sin-twofreq-phase}Graph of two ``sine'' waves with different phases.}
2265
2266
\ill{sin-twofreq-phase-sum}{.6}{Graph of the sum of the two ``sine'' waves with different frequencies
2267
and phases.\label{fig:sum-sin-phase}}
2268
2269
2270
So, {\em all you need} to reconstruct the chord graphed above is to
2271
know five numbers:
2272
\begin{itemize}
2273
\item the two frequencies---the collection of frequencies that make
2274
up the sound is called the {\em spectrum}\index{spectrum} of the sound,
2275
\item the two {\em amplitudes} of each of these two frequencies,
2276
\item the {\em phase} between them.
2277
2278
\end{itemize}
2279
2280
Now suppose you came across such a sound as pictured in
2281
Figure~\ref{fig:sum-sin-phase} and wanted to ``record it.'' Well,
2282
one way would be to sample the amplitude of the sound at many
2283
different times, as for example in Figure~\ref{fig:sum-sin-phase-sample}.
2284
2285
\ill{sin-twofreq-phase-sum-points}{.6}{Graph of sampling of a sound wave\label{fig:sum-sin-phase-sample}}
2286
2287
Then, fill in the rest of the points to obtain Figure~\ref{fig:sum-sin-phase-sample-fill}.
2288
2289
\ill{sin-twofreq-phase-sum-fill}{0.6}{Graph obtained from Figure~\ref{fig:sum-sin-phase-sample} by
2290
filling in the rest of the points\label{fig:sum-sin-phase-sample-fill}}
2291
2292
But this sampling would take an enormous amount of storage space, at
2293
least compared to storing five numbers, as explained above!
2294
Current audio compact discs do their sampling 44,100 times a second to
2295
get a reasonable quality of sound.
2296
2297
Another way is to simply record the {\em five} numbers: the {\em
2298
spectrum\index{spectrum}, amplitudes,} and {\em phase}. Surprisingly, this seems to
2299
be roughly the way our ear processes such a sound when we hear it.\footnote{%
2300
We recommend downloading Dave Benson's marvelous book
2301
{\em Music: A Mathematical Offering} from
2302
\url{https://homepages.abdn.ac.uk/mth192/pages/html/maths-music.html}.
2303
This is free, and gives a beautiful account of the superb
2304
mechanism of hearing, and of the mathematics of music.}
2305
2306
Even in this simplest of examples (our pure chord: the pure note C
2307
played simultaneously with pure note E) the {\em efficiency of data
2308
compression} that is the immediate bonus of analyzing the picture
2309
of the chords as built {\em just} with the five numbers giving {\em
2310
spectrum\index{spectrum}, amplitudes,} and {\em phase} is staggering.
2311
2312
\ill{fourier}{0.3}{Jean Baptiste Joseph Fourier (1768--1830)}
2313
2314
This type of analysis, in general, is called {\em Fourier Analysis}
2315
and is one of the glorious chapters of mathematics. One way of
2316
picturing {\em spectrum}\index{spectrum} and {\em amplitudes} of a sound is by a bar
2317
graph which might be called the {\em spectral picture} of the sound,
2318
the horizontal axis depicting frequency and the vertical one depicting
2319
amplitude: the height of a bar at any frequency is proportional to the
2320
amplitude of that frequency ``in'' the sound.
2321
2322
So our CE chord would have the spectral picture in
2323
Figure~\ref{fig:ce-spectral}.
2324
2325
2326
\illtwo{sound-ce-general_sum}{sound-ce-general_sum-blips}{.45}
2327
{Spectral picture of a CE chord\label{fig:ce-spectral}}
2328
2329
2330
This spectral picture ignores the phase but is nevertheless a very
2331
good portrait of the sound. The spectral picture of a graph gets us
2332
to think of that graph as ``built up by the superposition of a bunch
2333
of pure waves,'' and if the graph is complicated enough we may very well
2334
need {\em infinitely} many pure waves to build it up! Fourier analysis\index{Fourier analysis} is a
2335
mathematical theory that allows us to start with any graph---we are
2336
thinking here of graphs that picture sounds, but any graph will
2337
do---and actually compute its spectral picture (and even keep track of
2338
phases).
2339
2340
2341
The operation that starts with a graph and goes to its spectral
2342
picture that records the frequencies, amplitudes, and phases of the
2343
pure sine waves that, together, compose the graph is called the {\em
2344
Fourier transform} and nowadays there are very fast procedures for
2345
getting accurate {\em Fourier transforms} (meaning accurate spectral
2346
pictures including information about phases) by
2347
computer \bibnote{See \url{http://en.wikipedia.org/wiki/Fast_Fourier_transform}}.
2348
2349
2350
%If you pass to the worksheet at this point, you'll be able to see the
2351
%Fourier transforms of many different sounds, and experiment for
2352
%yourself.
2353
The theory behind this operation (Fourier transform giving
2354
us a spectral analysis of a graph) is quite beautiful, but equally
2355
impressive is how---given the power of modern computation---you can
2356
immediately perform this operation for yourself to get a sense of how
2357
different wave-sounds can be constructed from the superposition of
2358
pure tones.
2359
2360
The {\em sawtooth} wave in Figure~\ref{fig:sawtooth} has a spectral picture, its Fourier transform, given in Figure~\ref{fig:sawtooth-spectrum}:
2361
2362
\ill{sawtooth}{.7}{Graph of sawtooth wave\label{fig:sawtooth}}
2363
\ill{sawtooth-spectrum}{.7}{The Spectrum\index{spectrum} of the sawtooth wave has a spike of height $1/k$ at
2364
each integer $k$\label{fig:sawtooth-spectrum}}
2365
2366
2367
2368
\ill{complicated-wave}{.7}{A Complicated sound wave\label{fig:complicated-wave}}
2369
2370
Suppose you have a complicated sound wave, say as in
2371
Figure~\ref{fig:complicated-wave}, and you want to record it.
2372
Standard audio CDs record their data by intensive sampling as we
2373
mentioned. In contrast, current MP3 audio compression technology uses
2374
Fourier transforms plus sophisticated algorithms based on
2375
knowledge of which frequencies the human ear can hear.
2376
With this, MP3 technology manages to
2377
get a compression factor of 8--12 with little {\em perceived} loss in
2378
quality, so that you can fit your entire music collection on your
2379
phone, instead of just a few of your favorite CDs.
2380
2381
\chapter{The word ``Spectrum"}
2382
2383
% TODO: this is from wikipedia -- requires attribution.
2384
\begin{wrapfigure}{r}{0.2\textwidth}
2385
\includegraphics[width=0.2\textwidth]{illustrations/rainbow}
2386
\end{wrapfigure}
2387
2388
It is worth noting how often this word appears in scientific literature, with an array of different uses and meanings. It comes from Latin where its meaning is ``image," or ``appearance," related to the verb meaning {\it to look} (the older form being {\it specere} and the later form {\it spectare}). In most of its meanings, nowadays, it has to do with some procedure, an analysis, allowing one to see clearly the constituent parts of something to be analyzed, these constituent parts often organized in some continuous scale, such as in the discussion of the previous chapter.
2389
2390
The Oxford Dictionary lists, as one of its many uses:
2391
2392
\begin{quote} Used to classify something, or suggest that it can be classified, in terms of its position on a scale between two extreme or opposite points.\end{quote}
2393
2394
This works well for the color spectrum\index{spectrum}, as initiated by Newton (as in the figure above, sunlight is separated by a prism into a rainbow continuum of colors): an analysis of white light into its components. Or in mass spectrometry, where
2395
beams of ions are separated (analyzed) according to their
2396
mass/charge ratio and the mass spectrum\index{spectrum} is recorded on a photographic plate or film. Or in the recording of the various component frequencies, with their corresponding intensities of some audible phenomenon.
2397
2398
In mathematics the word has found its use in many different fields, the most basic use occurring in Fourier analysis\index{Fourier analysis}, which has as its goal the aim of either {\it analyzing} a function $f(t)$ as being comprised of simpler (specifically: sine and cosine) functions, or {\it synthesizing} such a function by combining simpler functions to produce it. The understanding here and in the previous chapter, is that an analysis of $f(t)$ as built up of simpler functions is meant to provide a significantly clearer image of the constitution of $f(t)$. If, to take a very particular example, the simpler functions that are needed for the synthesis of $f(t)$ are of the form $a\cos(\theta t)$ (for $a$ some real number which is the {\it amplitude} or size of the peaks of this periodic function)---and if $f(t)$ is given as the limit of $$a_1\cos(\theta_1t) + a_2\cos(\theta_2t) + a_3cos(\theta_3t) \dots$$ for a sequence of real numbers $\theta_1, \theta_2,
2399
\theta_3,
2400
\dots$ (i.e., these simpler functions are functions of {\it periods} ${\frac{2\pi}{\theta_1}}, {\frac{2\pi}{\theta_2}}, {\frac{2\pi}{\theta_3}}, \dots$) it is natural to call these $\theta_i$ the {\bf spectrum}\index{spectrum} of $f(t)$.
2401
This will eventually show up in our discussion below of trigonometric sums and the {\it Riemann spectrum}.
2402
2403
2404
\chapter{Spectra and trigonometric sums \label{sec:trigsums}}
2405
2406
As we saw in Chapter~\ref{sec:fourier1}, a pure tone can be represented by a periodic {\it sine wave}---a function of time $f(t)$---the equation of which might be:
2407
$$f(t)\ \ = \ \ a\cdot \cos(b +\theta t).$$
2408
2409
\ill{pure_tone}{.7}{Plot of the periodic sine wave $f(t) = 2\cdot \cos(1+t/2)$}
2410
2411
The $\theta$ determines the {\it frequency} of the periodic wave, the
2412
larger $\theta$ is the higher the ``pitch.'' The coefficient $a$
2413
determines the envelope of size of the periodic wave, and we call it
2414
the {\it amplitude} of the periodic wave.
2415
2416
Sometimes we encounter functions $F(t)$ that are not pure tones, but
2417
that can be expressed as (or we might say ``decomposed into'') a finite
2418
sum of pure tones, for example three of them:
2419
2420
$$F(t) = a_1\cdot \cos(b_1 +\theta_1 t) + a_2\cdot \cos(b_2 +\theta_2 t) + a_3\cdot \cos(b_3 +\theta_3 t)$$
2421
2422
\ill{mixed_tone3}{.7}{Plot of the sum $5 \cos\left(-t - 2\right) + 2 \cos\left(t/2 + 1\right) + 3 \cos\left(2 t + 4\right)$\label{fig:mixed_tone3}}
2423
2424
We refer to such functions $F(t)$ as in Figure~\ref{fig:mixed_tone3}
2425
as {\it finite trigonometric
2426
sums}, because---well---they are. In this example, there are three
2427
frequencies involved---i.e., $\theta_1,\theta_2,\theta_3$---and we
2428
say that {\it the spectrum\index{spectrum} of $F(t)$} is the set of these frequencies,
2429
i.e.,
2430
2431
$$
2432
{\rm The\ spectrum \ of \ } F(t) \ \ = \ \ \{\theta_1,\theta_2,\theta_3\}.
2433
$$
2434
2435
More generally we might consider a sum of any finite number of pure
2436
cosine waves---or in a moment we'll also see some infinite ones as
2437
well. Again, for these more general trigonometric sums, their {\it
2438
spectrum}\index{spectrum} will denote the set of frequencies that compose them.
2439
2440
\chapter{The spectrum and the staircase of primes\label{sec:fourier_staircase}}
2441
2442
\ill{prime_pi_100_aspect1}{0.95}{The Staircase of primes\label{fig:staircase100}}
2443
2444
2445
In view of the amazing data-compression virtues of Fourier analysis\index{Fourier analysis},
2446
it isn't unnatural to ask these questions:
2447
2448
\begin{itemize}
2449
\item Is there a way of using Fourier analysis\index{Fourier analysis} to better understand
2450
the complicated picture of the staircase of primes?
2451
2452
\item Does this staircase of primes (or, perhaps, some tinkered
2453
version of the staircase that contains the same basic information)
2454
have a {\em spectrum}\index{spectrum}?
2455
2456
\item If such a {\em spectrum}\index{spectrum} exists, can we compute it conveniently,
2457
just as we have done for the saw-tooth wave above, or for the major
2458
third CE chord?
2459
2460
2461
\item Assuming the spectrum\index{spectrum} exists, and is computable, will our
2462
understanding of this spectrum\index{spectrum} allow us to reproduce all the
2463
pertinent information about the placement of primes among all whole
2464
numbers, elegantly and faithfully?
2465
2466
\item And here is a most important question: will that spectrum\index{spectrum} show
2467
us order and organization lurking within the staircase that we would
2468
otherwise be blind to?
2469
2470
\end{itemize}
2471
2472
2473
Strangely enough, it is towards questions like these that Riemann's
2474
Hypothesis takes us. We began with the simple question about primes:
2475
how to count them, and are led to ask for profound, and hidden,
2476
regularities in structure.
2477
2478
\chapter{To our readers of Part~\ref{part1}}
2479
The statement of the \RH{}---admittedly as elusive as
2480
before---has, at least, been expressed elegantly and more simply,
2481
given our new staircase that approximates (conjecturally with {\em
2482
essential square root accuracy}) a 45 degree straight line.
2483
2484
We have offered two equivalent formulations of the \RH{},
2485
both having to do with the manner in which the prime numbers\index{prime number} are
2486
situated among all whole numbers.
2487
2488
In doing this, we hope that we have convinced you that---in the words
2489
of Don Zagier---primes seem to obey no other law than that of chance
2490
and yet exhibit stunning regularity. This is the end of Part~\ref{part1} of our
2491
book, and is largely the end of our main mission, to explain---in
2492
elementary terms---{\em what is Riemann's Hypothesis?}
2493
2494
2495
For readers who have at some point studied differential calculus,\index{calculus} in
2496
Part~\ref{part2} we shall discuss Fourier analysis\index{Fourier analysis}, a fundamental tool that will be used in Part~\ref{part3}
2497
where we show how
2498
Riemann's hypothesis provides a key to some deeper structure of the
2499
prime numbers\index{prime number}, and to the nature of the laws that they obey. We will---if not explain---at least
2500
hint at how the above series of questions have been answered so far,
2501
and how the \RH{} offers a surprise for the last question in this
2502
series.
2503
2504
2505
2506
%\chapter{To our readers of Part~\ref{part1}}
2507
%The statement of the \RH{}---admittedly as elusive as
2508
%before---has, at least, been expressed elegantly and more simply,
2509
%given our new staircase that approximates (conjecturally with {\em
2510
% essential square root accuracy}) a 45 degree straight line.
2511
2512
%We have offered two equivalent formulations of the \RH{},
2513
%both having to do with the manner in which the prime numbers are
2514
%situated among all whole numbers.
2515
2516
%In doing this, we hope that we have convinced you that---in the words
2517
%of Don Zagier---primes seem to obey no other law than that of chance
2518
%and yet exhibit stunning regularity. This is the end of Part~\ref{part1} of our
2519
%book, and is largely the end of our main mission, to explain---in
2520
%elementary terms---{\em what is Riemann's Hypothesis?}
2521
2522
2523
%For readers who have at some point studied Differential calculus,\index{calculus} in
2524
%Part~\ref{part2} we shall
2525
%go further and explain how the pursuit of
2526
%Riemann's hypothesis may provide a key to some deeper structure of the
2527
%prime numbers, and to the nature of the laws that they obey.
2528
2529
2530
\part{Distribution\index{distribution}s\label{part2}}
2531
2532
2533
\chapter[Slopes of graphs that have no slopes]{How Calculus\index{calculus} manages to
2534
find the slopes of graphs that have no slopes}\label{ch:calculusmanages}
2535
2536
Differential calculus,\index{calculus} initially the creation of Newton and Leibniz
2537
in the 1680s, acquaints us with {\it slopes} of graphs of functions of
2538
a real variable. So, to discuss this we should say a word about what
2539
a {\it function} is, and what its {\it graph} is.
2540
2541
\illtwo{newton}{leibniz}{0.25}{Isaac Newton and Gottfried Leibniz created calculus\index{calculus}}
2542
2543
2544
A {\bf function} (let us refer to it in this discussion as $f$) is
2545
often described as a {\it kind of machine} that for any specific
2546
input numerical value $a$ will give, as output, a well-defined
2547
numerical value.
2548
2549
This ``output number'' is denoted $f(a)$ and is called {\it the
2550
``value'' of the function $f$ at $a$}. For example, the {\it
2551
machine that adds $1$ to any number} can be thought of as the
2552
function $f$ whose value at any $a$ is given by the equation $f(a) =
2553
a+1$. Often we choose a letter---say, $X$---to stand for a ``general
2554
number'' and we denote the function $f$ by the symbol $f(X)$ so that
2555
this symbolization allows to ``substitute for $X$ any specific number
2556
$a$'' to get its value $f(a)$.
2557
2558
The {\bf graph} of a function provides a vivid visual representation of the
2559
function in the Euclidean\index{Euclid} plane where over every point $a$ on the
2560
$x$-axis you plot a point above it of ``height'' equal to the value of
2561
the function at $a$, i.e., $f(a)$. In Cartesian coordinates, then,
2562
you are plotting points $(a, f(a))$ in the plane where $a$ runs
2563
through all real numbers.
2564
2565
\ill{graph_aplusone}{0.6}{Graph of the function $f(a)=a+1$\label{fig:graph_aplusone}}
2566
2567
In this book we will very often be talking about ``graphs'' when we
2568
are also specifically interested in the functions---of which they are
2569
the graphs. We will use these words almost synonymously since we like
2570
to adopt a very visual attitude towards the behavior of the functions
2571
that interest us.
2572
2573
2574
\ill{graph_slope_deriv}{1}{\label{fig:graph_slope_deriv}Calculus\index{calculus}}
2575
2576
2577
2578
Figure~\ref{fig:graph_slope_deriv} illustrates a function (blue), the
2579
slope at a point (green straight line), and the derivative (red) of
2580
the function; the red derivative is the function whose value at a point
2581
is the slope of the blue function at that point. Differential
2582
calculus\index{calculus} explains to us how to calculate slopes of graphs, and
2583
finally, shows us the power that we then have to answer problems we
2584
could not answer if we couldn't compute those slopes.
2585
2586
Usually, in elementary calculus\index{calculus} classes we are called upon to compute
2587
slopes only of smooth graphs, i.e., graphs that actually {\em have}
2588
slopes at each of their points, such as in the illustration just
2589
above. What could calculus\index{calculus} possibly do if confronted with a graph
2590
that has {\em jumps}, such as in Figure~\ref{fig:jump}:
2591
$$f(x) = \begin{cases}1 & x \leq 3\\ 2 & x > 3?\end{cases}$$
2592
(Note that for purely aesthetic reasons, we draw a vertical line at the point where the jump occurs, though technically that vertical line is not part of the graph of the function.)
2593
2594
\ill{jump}{0.5}{The graph of the function $f(x)$ above that jumps---it is $1$ up to $3$ and then $2$ after that point.\label{fig:jump}}
2595
2596
2597
The most comfortable way to deal with the graph of such a function is
2598
to just approximate it by a differentiable function as in
2599
Figure~\ref{fig:jumpsmooth}. % Note that the function in
2600
%Figure~\ref{fig:jumpsmooth} is not smooth, since the
2601
%derivative is continuous but not differentiable; in our
2602
%discussion all
2603
%we will need is that our approximating function is once
2604
%differentiable.
2605
2606
2607
\ill{jump-smooth}{0.5}{A picture of a smooth graph approximating the
2608
graph that is $1$ up to some point $x$ and then $2$ after that
2609
point, the smooth graph being flat mostly.\label{fig:jumpsmooth}}
2610
2611
2612
Then take the {\em derivative} of that smooth function. Of course,
2613
this is just an approximation, so we might try to make a better
2614
approximation, which we do in each successive graph starting
2615
with Figure~\ref{fig:djump1} below.
2616
2617
2618
\ill{jump-smooth-deriv-7}{0.6}{A picture of the derivative of
2619
a smooth approximation to a function that jumps.\label{fig:djump1}}
2620
2621
Note that---as you would expect---in the range where the initial
2622
function is constant, its derivative is zero. In the subsequent
2623
figures, our initial function will be {\it nonconstant} for smaller
2624
and smaller intervals about the origin. Note also that, in our series
2625
of pictures below, we will be successively rescaling the $y$-axis; all
2626
our initial functions have the value $1$ for ``large'' negative numbers
2627
and the value $2$ for large positive numbers.
2628
2629
\ill{jump-smooth-deriv-2}{0.5}{Second picture of the derivative of
2630
a smooth approximation to a function that jumps.\label{fig:djump2}}
2631
\ill{jump-smooth-deriv-05}{0.5}{Third picture of the derivative of
2632
a smooth approximation to a function that jumps.\label{fig:djump3}}
2633
\ill{jump-smooth-deriv-01}{0.5}{Fourth picture of the derivative of
2634
a smooth approximation to a function that jumps.\label{fig:djump4}}
2635
2636
2637
Notice what is happening: as the approximation gets better and
2638
better, the derivative will be zero mostly, with a blip at the point
2639
of discontinuity, and the blip will get higher and higher.
2640
In each of these pictures, for any interval of real numbers $[a,b]$ the total area under the red graph over that interval is equal to
2641
\begin{center}
2642
{\it the height of the blue graph at $x=b$}\\
2643
minus\\
2644
{\it the height of the blue graph at $x=a$}.
2645
\end{center}
2646
This is a manifestation of one of the fundamental facts of life of
2647
calculus\index{calculus} relating a function to its derivative:
2648
2649
\begin{quote} Given any real-valued function $F(x)$---that has a
2650
derivative---for any interval of real numbers $[a,b]$ the total
2651
signed\footnote{When $F(x)<0$ we count that area as negative.} area
2652
between the graph and the horizontal axis
2653
of the derivative of $F(x)$ over that interval is
2654
equal to $F(b)-F(a)$.
2655
\end{quote}
2656
What happens if we take the series of figures \ref{fig:djump1}--\ref{fig:djump4}, etc.,
2657
{\it to the limit}? This is quite curious:
2658
2659
\begin{itemize}
2660
\item {\bf the series of red graphs:} these are getting thinner and
2661
thinner and higher and higher: can we make any sense of what the
2662
red graph might mean in the limit (even though the only picture of
2663
it that we have at present makes it infinitely thin and infinitely
2664
high)?
2665
2666
\item {\bf the series of blue graphs:} these are happily looking
2667
more and more like the tame Figure~\ref{fig:jump}.
2668
\end{itemize}
2669
2670
Each of our red graphs is the derivative of the corresponding blue
2671
graph. It is tempting to think of the limit of the red
2672
graphs---whatever we might construe this to be---as standing for
2673
the derivative of the limit of the blue graphs, i.e., of the graph
2674
in Figure~\ref{fig:jump}.
2675
2676
Well, the temptation is so great that, in fact, mathematicians and
2677
physicists of the early twentieth century struggled to give a
2678
meaning to things like {\it the limit of the red graphs}---such
2679
things were initially called {\bf generalized functions}, which
2680
might be considered the derivative of {\it the limit of the blue
2681
graphs}, i.e., of the graph of Figure~\ref{fig:jump}.
2682
2683
2684
Of course, to achieve progress in mathematics, all the concepts
2685
that play a role in the theory have to be unambiguously defined,
2686
and it took a while before {\it generalized functions} such as the
2687
limit of our series of red graphs had been rigorously introduced.
2688
2689
But many of the great moments in the development of mathematics
2690
occur when mathematicians---requiring some concept not yet
2691
formalized---work with the concept tentatively, dismissing---if
2692
need be---mental tortures, in hopes that the experience they
2693
acquire by working with the concept will eventually help to put
2694
that concept on sure footing. For example, early mathematicians
2695
(Newton, Leibniz)|in replacing approximate speeds by instantaneous
2696
velocities by passing to limits|had to wait a while before later
2697
mathematicians (e.g., Weierstrass) gave a rigorous foundation for
2698
what they were doing.
2699
2700
2701
% From http://en.wikipedia.org/wiki/Karl_Weierstrass
2702
2703
% From http://en.wikipedia.org/wiki/Laurent_Schwartz
2704
\illtwo{weierstrass}{schwartz}{.3}{Karl Weierstrass (1815--1897) and Laurent Schwartz (1915--2002)\index{Schwartz, Laurent}}
2705
2706
Karl Weierstrass\index{Weierstrass, Karl}, who worked during the latter part of the nineteenth
2707
century, was known as the ``father of modern analysis.'' He oversaw
2708
one of the glorious moments of rigorization of concepts that were
2709
long in use, but never before systematically organized. He, and
2710
other analysts of the time, were interested in providing a rigorous
2711
language to talk about {\it functions}, and more specifically {\it
2712
continuous functions} and {\it smooth} (i.e., {\it differentiable})
2713
functions. They wished to have a firm understanding of limits (i.e.,
2714
of sequences of numbers, or of functions).
2715
2716
2717
For Weierstrass\index{Weierstrass, Karl} and his companions, even though the functions they
2718
worked with needn't be smooth, or continuous, at the very least, the
2719
functions they studied had {\it well-defined---and usually
2720
finite---values}. But our ``limit of red graphs'' is not so easily
2721
formalized as the concepts that occupied the efforts of
2722
Weierstrass.
2723
2724
Happily however, this general process of approximating
2725
discontinuous functions more and more exactly by smooth functions,
2726
and taking their derivatives to get the blip-functions as we have
2727
just seen in the red graphs above was eventually given a
2728
mathematically rigorous foundation; notably, by the French
2729
mathematician, Laurent Schwartz\index{Schwartz, Laurent} who provided a beautiful theory that
2730
we will not go into here, that made perfect sense of ``generalized
2731
functions'' such as our limit of the series of red graphs, and that
2732
allows mathematicians to work with these concepts with ease. These
2733
``generalized functions'' are called {\it distributions}\index{distribution} in
2734
Schwartz's\index{Schwartz, Laurent} theory \bibnote{
2735
\url{http://en.wikipedia.org/wiki/Distribution_\%28mathematics\%29}
2736
contains a more formal definition and treatment of distributions. Here is Schwartz's explanation for his choice of the word {\it distribution:} \begin{quote}``Why did we choose the name distribution?
2737
Because, if $\mu$ is a measure, i.e., a particular kind of
2738
distribution, it can be considered as a distribution of electric
2739
charges in the universe. Distributions give more general types of
2740
electric charges, for example dipoles and magnetic distributions.
2741
If we consider the dipole placed at the point $a$ having magnetic
2742
moment $M$, we easily see that it is defined by the distribution
2743
$-D_M \delta_{(a)}$. These objects occur in physics. Deny's
2744
thesis, which he defended shortly after, introduced electric
2745
distributions of finite energy, the only ones which really occur in
2746
practice; these objects really are distributions, and do not
2747
correspond to measures. Thus, distributions have two very
2748
different aspects: they are a generalization of the notion of
2749
function, and a generalization of the notion of distribution of
2750
electric charges in space. [...] Both these interpretations of
2751
distributions are currently used.''\end{quote}}.
2752
2753
2754
2755
2756
\chapter[Distributions]{Distributions:\index{distribution} sharpening our approximating functions even if
2757
we have to let them shoot out to infinity\label{sec:dist}}
2758
2759
2760
2761
The curious {\it limit of the red graphs} of the previous section,
2762
which you might be tempted to think of as a ``blip-function'' $f(t)$ that
2763
vanishes for $t$ nonzero and is somehow ``infinite'' (whatever that
2764
means) at $0$ is an example of a {\it generalized function} (in the
2765
sense of the earlier mathematicians) or a {\it distribution}\index{distribution} in the
2766
sense of Laurent Schwartz.\index{Schwartz, Laurent}
2767
2768
This particular {\it limit of the red graphs} also goes by another
2769
name (it is officially called a
2770
Dirac $\delta$-function\index{$\delta$-function} (see \bibnote{
2771
David Mumford suggested that we offer the following paragraph from\\
2772
\url{http://en.wikipedia.org/wiki/Dirac_delta_function}\\
2773
on the Dirac delta function:
2774
\begin{quote}
2775
An infinitesimal formula for an infinitely tall, unit impulse delta function (infinitesimal version of Cauchy distribution) explicitly appears in an 1827 text of Augustin Louis Cauchy. Sim\'{e}on Denis Poisson considered the issue in connection with the study of wave propagation as did Gustav Kirchhoff somewhat later. Kirchhoff and Hermann von Helmholtz also introduced the unit impulse as a limit of Gaussians, which also corresponded to Lord Kelvin's notion of a point heat source. At the end of the 19th century, Oliver Heaviside used formal Fourier series to manipulate the unit impulse. The Dirac delta function as such was introduced as a ``convenient notation'' by Paul Dirac in his influential 1930 book {\em Principles of Quantum Mechanics}. He called it the ``delta function'' since he used it as a continuous analogue of the discrete Kronecker delta.
2776
\end{quote}}),
2777
the adjective ``Dirac'' being in honor of the physicist who first worked with this
2778
concept, the ``$\delta$'' being the symbol he assigned to these
2779
objects). The noun ``function'' should be in quotation marks for,
2780
properly speaking, the Dirac $\delta$-function\index{$\delta$-function} is not---as we have
2781
explained above---a bona fide function but rather a distribution\index{distribution}.
2782
2783
\ill{dirac}{0.3}{Paul\index{Dirac, Paul} Adrien Maurice Dirac (1902--1984)}
2784
2785
2786
Now may be a good time to summarize what the major difference is
2787
between {\it honest functions} and {\it generalized functions}
2788
or {\it distribution\index{distribution}s}.
2789
2790
An honest (by which we mean {\it integrable}) function of a real
2791
variable $f(t)$ possesses two ``features.''
2792
2793
2794
\begin{itemize}
2795
\item {\bf It has values.} That is, at any real number $t$, e.g., $t
2796
=2$ or $t =0$ or $t =\pi$ etc., our function has a definite real
2797
number value ($f(2)$ or $f(0)$ or $f(\pi)$ etc.) {\it and if we know
2798
all those values we know the function.}
2799
2800
\item {\bf It has areas under its graph.} If we are given any interval
2801
of real numbers, say the interval between $a$ and $b$, we can talk
2802
unambiguously about the area ``under'' the graph of the function
2803
$f(t)$ over the interval between $a$ and $b$. That is, in the
2804
terminology of integral calculus,\index{calculus} we can talk of {\it the integral of $f(t)$
2805
from $a$ to $b$}. And in the notation of calculus,\index{calculus} this---thanks
2806
to Leibniz---is elegantly denoted
2807
$$\int_a^bf(t)dt.$$
2808
2809
\end{itemize}
2810
2811
\ill{oo_integral}{.8}{This figure illustrates
2812
$\int_{-\infty}^{\infty} f(x) dx$, which is the signed area
2813
between the graph of $f(x)$ and the $x$-axis, where area below
2814
the $x$-axis (yellow) counts negative, and area above (grey) is
2815
positive.}
2816
2817
2818
In contrast, a {\it generalized function} or {\it distribution\index{distribution}}~\begin{itemize}
2819
\item {\bf may not have ``definite values''} at all real numbers if it
2820
is not an honest function. Nevertheless,
2821
2822
\item {\bf It has well-defined areas under portions of its ``graph.''}
2823
If we are given any interval of real numbers, say the (open)
2824
interval between $a$ and $b$, we can still talk unambiguously about
2825
the {\it area ``under'' the graph of the generalized function $D(t)$
2826
over the interval between $a$ and~$b$} and we will denote
2827
this--extending what we do in ordinary calculus---by\index{calculus} the symbol
2828
$$\int_a^bD(t)dt.$$
2829
\end{itemize}
2830
2831
This description is important to bear in mind and it gives us a handy
2832
way of thinking about ``generalized functions'' (i.e., distribution\index{distribution}s)
2833
as opposed to functions: when we consider an (integrable) function of
2834
a real variable, $f(t)$, we may invoke its {\it value} at every real
2835
number and for every interval $[a,b]$ we may consider the quantity
2836
$\int_a^bf(t)dt$. BUT when we are given a generalized function $D(t)$
2837
we {\it only} have at our disposal the latter quantities. In fact, a
2838
generalized function of a real variable $D(t)$ is (formally) nothing
2839
more than a {\it rule} that assigns to any finite interval $(a,b]$ ($a
2840
\le b$) a quantity that we might denote $\int_a^bD(t)dt$ and that {\it
2841
behaves as if it were the integral of a function} and in
2842
particular---for three real numbers $a\le b\le c$ we have the
2843
additivity relation
2844
$$ \int_a^cD(t)dt \ = \ \int_a^bD(t)dt \ + \ \int_b^cD(t)dt.$$
2845
2846
SO, any honest function integrable over finite intervals clearly {\it
2847
is} a distribution\index{distribution} (forget about its values!) but $\dots$ there are
2848
many more generalized functions, and including them in our sights
2849
gives us a very important tool.
2850
2851
2852
It is natural to talk, as well, of Cauchy sequences, and limits, of
2853
distributions.\index{distribution} We'll say that such a sequence $D_1(t), D_2(t),
2854
D_3(t),\dots$ is a {\bf Cauchy sequence} if for every interval
2855
$[a,b]$ the quantities $$\int_a^bD_1(t)dt,\quad
2856
\int_a^bD_2(t)dt,\quad\int_a^bD_3(t)dt,\dots$$ form a Cauchy sequence
2857
of real numbers (so for any $\varepsilon>0$ eventually all terms
2858
in the sequence of real numbers
2859
are within $\varepsilon$ of each other).
2860
Now, any Cauchy sequence of distributions\index{distribution} {\it
2861
converges to a limiting distribution}\index{distribution} $D(t)$ which is defined by
2862
the rule that for every interval $[a,b]$,
2863
2864
$$ \int_a^bD(t)dt\ =\ \lim_{i \to \infty} \int_a^bD_i(t)dt.$$
2865
2866
2867
If, by the way, you have an infinite sequence---say---of honest,
2868
continuous, functions that converges uniformly to a limit (which
2869
will again be a continuous function) then that sequence certainly
2870
converges---in the above sense---to the same limit when these
2871
functions are viewed as generalized functions. BUT, there are many
2872
important occasions where your sequence of honest continuous
2873
functions doesn't have that convergence property and {\it yet} when
2874
these continuous functions are viewed as generalized functions they do converge to some
2875
generalized function as a limit. We will see this soon when we get
2876
back to the ``sequence of the red graphs.'' This sequence {\bf does}
2877
converge (in the above sense) to the Dirac $\delta$-function\index{$\delta$-function} when
2878
these red graphs are thought of as a sequence of generalized
2879
functions.
2880
2881
2882
2883
2884
The integral notation for distribution\index{distribution} is very useful, and allows us
2885
the flexibility to define, for nice enough---and honest---functions
2886
$c(t)$ useful expressions such as $$\int_a^bc(t)D(t).$$
2887
2888
\ill{dirac_delta}{0.5}{The Dirac $\delta$-``function'' (actually
2889
distribution),\index{distribution} where we draw a vertical arrow to illustrate the
2890
delta function with support at a given point.}
2891
2892
For example, the Dirac $\delta$-function\index{$\delta$-function} we have been discussing
2893
(i.e., the limit of the red graphs of Chapter~\ref{ch:calculusmanages})\index{calculus} {\it is}
2894
an honest function away from $t=3$ and---in fact---is the ``trivial
2895
function'' zero away from $3$. And at $3$, we may {\it say}
2896
that it has the ``value'' infinity, in honor of it being the limit of
2897
blip functions getting taller and taller at $3$. The feature that
2898
pins it down as a distribution\index{distribution} is given by its behavior relative to
2899
the second feature above, the area of its graph over the open
2900
interval $(a,b)$ between $a$ and $b$:
2901
2902
\begin{itemize}
2903
\item If $3$ is
2904
not in the open interval spanned by $a$ and $b$, then the ``area
2905
under the graph of our Dirac $\delta$-function''\index{$\delta$-function} over
2906
the interval $(a,b)$
2907
is $0$.
2908
\item If $3$ is in the open interval $(a,b)$, then the ``area under the
2909
graph of our Dirac $\delta$-function''\index{$\delta$-function} is $1$---in
2910
notation $$\int_a^b\delta = 1.$$
2911
\end{itemize}
2912
2913
2914
2915
2916
We sometimes summarize the fact that these areas vanish so long as $3$
2917
is not included in the interval we are considering by saying
2918
that the {\bf support} of this $\delta$-function\index{$\delta$-function} is ``at $3$.''
2919
2920
2921
Once you're happy with {\it this} Dirac $\delta$-function,\index{$\delta$-function} you'll also
2922
be happy with a Dirac $\delta$-function---call\index{$\delta$-function} it $\delta_x$---with
2923
support concentrated at any specific real number~$x$.
2924
This $\delta_x$ vanishes for $t \ne x$ and intuitively speaking, has an
2925
{\it infinite blip} at $t=x$.
2926
2927
So, the original delta-function we were discussing, i.e., $\delta(t)$
2928
would be denoted $\delta_3(t)$.
2929
2930
\noindent {\bf A question:} If you've never seen distribution\index{distribution}s
2931
before, but know the Riemann integral, can you guess at what the
2932
definition of $\int_a^bc(t)D(t)$ is, and can you formulate hypotheses
2933
on $c(t)$ that would allow you to endow this expression with a
2934
definite meaning?
2935
2936
2937
\noindent {\bf A second question:} If you have not seen distribution\index{distribution}s
2938
before, and have answered the first question above, let
2939
$c(t)$ be an honest function for which your definition
2940
of $$\int_a^bc(t)D(t)$$ applies. Now let $x$ be a real number.
2941
Can you use your definition to compute
2942
2943
$$\int_{-{\infty}}^{+{\infty}}c(t)\delta_x(t)?$$
2944
2945
The answer to this second question, by the way, is:
2946
$\int_{-{\infty}}^{+{\infty}}c(t)\delta_x(t)=c(x).$ This will be useful in the later sections!
2947
2948
2949
The theory of distributions\index{distribution} gives a partial answer to the following funny question:
2950
2951
2952
\begin{quote} How in the world can you ``take the derivative'' of a
2953
function $F(t)$ that doesn't have a derivative?
2954
\end{quote}
2955
2956
The short answer to this question is that {\it this derivative $F'(t)$
2957
which doesn't exist as a function may exist as a distribution\index{distribution}.}
2958
What then is the integral of that distribution?\index{distribution} Well, it is given by
2959
the original function!
2960
2961
$$\int_a^bF'(t)dt \ = \ F(b) -F(a).$$
2962
2963
Let us practice this with simple staircase functions. For example,
2964
what is the {\it derivative}---in the sense of the theory of
2965
distributions---of\index{distribution} the function in Figure~\ref{fig:simple_staircase}?
2966
{\bf Answer:} $\delta_0 + 2 \delta_1$.
2967
2968
2969
\ill{simple_staircase}{.6}{The staircase function that is $0$ for $t
2970
\le 0$, $1$ for $0 <t \le 1$ and $3$ for $1< t \le 2$ has derivative
2971
$\delta_0 + 2\delta_1$.\label{fig:simple_staircase}}
2972
2973
2974
We'll be dealing with much more complicated staircase functions in the
2975
next chapter, but the general principles discussed here will nicely
2976
apply there \bibnote{As discussed in\\ \url{http://en.wikipedia.org/wiki/Distribution_\%28mathematics\%29},
2977
``generalized functions'' were introduced by Sergei
2978
Sobolev in the 1930s, then later
2979
independently introduced in the late
2980
1940s by Laurent Schwartz, who developed a comprehensive theory of
2981
distributions.
2982
}.
2983
2984
2985
\chapter{Fourier transforms: second visit}
2986
2987
In Chapter~\ref{sec:fourier1} above we wrote:
2988
2989
\begin{quote} The operation that starts with a graph and goes to its
2990
spectral picture that records the frequencies, amplitudes, and
2991
phases of the pure sine waves that, together, compose the graph is
2992
called the {\bf Fourier transform}.
2993
\end{quote}
2994
2995
2996
Now let's take a closer look at this operation {\it Fourier transform}.
2997
2998
We will focus our discussion on an {\bf even} function $f(t)$ of a
2999
real variable $t$. ``{\bf Even}'' means that its graph is symmetric
3000
about the $y$-axis; that is, $f(-t)= f(t)$. See
3001
Figure~\ref{fig:even_function}.
3002
3003
\ill{even_function}{.8}{The graph of an even function is symmetrical
3004
about the $y$-axis.\label{fig:even_function}}
3005
3006
When we get to apply this discussion to the {\it staircase of
3007
primes} $\pi(t)$ or the {\it tinkered staircase of primes}
3008
$\psi(t)$, both of which being defined only for positive values of
3009
$t$, then we would ``lose little information'' in our quest to
3010
understand them if we simply ``symmetrized their graphs'' by
3011
defining their values on negative numbers $-t$ via the formulas
3012
$\pi(-t)=\pi(t)$ and $\psi(-t)=\psi(t)$ thereby turning each of them
3013
into {\it even functions}.
3014
3015
\ill{even_pi}{.8}{Even extension of the staircase of primes.}
3016
3017
The idea behind the Fourier transform is to express $f(t)$ as {\it
3018
made up out of sine and cosine wave functions}. Since we have
3019
agreed to consider only even functions, we can dispense with the sine
3020
waves---they won't appear in our Fourier analysis\index{Fourier analysis}---and ask how to
3021
reconstruct $f(t)$ as a {\it sum} (with coefficients) of cosine
3022
functions (if only finitely many frequencies occur in the spectrum\index{spectrum} of
3023
our function) or more generally, as an {\it integral} if the spectrum\index{spectrum}
3024
is more elaborate. For this work, we need a little machine that tells
3025
us, for each real number $\theta$, whether or not $\theta$ is in the
3026
spectrum\index{spectrum} of $f(t)$, and if so, what the amplitude is of the cosine
3027
function $\cos(\theta t)$ that occurs in the Fourier expansion of
3028
$f(t)$---this amplitude answers the awkwardly phrased question:
3029
3030
{\it
3031
How much $\cos(\theta t)$ ``occurs in'' $f(t)$?}
3032
3033
We will denote this
3034
amplitude by ${\hat f}(\theta)$, and refer to it as {\bf the Fourier
3035
transform} of $f(t)$. The {\bf spectrum},\index{spectrum} then, of $f(t)$ is the
3036
set of all frequencies $\theta$ where the amplitude is nonzero.
3037
3038
\ill{fourier_machine}{.6}{The Fourier transform machine, which transforms $f(t)$ into ${\hat f}(\theta)$\label{fig:fourier_machine}}
3039
3040
3041
Now in certain easy circumstances---specifically, if
3042
$\int_{-{\infty}}^{+{\infty}}|f(t)|dt$ (exists, and)
3043
is finite---integral calculus\index{calculus} provides us with an easy construction of that
3044
machine (see Figure~\ref{fig:fourier_machine}); namely:
3045
3046
$${\hat f}(\theta) = \int_{-{\infty}}^{+{\infty}}f(t)\cos(-\theta t)dt.$$
3047
3048
3049
This concise machine manages to ``pick out'' just the part of
3050
$f(t)$ that has frequency $\theta$! It provides for us the {\it
3051
analysis} part of the Fourier analysis\index{Fourier analysis} of our function
3052
$f(t)$.
3053
3054
But there is a {\it synthesis} part to our work as well,
3055
for we can reconstruct $f(t)$ from its Fourier transform, by a
3056
process intriguingly similar to the analysis part; namely: if
3057
$\int_{-{\infty}}^{+{\infty}}|{\hat f}(\theta)|d\theta$ (exists, and) is finite, we retrieve $f(t)$ by the integral
3058
3059
$$f(t) = {\frac{1}{2\pi}}\int_{-{\infty}}^{+{\infty}}{\hat f}(\theta)\cos(\theta t)d\theta.$$
3060
3061
We are not so lucky to have
3062
$\int_{-{\infty}}^{+{\infty}}|f(t)|dt$ finite when we try our
3063
hand at a Fourier analysis\index{Fourier analysis} of the staircase of primes, but we'll
3064
work around this!
3065
3066
3067
3068
\chapter[Fourier transform of delta]{What is the Fourier transform of a delta function?\label{sec:ftdelta}}
3069
3070
Consider the $\delta$-function\index{$\delta$-function} that we denoted $\delta(t)$ (or
3071
$\delta_0(t)$). This is also the ``generalized function'' that we
3072
thought of as the ``limit of the red graphs'' in Chapter~\ref{sec:dist}
3073
above. Even though $\delta(t)$ is a distribution\index{distribution} and {\it not} a bona
3074
fide function, it is symmetric about the origin, and
3075
also $$\int_{-{\infty}}^{+{\infty}}|\delta(t)|dt$$ exists, and is
3076
finite (its value is, in fact, $1$). All this means that,
3077
appropriately understood, the discussion of the previous section
3078
applies, and we can {\it feed} this delta-function into our Fourier
3079
Transform Machine (Figure~\ref{fig:fourier_machine}) to see what
3080
frequencies and amplitudes arise in our attempt to express---whatever
3081
this means!---the delta-function as a sum, or an integral, of cosine
3082
functions.
3083
3084
3085
So what is the Fourier transform, ${\hat \delta_0}(\theta)$, of the delta-function?
3086
3087
3088
Well, the general formula would give us:
3089
$$ {\hat \delta_0}(\theta) = \int_{-\infty}^{+\infty}\cos(-\theta t)\delta_0(t)dt.$$
3090
For any nice function $c(t)$ we
3091
have that the integral of the product of $c(t)$ by the distribution\index{distribution}
3092
$\delta_x(t)$ is given by the {\it value} of the function $c(t)$ at
3093
$t=x$. So:
3094
3095
$$ {\hat \delta_0}(\theta) = \int_{-\infty}^{+\infty}\cos(-\theta t)\delta_0(t)dt = \cos(0) = 1.$$
3096
3097
3098
In other words, the Fourier transform of $\delta_0(t)$ is the constant
3099
function $$ {\hat \delta_0}(\theta)=1.$$ One can think of this
3100
colloquially as saying that the delta-function is a perfect example of
3101
{\it white noise} in that {\it every} frequency occurs in its Fourier
3102
analysis and they all occur in equal amounts.
3103
3104
To generalize this computation, let us consider for any real number $x$
3105
the symmetrized delta-function with support at $x$ and $-x$, given
3106
by $$d_x(t) \ = \ (\delta_x(t) + \delta_{-x}(t))/2$$
3107
in Figure~\ref{fig:two_delta}.
3108
3109
\ill{two_delta}{0.4}{The sum $(\delta_x(t) + \delta_{-x}(t))/2$, where we draw vertical arrows to illustrate the Dirac delta functions.\label{fig:two_delta}}
3110
3111
What is the Fourier transform of this $d_x(t)$? The answer is
3112
given by making the same computation as we've just made:
3113
3114
\begin{align*}\label{dx}
3115
{\hat d_x}(\theta) &= {\frac{1}{2}}\left(\int_{-\infty}^{+\infty}\cos(-\theta t)\delta_x(t)dt + \int_{-\infty}^{+\infty}\cos(-\theta t)\delta_{-x}(t)dt\right)\\
3116
&= {\frac{1}{2}}\big(\cos(-\theta x)+ \cos(+\theta x)\big)\\
3117
&= \cos(x\theta)
3118
\end{align*}
3119
3120
3121
To summarize this in ridiculous (!) colloquial terms: {\it for any
3122
frequency $\theta$ the amount of $\cos(\theta t)$ you need to build
3123
up the generalized function $(\delta_x(t) + \delta_{-x}(t))/2$ is
3124
$\cos(x\theta).$ }
3125
3126
3127
So far, so good, but remember that the theory of the Fourier transform
3128
has---like much of mathematics---two parts: an {\it analysis part} and
3129
a {\it synthesis} part. We've just performed the {\it analysis} part
3130
of the theory for these symmetrized delta functions $(\delta_x(t) +
3131
\delta_{-x}(t))/2$.
3132
3133
Can we synthesize them---i.e., build them up again---from their Fourier transforms?
3134
3135
3136
We'll leave this, at least for now, as a question for you.
3137
3138
%\chapter{Supports, Spectra, Blips and Spikes}
3139
3140
%(to be written)
3141
3142
% Here is where we will
3143
% be really justifying our heavy discussion of distribution\index{distribution}s,
3144
% trigonometric sums, etc.
3145
3146
\chapter{Trigonometric series}\label{ch:trigseries}
3147
Given our interest in the ideas of Fourier, it is not surprising that
3148
we'll want to deal with things like $$F(\theta) = \sum_{k=1}^{\infty}
3149
a_k\cos(s_k\cdot \theta)$$ where the $s_k$ are real numbers tending
3150
(strictly monotonically) to infinity. These we'll just call {\bf
3151
trigonometric series} without asking whether they converge in any
3152
sense for all values of $\theta$, or even for {\it any} value of $\theta$. The
3153
$s_k$'s that occur in such a trigonometric series we will call the
3154
{\bf spectral values} or for short, the {\bf spectrum}\index{spectrum} of the series,
3155
and the $a_k$'s the (corresponding) {\bf amplitudes}. We repeat that
3156
we impose no convergence requirements at all. But we also think of
3157
these things as providing ``cutoff'' finite trigonometric sums, which
3158
we think of as functions of two variables, $\theta$ and $C$ (the
3159
``cutoff'') where $$F(\theta,C): = \sum_{s_k\le C} a_k\cos(s_k\cdot \theta).$$ These functions $F(\theta,C)$ are finite trigonometric series and therefore ``honest functions" having finite values everywhere.
3160
3161
3162
%\section{Trigonometric Series as Fourier Transforms}
3163
3164
3165
Recall, as in Chapter \ref{sec:ftdelta}, that for any real number $x$, we considered
3166
the symmetrized delta-function with support at $x$ and $-x$, given
3167
by
3168
$$
3169
d_x(t) \ = \ (\delta_x(t) + \delta_{-x}(t))/2,
3170
$$
3171
and noted that the Fourier transform of this $d_x(t)$ is
3172
$$
3173
\label{dx2}{\hat d_x}(\theta) = \cos(x\theta).
3174
$$
3175
\ill{two_delta}{0.4}{The sum $(\delta_x(t) + \delta_{-x}(t))/2$,
3176
where we draw vertical arrows to illustrate the Dirac delta functions.}
3177
3178
It follows, of course, that a cutoff finite trigonometric series,
3179
$F(\theta,C)$ associated to an infinite trigonometric series\index{trigonometric series}
3180
$$
3181
F(\theta) = \sum_{k=1}^{\infty} a_k\cos(s_k\cdot \theta)
3182
$$
3183
is the Fourier transform of the distribution\index{distribution}
3184
$$
3185
D(t,C):=\sum_{s_k \le C}a_kd_{s_k}(t).
3186
$$
3187
Given the discreteness of the set of spectral values $s_k$ ($k=1,2,\dots$) we can
3188
consider the infinite sum
3189
$$
3190
D(t):=\sum_{k=1}^{\infty}a_kd_{s_k}(t),
3191
$$ viewed as distribution\index{distribution} playing the role of
3192
the ``inverse Fourier transform" of our trigonometric
3193
series $F(t)$.
3194
3195
3196
%\section{\bf Spike values}
3197
3198
\begin{definition} Say
3199
that a trigonometric series $F(\theta)$ has a {\bf spike} at the real number $\theta=\tau \in {\bf R}$
3200
if the set of absolute values $|F(\tau,C)|$ as $C$ ranges through positive number cutoffs is
3201
unbounded. A real number $\tau \in {\bf R}$ is, in contrast, a {\bf
3202
non-spike} if those values admit a finite upper
3203
bound. \end{definition}
3204
3205
% One mission in this book will be furthered just by paying attention to the %spike values of certain (infinite) trigonometric series.
3206
3207
3208
% The trigonometric sums we study are
3209
% going to be particularly nice. Under the assumption of the Riemann
3210
% Hypothesis, they will have the
3211
% property that the set of their spike values are particularly interesting {\it %discrete}
3212
% subsets of the real line.
3213
3214
In the chapters that follow we will be exhibiting graphs of trigonometric functions, cutoff at various values of $C$, that (in our opinion) strongly hint that as $C$ goes to infinity, one gets convergence to certain (discrete) sequences of very interesting {\it spike values}. To be sure, no finite computation, exhibited by a graph, can {\it prove} that this is in fact the case. But on the one hand, the vividness of those spikes is in itself worth experiencing, and on the other hand, given RH, there is justification that the {\it strong hints} are not misleading; for some theoretical background see the endnotes.
3215
3216
3217
3218
% WARNING: I had to hard-code the III in the title, since
3219
% otherwise it would be ?? in some places!! (Maybe due to limitations
3220
% of latex.)
3221
%\chapter{A sneak preview of Part~\ref{part3}}\label{snpr}
3222
\chapter{A sneak preview of Part~III}\label{snpr}
3223
3224
In this chapter, as a striking illustration of the type of phenomena that will be studied in Part~\ref{part3}, we will consider two infinite trigonometric sums---that seem to be related one to the other in that the {\it frequencies} of the terms in the one trigonometric sum give the {\it spike values} of the other, and vice versa: the {\it frequencies} of the other give the {\it spike values} of the one: a kind of duality as in the theory of Fourier transforms. We show this duality by exhibiting the graphs of more and more accurate finite approximations (cutoffs) of these infinite sums. More specifically,
3225
3226
\begin{enumerate}\item The first infinite trigonometric sum $F(t)$ is a sum{\footnote{Here we
3227
make use of the Greek symbol $\sum$ as a shorthand way of
3228
expressing a sum of many terms. We are not requesting this sum to
3229
converge.}} of pure cosine waves with frequencies given by {\it logarithms of powers of primes} and with amplitudes given by the formula $$F(t):=
3230
-\sum_{p^n}{\frac{\log(p)}{p^{n/2}}}\cos(t\log(p^n))
3231
$$ the summation being over all powers $p^n$ of all prime numbers\index{prime number} $p$.
3232
3233
The graphs of longer and longer finite truncations of these trigonometric sums, as you will see, have ``higher and higher peaks" concentrated more and more accurately at a {\it certain infinite discrete set of real numbers} that we will be referring to as {\bf the Riemann spectrum}, indicated in our pictures below (Figures~\ref{fig:pnsum5}--\ref{fig:pnsum500}) by the series of vertical red lines.
3234
3235
\item In contrast, the second infinite trigonometric sum $H(t)$ is a sum of pure cosine waves with frequencies given by what we have dubbed above {\it the Riemann spectrum} and with amplitudes all equal to $1$.
3236
3237
$$
3238
H(s):=1+\sum_{\theta}\cos(\theta \log(s)).
3239
$$ These graphs will have ``higher and higher peaks" concentrated more and more accurately at {\bf the logarithms of powers of primes} indicated in our pictures below (see Figure~\ref{fig:phihat}) by the series of vertical blue spikes.
3240
3241
\end{enumerate}
3242
3243
3244
3245
3246
That the series of {\it blue lines} (i.e., the logarithms of powers of
3247
primes) in our pictures below determines---via the trigonometric
3248
sums we describe---the series of {\it red lines} (i.e., what we are
3249
calling the spectrum)\index{spectrum} and conversely is a consequence of the Riemann
3250
Hypothesis.
3251
\begin{enumerate}
3252
3253
\item {\bf Viewing the Riemann spectrum as the spike values of a trigonometric series with
3254
frequencies equal to (logs of) powers of the primes:}
3255
3256
To get warmed up, let's plot the positive values of the following
3257
sum of (co-)sine waves:
3258
3259
\begin{align*}
3260
f(t) =& -{\frac{\log(2)}{2^{1/2}}}\cos(t\log(2))- {\frac{\log(3)}{3^{1/2}}}\cos(t\log(3))\\
3261
&\qquad -{\frac{\log(2)}{4^{1/2}}}\cos(t\log(4))-{\frac{\log(5)}{5^{1/2}}}\cos(t\log(5))
3262
\end{align*}
3263
3264
3265
3266
\ill{mini_phihat_even}{1}{Plot of $f(t)$\label{fig:mini_phihat_even}}
3267
3268
3269
Look at the peaks of Figure~\ref{fig:mini_phihat_even}. There is nothing very impressive
3270
about them, you might think; but wait, for $f(t)$ is just a very ``early''
3271
piece of the infinite trigonometric sum $F(t)$ described above.
3272
3273
3274
3275
Let us truncate the infinite sum $F(t)$ taking only finitely many terms, by
3276
choosing various ``cutoff values'' $C$ and forming the finite sums
3277
$$
3278
F_{\le C}(t):= -\sum_{p^n\leq C}{\frac{\log(p)}{p^{n/2}}}\cos(t\log(p^n))
3279
$$
3280
and plotting their positive
3281
values. Figures~\ref{fig:pnsum5}--\ref{fig:pnsum500} show what we get
3282
for a few values of $C$.
3283
3284
3285
In each of the graphs, we have indicated by red vertical arrows the
3286
real numbers that give the values of the {\it Riemann spectrum}\index{Riemann spectrum} that we will
3287
be discussing. These numbers at the red
3288
vertical arrows in Figures~\ref{fig:pnsum5}--\ref{fig:pnsum500},
3289
$$
3290
\theta_1, \theta_2, \theta_3, \dots
3291
$$
3292
are {\it spike values}---as described in Chapter~\ref{ch:trigseries}---of the infinite trigonometric series\index{trigonometric series}
3293
$$
3294
-\sum_{p^n<C}{\frac{\log(p)}{p^{n/2}}} \cos(t \log(p^n)).
3295
$$
3296
They constitute what we are calling the Riemann spectrum\index{Riemann spectrum} and are key to the staircase of primes
3297
\bibnote{If the \RH{} holds
3298
they are precisely the {\it imaginary parts of the
3299
``nontrivial'' zeroes of the Riemann zeta function}.}.
3300
3301
\begin{itemize}
3302
\item {\bf The sum with $p^n \le C=5$}
3303
3304
In Figure~\ref{fig:pnsum5} we plot the function $f(t)$ displayed above; it consists of the sum of the first four terms of our infinite sum, and doesn't yet show very much ``structure'':
3305
3306
\ill{phihat_even-5}{1}{Plot of $-\sum_{p^n\leq 5}{\frac{\log(p)}{p^{n/2}}}\cos(t\log(p^n))$ with
3307
arrows pointing to the spectrum\index{spectrum} of the primes\label{fig:pnsum5}}
3308
3309
\vskip20pt
3310
\item {\bf The sum with $p^n \le C=20$}
3311
3312
Something,
3313
(don't you agree?) is already beginning to happen
3314
in the graph in Figure~\ref{phihat_even-20}:
3315
\ill{phihat_even-20}{1}{Plot of $-\sum_{p^n\leq 20}{\frac{\log(p)}{p^{n/2}}}\cos(t\log(p^n))$ with arrows pointing to the spectrum\index{spectrum} of the primes\label{phihat_even-20}}\vskip20pt
3316
\item {\bf The sum with $p^n \le C=50$}
3317
3318
Note that the high peaks in Figure~\ref{fig:phihat_even-50}
3319
seem to be lining up more accurately with
3320
the vertical red lines. Note also that the $y$-axis has been
3321
rescaled.
3322
3323
\ill{phihat_even-50}{1}{Plot of $-\sum_{p^n\leq 50}{\frac{\log(p)}{p^{n/2}}}\cos(t\log(p^n))$ with arrows pointing to the spectrum\index{spectrum} of the primes\label{fig:phihat_even-50}}\vskip20pt
3324
\item {\bf The sum with $p^n \le C=500$}
3325
3326
Here, the peaks are even sharper, and note that again they are
3327
higher; that is, we have rescaled the $y$-axis.
3328
\ill{phihat_even-500}{1}{Plot of $-\sum_{p^n\leq
3329
500}{\frac{\log(p)}{p^{n/2}}}\cos(t\log(p^n))$ with arrows
3330
pointing to the spectrum\index{spectrum} of the primes\label{fig:pnsum500}}
3331
\end{itemize}
3332
3333
We will pay attention to:
3334
3335
\begin{itemize}
3336
\item how the spikes ``play out'' as we take the sums of longer and longer
3337
pieces of the infinite sum of cosine waves above, given by larger
3338
and larger cutoffs $C$,
3339
\item how this spectrum\index{spectrum} of red lines more closely matches the high
3340
peaks of the graphs of the positive values of these finite sums,
3341
\item how these peaks are climbing higher and higher,
3342
\item what relationship these peaks have to the Fourier analysis\index{Fourier analysis} of the
3343
staircase of primes,
3344
\item and, equally importantly, what these mysterious red lines
3345
signify.
3346
\end{itemize}
3347
3348
\item{\bf Toward (logs of) powers of the primes, starting from the Riemann spectrum\index{Riemann spectrum}:}
3349
3350
Here we will be making use of the series of numbers $$\theta_1,
3351
\theta_2, \theta_3, \dots$$ comprising what we called the {\it
3352
spectrum}\index{spectrum}. We consider the infinite trigonometric series\index{trigonometric series}
3353
$$
3354
G(t):= 1 + \cos(\theta_1t)+\cos(\theta_2t)+\cos(\theta_3t)+\dots
3355
$$
3356
or, using the $\sum$ notation,
3357
$$
3358
G(t):= 1+\sum_{\theta}\cos(\theta t)
3359
$$
3360
where the summation is over the spectrum\index{spectrum}, $\theta=\theta_1, \theta_2,
3361
\theta_3, \dots$. Again we will consider finite cutoffs $C$ of this
3362
infinite trigonometric sum (on a logarithmic scale),
3363
$$
3364
H_{\le C}(s):= 1+\sum_{i \leq C}\cos(\log(s) \theta_i)
3365
$$
3366
and to see the spikes in $H_{\le 1000}(s)$
3367
consider Figure~\ref{fig:phihat}.
3368
3369
\ill{phi_cos_sum_2_30_1000}{.8}{Illustration of $-\sum_{i=1}^{1000}
3370
\cos(\log(s)\theta_i)$, where $\theta_1 \sim 14.13, \ldots$ are the
3371
first $1000$ contributions to the spectrum\index{spectrum}. The red dots
3372
are at the prime powers $p^n$, whose size is proportional to
3373
$\log(p)$.\label{fig:phihat}}
3374
3375
\end{enumerate}
3376
3377
This passage---thanks to the Riemann Hypothesis---from spectrum\index{spectrum} to
3378
prime powers and back again via consideration of the ``high peaks'' in
3379
the graphs of the appropriate trigonometric sums provides a kind of
3380
visual duality emphasizing, for us, that the information inherent in
3381
the wild spacing of prime powers, is somehow ``packaged'' in the Riemann spectrum\index{Riemann spectrum}, and reciprocally,
3382
the information given in that series of mysterious numbers is
3383
obtainable from the sequence of prime powers.
3384
3385
3386
3387
\part{The Riemann Spectrum of the Prime Numbers\label{part3}}\index{prime number}
3388
3389
3390
\chapter{On losing no information\label{sec:loseno}}
3391
3392
To manage to repackage the ``same'' data in various ways---where each
3393
way brings out some features that would be kept in the shadows if the
3394
data were packaged in some different way---is a high art, in
3395
mathematics. In a sense {\it every} mathematical equation does this,
3396
for the ``equal sign'' in the middle of the equation tells us that
3397
even though the two sides of the equation may seem different, or have
3398
different shapes, they are nonetheless ``the same data.'' For
3399
example, the equation
3400
3401
$$\log(XY) = \log(X) + \log(Y)$$
3402
3403
which we encountered earlier in Chapter~\ref{sec:firstguess}, is
3404
just two ways of looking at the same thing, yet it was the basis for
3405
much manual calculation for several centuries.
3406
3407
3408
3409
3410
Now, the problem we have been concentrating on, in this book, has
3411
been---in effect---to understand the pattern, if we can call it
3412
that, given by the placement of prime numbers\index{prime number} among the natural
3413
line-up of all whole numbers.
3414
\ill{primes_line}{1}{Prime numbers up to $37$}
3415
3416
There are, of course, many ways for us to present this basic
3417
pattern. Our initial strategy was to focus attention on the {\it
3418
staircase of primes} which gives us a vivid portrait, if you wish,
3419
of the order of appearance of primes among all numbers.
3420
\ill{PN_38}{.9}{Prime numbers up to $37$}
3421
3422
As we have already hinted in the previous sections, however, there
3423
are various ways open to us to tinker with---and significantly
3424
modify---our staircase {\it without losing the essential information
3425
it contains}. Of course, there is always the danger of modifying
3426
things in such a way that ``retrieval'' of the original data becomes
3427
difficult. Moreover, we had better remember every change we have
3428
made if we are to have any hope of retrieving the original data!
3429
3430
With this in mind, let us go back to Chapter~\ref{sec:information}
3431
(discussing the staircase of primes) and Chapter~\ref{sec:tinkering},
3432
where we tinkered with the original staircase of primes---alias: the
3433
graph of $\pi(X)$---to get $\psi(X)$ whose risers look---from
3434
afar---as if they approximated the 45 degree staircase.% (see Figure~\ref{fig:psi38} below).
3435
3436
3437
At this point we'll do some further carpentry on $\psi(X)$ {\it without destroying the valuable
3438
information it contains}. We will be replacing $\psi(X)$ by a generalized function, i.e., a
3439
distribution,\index{distribution} which we denote $\Phi(t)$ that has support at all positive integral multiples
3440
of logs of prime numbers,\index{prime number} and is zero on the complement of that discrete set. Recall that
3441
by definition, a discrete subset $S$ of real numbers is the {\bf support} of a function,
3442
or of a distribution,\index{distribution} if the function vanishes on the complement of $S$ and doesn't vanish
3443
on the complement of any proper subset of $S$.
3444
3445
Given the mission of our book, it may be less important for us to elaborate on the construction
3446
of $\Phi(t)$ than it is {\bf (a)} to note that $\Phi(t)$ contains all the valuable information
3447
that $\psi(X)$ has and {\bf (b)} to pay close attention to the spike values of the trigonometric
3448
series that is the Fourier transform of $\Phi(t)$.
3449
3450
For the definition of the distribution\index{distribution} $\Phi(t)$ see the end-note
3451
\bibnote{
3452
{\it The construction of $ \Phi(t)$ from $\psi(X)$:}
3453
3454
Succinctly, for positive real numbers $t$, $$\Phi(t): \ = \ e^{- t/2}\Psi'(t),$$ where $\Psi(t) = \psi(e^t)$
3455
(see Figure~\ref{fig:psi_200}), and $\Psi'$ is the derivative of $\Psi(t)$, viewed as a distribution. We extend this function to all real arguments $t$ by requiring $\Phi(t)$ to be an even function of $t$, i.e., $\Phi(-t)= \Phi(t)$.
3456
But, to review this at a more leisurely pace,
3457
\begin{enumerate}
3458
3459
\item Distort the $X$-axis of our staircase by replacing the
3460
variable $X$ by $e^t$ to get the function $$\Psi(t):=
3461
\psi(e^t).$$ No harm is done by this for we can retrieve our
3462
original $\psi(X)$ as $$\psi(X) = \Psi(\log(X)).$$ Our distorted
3463
staircase has risers at ($0$ and) all positive integral multiples
3464
of logs of prime numbers.
3465
3466
3467
% \illtwo{psi_38}{bigPsi_38}{.4}{The two staircases $\psi$ (left) and $\Psi$ (right). Notice that the plot of $\Psi$ on the right is just the plot of $\psi$ but with the $X$-axis plotted on a logarithmic scale.\label{fig:psi38}}
3468
3469
3470
\item Now we'll do something that might seem a bit more brutal: {\it
3471
take the derivative of this distorted staircase $\Psi(t)$.} This
3472
derivative $\Psi'(t)$ is a {\it generalized} function with support
3473
at all nonnegative integral multiples of logs of prime numbers.
3474
3475
\vskip20pt
3476
\ill{bigPsi_prime}{1}{$\Psi'(t)$ is a (weighted) sum of Dirac delta functions at the logarithms of prime powers $p^n$ weighted by $\log(p)$ (and by $\log(2\pi)$ at $0$). The taller the arrow, the larger the weight.}
3477
3478
\item Now---for normalization purposes---multiply $\Psi'(t)$ by the
3479
function $e^{-t/2}$ which has no effect whatsoever on the support.
3480
\end{enumerate}
3481
\vskip20pt\ill{psi_200}{.4}{Illustration of the staircase $\psi(X)$ constructed in Chapter~\ref{sec:tinkering} that counts weighted prime powers.\label{fig:psi_200}}
3482
3483
%So what happens when we take the derivative---in the sense of
3484
%distributions---of a complicated staircase? For example, see
3485
%%Figure~\ref{fig:psi_200}. Well, we would have blip-functions (alias:
3486
%%Dirac $\delta$-functions) at each point of discontinuity of $\Psi(X)$;
3487
%that is, at $X=$ any power of a prime.
3488
3489
3490
In summary: The generalized function that resulted from the above carpentry:
3491
$$\Phi(t) = e^{-t/2}\Psi'(t),$$ retains the information we want (the placement of primes) even if in a slightly different format.
3492
}.% end bibnote
3493
3494
3495
A distribution\index{distribution} that has a discrete set of real numbers as its
3496
support---as $\Phi(t)$ does---we sometimes like to call {\bf spike
3497
distributions}\index{distribution} since the pictures of functions approximating it
3498
tend to look like a series of spikes.
3499
3500
We have then before us a spike distribution\index{distribution} with support at integral
3501
multiples of logarithms of prime numbers\index{prime number}, and this generalized
3502
function retains the essential information about the placement of
3503
prime numbers among all whole numbers, and will be playing a major
3504
role in our story: knowledge of the placement of the ``blips'' constituting this distribution\index{distribution} (its
3505
support), being at integral multiples of logs of prime numbers, would allow us to reconstruct the position of the prime
3506
numbers among all numbers. Of course there are many other ways to package this vital information, so we
3507
must explain our motivation for subjecting our poor initial staircase
3508
to the particular series of brutal acts of distortion that we
3509
described, which ends up with the distribution\index{distribution} $\Phi(t)$.
3510
3511
3512
3513
3514
3515
3516
3517
\chapter[From primes to the Riemann spectrum]{Going from
3518
the primes to the Riemann spectrum}
3519
\label{ch:prime-to-spectrum}\index{Riemann spectrum}
3520
%\todo{This chapter has not been fully written, there are some
3521
% mathematical issues that we don't yet explain, and we will need to
3522
% work out in order to do a satisfactory job here.}
3523
3524
3525
3526
We discussed the nature of the Fourier transform of (symmetrized)
3527
$\delta$-functions\index{$\delta$-function} in Chapter~\ref{sec:ftdelta}. In particular, recall
3528
the ``spike function'' $$d_x(t) \ = \ (\delta_x(t) +
3529
\delta_{-x}(t))/2$$ that has support at the points $x$ and $-x$. We
3530
mentioned that its Fourier transform, ${\hat d}_x(\theta)$, is equal
3531
to $\cos(x\theta)$ (and gave some hints about why this may be true).
3532
3533
3534
Our next goal is to work with the much more interesting ``spike
3535
function'' $$\Phi(t) \ = \ e^{- t/2}\Psi'(t),$$ which was one of the
3536
generalized functions that we engineered in Chapter~\ref{sec:loseno},
3537
and that has support at all nonnegative integral multiples of
3538
logarithms of prime numbers\index{prime number}.
3539
3540
3541
3542
3543
As with any function---or generalized function---defined for
3544
non-negative values of $t$, we can ``symmetrize it'' (about the
3545
$t$-axis) which means that we can define it on negative real numbers
3546
by the equation
3547
$$\Phi(-t) = \Phi(t).$$
3548
Let us make that convention, thereby turning $\Phi(t)$ into an {\it
3549
even} generalized function, as illustrated in Figure~\ref{fig:bigPhi}. (An {\bf even} function on the real line is a function that takes the same value on any real number and its negative as in the formula above.)
3550
3551
%\vskip20pt{\bf William: sum of Dirac delta functions Figure goes here}\vskip20pt
3552
\ill{bigPhi}{1}{$\Phi(t)$ is a sum of Dirac delta functions at the logarithms of prime powers $p^n$ weighted by $p^{-n/2}\log(p)$ (and $\log(2\pi)$ at $0$).\label{fig:bigPhi}}
3553
3554
3555
We may want to think of $\Phi(t)$ as a limit of this sequence of distribution\index{distribution}s,
3556
$$\Phi(t)\ = \ \lim_{C \to {\infty}}\Phi_{\le C}(t)$$
3557
where $\Phi_{\le C}(t)$ is the following finite linear
3558
combination of (symmetrized) $\delta$-functions\index{$\delta$-function} $d_x(t)$:
3559
$$\Phi_{\le C}(t)\quad :=\quad 2\sum_{{\rm prime\ powers \ }p^n \le C} p^{-n/2}\log(p)d_{n\log(p)}(t).$$
3560
Since the Fourier transform of $d_x(t)$ is $\cos(x\theta)$, the Fourier transform of each $d_{n\log(p)}(t)$ is
3561
$\cos(n\log(p)\theta)$, so the Fourier transform of
3562
$\Phi_{\le C}(t)$ is
3563
3564
$${\hat \Phi}_{\leq C}(\theta):= 2\sum_{{\rm prime\ powers\ }p^n \leq{} C} p^{-n/2} \log(p)\cos(n\log(p)\theta).$$
3565
3566
So, following the discussion in Chapter~\ref{ch:trigseries} above, we are dealing with the cutoffs at finite points $C$ of the {\it trigonometric series}{\footnote{The trigonometric series in the text---whose spectral values are the logarithms of prime powers---may also be written as
3567
$$\sum_{m=2}^{\infty}\Lambda(m)m^{-s} + \sum_{m=2}^{\infty}\Lambda(m)m^{-{\bar s}}$$ for $s={\frac{1}{2}}+i\theta$, where $\Lambda(m)$ is the von-Mangoldt function.}}
3568
3569
$${\hat \Phi}(\theta):= 2\sum_{{\rm prime\ powers\ }p^n } p^{-n/2} \log(p)\cos(n\log(p)\theta).$$
3570
3571
3572
3573
3574
For example, when $C=3$, we have the rather severe cutoff of these trigonometric series: ${\hat \Phi}_{\leq 3}(\theta)$ takes account {\it only} of the primes $p=2$ and $p=3$:
3575
$$
3576
{\hat \Phi}_{\leq 3}(\theta) =
3577
\frac{2}{\sqrt{2}} \log(2)\cos(\log(2)\theta)
3578
+
3579
\frac{2}{\sqrt{3}} \log(3)\cos(\log(3)\theta),
3580
$$
3581
which we plot in Figure~\ref{fig:theta3}.
3582
3583
3584
\ill{theta_3_intro-1}{.85}{Plot of ${\hat \Phi}_{\leq 3}(\theta)$\label{fig:theta3}}
3585
3586
3587
We will be interested in the values of $\theta$ that correspond to higher and higher {\it peaks} of our trigonometric series ${\hat \Phi}_{\leq C}(\theta)$ as $C\to \infty$. For example, the value of $\theta$ that provides the first peak of $
3588
{\hat \Phi'}_{\leq 3}(\theta)$ such that $
3589
|{\hat \Phi'}_{\leq 3}(\theta)| > 2$ is $$\theta=14.135375354\ldots.$$
3590
3591
So in Figure~\ref{fig:theta3} we begin this exploration by plotting ${\hat \Phi}_{\leq 3}(\theta)$, together with its derivative, highlighting the zeroes of the derivative.
3592
3593
3594
\ill{theta_3_intro-2}{.85}{Plot of ${\hat \Phi}_{\leq 3}(\theta)$ in blue and its derivative in grey\label{fig:theta4}}
3595
3596
As we shall see in subsequent figures of this chapter, there seems to be an eventual convergence of the values of $\theta$ that correspond to higher and higher {\it peaks}, the red dots in the figure above, as $C$ tends to $\infty$. These ``limit" $\theta$-values we now insert as the endpoints of red vertical lines into Figure~\ref{fig:theta5} comparing them with the red dots for our humble cutoff $C=3$.
3597
3598
3599
3600
\ill{theta_C-3}{0.9}{${\hat \Phi}_{\leq 3}(\theta)$ \label{fig:theta5}}
3601
3602
3603
3604
We give a further sample of graphs for a few higher cutoff values $C$ (introducing a few more primes into the game!).
3605
3606
Figures~\ref{fig:theta_C-5-10}--\ref{fig:theta_C-200-500}
3607
contain graphs of various cutoffs of ${\hat \Phi}_{\leq C}(\theta)$.
3608
As $C$ increases a sequence of spikes down emerge
3609
which we indicate with red vertical arrows.
3610
3611
3612
3613
\illtwo{theta_C-5}{theta_C-10}{0.45}{Plot of ${\hat \Phi}_{\leq C}(\theta)$ for $C=5$ and $10$\label{fig:theta_C-5-10}}
3614
3615
Given the numerical-experimental approach we have been adopting in this book, it is a particularly fortunate (and to us: surprising) thing that the convergence to those vertical red lines can already be illustrated using {\it such small} cutoff values $C$. One might almost imagine making hand computations that exhibit this phenomenon! Following in this spirit, see David Mumford's blog post {\url{http://www.dam.brown.edu/people/mumford/blog/2014/RiemannZeta.html}}.
3616
3617
To continue:
3618
3619
\illtwo{theta_C-20}{theta_C-100}{0.45}{Plot of ${\hat \Phi}_{\leq C}(\theta)$ for $C=10$ and $100$\label{fig:theta_C-10-100}}
3620
3621
\illtwo{theta_C-200}{theta_C-500}{0.45}{Plot of ${\hat \Phi}_{\leq C}(\theta)$ for $C=200$ and $500$\label{fig:theta_C-200-500}}
3622
3623
3624
For a theoretical discussion of these spikes, see endnote~\bibnote{
3625
A version of the Riemann--von Mangoldt explicit formula gives some theoretical affirmation of the phenomena we are seeing here. We thank Andrew Granville for a discussion about this.
3626
3627
\ill{granville}{0.25}{Andrew Granville}
3628
3629
3630
Even though the present endnote is not the place to give anything like a full account, we can't resist setting down a few of Granville's comments that might be helpful to people who wish to go further. (This discussion can be modified to analyze what happens unconditionally, but we will be assuming the \RH{} below.) The function ${\hat \Phi}_{\le C}(\theta)$ that we are graphing in this chapter can be written as:
3631
$${\hat \Phi}_{\le C}(\theta)= \sum_{n\le C}\Lambda(n)n^{-w}$$ where $w = {\frac{1}{2}}+i\theta$. This function, in turn, may be written (by Perron's formula) as
3632
\begin{align*}
3633
&{\frac{1}{2\pi i }}\lim_{T \to \infty}\int_{s=\sigma_o-iT}^{s=\sigma_o+iT}\sum_{n}\Lambda(n)n^{-w}\left({\frac {C}{n}}\right)^{s}{\frac{ds}{s}}\\
3634
&= {\frac{1}{2\pi i }}\lim_{T \to \infty}\int_{s=\sigma_o-iT}^{s=\sigma_o+iT}\sum_{n}\Lambda(n)n^{-w-s}C^{s}{\frac{ds}{s}}\\
3635
&= -{\frac{1}{2\pi i }}\lim_{T \to \infty}\int_{s=\sigma_o-iT}^{s=\sigma_o+iT}\left({\frac{\zeta'}{\zeta}}\right)(w+s){\frac{C^{s}}{s}}ds. \end{align*}
3636
3637
Here, we assume that $\sigma_o$ is sufficiently large and $C$ is not a prime power.
3638
3639
One proceeds, as is standard in the derivation of Explicit Formulae, by moving the line of integration to the left, picking up residues along the way. Fix the value of $w= {\frac{1}{2}}+i\theta$ and consider
3640
$$K_w(s,C):=\ \ {\frac{1}{2\pi i }}\left(\frac{\zeta'}{\zeta}\right)(w+s){\frac{C^{s}}{s}},$$ which has poles at $$s= 0, \ \ \ 1-w,\ \ \ {\rm and }\ \ \ \rho-w, $$ for every zero $\rho$ of $\zeta(s)$.
3641
We distinguish five cases, giving descriptive names to each:
3642
3643
\begin{enumerate} \item {\it Singular pole:} $s= 1-w$.
3644
\item {\it Trivial poles:} $s= \rho-w$ with $\rho$ a trivial zero of $\zeta(s)$.
3645
\item {\it Oscillatory poles:} $s= \rho-w = i(\gamma-\theta) \ne 0$ with $\rho= 1/2 + i \gamma (\ne w)$ a nontrivial zero of $\zeta(s)$. (Recall that we are assuming the \RH{}, and our variable $w= {\frac{1}{2}}+i\theta$ runs through complex numbers of real part equal to ${\frac{1}{2}}$. So, in this case, $s$ is purely imaginary.)
3646
\item {\it Elementary pole:} $s=0$ when $ w$ is not a nontrivial zero of $\zeta(s)$---i.e., when $0=s\ne\rho-w$ for any nontrivial zero $\rho$.
3647
\item {\it Double pole:} $s=0$ when $ w$ is a nontrivial zero of $\zeta(s)$---i.e., when $0=s=\rho-w$ for some nontrivial zero $\rho$. This, when it occurs, is indeed a double pole, and the residue is given by $m\cdot \log C +\epsilon$. Here $m$ is the multiplicity of the zero $\rho$ (which we expect always---or at least usually---to be equal to $1$) and $\epsilon$ is a constant (depending on $\rho$, but not on $C$).
3648
3649
\end{enumerate}
3650
3651
The standard technique for the ``Explicit formula'' will provide us with a formula for our function of interest ${\hat \Phi}_{\le C}(\theta)$. The formula has terms resulting from the residues of each of the first three types of poles, and of the {\it Elementary} or the {\it Double} pole---whichever exists. Here is the shape of the formula, given with terminology that is meant to be evocative:
3652
3653
3654
$$ {\bf(1)}\ \ \ {\hat \Phi}_{\le C}(\theta) = {\Sing}_{\le C}(\theta) + {\Triv}_{\le C}(\theta) + {\Osc}_{\le C}(\theta) + {\Elem}_{\le C}(\theta)$$
3655
3656
Or:
3657
3658
$${\bf(2)}\ \ \ {\hat \Phi}_{\le C}(\theta) = {\Sing}_{\le C}(\theta) + {\Triv}_{\le C}(\theta) + {\Osc}_{\le C}(\theta) + {\Double}_{\le C}(\theta),$$
3659
3660
\noindent the first if $w$ is not a nontrivial zero of $\zeta(s)$ and the second if it is.
3661
3662
The good news is that the functions ${\Sing}_{\le C}(\theta), {\Triv}_{\le C}(\theta)$ (and also ${\Elem}_{\le C}(\theta)$ when it exists) are smooth (easily describable) functions of the two variables $C$ and $\theta$; for us, this means that they are
3663
not that related to the essential information-laden {\it discontinuous} structure of $ {\hat \Phi}_{\le C}(\theta)$. Let us bunch these three contributions together and call the sum ${\Smooth}(C,\theta)$, and rewrite the above two formulae as:
3664
$$ {\bf(1)}\ \ \ {\hat \Phi}_{\le C}(\theta) ={\Smooth}(C,\theta) + {\Osc}_{\le C}(\theta) $$
3665
3666
Or:
3667
3668
$${\bf(2)}\ \ \ {\hat \Phi}_{\le C}(\theta) = {\Smooth}(C,\theta)+ {\Osc}_{\le C}(\theta) + m\cdot \log C +\epsilon,$$
3669
3670
depending upon whether or not $w$ is a nontrivial zero of $\zeta(s)$.
3671
3672
3673
We now focus our attention on the Oscillatory term, ${\Osc}_{\le C}(\theta) $, approximating it by a truncation: $$\Osc_w(C,X):= 2\sum_{|\gamma| <
3674
X} {{\frac{e^{i\log C\cdot (\gamma-\theta)}}{i(\gamma-\theta)}}}.$$
3675
Here if a zero has multiplicity $m$, then we count it $m$ times in the sum.
3676
Also, in this formula we have relegated the ``$\theta$" to the status of a subscript
3677
(i.e., $w = {\frac{1}{2}} +i\theta$) since we are keeping it constant, and we view the
3678
two variables $X$ and $C$ as linked in that we want the cutoff ``$X$" to be sufficiently
3679
large, say $X \gg C^2$, so that the error term can be controlled.
3680
3681
At this point, we can perform a ``multiplicative version'' of a Ces\`aro summation---i.e., the operator $F(c) \mapsto ({\Ces} F)(C):= \int_1^CF(c) dc/c.$
3682
This has the effect of forcing the oscillatory term to be bounded as $C$ tends to infinity.
3683
3684
This implies that for any fixed $\theta$,
3685
\begin{itemize}
3686
\item ${\Ces}{\hat \Phi}_{\le C}(\theta)$ is bounded independent of $C$ if $\theta$ is not the imaginary part of a nontrivial zero of $\zeta(s)$ and
3687
\item ${\Ces}{\hat \Phi}_{\le C}(\theta)$ grows as ${\frac{m}{2}}\cdot (\log C)^2 +O(\log C)$ if $\theta$ is the imaginary part of a nontrivial zero of $\zeta(s)$ of multiplicity $m$,
3688
% WARNING: above item is wrong; grows as should be different: ``we replace "m log(C)+O(1)" by "m/2 log(C)^2 + m gamma log(C)"?''
3689
\end{itemize}
3690
giving a theoretical confirmation of the striking feature of the
3691
graphs of our Chapter~\ref{ch:prime-to-spectrum}.}.
3692
3693
3694
% \ill{phihat_even_all}{1}{Plots of $-\frac{1}{2}{\hat \Phi}_{\le C}(\theta)$ for $C=5$ (top), 20, 50, and 500 (bottom).\label{fig:phic}}
3695
3696
3697
3698
%\vskip20pt
3699
%{\bf William: Figure goes here}
3700
%\vskip20pt
3701
% \ill{phihat_even_all}{1}{Plots of $-\frac{1}{2}{\hat \Phi}_{\le C}(\theta)$ for $C=5$ (top), 20, 50, and 500 (bottom).\label{fig:phic}}
3702
3703
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3704
3705
The $\theta$-coordinates of these spikes {\it seem to be}
3706
vaguely clustered about a discrete set of positive real numbers.
3707
%In
3708
%fact, this is already a hint of what happens to the graphs of these
3709
%functions ${\frac{1}{C}}{\hat\Phi}_{\le C}(\theta)$ as we allow the cut-off $C$ to
3710
%tend to infinity.
3711
These ``spikes" are our first glimpse of a
3712
certain infinite set of positive real numbers
3713
$$\theta_1,\theta_2,\theta_3,\dots$$ which constitute the {\bf Riemann spectrum\index{Riemann spectrum} of
3714
the primes}. If the \RH{} holds, these numbers would
3715
be the key to the placement of primes on the number line.
3716
3717
3718
3719
3720
3721
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3722
3723
3724
3725
By tabulating these peaks we compute---at least very approximately---$\dots$
3726
\begin{eqnarray*}
3727
\theta_1 &=& 14.134725 \dots\\
3728
\theta_2 &=& 21.022039 \dots\\
3729
\theta_3 &=& 25.010857 \dots\\
3730
\theta_4 &=& 30.424876 \dots\\
3731
\theta_5 &=& 32.935061 \dots\\
3732
\theta_6 &=& 37.586178 \dots
3733
\end{eqnarray*}
3734
3735
Riemann\index{Riemann, Bernhard} defined this sequence of numbers in his 1859 article in a
3736
manner somewhat different from the treatment we have given. In that
3737
article these $\theta_i$ appear as ``imaginary parts of the
3738
nontrivial zeroes\index{nontrivial zeroes} of his zeta function;''\index{zeta function} we will discuss this
3739
briefly in Part \ref{part4}, Chapter~\ref{ch:envision} below.
3740
3741
3742
3743
3744
\chapter{How many $\theta_i$'s are there?}
3745
3746
The Riemann spectrum\index{Riemann spectrum}, $\theta_1, \theta_2, \theta_3, \dots$ clearly deserves to be well understood! What do we know about this sequence of positive real numbers?
3747
3748
Just as we did with the prime numbers\index{prime number} before, we can count
3749
these numbers $\theta_1=14.1347\ldots$,
3750
$\theta_2=21.0220\ldots$, etc., and form the staircase
3751
of Figure~\ref{fig:staircase-riemann-spectrum-30},
3752
with a step up at $\theta_1$, a step up at $\theta_2$, etc.
3753
3754
\ill{staircase-riemann-spectrum-30}{0.8}{The staircase of the Riemann spectrum\index{Riemann spectrum}\label{fig:staircase-riemann-spectrum-30}}
3755
3756
Again, just as with the staircase of primes, we might hope
3757
that as we plot this staircase from a distance as in
3758
Figures~\ref{fig:staircase-riemann-spectrum-50} and \ref{fig:staircase-riemann-spectrum-1000} that it will
3759
look like a beautiful smooth curve.
3760
3761
3762
In fact, we know, conditional on RH, the staircase of real numbers $\theta_1, \theta_2, \theta_3,\dots$ is very closely approximated by the
3763
curve $${\frac{T}{2\pi}}\log {\frac{T}{2\pi e}},$$ (the error term being bounded by a constant times $\log T$).
3764
3765
3766
3767
\illtwo{staircase-riemann-spectrum-50}{staircase-riemann-spectrum-100}{0.45}{The staircase of the Riemann spectrum\index{Riemann spectrum} and the curve ${\frac{T}{2\pi}}\log {\frac{T}{2\pi e}}$\label{fig:staircase-riemann-spectrum-50}}
3768
3769
\ill{staircase-riemann-spectrum-1000}{0.95}{The staircase of the Riemann spectrum\index{Riemann spectrum} looks like a smooth curve\label{fig:staircase-riemann-spectrum-1000}}
3770
3771
3772
3773
3774
3775
Nowadays, these mysterious numbers $\theta_i$, these spectral lines for the
3776
staircase of primes, are known in great abundance and to great accuracy. Here is the smallest
3777
one, $\theta_1$, given with over $1{,}000$ digits of its decimal
3778
expansion:
3779
3780
\vskip20pt
3781
{\small
3782
$14.134725141734693790457251983562470270784257115699243175685567460149\newline
3783
9634298092567649490103931715610127792029715487974367661426914698822545\newline
3784
8250536323944713778041338123720597054962195586586020055556672583601077\newline
3785
3700205410982661507542780517442591306254481978651072304938725629738321\newline
3786
5774203952157256748093321400349904680343462673144209203773854871413783\newline
3787
1735639699536542811307968053149168852906782082298049264338666734623320\newline
3788
0787587617920056048680543568014444246510655975686659032286865105448594\newline
3789
4432062407272703209427452221304874872092412385141835146054279015244783\newline
3790
3835425453344004487936806761697300819000731393854983736215013045167269\newline
3791
6838920039176285123212854220523969133425832275335164060169763527563758\newline
3792
9695376749203361272092599917304270756830879511844534891800863008264831\newline
3793
2516911271068291052375961797743181517071354531677549515382893784903647\newline
3794
4709727019948485532209253574357909226125247736595518016975233461213977\newline
3795
3160053541259267474557258778014726098308089786007125320875093959979666\newline
3796
60675378381214891908864977277554420656532052405$}
3797
3798
\vskip20pt
3799
3800
3801
3802
\noindent{}and if, by any chance, you wish to peruse the first
3803
$2{,}001{,}052$
3804
of these $\theta_i$'s calculated to an accuracy
3805
within $3\cdot 10^{-9}$, consult Andrew Odlyzko's tables:
3806
\begin{center}
3807
\url{http://www.dtc.umn.edu/~odlyzko/zeta_tables}
3808
\end{center}
3809
3810
3811
3812
\chapter{Further questions about the Riemann spectrum\index{Riemann spectrum}}
3813
3814
Since people have already computed\footnote{See \url{http://numbers.computation.free.fr/Constants/constants.html} for details.}
3815
the first $10$ trillion $\theta$'s
3816
and have never found one with
3817
multiplicity $>1$, it is generally expected that the multiplicity of all
3818
the $\theta$'s in the Riemann spectrum\index{Riemann spectrum} is $1$.
3819
3820
But, independent of
3821
that expectation, our convention in what follows will be that we {\it
3822
count} each of the elements in the Riemann spectrum\index{Riemann spectrum} repeated as many
3823
times as their multiplicity. So, if it so happens that $\theta_n$
3824
occurs with multiplicity two, we view the Riemann spectrum\index{Riemann spectrum} as being
3825
the series of numbers
3826
$$\theta_1, \theta_2, \dots, \theta_{n-1}, \theta_n, \theta_n, \theta_{n+1}, \dots$$
3827
3828
It has been conjectured that there are no infinite arithmetic progressions among these numbers. More broadly, one might expect that there is no visible correlation between the $\theta_i$'s and translation, i.e., that the distribution\index{distribution} of $\theta_i$'s modulo any positive number $T$ is random, as in Figure~\ref{fig:zero-spacing}.
3829
3830
3831
3832
\illtwo{zero-spacing-mod2pi}{zero-spacing-mod1}{0.4}{Frequency histogram of Odlyzko's computation of the Riemann spectrum\index{Riemann spectrum} modulo $2\pi$ (left) and modulo 1 (right)\label{fig:zero-spacing}}
3833
3834
In analogy with the discussion of prime gaps in Chapter~\ref{ch:further}, we might compute {\it pair correlation functions} and the statistics of gaps between successive $\theta$'s in the Riemann spectrum\index{Riemann spectrum} (see {\url{http://www.baruch.cuny.edu/math/Riemann_Hypothesis/zeta.zero.spacing.pdf}). This study was begun by H.\thinspace{}L. Montgomery and
3835
F.\thinspace{}J. Dyson.
3836
As Dyson noticed, the distributions\index{distribution} one gets from the Riemann spectrum\index{Riemann spectrum} bears a similarity to the distributions of eigenvalues of a random unitary matrix.{\footnote{Any connection of the Riemann spectrum to eigenvalues of matrices would indeed be exciting for our understanding of the \RH{}, in view of what is known as the {\it Hilbert-P{\'o}lya Conjecture} (see {\url{http://en.wikipedia.org/wiki/Hilbert\%E2\%80\%93P\%C3\%B3lya_conjecture}}).}} This has given rise to what is know as the {\it random matrix heuristics}, a powerful source of conjectures for number theory and other branches of mathematics.
3837
3838
Here is a histogram of the distribution\index{distribution} of differences $\theta_{i+1}-\theta_i$:
3839
\
3840
3841
3842
\ill{riemann_spectrum_gaps}{0.95}{Frequency histogram of the
3843
first 99{,}999 gaps in the Riemann spectrum\label{fig:riemann_spectrum_gaps}}
3844
3845
\chapter[From the Riemann spectrum to primes]{Going
3846
from the Riemann spectrum to the primes}\index{Riemann spectrum}
3847
3848
To justify the name {\it Riemann spectrum of primes} we will
3849
investigate graphically whether in an analogous manner we can use this
3850
spectrum to get information about the placement of prime numbers\index{prime number}. We
3851
might ask, for example, if there is a trigonometric function with
3852
frequencies given by this collection of real
3853
numbers, $$\theta_1,\theta_2,\theta_3,\dots$$ that somehow pinpoints
3854
the prime powers, just as our functions $${\hat \Phi}(\theta)_{\le C}=
3855
2\sum_{{\rm prime\ powers\ } p^n \le C}
3856
p^{-m/2}\log(p)\cos(n\log(p)\theta)$$ for large $C$ pinpoint the
3857
spectrum\index{spectrum} (as discussed in the previous two chapters).
3858
3859
To start the return game, consider this sequence of trigonometric functions that have ($zero$ and) the $\theta_i$ as spectrum\index{spectrum}
3860
3861
$$G_C(x):= 1+ \sum_{i < C}\cos(\theta_i\cdot x).$$
3862
3863
As we'll see presently it is best to view these functions on a logarithmic scale so we will make the substitution of variables $x = \log(s)$ and write
3864
3865
$$H_C(s):= G_C(\log(s))= 1+ \sum_{i < C}\cos(\theta_i\cdot \log(s)).$$
3866
3867
3868
The theoretical story behind the phenomena that we will
3869
see graphically in this chapter is a manifestation of
3870
Riemann's explicit formula. For modern text references that discuss this general subject, see endnote \bibnote{ A reference for this is:
3871
3872
\vskip10pt
3873
{\bf [I-K]: }H. Iwaniec; E. Kowalski, {\bf Analytic Number Theory}, {\em American Mathematical Society Colloquium Publications} {\bf 53} (2004).
3874
3875
(See also the bibliography there.)
3876
\vskip10pt
3877
Many
3878
ways of seeing the explicit relationship are given in Chapter 5
3879
of {\bf [I-K]}. For example, consider Exercise 5 on page 109.
3880
3881
$$\sum_{\rho}{\hat \phi}(\rho) = -\sum_{n \ge 1}\Lambda(n)\phi(n) + I(\phi),$$
3882
3883
where \begin{itemize} \item $\phi$ is any smooth complex-valued
3884
function on $[1,+\infty)$ with compact support,\item ${\hat \phi}$ is
3885
its Mellin transform $${\hat \phi}(s):=
3886
\int_0^{\infty}\phi(x)x^{s-1}dx,$$ \item the last term on the right,
3887
$I(\phi)$, is just
3888
$$I(\phi):= \int_1^{\infty}(1-{\frac{1}{x^3-x}})\phi(x)dx$$ (coming from the pole at $s=1$ and the ``trivial zeroes''). \item The more serious summation on the left hand side of the equation is over the nontrivial zeroes $\rho$, noting that if $\rho$ is a nontrivial zero so is~${\bar \rho}$. \end{itemize}
3889
3890
3891
3892
Of course, this ``explicit formulation'' is not {\it immediately}
3893
applicable to the graphs we are constructing since we cannot naively
3894
take ${\hat \phi}$ to be a function forcing the left hand side to be
3895
$G_C(x)$.
3896
3897
See also Exercise 7 on page 112, which discusses the sums
3898
$$
3899
x - \sum_{|\theta| \le C} {\frac{x^{{\frac{1}{2}} +i\theta}-1}{{\frac{1}{2}} +i\theta}}.
3900
$$}
3901
3902
3903
\ill{phi_cos_sum_2_30_1000}{.8}{Illustration of $-\sum_{i=1}^{1000}
3904
\cos(\log(s)\theta_i)$, where $\theta_1 \sim 14.13, \ldots$ are the
3905
first $1000$ contributions to the Riemann spectrum\index{Riemann spectrum}. The red dots
3906
are at the prime powers $p^n$, whose size is proportional to
3907
$\log(p)$.}
3908
3909
\ill{phi_cos_sum_26_34_1000}{.8}{Illustration of $-\sum_{i=1}^{1000}
3910
\cos(\log(s)\theta_i)$ in the neighborhood of a twin prime. Notice
3911
how the two primes $29$ and $31$ are separated out by the Fourier
3912
series, and how the prime powers $3^3$ and $2^5$ also appear.}
3913
3914
\ill{phi_cos_sum_1010_1026_15000}{.7}{Fourier series from $1,000$ to
3915
$1,030$ using 15,000 of the numbers $\theta_i$. Note the twin
3916
primes $1{,}019$ and $1{,}021$ and that $1{,}024=2^{10}$.}
3917
3918
3919
\part{Back to Riemann\label{part4}}
3920
3921
3922
\chapter[Building $\pi(X)$ from the spectrum]{How to build $\pi(X)$ from the spectrum (Riemann's way)}
3923
We have been dealing in Part~\ref{part3} of our book with $\Phi(t)$, a
3924
distribution\index{distribution} that---we said---contains all the essential information
3925
about the placement of primes among numbers. We have given a clean
3926
restatement of Riemann's hypothesis, the third restatement so far,
3927
in term of this $\Phi(t)$. But $\Phi(t)$ was the effect of a series
3928
of recalibrations and reconfigurings of the original untampered-with
3929
staircase of primes. A test of whether we have strayed from our
3930
original problem---to understand this staircase---would be whether
3931
we can return to the original staircase, and ``reconstruct it'' so to
3932
speak, solely from the information of $\Phi(t)$---or equivalently,
3933
assuming the \RH{} as formulated in Chapter~\ref{sec:tinkering}---can
3934
we construct the staircase of primes $\pi(X)$ solely
3935
from knowledge of the sequence of real numbers $\theta_1,
3936
\theta_2,\theta_3,\dots$?
3937
3938
3939
3940
3941
The answer to this is yes (given the \RH{}), and is discussed very
3942
beautifully by Bernhard Riemann\index{Riemann, Bernhard} himself in his famous 1859 article.
3943
3944
Bernhard Riemann used the spectrum\index{spectrum} of the prime numbers\index{prime number} to provide
3945
an exact analytic formula that analyzes and/or synthesizes the
3946
staircase of primes. This formula is motivated by Fourier's
3947
analysis of functions as constituted out of cosines.
3948
%Riemann\index{Riemann, Bernhard} started
3949
%with a specific smooth function, which we will refer to as $R(X)$, a
3950
% function that Riemann offered, just as Gauss\index{Gauss, Carl Friedrich} offered his $\Li(X)$,
3951
% as a candidate smooth function approximating the staircase of
3952
%primes.
3953
Recall from Chapter~\ref{sec:rh1} that Gauss's guess is
3954
$\Li(X)= \int_2^{X}dt/{\rm log}(t).$ To continue this discussion, we do need some familiarity with complex numbers, for the definition of Riemann's\index{Riemann, Bernhard} exact formula
3955
requires extending
3956
the definition of the function $\Li(X)$ to make sense for
3957
complex numbers $X=a+bi$. In fact, more naturally, one might work with the path integral $\li(X):= \int_0^{X}dt/{\rm log}(t).$
3958
3959
Riemann \index{Riemann, Bernhard} begins his discussion
3960
(see Figure~\ref{fig:riemann_RX}) by defining
3961
$$
3962
R(X) = \sum_{n=1}^{\infty}{\mu(n)\over n} \li(X^{1\over n})= \lim_{N\to \infty} R^{(N)}(X):= \lim_{N\to \infty} \sum_{n=1}^{N}{\mu(n)\over n} \li(X^{1\over n}),
3963
$$
3964
where $R^{(N)}(X)$ denotes the truncated sum, which one can compute as an approximation.
3965
\ill{riemann_RX}{1}{Riemann\index{Riemann, Bernhard} defining $R(X)$ in his manuscript\label{fig:riemann_RX}}
3966
In all the discussion of this section the order of summation is
3967
important. For such considerations and issues regarding actual
3968
computation we refer to Riesel-Gohl (see \url{http://wstein.org/rh/rg.pdf}).
3969
3970
Here $\mu(n)$ is the M\"{o}bius function\index{M\"{o}bius function}
3971
which is defined by
3972
$$
3973
\mu(n) = \begin{cases}
3974
1 &
3975
\mbox{\begin{minipage}{0.6\textwidth}if $n$ is a square-free
3976
positive integer with an even number of distinct prime
3977
factors,\end{minipage}}\vspace{1em}\\
3978
-1& \mbox{\begin{minipage}{0.6\textwidth}if $n$ is a square-free
3979
positive integer with an odd number of distinct
3980
prime factors,\end{minipage}}\vspace{1em}\\
3981
0 & \mbox{if $n$ is not square-free.}
3982
\end{cases}
3983
$$
3984
See Figure~\ref{fig:moebius} for a plot of the M\"{o}bius function.\index{M\"{o}bius function}
3985
3986
\ill{moebius}{1}{The blue dots plot the values of the M\"{o}bius function\index{M\"{o}bius function} $\mu(n)$, which is only defined at integers.\label{fig:moebius}}
3987
3988
3989
In Chapter~\ref{sec:pnt} we encountered the Prime Number Theorem,\index{prime number} which
3990
asserts that $X/\log(X)$ and $\Li(X)$ are both approximations for
3991
$\pi(X)$, in the sense that both go to infinity at the same rate. That is, the ratio of any two of these three functions tends to $1$ as $X$ goes to $\infty$.
3992
Our first formulation of the \RH{} (see page~\pageref{rh:first}) was
3993
that $\Li(X)$ is an essentially square root accurate approximation of
3994
$\pi(X)$. Figures~\ref{fig:guess100}--\ref{fig:guess10000} illustrate
3995
that Riemann's function $R(X)$ appears to be an even better
3996
approximation to $\pi(X)$ than anything we have seen before.
3997
3998
\illtwo{pi_riemann_gauss_100}{pi_riemann_gauss_1000}{0.47}{Comparisons of $\Li(X)$ (top), $\pi(X)$ (middle), and $R(X)$ (bottom, computed using 100 terms)\label{fig:guess100}}
3999
4000
4001
\ill{pi_riemann_gauss_10000-11000}{0.8}{Closeup comparison of $\Li(X)$ (top), $\pi(X)$ (middle), and $R(X)$ (bottom, computed using 100 terms)\label{fig:guess10000}}
4002
4003
4004
Think of Riemann's smooth curve $R(X)$ as the {\em fundamental}
4005
approximation to $\pi(X)$.
4006
Riemann\index{Riemann, Bernhard} offered much more than just a (conjecturally) better
4007
approximation to $\pi(X)$ in his wonderful 1859 article
4008
(see Figure~\ref{fig:riemann_Rk}).
4009
He found a way to construct what looks vaguely like a Fourier series,
4010
but with $\sin(X)$ replaced by $R(X)$ and its spectrum\index{spectrum} the $\theta_i$, which
4011
conjecturally equals $\pi(X)$ (with a slight correction if the number $X$ is itself a prime).
4012
\ill{riemann_Rk}{1}{Riemann's\index{Riemann, Bernhard} analytic formula for $\pi(X)$\label{fig:riemann_Rk}}
4013
In this manner, Riemann gave an infinite sequence of improved guesses, beginning with $R_0(X)$ (see equation (18) of Riesel-Gohl at \url{http://wstein.org/rh/rg.pdf}) a modification of $R(X)$ that takes account of the pole and the trivial zeroes of the Riemann zeta-function, and then considered a sequence:
4014
4015
4016
$$
4017
R_0(X),\quad R_1(X), \quad R_2(X), \quad R_3(X), \quad \ldots
4018
$$
4019
and he hypothesized that one and all of them were all
4020
essentially square root approximations to $\pi(X)$,
4021
and that the sequence of these better and better approximations converge to give an exact formula
4022
for $\pi(X)$.
4023
4024
Thus not only did Riemann\index{Riemann, Bernhard} provide a ``fundamental'' (that is, a smooth curve
4025
that is astoundingly close to $\pi(X)$) but he viewed this as just a
4026
starting point, for he gave the recipe for providing an infinite
4027
sequence of corrective terms---call them Riemann's {\em harmonics}\index{harmonics}; we
4028
will denote the first of these ``harmonics'' $C_1(X)$, the second
4029
$C_2(X)$, etc. Riemann\index{Riemann, Bernhard} gets his first corrected curve, $R_1(X)$, from
4030
$R_0(X)$ by adding this first harmonic to the fundamental, $$R_1(X) =
4031
R_0(X) + C_1(X),$$ he gets the second by correcting $R_1(X)$ by adding
4032
the second harmonic $$R_2(X) = R_1 (X) + C_2(X),$$ and so on $$R_3(X)
4033
= R_2 (X) + C_3(X),$$ and in the limit provides us with an exact fit.
4034
4035
4036
The \RH{}, if true, would tell us that these correction
4037
terms $C_1(X), C_2(X), C_3(X),\dots$ are all {\em square-root small}
4038
%,
4039
%and all the successively corrected smooth curves $$R_0(X), R_1(X),
4040
%R_2(X), R_3(X),\dots$$ are good approximations to $\pi(X)$.
4041
%Moreover,
4042
%$$
4043
%\pi(X) = R_0(X) + \sum_{k=1}^{\infty} C_k(X).
4044
%$$
4045
4046
The elegance of Riemann's\index{Riemann, Bernhard} treatment of this problem is that the
4047
corrective terms $C_k(X)$ are all {\em modeled on} the fundamental
4048
$R(X)$ and are completely described if you know the sequence of real
4049
numbers $\theta_1, \theta_2, \theta_3,\dots$ of the last section.
4050
4051
4052
Assuming the \RH{}, the Riemann\index{Riemann, Bernhard} correction
4053
terms $C_k(X)$ are defined to be
4054
$$
4055
C_k(X)= -R(X^{\frac{1}{2} + i\theta_k}) -R(X^{\frac{1}{2} - i\theta_k}),
4056
$$
4057
where $\theta_1 = 14.134725\dots, \theta_2 = 21.022039\dots$, etc.,
4058
is the spectrum\index{spectrum} of the prime numbers\index{prime number} \bibnote{You may well ask how we propose to order these correction terms if RH
4059
is false. Order them in terms of
4060
(the absolute value of) their imaginary part, and in the unlikely
4061
situation that there is more than one zero with the same imaginary
4062
part, order zeroes of the same imaginary part by their real parts,
4063
going from right to left.}.
4064
4065
In sum, Riemann\index{Riemann, Bernhard} provided an extraordinary recipe that allows us to work
4066
out the harmonics\index{harmonics}, $$C_1(X),\quad C_2(X),\quad C_3(X),\quad \dots$$ without our having
4067
to consult, or compute with, the actual staircase of primes. As with
4068
Fourier's modus operandi where both {\em fundamental} and all {\em
4069
harmonics}\index{harmonics} are modeled on the sine wave, but appropriately
4070
calibrated, Riemann\index{Riemann, Bernhard} fashioned his higher harmonics\index{harmonics}, modeling them all
4071
on a single function, namely $R(X)$.
4072
4073
The convergence of $R_k(X)$ to $\pi(X)$ is strikingly illustrated
4074
in the plots in Figures~\ref{fig:rkfirst}--\ref{fig:rklast} of $R_k$ for various values of $k$.
4075
4076
4077
\ill{Rk_1_2_100}{.9}{The function $R_{1}$ approximating the staircase of primes up to $100$\label{fig:rkfirst}}
4078
4079
\ill{Rk_10_2_100}{.9}{The function $R_{10}$ approximating the staircase of primes up to $100$}
4080
4081
\ill{Rk_25_2_100}{.9}{The function $R_{25}$ approximating the staircase of primes up to $100$}
4082
4083
\ill{Rk_50_2_100}{.9}{The function $R_{50}$ approximating the staircase of primes up to $100$}
4084
4085
4086
\ill{Rk_50_2_500}{.9}{The function $R_{50}$ approximating the staircase of primes up to $500$}
4087
4088
\ill{Rk_50_350_400}{.9}{The function $\Li(X)$ (top, green), the function $R_{50}(X)$ (in blue), and the staircase of primes on the interval from 350 to 400.\label{fig:rklast}}
4089
4090
4091
\chapter[As Riemann envisioned it]{As Riemann envisioned it, the zeta function relates the staircase of primes to its Riemann\index{Riemann, Bernhard} spectrum\index{Riemann spectrum}\label{ch:envision}}
4092
In the previous chapters we have described---using Riemann's
4093
Hypothesis---how to obtain the {\it
4094
spectrum}\index{spectrum} $$\theta_1,\theta_2,\theta_3,\dots$$ from the staircase of
4095
primes, and hinted at how to go back. Roughly speaking, we were
4096
performing ``Fourier transformations'' to make this transit. But
4097
Riemann\index{Riemann, Bernhard}, on the very first page of his 1859 memoir, construes the
4098
relationship we have been discussing, between spectrum\index{spectrum} and staircase,
4099
in an even more profound way.
4100
4101
To talk about this extraordinary insight of Riemann\index{Riemann, Bernhard}, one would need to
4102
do two things that might seem remote from our topic, given our
4103
discussion so far.
4104
4105
\begin{itemize} \item We will discuss a key idea that Leonhard Euler\index{Euler, Leonhard}
4106
had (circa 1740).
4107
\item To follow the evolution of this idea in the hands of Riemann\index{Riemann, Bernhard}, we would have to assume familiarity with basic complex analysis.
4108
\end{itemize}
4109
4110
We will say only a few words here about this, in hopes of giving at
4111
least a shred of a hint of how marvelous Riemann's idea is. We will be drawing, at this point, on some further mathematical background. For
4112
readers who wish to pursue the themes we discuss, here is a list of sources,
4113
that are our favorites among those meant to be read by a somewhat
4114
broader audience than people very advanced in the subject. We list
4115
them in order of ``required background.''
4116
4117
\begin{enumerate}
4118
\item John Derbyshire's {\it Prime Obsession: Bernhard Riemann and
4119
the Greatest Unsolved Problem in Mathematics} (2003). We have already
4120
mentioned this book in our foreword, but feel that it is so
4121
good, that it is worth a second mention here.
4122
\item The Wikipedia entry for Riemann's Zeta Function\index{zeta function}
4123
(\url{http://en.wikipedia.org/wiki/Riemann\_zeta\_function}). It is
4124
difficult to summarize who wrote this, but we feel that it is a
4125
gift to the community in its clarity. Thanks authors!
4126
\item Enrico Bombieri's article \bibnote{Bombieri, {\em The Riemann
4127
Hypothesis}, available at
4128
\url{http://www.claymath.org/sites/default/files/official_problem_description.pdf}}.
4129
To comprehend all ten pages of this excellent and fairly thorough
4130
account may require significant background, but try your hand at
4131
it; no matter where you stop, you will have seen good things in
4132
what you have read.
4133
\end{enumerate}
4134
\vskip20pt
4135
{\bf Leonhard Euler's\index{Euler, Leonhard} idea ($\simeq$1740):}
4136
As readers of Jacob Bernoulli's {\it Ars Conjectandi} (or of the works
4137
of John Wallis) know, there was in the early 18th century
4138
already a rich mathematical theory of
4139
finite sums of (non-negative $k$-th powers) of consecutive integers.
4140
This sum, $$S_k(N) = 1^k+2^k+3^k+\cdots +(N-1)^k,$$ is a polynomial
4141
in $N$ of degree $k+1$ with no constant term, a leading term equal to
4142
${1\over k+1}N^{k+1}$, and a famous linear term. The coefficient of
4143
the linear term of the polynomial $S_k(N)$ is the {\it Bernoulli
4144
number} $B_{k}$:
4145
\begin{align*}
4146
S_1(N) &= 1+2+3+ \dots + (N-1) \ = \ {N(N-1)\over 2} \ = \ {N^2\over 2}\ -\ {1\over 2}\cdot N,\\
4147
S_2(N) &= 1^2+2^2+3^2+ \dots + (N-1)^2 \ = \ {N^3\over 3} +\cdots -\ {1\over 6}\cdot N,\\
4148
S_3(N) &= 1^3+2^3+3^3+ \dots + (N-1)^3 \ = \ {N^4\over 4} +\cdots -\ 0\cdot N,\\
4149
S_4(N) &= 1^4+2^4+3^4+ \dots + (N-1)^4 \ = \ {N^5\over 5} +\cdots -\ {1\over 30}\cdot N,
4150
\end{align*}
4151
etc.
4152
For odd integers $k
4153
> 1$ this linear term vanishes. For even integers $2k$ the Bernoulli number
4154
$B_{2k}$ is the rational number given by the coefficient
4155
of $\frac{x^{2k}}{(2k)!}$ in the power series expansion
4156
\ \newline \bigskip \ \newline
4157
$${x\over e^x-1} \ = \ 1 - {x\over 2} + \sum_{k=1}^{\infty}
4158
(-1)^{k+1}B_{2k}{x^{2k}\over{(2k)}!}.$$
4159
\ \newline \bigskip \ \newline
4160
So
4161
$$ B_2 = {1\over 6},\ \ \ \ \ \ \ \ B_4 = {1\over 30},\ \ \ \ \ \ \ \ B_6 = {1\over 42},\ \ \ \ \ \ \
4162
\ B_8 = {1\over 30},$$ and to convince you that the numerator of these numbers is not always $1$, here are
4163
a few more:
4164
$$B_{10} = {5\over 66},\ \ \ \ \ \ \ \ B_{12}= {691\over 2730},\ \ \ \ \ \ \ \ B_{14} = {7\over 6}.$$
4165
\ \newline \bigskip \ \newline
4166
If you turn attention to sums of negative $k$-th powers of consecutive
4167
integers, then when $k= -1$, $$S_{-1}(N)= {\frac{1}{1}}+
4168
{\frac{1}{2}}+ {\frac{1}{3}}+ \cdots+ {\frac{1}{N}}$$ tends to infinity
4169
like $\log(N)$, but for $k < -1$ we are facing the sum of reciprocals
4170
of powers (of exponent $>1$) of consecutive whole numbers, and
4171
$S_k(N)$ converges. This is the first appearance of the zeta function\index{zeta function}
4172
$\zeta(s)$ for arguments $s=2,3,4,\dots$ So let us denote these limits
4173
by notation that has been standard, after Riemann\index{Riemann, Bernhard}:
4174
$$\zeta(s):= {\frac{1}{1^s}}+ {\frac{1}{2^s}}+ {\frac{1}{3^s}}+ \cdots$$
4175
The striking reformulation that Euler\index{Euler, Leonhard} discovered was the expression of
4176
this infinite sum as an infinite product of factors associated to the
4177
prime numbers:\index{prime number}
4178
$$
4179
\zeta(s)=\sum_n{\frac{1}{n^s}} =
4180
\prod_{p \ {\rm prime}}{\frac{1}{1-p^{-s}}},
4181
$$
4182
where the infinite sum on the left and the infinite product on the
4183
right both converge (and are equal) if $s > 1$. He also evaluated these
4184
sums at even positive integers, where---surprise---the Bernoulli
4185
numbers come in again; and they and $\pi$ combine to yield the values of the zeta function\index{zeta function} at even positive integers:
4186
4187
\begin{align*}
4188
\zeta(2) &= {\frac{1}{1^2}}+{\frac{1}{2^2}}+\cdots = \pi^2/6 \simeq 1.65\dots\\
4189
\zeta(4) &= {\frac{1}{1^4}}+{\frac{1}{2^4}}+\cdots = \pi^4/90\simeq 1.0823\dots
4190
\end{align*}
4191
and, in general,
4192
$$\zeta(2n) = {\frac{1}{1^{2n}}}+{\frac{1}{2^{2n}}}+\cdots \ \ \ = \ \ \ (-1)^{n+1}B_{2n}\pi^{2n}\cdot {\frac{2^{2n-1}}{(2n)!}}.$$
4193
4194
A side note to Euler's\index{Euler, Leonhard} formulas comes from the fact (only known much
4195
later) that no power of $\pi$ is rational: do you see how to use
4196
this to give a proof that there are infinitely many primes,
4197
combining Euler's\index{Euler, Leonhard} infinite product expansion displayed above with
4198
the formula for $\zeta(2)$, or with the formula for $\zeta(4)$, or,
4199
in fact, for the formulas for $\zeta(2n)$ for {\it any} $n$ you
4200
choose?
4201
4202
{\bf Pafnuty Lvovich Chebyshev's\index{Chebyshev, Pafnuty Lvovich} idea ($\simeq$1845):} The second moment
4203
in the history of evolution of this function $\zeta(s)$ is when
4204
Chebyshev\index{Chebyshev, Pafnuty Lvovich} used the {\it same formula} as above in the extended range
4205
where $s$ is allowed now to be a real variable---not just an
4206
integer---greater than $1$. Making use of this extension of the
4207
range of definition of Euler's\index{Euler, Leonhard} sum of reciprocals of powers of
4208
consecutive whole numbers, Chebyshev\index{Chebyshev, Pafnuty Lvovich} could prove that for large $x$
4209
the ratio of $\pi(x)$ and $x/\log(x)$ is bounded above and below by
4210
two explicitly given constants. He also proved that there exists a
4211
prime number\index{prime number} in the interval bounded by $n$ and $2n$ for any positive
4212
integer $n$ (this was called {\it Bertrand's postulate}; see \url{http://en.wikipedia.org/wiki/Proof_of_Bertrand%27s_postulate}).
4213
\vskip20pt
4214
4215
{\bf Riemann's idea (1859):} It is in the third step of the
4216
evolution of $\zeta(s)$ that something quite surprising
4217
happens. Riemann\index{Riemann, Bernhard} extended the range of Chebyshev's\index{Chebyshev, Pafnuty Lvovich} sum of
4218
reciprocals of positive real powers of consecutive whole numbers
4219
allowing the argument $s$ to range over the entire complex plane $s$
4220
(avoiding $s=1$). Now this is a more mysterious extension of
4221
Euler's\index{Euler, Leonhard} function, and it is deeper in two ways:
4222
4223
\begin{itemize}
4224
\item The formula $$\zeta(s):= {\frac{1}{1^s}}+ {\frac{1}{2^s}}+
4225
{\frac{1}{3^s}}+ \cdots$$ does converge when the real part of the
4226
exponent $s$ is greater than $1$ (i.e., this allows us to use the
4227
same formula, as Chebyshev\index{Chebyshev, Pafnuty Lvovich} had done, for the right half plane
4228
in the complex plane determined by the condition $s=x+iy$ with $x>1$
4229
but not beyond this). You can't simply use the same formula for the
4230
extension.\item So you must face the fact that if you wish to
4231
``extend'' a function beyond the natural range in which its defining
4232
formula makes sense, there may be many ways of doing it.
4233
\end{itemize}
4234
4235
To appreciate the second point, the theory of a complex variable is
4236
essential. The {\it uniqueness} (but not yet the {\it existence}) of
4237
Riemann's extension of $\zeta(s)$ to the entire complex plane is
4238
guaranteed by the phenomenon referred to as {\it analytic
4239
continuation}. If you are given a function on any infinite subset
4240
$X$ of the complex plane that contains a limit point, and if you are
4241
looking for a function on the entire complex plane{\footnote{ or to
4242
any connected open subset that contains $X$}} that is
4243
differentiable in the sense of complex analysis, there may be no
4244
functions at all that have that property, but if there is one, that
4245
function is {\it unique}. But Riemann\index{Riemann, Bernhard} succeeded: he was indeed able
4246
to extend Euler's\index{Euler, Leonhard} function to the entire complex plane except for the
4247
point $s=1$, thereby defining what we now call {\it Riemann's zeta
4248
function}. \vskip20pt
4249
4250
Those ubiquitous Bernoulli numbers reappear yet again as
4251
values of this {\it extended} zeta function\index{zeta function} at negative integers:
4252
$$\zeta(-n) = -B_{n+1}/(n+1)$$
4253
so since the Bernoulli numbers indexed by odd integers $>1$ all
4254
vanish, the extended zeta function\index{zeta function} $\zeta(s)$ actually vanishes at
4255
{\it all} even negative integers.
4256
4257
The even integers $-2, -4, -6, \dots$ are often called the {\bf
4258
trivial zeroes} of the Riemann zeta function.\index{zeta function} There are indeed
4259
other zeroes of the zeta function,\index{zeta function} and those other zeroes could---in no way---be
4260
dubbed ``trivial,'' as we shall shortly see.
4261
\vskip20pt
4262
It is time to consider these facts:
4263
4264
\begin{enumerate}\item {\bf Riemann's zeta function\index{zeta function} codes the
4265
placement of prime powers among all numbers.} The key here is to
4266
take the logarithm and then the derivative of $\zeta(s)$ (this boils
4267
down to forming ${\frac{d{\zeta}}{ds}}(s)/\zeta(s)$). Assuming
4268
that the real part of $s$ is $>1$, taking the logarithm of
4269
$\zeta(s)$---using Euler's\index{Euler, Leonhard} infinite product formulation---gives
4270
us $$\log\zeta(s)= \sum_{p \ {\rm
4271
prime}}-\log{\big(1-p^{-s}\big)},$$ and we can do this
4272
term-by-term, since the real part of $s$ is $>1$. Then taking the
4273
derivative gives us:
4274
4275
\vskip20pt
4276
$${\frac{d{\zeta}}{ds}}(s)/\zeta(s)= -\sum_{n=1}^{\infty}\Lambda(n)n^{-s}$$
4277
where
4278
$$
4279
\Lambda(n) :=
4280
\begin{cases}
4281
\log(p) & \text{when $n= p^k$ for $p$ a prime number and $k >0$, and}\\
4282
0 & \text{if $n$ is not a power of a prime number.}
4283
\end{cases}
4284
$$
4285
In particular, $\Lambda(n)$ ``records" the placement of prime powers.
4286
4287
%\todo{Put in log deriv of zeta.}
4288
%$$ {\frac{d{\zeta}}{ds}}(s)/\zeta(s)=-\sum_{p \ {\rm prime}}\log(p)p^{-s}.$$ For any particular $s$ this is an infinite sum which tallies the primes $p$, each weighted by the quantity $\log(p)p^{-s}$ and is absolutely convergent if the real part of $s$ is $>1$. % note : this forgets the prime powers.
4289
4290
\vskip20pt
4291
4292
\item {\bf You know lots about an analytic function if you know its zeroes and
4293
poles.} For example for polynomials, or
4294
even rational functions: if someone told you that a
4295
certain rational function $f(s)$ vanishes to order $1$ at $0$ and at
4296
$\infty$; and that it has a double pole at $s=2$ and at all other
4297
points has finite nonzero values, then you can immediate say that
4298
this mystery function is a nonzero constant times $s/(s-2)^2$.
4299
4300
Knowing the zeroes and poles (in the complex plane) alone of the
4301
Riemann zeta function\index{zeta function} doesn't entirely pin it down---you have to
4302
know more about its behavior at infinity since---for example,
4303
multiplying a function by $e^z$ doesn't change the structure of its
4304
zeroes and poles in the finite plane. But a complete understanding
4305
of the zeroes and poles of $\zeta(s)$ will give all the information
4306
you need to pin down the placement of primes among all numbers.
4307
4308
So here is the score:
4309
4310
\begin{itemize} \item As for poles, $\zeta(s)$ has only one pole. It
4311
is at $s=1$ and is of order $1$ (a ``simple pole'').\item As for
4312
zeroes, we have already mentioned the trivial zeroes (at negative
4313
even integers), but $\zeta(s)$ also has infinitely many {\it
4314
nontrivial} zeroes. These nontrivial zeroes\index{nontrivial zeroes} are known to lie in
4315
the vertical strip $$0\ < \ {\rm real\ part\ of\ } s\ < \
4316
1.$$\end{itemize}
4317
4318
\end{enumerate}
4319
4320
And here is yet another equivalent statement of Riemann's
4321
Hypothesis---this being the formulation closest to the one given in
4322
his 1859 memoir:
4323
4324
\begin{center}
4325
\shadowbox{ \begin{minipage}{0.9\textwidth}
4326
\mbox{} \vspace{0.2ex}
4327
\begin{center}{\bf\large The {\bf \RH{}} (fourth formulation)}\end{center}
4328
\medskip
4329
4330
All the nontrivial zeroes\index{nontrivial zeroes} of $\zeta(s)$ lie on the vertical
4331
line in the complex plane consisting of the
4332
complex numbers with real part equal to $1/2$.
4333
These zeroes are none other than
4334
${\frac{1}{2}}\pm i\theta_1,{\frac{1}{2}}\pm i\theta_2,
4335
{\frac{1}{2}}\pm i\theta_3,\dots,$ where $\theta_1, \theta_2,
4336
\theta_3, \dots$ comprise the spectrum\index{spectrum} of primes we talked
4337
about in the earlier chapters.
4338
4339
\vspace{1ex}
4340
\end{minipage}}
4341
\end{center}
4342
4343
The ``${\frac{1}{2}}$" that appears in this formula is directly related to the fact---correspondingly conditional on RH---that $\pi(X)$ is ``square-root accurately"\index{square-root accurate} approximated by $\Li(X)$. That is, the error term is bounded by $X^{{\bf{\frac{1}{2}}}+\epsilon}.$ It has been conjectured that {\it all} the zeroes of $\zeta(s)$ are simple zeroes.
4344
4345
4346
Here is how Riemann\index{Riemann, Bernhard} phrased RH:
4347
4348
\begin{quote}
4349
``One now finds indeed approximately this number of real roots
4350
within these limits, and it is very probable that all roots are
4351
real. Certainly one would wish for a stricter proof here; I have
4352
meanwhile temporarily put aside the search for this after some
4353
futile attempts, as it appears unnecessary for the next objective of
4354
my investigation.''
4355
\end{quote}
4356
4357
In the above quotation, Riemann's roots are the $\theta_i$'s and the statement that they are ``real" is equivalent to RH.
4358
4359
The zeta function,\index{zeta function} then, is the vise, that so elegantly clamps
4360
together information about the placement of primes and their
4361
spectrum\index{spectrum}!
4362
4363
4364
That a simple geometric property of these zeroes (lying on a line!) is
4365
directly equivalent to such profound (and more difficult to express)
4366
regularities among prime numbers\index{prime number} suggests that these zeroes and the
4367
parade of Riemann's corrections governed by them---when we truly
4368
comprehend their message---may have lots more to teach us, may
4369
eventually allow us a more powerful understanding of arithmetic. This
4370
infinite collection of complex numbers, i.e., the nontrivial zeroes\index{nontrivial zeroes} of
4371
the Riemann zeta function,\index{zeta function} plays a role with respect to $\pi(X)$ rather
4372
like the role the {\em spectrum}\index{spectrum} of the Hydrogen atom plays in
4373
Fourier's theory. Are the primes themselves no more than an
4374
epiphenomenon, behind which there lies, still veiled from us, a
4375
yet-to-be-discovered, yet-to-be-hypothesized, profound conceptual key
4376
to their perplexing orneriness? Are the many innocently posed, yet
4377
unanswered, phenomenological questions about numbers---such as in the
4378
ones listed earlier---waiting for our discovery of this deeper level
4379
of arithmetic? Or for layers deeper still? Are we, in fact, just at
4380
the beginning?
4381
4382
4383
4384
4385
These are not completely idle thoughts, for a tantalizing analogy
4386
relates the number theory we have been discussing to an already
4387
established branch of mathematics---due, largely, to the work of
4388
Alexander Grothendieck,\index{Grothendieck, Alexandre} and Pierre Deligne\index{Deligne, Pierre}---where the corresponding
4389
analogue of Riemann's hypothesis has indeed been proved\ldots.
4390
4391
4392
4393
\chapter{Companions to the zeta function\index{zeta function}}\label{ch:companions}
4394
4395
Our book, so far, has been exclusively about Riemann's $\zeta(s)$ and
4396
its zeroes. We have been discussing how (the placement of) the zeroes
4397
of $\zeta(s)$ in the complex plane contains the information needed to
4398
understand (the placement of) the primes in the set of all whole
4399
numbers; and conversely.
4400
4401
It would be wrong---we think---if we don't even mention that
4402
$\zeta(s)$ fits into a broad family of similar functions that connect to other problems in number theory.
4403
4404
4405
% for every {\it quadratic number field} $K:={\bf Q}({\sqrt
4406
% d})$, there is a corresponding ``$L$-function'' $L(K,s)$ with an
4407
%expansion of the form $$L(K,s)= \sum_{N=1}^{\infty} a_n(K)\cdot
4408
%n^{-s}$$ convergent for complex numbers $s$ with real part $> 1$---and
4409
%where the coefficients $a_n(K)$ are either $0$ or $\pm 1$. The
4410
%product $\zeta(s)\cdot L(K,s)$ is called the {\it zeta function\index{zeta function} of
4411
% $K$} and is sometimes denoted $\zeta_K(s)$ to distinguish it from
4412
%the Riemann zeta function.\index{zeta function} The ``nontrivial zeroes''\index{nontrivial zeroes} of $\zeta_K(s)$
4413
%contains the information needed to understand the (placement of) the
4414
%norms of prime ideals of the ring of integers of $K$.
4415
4416
For example---instead of the ordinary integers---consider the {\it
4417
Gaussian integers}.\index{Gaussian integers} This is the collection of numbers
4418
$$ \{ a+bi\}$$
4419
where $i = {\sqrt{-1}}$ and $a,b$ are ordinary integers. We can add
4420
and multiply two such numbers and get another of the same form. The
4421
only ``units'' among the Gaussian integers\index{Gaussian integers} (i.e., numbers whose
4422
inverse is again a Gaussian integer) are the four numbers $\pm 1, \pm
4423
i$ and if we multiply any Gaussian integer $a+bi$ by any of these
4424
four units, we get the collection $\{a+bi, -a-bi, -b+ai, b -ai\}$.
4425
We measure the {\it size} of a Gaussian integer by the square of its
4426
distance to the origin, i.e., $$|a+bi|^2 = {a^2+b^2}.$$ This size function is called the {\bf norm} of the Gaussian integer $a+bi$ and can also be thought of as the product of $a+bi$ and its ``conjugate'' $a-bi$. Note that
4427
the norm is a nice multiplicative function on the set of Gaussian
4428
integers, in that the norm of a product of two Gaussian integers\index{Gaussian integers} is
4429
the product of the norms of each of them.
4430
4431
We have a natural notion of {\bf prime Gaussian integer}, i.e., one
4432
with $a>0$ and $b\geq 0$ that cannot be factored\index{factor} as the product of
4433
two Gaussian integers\index{Gaussian integers} of smaller size.
4434
Given that every nonzero Gaussian integer is uniquely expressible as a unit times a product of prime Gaussian integers, can you prove that if a Gaussian integer is a prime
4435
Gaussian integer, then its size must either be an ordinary prime
4436
number, or the square of an ordinary prime number?\index{prime number}
4437
4438
Figure~\ref{fig:gaussian-10} contains a plot of the first few Gaussian
4439
primes as they display themselves amongst complex numbers:
4440
4441
\ill{gaussian_primes-10}{.8}{Gaussian primes with coordinates up to 10\label{fig:gaussian-10}}
4442
4443
Figure~\ref{fig:gaussian-100} plots a much larger number of Gaussian primes:
4444
4445
\ill{gaussian_primes-100}{1}{Gaussian primes with coordinates up to 100\label{fig:gaussian-100}}
4446
4447
Figures~\ref{fig:gaussian-staircase-14}--\ref{fig:gaussian-staircase-10000} plot the number
4448
of Gaussian primes up to each norm:
4449
4450
\ill{gaussian_staircase_14}{.8}{Staircase of Gaussian primes of norm up to 14\label{fig:gaussian-staircase-14}}
4451
\ill{gaussian_staircase_100}{.8}{Staircase of Gaussian primes of norm up to 100}
4452
\ill{gaussian_staircase_1000}{.8}{Staircase of Gaussian primes of norm up to 1000}
4453
\ill{gaussian_staircase_10000}{.8}{Staircase of Gaussian primes of norm up to 10000\label{fig:gaussian-staircase-10000}}
4454
4455
The natural question to ask, then, is: how are the Gaussian prime numbers\index{prime number} distributed? Can one provide as close an estimate to their distribution\index{distribution} and structure, as one has for ordinary primes? The answer, here is yes: there is a companion theory, with an analogue to the Riemann zeta function\index{zeta function} playing a role similar to the prototype $\zeta(s)$. And, it seems as if its ``nontrivial zeroes''\index{nontrivial zeroes} behave similarly: as far as things have been computed, they all have the property that
4456
their real part is equal to ${\frac{1}{2}}$. That is, we
4457
have a companion to the \RH{}.
4458
4459
This is just the beginning of a much larger story related to what has been come to be called the ``Grand Riemann Hypotheses" and connects to analogous problems, some of them actually solved, that give some measure of evidence for the truth of these hypotheses. For example, for any system
4460
of polynomials in a fixed number of variables (with integer
4461
coefficients, say) and for each prime number $p$ there are
4462
``zeta-type'' functions that contain all the information needed to
4463
count the number of simultaneous solutions in finite fields of
4464
characteristic $p$. That such counts can be well-approximated with a
4465
neatly small error term is related to the placement of the zeroes of
4466
these ``zeta-type'' functions. There is then an analogous ``\RH{}''
4467
that prescribes precise conditions on the real parts of their
4468
zeroes---this prescription being called the ``\RH{} for
4469
function fields.'' Now the beauty of this analogous hypothesis is that
4470
it has, in fact, been proved!
4471
4472
Is this yet another reason to believe the Grand \RH{}?
4473
4474
4475
%
4476
%\chapter{Glossary}
4477
%
4478
%Here we give all the connections with the standard literature and
4479
%conventional terminology that we restrained
4480
% ourselves from giving in the text itself.
4481
%
4482
% For the moment the list of entries is the following but it will
4483
% expand.
4484
%
4485
% $\pi(X)= \pi_0(X)$, $Q(X)= \psi_0(X)$, $\log, \exp$, $\delta$,
4486
% distributions,\index{distribution} RSA cryptography, Mersenne prime, $\Li(x)$, random
4487
% walk, spectrum, harmonic, fundamental, frequency, phase, amplitude,
4488
% band-pass, complex numbers, complex plane, Riemann Zeta function\index{zeta function},
4489
% zeroes of zeta.
4490
%
4491
%
4492
% Also mention Brian Conrey's Notices article on RH as ``among the
4493
% best 12 pages of RH survey material that there is---at least for an
4494
% audience of general mathematicians.'' And mention
4495
% mention Sarnak and Bombieri articles at the CMI website on RH.
4496
%
4497
4498
\backmatter
4499
4500
4501
%\eject
4502
%\addcontentsline{toc}{chapter}{Index}
4503
%\printindex
4504
4505
%\label{lastpage}
4506
4507
% no longer in previous chapter, so we increase the counter
4508
% so that figure and other labels aren't for the previous chapter.
4509
\stepcounter{chapter}
4510
\addcontentsline{toc}{chapter}{Endnotes}
4511
4512
\renewcommand\bibname{Endnotes}
4513
4514
\bibliography{biblio}
4515
4516
4517
\printindex
4518
4519
4520
\end{document}
4521
4522
4523
%sagemathcloud={"zoom_width":115,"latex_command":"killall pdflatex; makeindex rh; pdflatex -synctex=1 -interact=nonstopmode rh.tex; bibtex rh"}
4524
4525