Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupport News Sign UpSign In
| Download

Jupyter notebook exam_ml.ipynb

Views: 61
Kernel: Python 2 (SageMath)

Final Exam

Machine Learning 2015-2

After solving all the questions in the exam save your notebook with the name username.ipynb and submit it to: https://www.dropbox.com/request/KN8GwdAIi0Hl2jk2mg2E


The following code implements a simple one-neuron neural network:

import numpy as np import pylab as pl %matplotlib inline def sigmoid(x): return 1.0/(1.0 + np.exp(-x)) def predict(w, x): x = np.append(np.array([1]), x) return sigmoid(np.dot(w, x))

1. (1.0)

Find a weight vector such that the neural network calculates the NOR function:

f(x,y)=¬(xy)f(x,y)=\neg(x\vee y)

Use the following function to test your answer:

def test_prediction(X, Y, w): epsilon = 0.001 for i, x in enumerate(X): if np.abs(predict(w, x) - Y[i]) > epsilon: raise Exception("Prediction error") return True X = [[0, 0], [0, 1], [1, 0], [1, 1]] Y = [1, 0, 0 ,0] w = np.array([10, -20, -20]) test_prediction(X, Y, w)
True

2. (1.0)

The following function calculates the loss function of the neural network

def loss(w, x, y): return (predict(w, x) - y) ** 2 / 2

Write a function that calculates the gradient of the loss with respect to the weights:

Ew\frac{\partial E}{\partial w}
def dsig(x): return x*(1-x) def de_dw(w, x, y): delta = np.zeros(len(w)) val=(predict(w,x)-y)*dsig(predict(w,x)) x = np.append(np.array([1]), x) delta=val*x # put your code here return delta

Use the following functions to test your code:

def num_de_dw(w, x, y, epsilon): deltas = np.identity(len(w)) * epsilon de = np.zeros(len(w)) for i in range(len(w)): de[i] = (loss(w + deltas[i, :], x, y) - loss(w - deltas[i, :], x, y)) / (2 * epsilon) return de def test_de_dw(): num_tests = 100 epsilon = 0.0001 for i in range(num_tests): tw = np.random.randn(3) tx = np.random.randn(2) ty = np.random.randn(1) print (de_dw(tw, tx,ty)) print (num_de_dw(tw, tx, ty, epsilon)) if np.linalg.norm(de_dw(tw, tx,ty) - num_de_dw(tw, tx, ty, epsilon)) > epsilon: raise Exception("de_dw test failed!") test_de_dw()
[ 0.00361838 0.00278242 -0.0038118 ] [ 0.00361838 0.00278242 -0.0038118 ] [-0.04035732 -0.05140127 0.04998526] [-0.04035732 -0.05140127 0.04998526] [ 0.02188831 0.01112613 0.02292367] [ 0.02188831 0.01112613 0.02292367] [ 0.03404092 -0.00240233 -0.05807317] [ 0.03404092 -0.00240233 -0.05807317] [ 0.52423482 0.14721585 0.14591704] [ 0.52423482 0.14721585 0.14591704] [ 0.22546929 -0.08998641 0.11208553] [ 0.22546929 -0.08998641 0.11208553] [ 0.04533799 0.00898872 -0.011983 ] [ 0.04533799 0.00898872 -0.011983 ] [ 0.09861583 -0.09234696 -0.21875542] [ 0.09861583 -0.09234696 -0.21875542] [ 0.05877143 -0.02940076 0.03116671] [ 0.05877143 -0.02940076 0.03116671] [ 0.11931123 -0.02702918 -0.06524697] [ 0.11931123 -0.02702918 -0.06524697] [ 0.43251664 -0.56064835 -0.80653295] [ 0.43251664 -0.56064835 -0.80653295] [ 0.32120464 0.09854823 -0.21391254] [ 0.32120464 0.09854823 -0.21391254] [ 0.20352592 -0.01110682 0.05591738] [ 0.20352592 -0.01110682 0.05591738] [ 0.01208132 0.00422836 -0.00190306] [ 0.01208132 0.00422836 -0.00190306] [ 0.0193632 -0.03179308 -0.02202115] [ 0.0193632 -0.03179308 -0.02202115] [ 0.04417083 -0.08437178 -0.02233991] [ 0.04417083 -0.08437178 -0.02233991] [ 0.0004245 0.00103905 0.00073962] [ 0.0004245 0.00103905 0.00073962] [-0.09753699 0.03430715 -0.04138716] [-0.09753699 0.03430715 -0.04138716] [-0.1147732 0.07139269 0.16531029] [-0.1147732 0.07139269 0.16531029] [-0.1867174 -0.12744464 -0.30730282] [-0.1867174 -0.12744464 -0.30730281] [ 0.47051809 0.00854824 -0.58196547] [ 0.47051809 0.00854824 -0.58196547] [ 0.20323275 -0.08996641 0.22137566] [ 0.20323275 -0.08996641 0.22137566] [ 0.07807574 -0.01125673 0.03511072] [ 0.07807574 -0.01125673 0.03511072] [-0.1922282 -0.13344952 -0.12512982] [-0.1922282 -0.13344952 -0.12512982] [ 0.44808434 -0.01082755 0.00216098] [ 0.44808434 -0.01082755 0.00216098] [-0.27836925 -0.00201407 0.24415776] [-0.27836925 -0.00201407 0.24415776] [-0.04933149 -0.02702416 -0.01838848] [-0.04933149 -0.02702416 -0.01838848] [-0.0941828 0.14928483 0.1073521 ] [-0.0941828 0.14928483 0.1073521 ] [-0.04820676 -0.0083064 -0.07348709] [-0.04820676 -0.0083064 -0.07348709] [-0.00020326 0.00050782 -0.00023546] [-0.00020326 0.00050782 -0.00023546] [ 0.03939128 -0.03061601 -0.0046337 ] [ 0.03939128 -0.03061601 -0.0046337 ] [ 0.04757414 0.0327343 0.01285949] [ 0.04757414 0.0327343 0.01285949] [ 0.03075601 -0.01185176 -0.02093231] [ 0.03075601 -0.01185176 -0.02093231] [ 0.05934192 -0.02996796 0.07453932] [ 0.05934192 -0.02996796 0.07453932] [-0.09252648 0.12129026 0.14658041] [-0.09252648 0.12129026 0.14658041] [ 0.0091093 0.00203482 -0.00784722] [ 0.0091093 0.00203482 -0.00784722] [ 2.43267959e-02 2.54306941e-02 -8.54035051e-05] [ 2.43267960e-02 2.54306942e-02 -8.54035051e-05] [ 0.5093184 0.33872083 -0.32138307] [ 0.5093184 0.33872083 -0.32138307] [ 0.10051475 -0.09812507 0.03643533] [ 0.10051475 -0.09812507 0.03643533] [ 0.00541466 0.00453131 0.01127696] [ 0.00541466 0.00453131 0.01127696] [ 0.12173473 0.1238322 -0.08829124] [ 0.12173473 0.1238322 -0.08829124] [ 0.02671952 0.00153561 0.00739973] [ 0.02671952 0.00153561 0.00739973] [-0.00034706 0.00012495 -0.00057136] [-0.00034706 0.00012495 -0.00057136] [ 0.20312833 0.02596203 -0.09139195] [ 0.20312833 0.02596203 -0.09139195] [ 0.35024836 -0.93569558 -0.39538334] [ 0.35024836 -0.93569557 -0.39538334] [ 0.13967526 0.18001101 0.05598359] [ 0.13967526 0.18001101 0.05598359] [ 0.24541106 0.39459918 -0.12311136] [ 0.24541106 0.39459918 -0.12311136] [ 0.25317134 0.34654978 -0.0483229 ] [ 0.25317134 0.34654978 -0.0483229 ] [-0.00073854 -0.00194302 0.00084781] [-0.00073854 -0.00194302 0.00084781] [ 0.00560716 0.00442974 -0.00639664] [ 0.00560716 0.00442974 -0.00639664] [-0.10548431 -0.02398795 0.0383357 ] [-0.1054843 -0.02398795 0.0383357 ] [ 0.32818243 -0.25669906 -0.05766182] [ 0.32818243 -0.25669906 -0.05766182] [ 0.14649066 0.22729138 -0.00566705] [ 0.14649066 0.22729138 -0.00566705] [-0.50233951 -1.13963594 -0.02543276] [-0.50233951 -1.13963594 -0.02543276] [ 0.48959341 -0.62647716 -0.01915929] [ 0.48959341 -0.62647716 -0.01915929] [ 0.11578909 0.00761076 -0.02407562] [ 0.11578909 0.00761076 -0.02407562] [ 0.03487706 -0.03315373 0.01437933] [ 0.03487706 -0.03315373 0.01437933] [-0.48640015 -0.30370259 -0.5777591 ] [-0.48640015 -0.30370259 -0.5777591 ] [ 0.03072539 0.01194149 -0.02394993] [ 0.03072539 0.01194149 -0.02394993] [ 0.01576878 -0.00337766 0.01715638] [ 0.01576878 -0.00337766 0.01715638] [ 0.12775916 0.17459785 0.00469759] [ 0.12775916 0.17459785 0.00469759] [ 0.07849305 -0.04365712 0.01372478] [ 0.07849305 -0.04365712 0.01372478] [ 0.03135299 0.04627793 0.04456123] [ 0.03135299 0.04627793 0.04456123] [ 0.03118055 0.04168922 0.0310658 ] [ 0.03118055 0.04168922 0.0310658 ] [ 0.05352351 -0.06348895 0.05953933] [ 0.05352351 -0.06348895 0.05953933] [-0.00840743 0.00787568 -0.00741771] [-0.00840743 0.00787568 -0.00741771] [ 0.03396481 -0.01756546 0.00143522] [ 0.03396481 -0.01756546 0.00143522] [ 0.07865796 -0.11947434 -0.06254752] [ 0.07865796 -0.11947434 -0.06254752] [ 0.31542869 -0.06063275 0.05544078] [ 0.31542869 -0.06063275 0.05544078] [ 0.12891659 -0.06968791 0.01342116] [ 0.12891659 -0.06968791 0.01342116] [ 0.02215945 -0.01510461 -0.02836185] [ 0.02215945 -0.01510461 -0.02836185] [ 0.07126199 -0.0893883 -0.04065034] [ 0.07126199 -0.0893883 -0.04065034] [ 0.17092115 0.02968431 0.30006181] [ 0.17092115 0.02968431 0.30006181] [ 0.00199004 -0.00356849 -0.00129328] [ 0.00199004 -0.00356849 -0.00129328] [-0.19638704 0.17432019 0.23136635] [-0.19638704 0.17432019 0.23136635] [-0.11959387 0.06890331 -0.16004863] [-0.11959387 0.06890331 -0.16004863] [-0.00472371 0.00875916 0.00055854] [-0.00472371 0.00875916 0.00055854] [ 0.35728917 0.25638664 -0.23197925] [ 0.35728917 0.25638664 -0.23197925] [ 0.11248355 -0.01238601 -0.23657722] [ 0.11248355 -0.01238601 -0.23657722] [-0.0161583 0.01224079 0.02569674] [-0.0161583 0.01224079 0.02569674] [ 0.1354122 0.00966235 -0.04328074] [ 0.1354122 0.00966235 -0.04328074] [ 0.27310759 0.01255336 0.08220588] [ 0.27310759 0.01255336 0.08220588] [-0.36315055 0.15160219 -0.49211041] [-0.36315055 0.15160219 -0.49211041] [ 0.29019676 -0.05150968 0.03283823] [ 0.29019676 -0.05150968 0.03283823] [-0.08386687 -0.05946682 0.05886732] [-0.08386687 -0.05946682 0.05886732] [-0.08411782 -0.06067491 0.00202236] [-0.08411782 -0.06067491 0.00202236] [ 0.00345004 0.0112581 -0.00080798] [ 0.00345004 0.0112581 -0.00080798] [ 0.08287697 0.11685906 0.0575813 ] [ 0.08287697 0.11685906 0.0575813 ] [ 0.2493986 0.29747052 0.09162665] [ 0.2493986 0.29747052 0.09162665] [ 0.54638284 -0.32353767 0.43321959] [ 0.54638284 -0.32353767 0.43321959] [ 0.24395516 -0.15690855 0.03607412] [ 0.24395516 -0.15690855 0.03607412] [ 0.28072974 -0.13066481 -0.2314205 ] [ 0.28072974 -0.13066481 -0.2314205 ] [ 0.31145577 -0.17782658 0.1526641 ] [ 0.31145577 -0.17782658 0.1526641 ] [ 0.02545361 -0.01425304 -0.00147651] [ 0.02545361 -0.01425304 -0.00147651] [-0.06792889 0.00584921 0.03167822] [-0.06792889 0.00584921 0.03167822] [-0.03155762 -0.01993758 0.01719192] [-0.03155762 -0.01993758 0.01719192] [ 0.3676183 0.18589928 0.43560384] [ 0.3676183 0.18589928 0.43560384] [-0.11201769 -0.13025339 0.01482716] [-0.11201769 -0.13025339 0.01482716] [ 0.48846115 -0.61975222 -0.01107753] [ 0.48846115 -0.61975222 -0.01107753] [-0.80876316 -0.75776966 1.73373514] [-0.80876316 -0.75776966 1.73373513]

Now, we can use the gradient function to train the neural network using gradient descent

def evaluate(w, X, Y): result = 0 for i, x in enumerate(X): result += loss(w, x, Y[i]) return result def train(X, Y, epochs, eta, w_ini): losses = [] w = w_ini for i in range(epochs): delta = np.zeros(len(w)) for i, x in enumerate(X): delta += de_dw(w, x, Y[i]) w = w - eta * delta losses.append(evaluate(w, X, Y)) return w, losses
X = [[0, 0], [0, 1], [1, 0], [1, 1]] Y = [0, 0, 1, 0] w, losses = train(X, Y, 50, 10, [0, 0, 0]) pl.plot(losses) print w print predict(w, [1, 0]) print predict(w, [0, 1])
[-2.75411492 5.35754647 -5.59430763] 0.93108210251 0.000236713671518
Image in a Jupyter notebook

3. (1.0)

Now we will modify the loss function to include a regularization term: E(w,D)=12(xi,yi)D(f(w,xi)yi)2+12βw22 E(w,D)=\frac{1}{2}\sum_{(x_{i},y_{i})\in D}(f(w,x_{i})-y_{i})^{2}+\frac{\text{1}}{2}\beta\left\Vert w\right\Vert _{2}^{2}

where f(w,xi)f(w,x_{i}) is the prediction calculated by the neural network.

To accomplish this you must modify the following functions:

def reg_loss(w, beta, x, y): loss = (((predict(w, x) - y) ** 2)/2)+(1/4*beta*np.dot(w,w)) return loss def reg_de_dw(w, beta, x, y): delta = np.zeros(len(w)) delta= de_dw(w,x,y)+(1/2*beta*w) # put your code here return delta

You can use the following functions to test your code:

def reg_num_de_dw(w, beta, x, y, epsilon): deltas = np.identity(len(w)) * epsilon de = np.zeros(len(w)) for i in range(len(w)): de[i] = (reg_loss(w + deltas[i, :], beta, x, y) - reg_loss(w - deltas[i, :], beta, x, y)) / (2 * epsilon) return de def reg_test_de_dw(): num_tests = 100 epsilon = 0.0001 beta = 1 for i in range(num_tests): tw = np.random.randn(3) tx = np.random.randn(2) ty = np.random.randn(1) if np.linalg.norm(reg_de_dw(tw, beta, tx, ty) - reg_num_de_dw(tw, beta, tx, ty, epsilon)) > epsilon: raise Exception("reg_de_dw test failed!") reg_test_de_dw()

4. (1.0)

Now train the neural network using regularization:

def reg_evaluate(w, beta, X, Y): result = 0 for i, x in enumerate(X): result += reg_loss(w,beta, x, Y[i]) return result def reg_train(X, Y, epochs, eta, w_ini, beta): losses = [] w = np.array(w_ini) for i in range(epochs): delta = np.zeros(len(w)) for i, x in enumerate(X): delta += reg_de_dw(w,beta, x, Y[i]) w = w - eta * delta losses.append(reg_evaluate(w,beta, X, Y)) # your code here return w, losses
X = [[0, 0], [0, 1], [1, 0], [1, 1]] Y = [0, 0, 1, 0] wr, losses = reg_train(X, Y, 50, 2, [0, 0, 0], 0.01) pl.plot(losses) print wr print predict(wr, [1, 0]) print predict(wr, [0, 1])
[-1.73530979 3.28436986 -3.55201031] 0.824777934667 0.00502986236723
Image in a Jupyter notebook

What is the effect of regularization? Discuss.

5. (1.0)

Here, we will build a kernel version of the previous neural network, i.e., a neural network able to work in a feature space induced by a kernel. To do this we will express the weight vector as a linear combination of vectors in a set XX:

w=xiXαiϕ(xi)w=\sum_{x_{i}\in X}\alpha_{i}\phi(x_{i})

Now, implement this modifying the following functions:

def k_predict(alpha, X, kernel, x): result = 0 weight= np.dot(alpha,kernel(X,x)) x = np.append(np.array([1]), x) result= np.dot(weight,x) # your code here return sigmoid(result) def k_loss(alpha, X, beta, kernel, x, y): w=np.dot(alpha,kernel(X,x)) loss = (((k_predict(alpha,X,kernel, x) - y) ** 2)/2)+(1/4*beta*np.dot(w,w)) # your code here return loss

Test your functions with the following code:

alpha = [1, 0.5, -0.3, -0.4] Xs = [[0.1, -0.5], [0.5, 1.0], [-1.0, 0.5], [1.0, 1.0]] def k1(x, y): return np.dot(x, y) def k2(x, y): return (np.dot(x, y) + 1) ** 2 X = [[0, 0], [0, 1], [1, 0], [1, 1]] Y = [0, 0, 1, 0] for i, x in enumerate(X): print k_predict(alpha, Xs, k1, x), k_loss(alpha, Xs, 1, k1, x, Y[i]) print "--------" for i, x in enumerate(X): print k_predict(alpha, Xs, k2, x), k_loss(alpha, Xs, 1, k2, x, Y[i])
[ 0.5 0.5 0.5] [ 0.125 0.125 0.125] [ 0.36586441 0.5 0.36586441] [ 0.06692838 0.125 0.06692838] [ 0.5621765 0.5621765 0.5 ] [ 0.09584471 0.09584471 0.125 ] [ 0.42555748 0.42555748 0.42555748] [ 0.09054959 0.09054959 0.09054959] -------- [ 0.68997448 0.5 0.5 ] [ 0.23803239 0.125 0.125 ] [ 0.49375033 0.5 0.49375033] [ 0.12189469 0.125 0.12189469] [ 0.67590153 0.67590153 0.5 ] [ 0.05251991 0.05251991 0.125 ] [ 0.45264238 0.45264238 0.45264238] [ 0.10244256 0.10244256 0.10244256]

6. (optional, extra credit)

Train the kernel neural network using gradient descent.

def k_de_dw(alpha, X, beta, kernel, x, y): w=np.dot(alpha,kernel(X,x)) delta = 0 val=(k_predict(alpha,X,kernel, x) - y)*dsig(k_predict(alpha,X,kernel, x)) x = np.append(np.array([1]), x) delta=val*x delta=delta+(1/2*beta*w) # put your code here return delta
def k_evaluate(alpha, X, beta, kernel, X_, Y): result = 0 for i, x in enumerate(X_): result += k_loss(alpha, X, beta, kernel, x, Y[i]) return result def k_train(X, Y, epochs, eta, w_ini, beta,alpha,X_,kernel): losses = [] w = np.array(w_ini) for i in range(epochs): delta = 0 for i, x in enumerate(X_): delta += k_de_dw(alpha, X, beta, kernel, x, Y[i]) w = w - eta * delta losses.append(k_evaluate(alpha, X, beta, kernel, x, Y)) # your code here return w, losses
alpha = [1, 0.5, -0.3, -0.4] Xs = [[0.1, -0.5], [0.5, 1.0], [-1.0, 0.5], [1.0, 1.0]] def k1(x, y): return np.dot(x, y) def k2(x, y): return (np.dot(x, y) + 1) ** 2 X = [[0, 0], [0, 1], [1, 0], [1, 1]] Y = [0, 0, 1, 0] wr, losses = k_train(Xs, Y, 50, 2, [0, 0, 0], 0.01,alpha,X,k2) pl.plot(losses) print wr print predict(wr, [1, 0]) print predict(wr, [0, 1])
[-31.2159388 -4.11488599 -23.55637307] 4.52916096464e-16 1.63187614121e-24
Image in a Jupyter notebook