#!/usr/bin/python1# Import libraries needed2import sys3import numpy as np4#from sklearn.svm import SVR5#from sklearn.neighbors import KNeighborsRegressor6from sklearn.kernel_ridge import KernelRidge7from sklearn.linear_model import Ridge8from sklearn.model_selection import GridSearchCV9from sklearn.metrics import classification_report10from sklearn.svm import SVC11import argparse12import matplotlib13matplotlib.use('Agg')14import matplotlib.pyplot as plt15from sklearn.preprocessing import PolynomialFeatures16from sklearn.pipeline import make_pipeline17from sklearn import linear_model18from __future__ import print_function19from sklearn import datasets20from sklearn.model_selection import train_test_split2122#Create the argument parser23parser = argparse.ArgumentParser (description = "ploting a polynomial fit of x and y data values")24parser.add_argument("input")25parser.add_argument("degree", type = int)26#parser.add_argument("C", type = int)27arguments = parser.parse_args()282930# Load the traing and validation sets31validate_data = arguments.input + ".validate.xy"32training_data = arguments.input + ".training.xy"33plot_name = arguments.input + ".simple_polynomial_fit.png"3435#Create an empty list of x and y values and load in the text file and interpret it as an array (2 coloumns, x and y, with 100 rows)36tx_y_data = np.loadtxt(training_data, delimiter = '\t')37# This x_y_data array is two columns, just as the text file was. Split the input array into an array for x and a separate one for y38xtdata = tx_y_data[:,0] #all rows in the 0th column39ytdata = tx_y_data[:,1] #all4041# X and y data needs to be numpy array [n_samples,n_features]. For simple x-y data, this is [100,1] (if we have 100 data points) [rows,columns]42#print the shape of the x data. As of now it is a list with 100 elements. As read from disk these 1-D arrays are size [100, ]. *Convert a simple list to a [1,100] array, and then transpose to make it [100,1]*43#This is done because the previous code is just a list with no dimensiality. Turns into an array with a long row of 100 elements. The transform makes it 100 rows with 1 column, for both x and y.44xtdata = np.array([xtdata])45xtdata = xtdata.T46#Print the shape of the x data4748#repeat for y data49#ytdata = np.array([ytdata])50#ytdata = ytdata.T51#print the shape of y data5253#Repeat the process above for validate file54vx_y_data = np.loadtxt(validate_data, delimiter = '\t')55# This x_y_data array is two columns, just as the text file was. Split the input array into an array for x and a separate one for y56x_vdata = vx_y_data[:,0] #all rows in the 0th column57y_vdata = vx_y_data[:,1] #all5859# X and y data needs to be numpy array [n_samples,n_features]. For simple x-y data, this is [100,1] (if we have 100 data points) [rows,columns]60#print the shape of the x data. As of now it is a list with 100 elements. As read from disk these 1-D arrays are size [100, ]. *Convert a simple list to a [1,100] array, and then transpose to make it [100,1]*61x_vdata = np.array([x_vdata])62x_vdata = x_vdata.T63#Print the shape of the x data6465#repeat for y data66#y_vdata = np.array([y_vdata])67#y_vdata = y_vdata.T686970fit_stdev = list()71# Define the model that we will be using to do the fit72#regr = linear_model.LinearRegression(fit_intercept=True) # just a simple LinearRegression fit73#regr = make_pipeline( PolynomialFeatures(degree=arguments.degree), linear_model.LinearRegression(fit_intercept=True) )74#regr = SVR(degree=arguments.degree, gamma='auto', coef0=0.0, tol=10, C=arguments.C, epsilon=35, shrinking=True, cache_size=200, verbose=False, max_iter=-1, kernel='poly')75#regr = NearestNeighbors(n_neighbors=2, algorithm='auto').fit(x_vdata)76#distances, indices = nbrs.kneighbors(X)77#regr = KNeighborsRegressor(n_neighbors=10, weights='distance', algorithm='auto', leaf_size=30, p=1, metric='minkowski', metric_params=None, n_jobs=1)78kr = KernelRidge(alpha=1.0, coef0=1, kernel='polynomial', gamma=None, degree = arguments.degree, kernel_params=None)79kr.fit (xtdata, ytdata)80fit_stdev.append( np.sqrt( np.mean((kr.predict(x_vdata) - y_vdata) ** 2) ) )81print "The standard deviation of the fit is:", fit_stdev8283print(__doc__)8485# Loading the Digits dataset86digits = datasets.load_digits()8788# To apply an classifier on this data, we need to flatten the image, to89# turn the data in a (samples, feature) matrix:90n_samples = len(digits.images)91X = digits.images.reshape((n_samples, -1))92y = digits.target9394# Split the dataset in two equal parts95#X_train, X_test, y_train, y_test = train_test_split(96# X, y, test_size=0.5, random_state=0)9798# Set the parameters by cross-validation99tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],100'C': [1, 10, 100, 1000]},101{'kernel': ['polynomial'], 'C': [1, 10, 100, 1000]}]102scores = ['precision', 'recall']103for score in scores:104print("# Tuning hyper-parameters for %s" % score)105print()106107clf = GridSearchCV(SVC(C=1), tuned_parameters, cv=5,108scoring='%s_macro' % score)109clf.fit(xtdata, ytdata)110111print("Best parameters set found on development set:")112print()113print(clf.best_params_)114print()115print("Grid scores on development set:")116print()117means = clf.cv_results_['mean_test_score']118stds = clf.cv_results_['std_test_score']119for mean, std, params in zip(means, stds, clf.cv_results_['params']):120print("%0.3f (+/-%0.03f) for %r"121% (mean, std * 2, params))122print()123124print("Detailed classification report:")125print()126print("The model is trained on the full development set.")127print("The scores are computed on the full evaluation set.")128print()129y_true, y_pred = y_test, clf.predict(X_test)130print(classification_report(y_true, y_pred))131print()132133R = Ridge(alpha=1.0,fit_intercept=False)134kr.fit(xtdata, ytdata)135R.fit(xtdata, ytdata)136print np.dot(xtdata.transpose(),kr.dual_coef_)137print R.coef_138139#fit_coefficients = regr.coef_ to pull out the linear regression140#fit_coefficients = regr.named_steps['linearregression'].coef_141#print 'The coefficients of the regression are' +str(fit_coefficients)142#fit_intercept = regr.intercept_143#fit_intercept = regr.named_steps['linearregression'].intercept_144#print "The intercept of the line is %d" % (fit_intercept)145146# Evaluate how well the model did, usually using the validation data147# Now evaluate how the model performs on the validation data. Calculate the mean squared error, predict and fit will take in data, calculate a y and x and compare. In prediction, use those parameters with given x data to spit out some y data. Take a difference between that and the true y value to get the errors148mean_sq_error = np.mean((kr.predict(x_vdata) - y_vdata) ** 2)149sqrt_mean_sq_error = (mean_sq_error) ** .5150# The variance score, determined by the ML fitting: 1 is perfect prediction. Score uses the fit results we have to determine a normalized mean squared erro\]\AASASDF" wwq\`"AS151variance_score = kr.score(x_vdata, y_vdata)152print "The varience score is %d" % (variance_score)153print "The mean sqaured error of the fit is %d" % (mean_sq_error)154print "The square root of the mean squared error is %d" % (sqrt_mean_sq_error)155#print 'regr is:\n', regr156157# Plot training data158plt.plot (xtdata, ytdata, 'bo', label="Training Data")159# Plot validation data160plt.plot (x_vdata, y_vdata, 'ro', label="Validation Data")161# Plot fit line162plt.plot (x_vdata, kr.predict(x_vdata), label="Fit Function with degree %d." % (arguments.degree))163#Create a legend164plt.legend(shadow=True, loc="upper left")165166#Lable the plot167plt.xlabel('Time (sec)')168plt.ylabel('Signal Strength')169plt.title('Signal Strength vs. Time for Relative Activation Energies')170plt.savefig("kernel_ridge_poly_fit.png")171172173174175176