— tags: “机器学习” —
一:返回一个5阶单位矩阵
1 2 3 4 5 6 7
| import numpy as np def warmupExercise(): E5=np.eye(5) print('这是一个五阶单位矩阵') print(E5)
warmupExercise()
|
二:线性回归
1.含有一个变量,大意是:假如你是一个饭店老板,要在其他城市拓展业务,现有数据在ex1data.txt第一列是人口,第二列是收益
导包
1 2 3
| import numpy as np import matplotlib.pyplot as plt import pandas as pd
|
将数据读取,进行展示
1 2 3 4
| data = pd.read_csv('ex1data1.txt',names=['Population','Profit']) data.describe() data.plot(x='Population',y='Profit',kind='scatter') plt.show()
|
定义损失函数:
1 2 3 4 5 6
| def computeCost(X,y,theta): inner = np.power(((X*theta.T) - y ),2) return np.sum(inner)/(2*len(X))
|
能够直接矩阵相乘,增加一列1
1 2
| data.insert(0,'Ones',1)
|
将数据分割出来,0-1列是变量x,2列是y
1 2 3 4
| cols = data.shape[1] print(cols) X = data.iloc[:,0:cols-1] y = data.iloc[:,cols-1:cols]
|
3
转化成matrix类型
1 2 3
| X = np.matrix(X.values) y = np.matrix(y.values) theta = np.matrix([0,0])
|
1
| X.shape,y.shape,theta.shape
|
计算代价函数
设置梯度下降
公式:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
| def gradientDescent(X, y, theta, alpha, epoch): """reuturn theta, cost""" temp = np.matrix(np.zeros(theta.shape)) parameters = int(theta.flatten().shape[1]) cost = np.zeros(epoch) m = X.shape[0]
for i in range(epoch): temp =theta - (alpha / m) * (X * theta.T - y).T * X
theta = temp cost[i] = computeCost(X, y, theta) return theta, cost
|
设置学习率和迭代次数
1 2
| alpha = 0.01 epoch = 1000
|
1
| final_theta,cost = gradientDescent(X,y,theta,alpha,epoch)
|
计算最后的损失
1
| computeCost(X ,y ,final_theta)
|
绘制线性模型以及数据,直观地看出它的拟合。
np.linspace()在指定的间隔内返回均匀间隔的数字。
1 2 3 4 5 6 7 8 9 10
| x = np.linspace(data.Population.min(),data.Population.max(),100) f = final_theta[0,0] + (final_theta[0,1]*x) fig, ax = plt.subplots(figsize=(6,4)) ax.plot(x, f,'r', label = 'Prediction') ax.scatter(data['Population'],data.Profit,label='Traing Data') ax.legend(loc=2) ax.set_xlabel('Population') ax.set_ylabel('Profit') ax.set_title('Predicted Profit vs. Population Size') plt.show()
|
将cost绘制出来
1 2 3 4 5 6
| fig, ax = plt.subplots(figsize=(8,4)) ax.plot(np.arange(epoch),cost,'r') ax.set_xlabel('Iterations') ax.set_ylabel('Cost') ax.set_title('Error vs. Training Epoch') plt.show()
|
2.多个变量:ex2data.txt第一列房子大小,第二列房子卧室数量,第三列房子价格。预测房价。
1 2 3
| path = 'ex1data2.txt' data2 = pd.read_csv(path, names=['Size', 'Bedrooms','Price']) data2.head()
|
预处理步骤 - 特征归一化
1 2
| data2 = (data2 - data2.mean())/data2.std() data2.head()
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
| data2.insert(0, 'Ones', 1)
cols = data2.shape[1] X2 = data2.iloc[:,0:cols-1] y2 = data2.iloc[:,cols-1:cols]
X2 = np.matrix(X2.values) y2 = np.matrix(y2.values) theta2 = np.matrix(np.array([0,0,0]))
g2, cost2 = gradientDescent(X2, y2, theta2, alpha, epoch)
computeCost(X2, y2, g2), g2
|
绘制代价函数
1 2 3 4 5 6
| fig, ax = plt.subplots(figsize=(12,8)) ax.plot(np.arange(epoch),cost2,'r') ax.set_xlabel('Iterations') ax.set_ylabel('Cost') ax.set_title('Error vs. Training Epoch') plt.show()
|
利用sklearn自带的线性回归
1 2 3
| from sklearn import linear_model model = linear_model.LinearRegression() model.fit(X,y)
|
1 2 3 4 5 6 7 8 9 10 11
| x =np.array(X[:,1].A1) f = model.predict(X).flatten()
fig,ax = plt.subplots(figsize=(8,5)) ax.plot(x, f , 'r',label='Prediction') ax.scatter(data.Population,data.Profit,label='Traning Data') ax.legend(loc=2) ax.set_xlabel('Population') ax.set_ylabel('Profit') ax.set_title('Predicted Profit vs. Population Size') plt.show()
|
直接求解的方法
1 2 3 4
| def normalEpn(X,y): theta = np.linalg.inv(X.T@X)@X.T@y return theta
|
1 2
| final_theta2 = normalEpn(X,y) final_theta
|