data1.insert(0, 'ones', 1) X = data1.values[:,:-1] y = data1.values[:,-1] theta = np.zeros(X.shape[1]
def sigmoid(x): return 1 / (1 + np.exp(-x))
def costFunction(theta, X, y): return np.mean(- y * np.log(sigmoid(np.dot(X, theta))) - (1 - y) * np.log(1 - sigmoid(np.dot(X, theta))))
costFunction(theta, X, y)
0.6931471805599453
定义梯度下降函数
def gradient(theta, X, y): return (1/X.shape[0]) * np.dot(X.T, sigmoid(np.dot(X, theta)) - y)
array([ -0.1 , -12.00921659, -11.26284221])
构造优化器求解
import scipy.optimize as optres = opt.minimize(fun=costFunction, x0=theta, args=(X, y), jac=gradient, method='Newton-CG') res
fun: 0.20349771251305832 jac: array([1.68639010e-05, 9.03344162e-04, 8.76022414e-04]) message: 'Optimization terminated successfully.' nfev: 71 nhev: 0 nit: 28 njev: 240 status: 0 success: True x: array([-25.1527642 , 0.20616308, 0.20140236])
验证
sigmoid(np.dot(np.array([1, 45, 85]), theta_result))
0.776220348464748
def predict(X, theta): return (sigmoid(np.dot(X, theta)) >= 0.5).astype(int) y_pred = predict(X, theta_result) from sklearn.metrics import classification_report print(classification_report(y, y_pred))
precision recall f1-score support 0.0 0.87 0.85 0.86 40 1.0 0.90 0.92 0.91 60avg / total 0.89 0.89 0.89 100
x = [X[:,1].min(), X[:,1].max()]y = [-(theta_result[0]+theta_result[1]*x[0])/theta_result[2],-(theta_result[0]+theta_result[1]*x[1])/theta_result[2]]_, ax = plt.subplots(figsize=(10,6))data1[data1['Admission']==0].plot(x='Exam 1 score', y='Exam 2 score', kind='scatter', c='red', marker='o', ax=ax, label='Not admitted') data1[data1['Admission']==1].plot(x='Exam 1 score', y='Exam 2 score', kind='scatter', c='blue',marker='x', ax=ax, label='Admitted') ax.plot(x, y) plt.show()