本文共 8602 字,大约阅读时间需要 28 分钟。
logit function
import matplotlib.pyplot as pltimport numpy as npdef logit(p): return np.log(p / (1-p))p = np.arange(0.001, 1, 0.001)lp = logit(p)plt.plot(p, lp)plt.axhline(0, color='k')plt.xlim(-0.1, 1.1)plt.ylim(-7, 7)plt.xlabel('p')plt.ylabel('logit(p)')plt.xticks([0.0, 0.5, 1.0])ax = plt.gca()ax.xaxis.grid(True)plt.show()
logistic function
即
clf.coef_和clf.intercept_就是 θ
import matplotlib.pyplot as pltimport numpy as npdef sigmoid(z): return 1.0 / (1.0 + np.exp(-z)) z = np.arange(-7, 7, 0.1)phi_z = sigmoid(z)plt.plot(z, phi_z)plt.axvline(0.0, color='k')plt.ylim(-0.1, 1.1)plt.xlabel('z')plt.ylabel('$\phi (z)$')plt.yticks([0.0, 0.5, 1.0])ax = plt.gca()ax.yaxis.grid(True)plt.tight_layout()plt.show()
plot cost function J
import matplotlib.pyplot as pltimport numpy as npdef sigmoid(z): return 1.0 / (1.0 + np.exp(-z))def cost_1(z): return - np.log(sigmoid(z))def cost_0(z): return - np.log(1 - sigmoid(z)) z = np.arange(-10, 10, 0.1)phi_z = sigmoid(z)c1 = [cost_1(x) for x in z]plt.plot(phi_z, c1, label='J(w) if y=1')c0 = [cost_0(x) for x in z]plt.plot(phi_z, c0, linestyle='--', label='J(w) if y=0')plt.ylim(0.0, 5.1)plt.xlim([0, 1])plt.xlabel('$\phi$(z)')plt.ylabel('J(w)')plt.legend(loc='best')plt.tight_layout()plt.show()
Logistic Regression 手写
import numpy as np class LogisticRegressionGD(object): def __init__(self, eta=0.05, n_iter=100, random_state=1): self.eta = eta self.n_iter = n_iter self.random_state = random_state def fit(self, X, y): rgen = np.random.RandomState(self.random_state) self.w_ = rgen.normal(loc=0.0, scale=0.01, size=1 + X.shape[1]) self.cost_ = [] for i in range(self.n_iter): net_input = self.net_input(X) output = self.activation(net_input) errors = (y - output) self.w_[1:] += self.eta * X.T.dot(errors) self.w_[0] += self.eta * errors.sum() cost = -y.dot(np.log(output)) - ((1 - y).dot(np.log(1 - output))) self.cost_.append(cost) return self def net_input(self, X): return np.dot(X, self.w_[1:]) + self.w_[0] def activation(self, z): return 1. / (1. + np.exp(-np.clip(z, -250, 250))) def predict(self, X): return np.where(self.net_input(X) >= 0.0, 1, 0) # equivalent to: # return np.where(self.activation(self.net_input(X)) >= 0.5, 1, 0) from sklearn import datasetsiris = datasets.load_iris()X = iris.data[:, [2, 3]]y = iris.targetfrom sklearn.model_selection import train_test_splitX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)from sklearn.preprocessing import StandardScalersc = StandardScaler()sc.fit(X_train)X_train_std = sc.transform(X_train)X_test_std = sc.transform(X_test)lrgd = LogisticRegressionGD(eta=0.05, n_iter=1000, random_state=1)lrgd.fit(X_train_std, y_train)y_pred = lrgd.predict(X_train_std)print('Misclassified training samples:',(y_train!=y_pred).sum()) y_pred = lrgd.predict(X_test_std)print('Misclassified samples:', (y_test != y_pred).sum()) from sklearn.metrics import accuracy_scoreprint('Accuracy: %.3f' % accuracy_score(y_test, y_pred))X_combined_std = np.vstack((X_train_std, X_test_std))y_combined = np.hstack((y_train, y_test))from matplotlib.colors import ListedColormapimport matplotlib.pyplot as pltdef plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02): markers = ('s', 'x', 'o', '^', 'v') colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') cmap = ListedColormap(colors[:len(np.unique(y))]) x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1 x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),np.arange(x2_min, x2_max, resolution)) Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) Z = Z.reshape(xx1.shape) plt.contourf(xx1, xx2, Z, alpha=0.3, cmap=cmap) plt.xlim(xx1.min(), xx1.max()) plt.ylim(xx2.min(), xx2.max()) for idx, cl in enumerate(np.unique(y)): plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], alpha=0.8, c=colors[idx], marker=markers[idx], label=cl, edgecolor='black') if test_idx: X_test, y_test = X[test_idx, :], y[test_idx] plt.scatter(X_test[:, 0], X_test[:, 1], c='none', edgecolor='black', alpha=1.0, linewidth=1, marker='o', s=100, label='test set') X_train_01_subset = X_train[(y_train == 0) | (y_train == 1)]y_train_01_subset = y_train[(y_train == 0) | (y_train == 1)]lrgd = LogisticRegressionGD(eta=0.05, n_iter=1000, random_state=1)lrgd.fit(X_train_01_subset, y_train_01_subset)plot_decision_regions(X=X_train_01_subset, y=y_train_01_subset, classifier=lrgd)plt.xlabel('petal length [standardized]')plt.ylabel('petal width [standardized]')plt.legend(loc='upper left')plt.tight_layout()plt.show()print((y_train == 0) | (y_train == 1))
Training a logistic regression model with scikit-learn
import numpy as npfrom sklearn import datasetsiris = datasets.load_iris()#iris 是#iris是个类 ['DESCR', 'data', 'feature_names', 'filename', 'target', 'target_names']X = iris.data[:, [2, 3]]y = iris.targetfrom sklearn.model_selection import train_test_splitX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)from sklearn.preprocessing import StandardScalersc = StandardScaler()sc.fit(X_train)X_train_std = sc.transform(X_train)X_test_std = sc.transform(X_test)from sklearn.linear_model import LogisticRegressionlr = LogisticRegression(C=100.0, solver='liblinear', multi_class='ovr')lr.fit(X_train_std, y_train)y_pred = lr.predict(X_train_std)#y_pred是numpy arrayprint('Misclassified training samples:',(y_train!=y_pred).sum()) y_pred = lr.predict(X_test_std)print('Misclassified samples:', (y_test != y_pred).sum()) from sklearn.metrics import accuracy_scoreprint('Accuracy: %.3f' % accuracy_score(y_test, y_pred))#后面就开始是可视化的部分了X_combined_std = np.vstack((X_train_std, X_test_std))y_combined = np.hstack((y_train, y_test))from matplotlib.colors import ListedColormapimport matplotlib.pyplot as pltdef plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02): markers = ('s', 'x', 'o', '^', 'v') colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') cmap = ListedColormap(colors[:len(np.unique(y))]) x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1 x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),np.arange(x2_min, x2_max, resolution)) Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) Z = Z.reshape(xx1.shape) plt.contourf(xx1, xx2, Z, alpha=0.3, cmap=cmap) plt.xlim(xx1.min(), xx1.max()) plt.ylim(xx2.min(), xx2.max()) for idx, cl in enumerate(np.unique(y)): plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], alpha=0.8, c=colors[idx], marker=markers[idx], label=cl, edgecolor='black') if test_idx: X_test, y_test = X[test_idx, :], y[test_idx] plt.scatter(X_test[:, 0], X_test[:, 1], c='none', edgecolor='black', alpha=1.0, linewidth=1, marker='o', s=100, label='test set') from sklearn.linear_model import LogisticRegressionlr = LogisticRegression(C=100.0, solver='liblinear', multi_class='ovr')lr.fit(X_train_std, y_train)plot_decision_regions(X_combined_std, y_combined, classifier=lr, test_idx=range(105, 150))plt.xlabel('petal length [standardized]')plt.ylabel('petal width [standardized]')plt.legend(loc='upper left')plt.tight_layout()plt.show() import numpy as npfrom sklearn import datasetsfrom sklearn.model_selection import train_test_splitfrom sklearn.preprocessing import StandardScalerfrom sklearn.linear_model import LogisticRegressionfrom sklearn.metrics import accuracy_scoreiris = datasets.load_iris()#iris 是#iris是个类 ['DESCR', 'data', 'feature_names', 'filename', 'target', 'target_names']X = iris.data[:,[2,3]]y = iris.targetX_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=1,stratify=y)sc = StandardScaler()sc.fit(X_train)X_train_std = sc.transform(X_train)X_test_std = sc.transform(X_test)lr = LogisticRegression(C=100.0, solver='liblinear', multi_class='ovr')lr.fit(X_train_std, y_train)y_pred = lr.predict(X_train_std)#y_pred是numpy arrayprint('Misclassified training samples: ', (y_pred!=y_train).sum())y_pred = lr.predict(X_test_std)print('Misclassified testing samples: ', (y_pred!=y_test).sum())print('Accuracy: %.3f' % accuracy_score(y_pred,y_test))
Sklearn库中Logistic Regression函数各个参数总结
coef_ 和 intercept_
转载地址:http://amygf.baihongyu.com/