博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
COMP7404 Machine Learing——Logistic Regression
阅读量:2136 次
发布时间:2019-04-30

本文共 8602 字,大约阅读时间需要 28 分钟。

logit function

import matplotlib.pyplot as pltimport numpy as npdef logit(p):    return np.log(p / (1-p))p = np.arange(0.001, 1, 0.001)lp = logit(p)plt.plot(p, lp)plt.axhline(0, color='k')plt.xlim(-0.1, 1.1)plt.ylim(-7, 7)plt.xlabel('p')plt.ylabel('logit(p)')plt.xticks([0.0, 0.5, 1.0])ax = plt.gca()ax.xaxis.grid(True)plt.show()

 

logistic function

clf.coef_和clf.intercept_就是 θ

import matplotlib.pyplot as pltimport numpy as npdef sigmoid(z):    return 1.0 / (1.0 + np.exp(-z))    z = np.arange(-7, 7, 0.1)phi_z = sigmoid(z)plt.plot(z, phi_z)plt.axvline(0.0, color='k')plt.ylim(-0.1, 1.1)plt.xlabel('z')plt.ylabel('$\phi (z)$')plt.yticks([0.0, 0.5, 1.0])ax = plt.gca()ax.yaxis.grid(True)plt.tight_layout()plt.show()

 

plot cost function J

import matplotlib.pyplot as pltimport numpy as npdef sigmoid(z):    return 1.0 / (1.0 + np.exp(-z))def cost_1(z):    return - np.log(sigmoid(z))def cost_0(z):    return - np.log(1 - sigmoid(z))    z = np.arange(-10, 10, 0.1)phi_z = sigmoid(z)c1 = [cost_1(x) for x in z]plt.plot(phi_z, c1, label='J(w) if y=1')c0 = [cost_0(x) for x in z]plt.plot(phi_z, c0, linestyle='--', label='J(w) if y=0')plt.ylim(0.0, 5.1)plt.xlim([0, 1])plt.xlabel('$\phi$(z)')plt.ylabel('J(w)')plt.legend(loc='best')plt.tight_layout()plt.show()

 

Logistic Regression 手写

import numpy as np class LogisticRegressionGD(object):    def __init__(self, eta=0.05, n_iter=100, random_state=1):        self.eta = eta        self.n_iter = n_iter        self.random_state = random_state    def fit(self, X, y):        rgen = np.random.RandomState(self.random_state)        self.w_ = rgen.normal(loc=0.0, scale=0.01, size=1 + X.shape[1])        self.cost_ = []        for i in range(self.n_iter):            net_input = self.net_input(X)            output = self.activation(net_input)            errors = (y - output)            self.w_[1:] += self.eta * X.T.dot(errors)            self.w_[0] += self.eta * errors.sum()            cost = -y.dot(np.log(output)) - ((1 - y).dot(np.log(1 - output)))            self.cost_.append(cost)        return self        def net_input(self, X):        return np.dot(X, self.w_[1:]) + self.w_[0]    def activation(self, z):        return 1. / (1. + np.exp(-np.clip(z, -250, 250)))    def predict(self, X):        return np.where(self.net_input(X) >= 0.0, 1, 0)        # equivalent to:        # return np.where(self.activation(self.net_input(X)) >= 0.5, 1, 0)  from sklearn import datasetsiris = datasets.load_iris()X = iris.data[:, [2, 3]]y = iris.targetfrom sklearn.model_selection import train_test_splitX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)from sklearn.preprocessing import StandardScalersc = StandardScaler()sc.fit(X_train)X_train_std = sc.transform(X_train)X_test_std = sc.transform(X_test)lrgd = LogisticRegressionGD(eta=0.05, n_iter=1000, random_state=1)lrgd.fit(X_train_std, y_train)y_pred = lrgd.predict(X_train_std)print('Misclassified training samples:',(y_train!=y_pred).sum()) y_pred = lrgd.predict(X_test_std)print('Misclassified samples:', (y_test != y_pred).sum()) from sklearn.metrics import accuracy_scoreprint('Accuracy: %.3f' % accuracy_score(y_test, y_pred))X_combined_std = np.vstack((X_train_std, X_test_std))y_combined = np.hstack((y_train, y_test))from matplotlib.colors import ListedColormapimport matplotlib.pyplot as pltdef plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02):    markers = ('s', 'x', 'o', '^', 'v')    colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')    cmap = ListedColormap(colors[:len(np.unique(y))])    x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1    x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),np.arange(x2_min, x2_max, resolution))    Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)    Z = Z.reshape(xx1.shape)    plt.contourf(xx1, xx2, Z, alpha=0.3, cmap=cmap)    plt.xlim(xx1.min(), xx1.max())    plt.ylim(xx2.min(), xx2.max())    for idx, cl in enumerate(np.unique(y)):        plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], alpha=0.8, c=colors[idx], marker=markers[idx],                     label=cl, edgecolor='black')    if test_idx:        X_test, y_test = X[test_idx, :], y[test_idx]        plt.scatter(X_test[:, 0], X_test[:, 1], c='none', edgecolor='black', alpha=1.0, linewidth=1,                    marker='o', s=100, label='test set')    X_train_01_subset = X_train[(y_train == 0) | (y_train == 1)]y_train_01_subset = y_train[(y_train == 0) | (y_train == 1)]lrgd = LogisticRegressionGD(eta=0.05, n_iter=1000, random_state=1)lrgd.fit(X_train_01_subset, y_train_01_subset)plot_decision_regions(X=X_train_01_subset, y=y_train_01_subset, classifier=lrgd)plt.xlabel('petal length [standardized]')plt.ylabel('petal width [standardized]')plt.legend(loc='upper left')plt.tight_layout()plt.show()print((y_train == 0) | (y_train == 1))

 

Training a logistic regression model with scikit-learn

import numpy as npfrom sklearn import datasetsiris = datasets.load_iris()#iris 是 
#iris是个类 ['DESCR', 'data', 'feature_names', 'filename', 'target', 'target_names']X = iris.data[:, [2, 3]]y = iris.targetfrom sklearn.model_selection import train_test_splitX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)from sklearn.preprocessing import StandardScalersc = StandardScaler()sc.fit(X_train)X_train_std = sc.transform(X_train)X_test_std = sc.transform(X_test)from sklearn.linear_model import LogisticRegressionlr = LogisticRegression(C=100.0, solver='liblinear', multi_class='ovr')lr.fit(X_train_std, y_train)y_pred = lr.predict(X_train_std)#y_pred是numpy arrayprint('Misclassified training samples:',(y_train!=y_pred).sum()) y_pred = lr.predict(X_test_std)print('Misclassified samples:', (y_test != y_pred).sum()) from sklearn.metrics import accuracy_scoreprint('Accuracy: %.3f' % accuracy_score(y_test, y_pred))#后面就开始是可视化的部分了X_combined_std = np.vstack((X_train_std, X_test_std))y_combined = np.hstack((y_train, y_test))from matplotlib.colors import ListedColormapimport matplotlib.pyplot as pltdef plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02): markers = ('s', 'x', 'o', '^', 'v') colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') cmap = ListedColormap(colors[:len(np.unique(y))]) x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1 x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),np.arange(x2_min, x2_max, resolution)) Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) Z = Z.reshape(xx1.shape) plt.contourf(xx1, xx2, Z, alpha=0.3, cmap=cmap) plt.xlim(xx1.min(), xx1.max()) plt.ylim(xx2.min(), xx2.max()) for idx, cl in enumerate(np.unique(y)): plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], alpha=0.8, c=colors[idx], marker=markers[idx], label=cl, edgecolor='black') if test_idx: X_test, y_test = X[test_idx, :], y[test_idx] plt.scatter(X_test[:, 0], X_test[:, 1], c='none', edgecolor='black', alpha=1.0, linewidth=1, marker='o', s=100, label='test set') from sklearn.linear_model import LogisticRegressionlr = LogisticRegression(C=100.0, solver='liblinear', multi_class='ovr')lr.fit(X_train_std, y_train)plot_decision_regions(X_combined_std, y_combined, classifier=lr, test_idx=range(105, 150))plt.xlabel('petal length [standardized]')plt.ylabel('petal width [standardized]')plt.legend(loc='upper left')plt.tight_layout()plt.show()

import numpy as npfrom sklearn import datasetsfrom sklearn.model_selection import train_test_splitfrom sklearn.preprocessing import StandardScalerfrom sklearn.linear_model import LogisticRegressionfrom sklearn.metrics import accuracy_scoreiris = datasets.load_iris()#iris 是 
#iris是个类 ['DESCR', 'data', 'feature_names', 'filename', 'target', 'target_names']X = iris.data[:,[2,3]]y = iris.targetX_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=1,stratify=y)sc = StandardScaler()sc.fit(X_train)X_train_std = sc.transform(X_train)X_test_std = sc.transform(X_test)lr = LogisticRegression(C=100.0, solver='liblinear', multi_class='ovr')lr.fit(X_train_std, y_train)y_pred = lr.predict(X_train_std)#y_pred是numpy arrayprint('Misclassified training samples: ', (y_pred!=y_train).sum())y_pred = lr.predict(X_test_std)print('Misclassified testing samples: ', (y_pred!=y_test).sum())print('Accuracy: %.3f' % accuracy_score(y_pred,y_test))

 

Sklearn库中Logistic Regression函数各个参数总结

 

coef_ 和 intercept_

 

转载地址:http://amygf.baihongyu.com/

你可能感兴趣的文章
【selenium】selenium ide的安装过程
查看>>
【手机自动化测试】monkey测试
查看>>
【英语】软件开发常用英语词汇
查看>>
Fiddler 抓包工具总结
查看>>
【雅思】雅思需要购买和准备的学习资料
查看>>
【雅思】雅思写作作业(1)
查看>>
【雅思】【大作文】【审题作业】关于同不同意的审题作业(重点)
查看>>
【Loadrunner】通过loadrunner录制时候有事件但是白页无法出来登录页怎么办?
查看>>
【English】【托业】【四六级】写译高频词汇
查看>>
【托业】【新东方全真模拟】01~02-----P5~6
查看>>
【托业】【新东方全真模拟】03~04-----P5~6
查看>>
【托业】【新东方托业全真模拟】TEST05~06-----P5~6
查看>>
【托业】【新东方托业全真模拟】TEST09~10-----P5~6
查看>>
【托业】【新东方托业全真模拟】TEST07~08-----P5~6
查看>>
solver及其配置
查看>>
JAVA多线程之volatile 与 synchronized 的比较
查看>>
Java集合框架知识梳理
查看>>
笔试题(一)—— java基础
查看>>
Redis学习笔记(三)—— 使用redis客户端连接windows和linux下的redis并解决无法连接redis的问题
查看>>
Intellij IDEA使用(一)—— 安装Intellij IDEA(ideaIU-2017.2.3)并完成Intellij IDEA的简单配置
查看>>