zoukankan      html  css  js  c++  java
  • LogisticRegression

    import numpy as np
    from dev.metrics import accuracy_score

    class LogisticRegression:

    def __init__(self):
    """初始化Linear Regression模型"""
    self.coef_ = None
    self.intercept_ = None
    self._theta = None

    def _sigmoid(self,t):
    return 1. / ( 1 + np.exp(-t))


    def fit(self, X_train, y_train, eta=0.01, n_iters=1e4):
    """根据训练数据集X_train, y_train, 使用梯度下降法训练Linear Regression模型"""
    assert X_train.shape[0] == y_train.shape[0],
    "the size of X_train must be equal to the size of y_train"

    def J(theta, X_b, y):

    y_hat =self._sigmoid(X_b.dot(theta))
    try:
    return - np.sum(y*np.log(y_hat) + (1-y) * np.log(1 - y_hat)) / len(y)
    # return np.sum((y - X_b.dot(theta)) ** 2) / len(y)
    except:
    return float('inf')

    def dJ(theta, X_b, y):
    # res = np.empty(len(theta))
    # res[0] = np.sum(X_b.dot(theta) - y)
    # for i in range(1, len(theta)):
    # res[i] = (X_b.dot(theta) - y).dot(X_b[:, i])
    # return res * 2 / len(X_b)
    # return X_b.T.dot(X_b.dot(theta) - y) * 2. / len(X_b)

    return X_b.T.dot(self._sigmoid(X_b.dot(theta)) - y) / len(X_b)

    def gradient_descent(X_b, y, initial_theta, eta, n_iters=1e4, epsilon=1e-8):

    theta = initial_theta
    cur_iter = 0

    while cur_iter < n_iters:
    gradient = dJ(theta, X_b, y)
    last_theta = theta
    theta = theta - eta * gradient
    if (abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon):
    break

    cur_iter += 1

    return theta

    X_b = np.hstack([np.ones((len(X_train), 1)), X_train])
    initial_theta = np.zeros(X_b.shape[1])
    self._theta = gradient_descent(X_b, y_train, initial_theta, eta, n_iters)

    self.intercept_ = self._theta[0]
    self.coef_ = self._theta[1:]

    return self


    def predict_proba(self, X_predict):
    """给定待预测数据集X_predict,返回表示X_predict的结果向量"""
    assert self.intercept_ is not None and self.coef_ is not None,
    "must fit before predict!"
    assert X_predict.shape[1] == len(self.coef_),
    "the feature number of X_predict must be equal to X_train"

    X_b = np.hstack([np.ones((len(X_predict), 1)), X_predict])
    return self._sigmoid(X_b.dot(self._theta))

    def predict(self, X_predict):
    """给定待预测数据集X_predict,返回表示X_predict的结果向量"""
    assert self.intercept_ is not None and self.coef_ is not None,
    "must fit before predict!"
    assert X_predict.shape[1] == len(self.coef_),
    "the feature number of X_predict must be equal to X_train"



    proba = self.predict_proba(X_predict)
    return np.array(proba >= 0.5,dtype=int)

    def score(self, X_test, y_test):
    """根据测试数据集 X_test 和 y_test 确定当前模型的准确度"""

    y_predict = self.predict(X_test)
    return accuracy_score(y_test, y_predict)

    def __repr__(self):
    return "LinearRegression()"
  • 相关阅读:
    二叉搜索树
    自己课题的任务
    查找
    排序
    python快速教程
    塌下心来,慢慢积累~
    生命只有一次,你可以用它来做一点伟大的事情
    Python 编程规范-----转载
    Some regret....
    Discussing the scenery in the program of 863 with Doctor Zhang!
  • 原文地址:https://www.cnblogs.com/heguoxiu/p/10135594.html
Copyright © 2011-2022 走看看