zoukankan      html  css  js  c++  java
  • 疾病风险评分--mark

     查找有关疾病风险评分的代码,找到了一个关于银行信贷的R包,不知道行不行得通,日后再细看

    #这一步需要安装N多个包
    library(devtools)
    devtools::install_github("ayhandis/creditR")
    library(creditR)
    ls("package:creditR")
    data("germancredit")
    str(germancredit)
    head(germancredit)
    sample_data <- germancredit[,c("duration.in.month","credit.amount",
                                   "installment.rate.in.percentage.of.disposable.income", 
                                   "age.in.years","creditability")]
    sample_data$creditability <- ifelse(sample_data$creditability == "bad",1,0)
    missing_ratio(sample_data)
    traintest <- train_test_split(sample_data,123,0.70)
    train <- traintest$train
    test <- traintest$test
    woerules <- woe.binning(df = train,target.var = "creditability",pred.var = train,event.class = 1)
    train_woe <- woe.binning.deploy(train, woerules, add.woe.or.dum.var='woe')
    train_woe <- woe.get.clear.data(train_woe,default_flag = "creditability",prefix = "woe")
    test_woe <- woe.binning.deploy(test, woerules, add.woe.or.dum.var='woe')
    test_woe <- woe.get.clear.data(test_woe,default_flag = "creditability",prefix = "woe")
    IV.calc.data(train_woe,"creditability")
    Gini.univariate.data(train_woe,"creditability")
    eliminated_data <- Gini_elimination(train_woe,"creditability",0.10)
    str(eliminated_data)
    clustering_data <- variable.clustering(eliminated_data,"creditability", 2)
    clustering_data
    selected_data <- variable.clustering.gini(eliminated_data,"creditability", 2)
    correlation.cluster(eliminated_data,clustering_data,variables = "variable",clusters = "Group")
    model= glm(formula = creditability ~ ., family = binomial(link = "logit"),  data = eliminated_data)
    summary(model)
    woe.glm.feature.importance(eliminated_data,model,"creditability")
    ms_train_data <- cbind(eliminated_data,model$fitted.values)
    ms_test_data <- cbind(test_woe[,colnames(eliminated_data)], 
                          predict(model,type = "response", newdata = test_woe))
    colnames(ms_train_data) <- c("woe.duration.in.month.binned","woe.age.in.years.binned",
                                 "woe.installment.rate.in.percentage.of.disposable.income.binned",
                                 "creditability","PD")
    colnames(ms_test_data) <- c("woe.duration.in.month.binned","woe.age.in.years.binned",
                                "woe.installment.rate.in.percentage.of.disposable.income.binned",
                                "creditability","PD")
    regression_calibration <- regression.calibration(model,test_woe,"creditability")
    regression_calibration$calibration_data
    regression_calibration$calibration_model
    regression_calibration$calibration_formula
    master_scale <- master.scale(ms_train_data,"creditability","PD")
    master_scale
    ms_train_data$Score = log(ms_train_data$PD/(1-ms_train_data$PD))
    ms_test_data$Score = log(ms_test_data$PD/(1-ms_test_data$PD))
    bayesian_method <- bayesian.calibration(data = master_scale,average_score ="Score",total_observations = "Total.Observations",PD = "PD",central_tendency = 0.05,calibration_data = ms_train_data,calibration_data_score ="Score")
    bayesian_method$Calibration.model
    bayesian_method$Calibration.formula
    scaled.score(bayesian_method$calibration_data, "calibrated_pd", 3000, 15)
    vif.calc(model)
    Gini(model$fitted.values,ms_train_data$creditability)
    k.fold.cross.validation.glm(ms_train_data,"creditability",5,1)
    Kolmogorov.Smirnov(ms_train_data,"creditability","PD")
    Kolmogorov.Smirnov(ms_test_data,"creditability","PD")
    SSI.calc.data(train_woe,test_woe,"creditability")
    Herfindahl.Hirschman.Index(master_scale,"Total.Observations")
    Anchor.point(master_scale,"PD","Total.Observations",0.30)
    chisquare.test(master_scale,"PD","Bad.Count","Total.Observations",0.90)
    master_scale$DR <- master_scale$Bad.Count/master_scale$Total.Observations
    Binomial.test(master_scale,"Total.Observations","PD","DR",0.90,"one")
    

    https://www.mediecogroup.com/news_detail/514/1/

    https://www.mediecogroup.com/method_topic_article_detail/281/?ty=methods

    https://www.mediecogroup.com/method_topic_article_detail/296/?ty=methods

    https://blog.csdn.net/tMb8Z9Vdm66wH68VX1/article/details/89369428

    https://www.analyticsvidhya.com/blog/2019/03/introduction-creditr-r-package-enhance-credit-risk-scoring-validation-r-codes/

    Valar morghulis
  • 相关阅读:
    高性能分布式计算与存储系统设计概要
    .NET核心代码保护策略
    Web 通信 之 长连接、长轮询(long polling)
    C++数据结构之二叉查找树(BST)
    T4:T4 笔记 + Trait 示例
    腾讯2014软件开发
    CSS选择器从右向左的匹配规则
    Js面向对象编程
    Js杂谈-正则的测试与回溯次数
    Microsoft Message Analyzer (微软消息分析器,“网络抓包工具
  • 原文地址:https://www.cnblogs.com/super-yb/p/11377867.html
Copyright © 2011-2022 走看看