Scorecard R

#install.packages("scorecard")
# Traditional Credit Scoring Using Logistic Regression
library(scorecard)


data(germancredit)
# Example I
dt = germancredit[, c("creditability", "credit.amount", "purpose")]
# binning for dt
bins = woebin(dt, y = "creditability")
# converting to woe
dt_woe = woebin_ply(dt, bins=bins)
str(dt_woe)
# converting to bin
dt_bin = woebin_ply(dt, bins=bins, to = 'bin')
str(dt_bin)

# data preparing ------
# load germancredit data
data("germancredit")
# filter variable via missing rate, iv, identical value rate
dt_f = var_filter(germancredit, y="creditability")
# breaking dt into train and test
dt_list = split_df(dt_f, y="creditability", ratio = c(0.6, 0.4), seed = 30)
label_list = lapply(dt_list, function(x) x$creditability)

# woe binning ------
bins = woebin(dt_f, y="creditability")
bins_adj = bins
# woebin_plot(bins)

# converting train and test into woe values
dt_woe_list = lapply(dt_list, function(x) woebin_ply(x, bins_adj))

dt_bin_list = lapply(dt_list, function(x) woebin_ply(x, bins_adj,to = 'bin'))

  
  # glm / selecting variables ------
  m1 = glm( creditability ~ ., family = binomial(), data = dt_woe_list$train)
  # vif(m1, merge_coef = TRUE) # summary(m1)
  # Select a formula-based model by AIC (or by LASSO for large dataset)
  m_step = step(m1, direction="both", trace = FALSE)
  m2 = eval(m_step$call)
  # vif(m2, merge_coef = TRUE) # summary(m2)
  
  # performance ks & roc ------
  ## predicted proability
  pred_list = lapply(dt_woe_list, function(x) predict(m2, x, type='response'))
  ## Adjusting for oversampling (support.sas.com/kb/22/601.html)
  # card_prob_adj = scorecard2(bins_adj, dt=dt_list$train, y='creditability', 
  #                x=sub('_woe$','',names(coef(m2))[-1]), badprob_pop=0.03, return_prob=TRUE)
  
  ## performance
  perf = perf_eva(pred = pred_list, label = label_list)
  # perf_adj = perf_eva(pred = card_prob_adj$prob, label = label_list$train)
  
  # score ------
  ## scorecard
  card = scorecard(bins_adj, m2)
  ## credit score
  score_list = lapply(dt_list, function(x) scorecard_ply(x, card))
  
  score_list2 = lapply(dt_list, function(x) scorecard_ply(x, card, only_total_score=FALSE))
  
    ctrain = cbind(dt_list$train,data.frame(dt_bin_list[1]),data.frame(dt_woe_list[1]),data.frame(score_list2[1]))
    
  write.csv(ctrain,"/home/star/balack_namelist/scorecard/CreditScoreModel-master/train.csv",row.names = F)

write.csv(score_list2[1],"/home/star/balack_namelist/scorecard/CreditScoreModel-master/train.csv",row.names = F)

## psi
perf_psi(score = score_list, label = label_list)

猜你喜欢

转载自blog.csdn.net/starzhou/article/details/106289997
R
R: