1、向量是有名字的,可以使用names()查看或者unname()去除
2、构建空的向量或者空的数据框接收循环结果的时候,必须把构建步骤放在循坏之外,否则每次循环都会生成新的空的数据框,最终得到的数据框会有很多零值。
setwd("E:/天睿TERADATA/data_analysis") rawdata<-read.csv("test_data.csv") #筛选出待分析数据(只挑选数值型变量),并用p_1与p_99替换异常值: data_prepare<-function(dta){ is_numeric_logitcal<-vector(length=ncol(dta)) for(i in 1:ncol(dta)){ is_numeric_logical[i]<-is.numeric(dta[,i]) } newdata<-dta[,!is_numeric_logit] for(i in 1:ncol(dta)){ p_1<-unname(quantile(dta[,i],0.01)) p_99<-unname(quantile(dta[,i],0.99)) dta[,i]<-ifelse(dta[,i]<p_1,p_1, ifelse(dta[,i]>p_99,p_99,dta[,i])) } return(dta) } #导出新表: newdata<-data_prepare(rawdata) write.csv(newdata,"newdata.csv") #得到统计量数据框statistic_framework statistic_framework<-function(dta){ data_statistic<-function(variable,na.omit=T){ if(na.omit) variable<-variable[!is.na(variable)] #是否忽视缺失值 n<-length(variable) nmiss<-length(variable[is.na(variable)]) nobs<-n-nmiss min<-min(variable) max<-max(variable) mean<-mean(variable) sd<-sd(variable) quantile_num<-c(0.01,0.02,0.03,0.04,0.05,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5,0.55,0.6,0.65, 0.7,0.75,0.8,0.85,0.9,0.91,0.92,0.93,0.94,0.95,0.96,0.97,0.98,0.99,1) px<-vector(mode="numeric",length=length(quantile_num)) for(i in 1:length(quantile_num)){ px[i]<-unname(quantile(variable,quantile_num[i])) } statistic_vector<-c(n,nmiss,nobs,min,max,mean,sd,px) return(statistic_vector) } ncol_result<-length(data_statistic(dta[,1])) result<-as.data.frame(matrix(0,nrow=ncol(dta),ncol=ncol_result)) rownames(result)<-colnames(dta) for(i in 1:ncol(dta)){ result[i,]<-data_statistic(dta[,i]) } colnames(result)<-c("n","nmiss","nobs","min","max","mean","sd","p_1","p_2","p_3","p_4","p_5","p_10", "p_15","p_20","p_25","p_30","p_35","p_40","p_45","p_50","p_55","p_60","p_65", "p_70","p_75","p_80","p_85","p_90","p_91","p_92","p_93","p_94","p_95","p_96","p_97", "p_98","p_99","p_100") return(result) } #查看测试数据结果: statistic_result<-statistic_framework(rawdata) write.csv(statistic_result,"statistic_result.csv")