1、导入数据
#2018年物料数据
library(xlsx)
x=read.xlsx(“material_sales_order.xlsx”,sheetIndex=1,encoding = “UTF-8”)
x=read.xlsx(“material_sales_order_动态.xlsx”,sheetIndex=1,encoding = “UTF-8”)
x=x[xKaTeX parse error: Expected 'EOF', got '#' at position 17: …rder_nums>5,] #̲考虑数据的实际情况,将数据值为…km.cluster<=2,] #选择聚类的1和2类
2、观测数据分布
#物料订单额数据
boxplot(x
orderamount)qqnorm(xorder_amount)
#物料订单数数据
boxplot(xKaTeX parse error: Expected 'EOF', got '#' at position 13: order_nums) #̲数据的正态分布检验 qqnor…order_nums)
3、对原始数据进行标准化数据
X=cbind(x,scale(x
orderamount),scale(xorder_nums)) #对数据做标准化
write.xlsx(X,“material_sales_ordery_标准化.xlsx”)
4、进行聚类分析
##k-means聚类确定聚类个数
d=data.frame(x
ordernums)mydata<−dwss<−(nrow(mydata)−1)∗sum(apply(mydata,2,var))for(iin2:15)wss[i]<−sum(kmeans(mydata,centers=i)withinss)
###这里的wss(within-cluster sum of squares)是组内平方和
plot(1:15, wss, type=“b”, xlab=“聚类个数”,ylab=“误差(平方和)”)
##动态聚类
km=kmeans(x
ordernums,6)plot(xorder_nums, col = km
cluster,pch=1,xlab="序号",ylab="订单数量")X=cbind(x,kmcluster) #读取聚类结果
write.xlsx(X,“material_sales_order_动态.xlsx”)
#自动计算的订单数散布最小的聚类个数
library(ykmeans)
a=data.frame(x
ordernums)km=ykmeans(a,"x.ordernums","x.ordernums",3:6)table(kmcluster)
考虑最近一次订单发生的时间
是否可以考虑使用因子分析对其进行评分