数组
由函数array()建立,一般格式为:
array(data,dim,dimnames) #dim为数组的维数向量
#dimnames为由各维的名称构成的字符型向量,缺省为空
> A<-array(1:6,c(2:3)) > A [,1] [,2] [,3] [1,] 1 3 5 [2,] 2 4 6 > dimnames(A)<-list(c("a1","a2"),c("b1","b2","b3")) > A b1 b2 b3 a1 1 3 5 a2 2 4 6 > colnames(A) #列名 [1] "b1" "b2" "b3" > rownames(A) #行名 [1] "a1" "a2"
因子
因子是用于对数据进行分类并将其存储为级别的数据对象,可以存储字符串和整数。使用factor()函数通过将向量作为输入创建因子。
链接:R语言-因子
> data <- c("East","West","East","North","North","East","West","West","West","East","North") > print(data) [1] "East" "West" "East" "North" "North" "East" "West" "West" "West" "East" "North" > print(is.factor(data)) [1] FALSE
> factor_data <- factor(data) > print(factor_data) [1] East West East North North East West West West East North Levels: East North West > print(is.factor(factor_data)) [1] TRUE
在创建具有文本数据列的任何数据框时,R语言将文本列视为分类数据并在其上创建因子
> height <- c(132,151,162,139,166,147,122) > weight <- c(48,49,66,53,67,52,40) > gender <- c("male","male","female","female","male","female","male") > input_data <- data.frame(height,weight,gender) > print(input_data) height weight gender 1 132 48 male 2 151 49 male 3 162 66 female 4 139 53 female 5 166 67 male 6 147 52 female 7 122 40 male > print(is.factor(input_data$gender)) [1] TRUE > print(input_data$gender) [1] male male female female male female male Levels: female male
生成因子级别-gl()函数
gl(n, k, labels) #n是给出级数的整数,k是给出复制数目的整数,labels是所得因子水平的标签向量
> v <- gl(3, 4, labels = c("Tampa", "Seattle","Boston")) > print(v) [1] Tampa Tampa Tampa Tampa Seattle Seattle Seattle Seattle Boston [10] Boston Boston Boston Levels: Tampa Seattle Boston
矩阵
> X<-matrix(c(1:6),byrow=TRUE,nr=2,nc=3) #byrow=TRUE按行填充数据,默认以列优先 > X [,1] [,2] [,3] [1,] 1 2 3 [2,] 4 5 6
> diag(5) #单位矩阵 [,1] [,2] [,3] [,4] [,5] [1,] 1 0 0 0 0 [2,] 0 1 0 0 0 [3,] 0 0 1 0 0 [4,] 0 0 0 1 0 [5,] 0 0 0 0 1
> diag(c(1:3)) #对角矩阵 [,1] [,2] [,3] [1,] 1 0 0 [2,] 0 2 0 [3,] 0 0 3
> m<-matrix(1,nr=2,nc=2) > m [,1] [,2] [1,] 1 1 [2,] 1 1 > n<-matrix(2,nr=2,nc=2) > n [,1] [,2] [1,] 2 2 [2,] 2 2 > rbind(m,n) #按行合并 [,1] [,2] [1,] 1 1 [2,] 1 1 [3,] 2 2 [4,] 2 2 > cbind(m,n) #按列合并 [,1] [,2] [,3] [,4] [1,] 1 1 2 2 [2,] 1 1 2 2
> m*n #逐元乘积 [,1] [,2] [1,] 2 2 [2,] 2 2 > n*n [,1] [,2] [1,] 4 4 [2,] 4 4 > rbind(m,n)%*%cbind(m,n) #代数乘积 [,1] [,2] [,3] [,4] [1,] 2 2 4 4 [2,] 2 2 4 4 [3,] 4 4 8 8 [4,] 4 4 8 8 > cbind(m,n)%*%rbind(m,n) [,1] [,2] [1,] 10 10 [2,] 10 10dim(x) #行和列的维数
nrow(x) #行数
ncol(x) #列数
as.matrix(x) #转换成矩阵
is.matrix(x);#判断是否矩阵
diag() #方阵对角线元素或者生成对角矩阵
eigen() #求特征值和特征向量
solve() #求逆矩阵
chol() #Choleski分解
svd() #奇异值分解
qr() #QR分解
det() #求行列式(不同行不同列的n个元素的乘积的代数和)
dim() #给出行列数
t() #矩阵转置
scale()函数
#中心化(每个数值减去均值)#标准化(将每个数都减去这组数的平均值后再除以这组数的均方根)
scale(x, center = TRUE, scale = TRUE) #对数据矩阵做中心化和标准化变换 scale(x,scale=FALSE) #直接减去均值,没有除以均方根 scale(x,center=FALSE) #没有减去均值,直接除以均方根
apply()函数
apply(data, dim, function) #dim取1表示对行运用函数,取2表示对列运用函数
apply(x, 2, sum) #列和,等同于colMeans(x)
apply(x, 1, var) #行方差
apply(x, 2, max) #每列最大值
apply(x, 2, rev) #每列的数反排列
//function也可取"+","-","*","/"
sweep()函数
sweep(x, dim, STATS, FUN="-", ...) #dim取1表示对行运用函数,取2表示对列运用函数#STATS是运算的参数
#FUN为运算函数,默认是减法
> m<-matrix(c(1:9),byrow=TRUE,nrow=3) > m [,1] [,2] [,3] [1,] 1 2 3 [2,] 4 5 6 [3,] 7 8 9 > sweep(m, 1, c(1,4,7), "+") #第一行都加1,第二行都加4,第三行都加7 [,1] [,2] [,3] [1,] 2 3 4 [2,] 8 9 10 [3,] 14 15 16