R2-03 第十次作业综合案例

微思微丝 2018-01-08 23:18:17 阅读: 1436

ggplot2之综合案例

任务1：

###定义summarySE的功能
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
                      conf.interval=.95, .drop=TRUE) {
  library(plyr)
  
  # New version of length which can handle NA's: if na.rm==T, don't count them
  #构建一个新版的length函数，它能更好地处理缺失NA值，Z只需判断na.rm==T,而不用对缺失值进行繁琐的计数.
  length2 <- function (x, na.rm=FALSE) {
    if (na.rm) sum(!is.na(x))
    else       length(x)
  }
  
  # This does the summary. For each group's data frame, return a vector with
  # N, mean, and sd
  datac <- ddply(data, groupvars, .drop=.drop,
                 .fun = function(xx, col) {
                   c(N    = length2(xx[[col]], na.rm=na.rm),
                     mean = mean   (xx[[col]], na.rm=na.rm),
                     sd   = sd     (xx[[col]], na.rm=na.rm)
                   )
                 },
                 measurevar
  )
  
  # Rename the "mean" column  
  #重命名相关列
  datac <- rename(datac, c("mean" = measurevar))
  
  datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean
  
  # Confidence interval multiplier for standard error
  #通过计算标准差计算置信区间算子
  # Calculate t-statistic for confidence interval: 
  #由置信区间算子得到对应的t统计量
  # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
  #例如，如果置信水平为95%，qt函数中的第一个参数为0.975，自由度则为N-1.
  ciMult <- qt(conf.interval/2 + .5, datac$N-1)
  datac$ci <- datac$se * ciMult
  
  return(datac)
}
tg <- ToothGrowth
head(tg)
library(ggplot2)
tg1<-summarySE(tg, measurevar="len", groupvars=c("supp","dose"))
pd <- position_dodge(0.1)
ggplot(tg1, aes(x=dose, y=len, colour=supp,shape=supp))+ 
  geom_errorbar(aes(ymin=len-sd, ymax=len+sd), width=.1,colour="blue",lwd=1,position=pd) +
  geom_line(aes(linetype=supp), position=pd,size=1.3)+
  geom_point(size=3, position=pd)+
  theme_bw(base_size = 15,base_family = "Times")+
  ggtitle("R2-03")+
  theme(axis.line = element_line(),panel.border = element_blank())+
  theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())+
  theme(legend.justification=c(1,0), legend.position=c(1,0.1))

任务2：

options(scipen = 999)
library(ggplot2)
theme_set(theme_bw())
data(midwest,package="ggplot2")
gg<-ggplot(midwest,aes(x=area,y=poptotal))+
  geom_point(aes(col=state,size=poptotal))+
  scale_colour_manual(values=rainbow(5))+##自己定义为彩虹色
  geom_smooth(method="loess",se=F)+
  xlim(c(0,0.1))+
  ylim(c(0,500000))+
  labs(subtitle="Area Vs Population",
       y="Population",x="Area",
       title="R2-03",caption="Source:midwest")+
  scale_size_continuous(guide=FALSE)+
  theme(legend.position=c(1,1),legend.justification = c(1,1))+
  theme(legend.background = element_blank())+
  theme(axis.text.y = element_text(angle=30,face = "italic",colour = "darkred",size=10))+
  theme(axis.text.x = element_text(face = "italic",colour = "darkred",size=10))
library(Cairo)
ggsave("E:/PNG/R2/R2-10-综合案例/R2-03-24-8.png",width=4,height=4)

任务3：1)

library(ggplot2)
library(reshape2)
require(scales)
require(plyr)
data<-read.csv("E:/PNG/R2/R2-10-综合案例/task3.csv")
data$Name <- with(data, reorder(Name, PTS))
head(data)
##融合数据
data.m <- melt(data) 
head(data.m)
##Scale之后列名命名为rescale放入原数据data.m
data.m <- ddply(data.m, .(variable), transform,rescale = scale(value))
head(data.m)
p <- ggplot(data.m, aes(variable, Name)) + 
  geom_tile(aes(fill = rescale),colour = "white") + 
  scale_fill_gradient(low = "white",high = "steelblue")+
  labs(x = "", y = "")+
  scale_x_discrete(expand = c(0, 0)) +
  scale_y_discrete(expand = c(0, 0)) + 
  theme(legend.position = "none")+
  theme(axis.ticks = element_blank())+
  theme(axis.text.x = element_text(angle =40, hjust=0.5,vjust = 0.5, colour = "grey50"))
p

2)

library(ggplot2)
data<-read.csv("E:/PNG/R2/R2-10-综合案例/task3.csv")
head(data)
a=as.matrix(data)
b=a[,-1]
y=apply(b,2,as.numeric)
head(y)
df <- as.matrix((scale(y)))
col <- colorRampPalette(c("white", "blue"))(256)
heatmap(df, scale = "none", col=col,Rowv=NA, Colv=NA,margins=c(5,10))

3)

library(ggplot2)
nba<-read.csv("E:/PNG/R2/R2-10-综合案例/task3.csv")
nba <- nba[order(nba$PTS),]
is.numeric(nba)
row.names(nba) <- nba$Name     ##该句是上图不能出名字的关键
head(nba)
nba <- nba[,2:20]# or nba <- nba[,-1]
head(nba)
nba_matrix <- data.matrix(nba)
col <- colorRampPalette(c("white", "blue"))(256)
heatmap(nba_matrix, Rowv=NA, Colv=NA, col=col,
        revC=FALSE, scale='column', margins=c(5,10))