您现在的位置:主页 > 数据可视化 > GGplot2画图教程.实战

GGplot2画图教程.实战

2017-06-19 15:47

ggplot2越来越受人欢迎,其画图语法的优雅性和美观让人难忘,这篇文章很好的介绍了ggplot2画图的实战,小编看了一遍就学得请清除和粗了.在这列分享出来.

FROM:《R Graphics Cookbook-By Winston Chang》

导入需要的软件包,ggplot2用于作图,gcookbook存储有案例数据。

library(ggplot2)
library(gcookbook)

1.基础的条形图

ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity")

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 


ggplot(BOD, aes(x = Time, y = demand)) + geom_bar(stat = "identity")

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 


# Convert Time to a discrete (categorical) variable with factor()
ggplot(BOD, aes(x = factor(Time), y = demand)) + geom_bar(stat = "identity")

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 


ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity", fill = "lightblue", 
    colour = "black")

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

2.涉及分组变量的条形图

ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(position = "dodge")
## Mapping a variable to y and also using stat="bin".
##   With stat="bin", it will attempt to set the y value to the count of cases in each group.
##   This can result in unexpected behavior and will not be allowed in a future version of ggplot2.
##   If you want y to represent counts of cases, use stat="bin" and don"t map a variable to y.
##   If you want y to represent values in the data, use stat="identity".
##   See ?geom_bar for examples. (Deprecated; last used in version 0.9.2)

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 


# To add a black outline, use colour="black" inside geom_bar().
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(position = "dodge", 
    colour = "black") + scale_fill_brewer(palette = "Pastel1")
## Mapping a variable to y and also using stat="bin".
##   With stat="bin", it will attempt to set the y value to the count of cases in each group.
##   This can result in unexpected behavior and will not be allowed in a future version of ggplot2.
##   If you want y to represent counts of cases, use stat="bin" and don"t map a variable to y.
##   If you want y to represent values in the data, use stat="identity".
##   See ?geom_bar for examples. (Deprecated; last used in version 0.9.2)

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

# there are any missing combinations of the categorical variables
ce <- cabbage_exp[1:5, ]  # Copy the data without last row
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(position = "dodge", 
    colour = "black") + scale_fill_brewer(palette = "Pastel1")
## Mapping a variable to y and also using stat="bin".
##   With stat="bin", it will attempt to set the y value to the count of cases in each group.
##   This can result in unexpected behavior and will not be allowed in a future version of ggplot2.
##   If you want y to represent counts of cases, use stat="bin" and don"t map a variable to y.
##   If you want y to represent values in the data, use stat="identity".
##   See ?geom_bar for examples. (Deprecated; last used in version 0.9.2)

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

3.计数数据条形图

# With geom_bar(), the default behavior is to use stat="bin", which counts
# up the number of cases for each group
ggplot(diamonds, aes(x = carat)) + geom_bar()
## stat_bin: binwidth defaulted to range/30. Use "binwidth = x" to adjust this.

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

4.条形图颜色


upc <- subset(uspopchange, rank(Change) > 40)
ggplot(upc, aes(x = Abb, y = Change, fill = Region)) + geom_bar(stat = "identity")

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

# using scale_fill_brewer() or scale_fill_manual() to set colors. Note that
# setting occurs outside of aes(), while mapping occurs within aes()
ggplot(upc, aes(x = reorder(Abb, Change), y = Change, fill = Region)) + geom_bar(stat = "identity", 
    colour = "black") + scale_fill_manual(values = c("#669933", "#FFCC66")) + 
    xlab("State")

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

5.对正负数用不同的颜色作图

csub <- subset(climate, Source == "Berkeley" & Year >= 1900)
csub$pos <- csub$Anomaly10y >= 0
ggplot(csub, aes(x = Year, y = Anomaly10y, fill = pos)) + geom_bar(stat = "identity", 
    position = "identity")

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

# change the colors with scale_fill_manual() and remove the legend with
# guide=FALSE
ggplot(csub, aes(x = Year, y = Anomaly10y, fill = pos)) + geom_bar(stat = "identity", 
    position = "identity", colour = "black", size = 0.25) + scale_fill_manual(values = c("#CCEEFF", 
    "#FFDDDD"), guide = FALSE)

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

6.调整条形宽度和空间

# To make the bars narrower or wider, set width in geom_bar().
ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity")

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity", width = 0.5)

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity", width = 1)

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

# For a grouped bar graph with narrow bars
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity", 
    width = 0.5, position = "dodge")

 

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

 

# with some space between the bars
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity", 
    width = 0.5, position = position_dodge(0.7))

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

7.堆叠条形图

# geom_bar() and map a variable fill
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity")

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

# reverse the order of items in the legend by using guides()
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity") + 
    guides(fill = guide_legend(reverse = TRUE))

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

# pecify order=desc() in the aesthetic mapping:
library(plyr)  # Needed for desc()
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar, order = desc(Cultivar))) + 
    geom_bar(stat = "identity")

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

# use scale_fill_brewer() to get a different color palette, and use
# colour="black" to get a black outline
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity", 
    colour = "black") + guides(fill = guide_legend(reverse = TRUE)) + scale_fill_brewer(palette = "Pastel1")

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

8.比例堆栈图形

# scale the data to 100% within each stack. This can be done by using
# ddply() from the plyr package, with transform().
library(plyr)
# Do a group-wise transform(), splitting on "Date"
ce <- ddply(cabbage_exp, "Date", transform, percent_weight = Weight/sum(Weight) * 
    100)
ggplot(ce, aes(x = Date, y = percent_weight, fill = Cultivar)) + geom_bar(stat = "identity")

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

# To calculate the percentages within each Weight group, we used the ddply()
# function
ddply(cabbage_exp, "Date", transform, percent_weight = Weight/sum(Weight) * 
    100)
##   Cultivar Date Weight     sd  n      se percent_weight
## 1      c39  d16   3.18 0.9566 10 0.30251          58.46
## 2      c52  d16   2.26 0.4452 10 0.14079          41.54
## 3      c39  d20   2.80 0.2789 10 0.08819          47.38
## 4      c52  d20   3.11 0.7909 10 0.25009          52.62
## 5      c39  d21   2.74 0.9834 10 0.31098          65.08
## 6      c52  d21   1.47 0.2111 10 0.06675          34.92
ggplot(ce, aes(x = Date, y = percent_weight, fill = Cultivar)) + geom_bar(stat = "identity", 
    colour = "black") + guides(fill = guide_legend(reverse = TRUE)) + scale_fill_brewer(palette = "Pastel1")

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

9.添加标签

# Add geom_text() to your graph Below the top
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) + geom_bar(stat = "identity") + 
    geom_text(aes(label = Weight), vjust = 1.5, colour = "white")

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

# Above the top
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) + geom_bar(stat = "identity") + 
    geom_text(aes(label = Weight), vjust = -0.2)

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

# Adjust y limits to be a little higher
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) + geom_bar(stat = "identity") + 
    geom_text(aes(label = Weight), vjust = -0.2) + ylim(0, max(cabbage_exp$Weight) * 
    1.05)

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

# Map y positions slightly above bar top - y range of plot will auto-adjust
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) + geom_bar(stat = "identity") + 
    geom_text(aes(y = Weight + 0.1, label = Weight))

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

# use size to specify a smaller font to make the labels fit
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity", 
    position = "dodge") + geom_text(aes(label = Weight), vjust = 1.5, colour = "white", 
    position = position_dodge(0.9), size = 3)
## ymax not defined: adjusting position using y instead

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

# Putting labels on stacked bar graphs requires finding the cumulative sum
# for each stack;use the arrange() function from the plyr
library(plyr)
# Sort by the day and sex columns
ce <- arrange(cabbage_exp, Date, Cultivar)
# Get the cumulative sum
ce <- ddply(ce, "Date", transform, label_y = cumsum(Weight))
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity") + 
    geom_text(aes(y = label_y, label = Weight), vjust = 1.5, colour = "white")

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

# To put the labels in the middle of each bar,y offset in geom_bar() can be
# removed
ce <- arrange(cabbage_exp, Date, Cultivar)
# Calculate y position, placing it in the middle
ce <- ddply(ce, "Date", transform, label_y = cumsum(Weight) - 0.5 * Weight)
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity") + 
    geom_text(aes(y = label_y, label = Weight), colour = "white")

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

# change the legend order and colors, add labels in the middle with a
# smaller font using size, add a “kg” using paste, and make sure there are
# always two digits after the decimal point by using format
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity", 
    colour = "black") + geom_text(aes(y = label_y, label = paste(format(Weight, 
    nsmall = 2), "kg")), size = 4) + guides(fill = guide_legend(reverse = TRUE)) + 
    scale_fill_brewer(palette = "Pastel1")

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

10.Cleveland点阵图

#The simplest way to create a dot plot is to use geom_point():
tophit <- tophitters2001[1:25, ] # Take the top 25 from the tophitters data set
ggplot(tophit, aes(x=avg, y=name)) + geom_point()

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

tophit[, c("name", "lg", "avg")]
##                 name lg    avg
## 1       Larry Walker NL 0.3501
## 2      Ichiro Suzuki AL 0.3497
## 3       Jason Giambi AL 0.3423
## 4     Roberto Alomar AL 0.3357
## 5        Todd Helton NL 0.3356
## 6        Moises Alou NL 0.3314
## 7      Lance Berkman NL 0.3310
## 8         Bret Boone AL 0.3307
## 9  Frank Catalanotto AL 0.3305
## 10     Chipper Jones NL 0.3304
## 11     Albert Pujols NL 0.3288
## 12       Barry Bonds NL 0.3277
## 13        Sammy Sosa NL 0.3276
## 14       Juan Pierre NL 0.3274
## 15     Juan Gonzalez AL 0.3252
## 16     Luis Gonzalez NL 0.3251
## 17      Rich Aurilia NL 0.3239
## 18      Paul Lo Duca NL 0.3196
## 19        Jose Vidro NL 0.3189
## 20    Alex Rodriguez AL 0.3180
## 21       Cliff Floyd NL 0.3171
## 22   Shannon Stewart AL 0.3156
## 23      Jeff Cirillo NL 0.3125
## 24       Jeff Conine AL 0.3111
## 25       Derek Jeter AL 0.3111
#use reorder(name, avg), which takes the name column, turns it into a factor, and sorts the factor levels by avg
ggplot(tophit, aes(x=avg, y=reorder(name, avg))) +
geom_point(size=3) + # Use a larger dot
theme_bw() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(colour="grey60", linetype="dashed"))

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

#rotate the text labels by 60 degrees
ggplot(tophit, aes(x=reorder(name, avg), y=avg)) +
geom_point(size=3) + # Use a larger dot
theme_bw() +
theme(axis.text.x = element_text(angle=60, hjust=1),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_line(colour="grey60", linetype="dashed"))

 

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

 

#the reorder() function will only order factor levels by one other variable; to order the factor levels by two variables, we must do it manually
# Get the names, sorted first by lg, then by avg
nameorder <- tophit$name[order(tophit$lg, tophit$avg)]
# Turn name into a factor, with levels in the order of nameorder
tophit$name <- factor(tophit$name, levels=nameorder)
#Note that geom_segment() needs values for x, y, xend, and yend
ggplot(tophit, aes(x=avg, y=name)) +
geom_segment(aes(yend=name), xend=0, colour="grey50") +
geom_point(size=3, aes(colour=lg)) +
scale_colour_brewer(palette="Set1", limits=c("NL","AL")) +
theme_bw() +
theme(panel.grid.major.y = element_blank(), # No horizontal grid lines
legend.position=c(1, 0.55), # Put legend inside plot area
legend.justification=c(1, 0.5))

 

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

 

#Another way to separate the two groups is to use facets
ggplot(tophit, aes(x=avg, y=name)) +
geom_segment(aes(yend=name), xend=0, colour="grey50") +
geom_point(size=3, aes(colour=lg)) +
scale_colour_brewer(palette="Set1", limits=c("NL","AL"), guide=FALSE) +
theme_bw() +
theme(panel.grid.major.y = element_blank()) +
facet_grid(lg ~ ., scales="free_y", space="free_y")

条形图-ggplot2 - yugao1986@126 - R语言学习博客--yugao1986

 

 


精选的每一篇文章均来源于公开网络,仅供学习使用,不会用于任何商业用途,文章版权归原作者所有,如果侵犯到原作者的权益,请您与我们联系删除或者授权事宜,。转载网站文章请注明原文章作者,否则产生的任何版权纠纷与无关。