# GGplot2画图教程.实战

2017-06-19 15:47

ggplot2越来越受人欢迎,其画图语法的优雅性和美观让人难忘,这篇文章很好的介绍了ggplot2画图的实战,小编看了一遍就学得请清除和粗了.在这列分享出来.

FROM:《R Graphics Cookbook-By Winston Chang》

``````library(ggplot2)
library(gcookbook)
``````

1.基础的条形图

``````ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity")
`````` ``````
ggplot(BOD, aes(x = Time, y = demand)) + geom_bar(stat = "identity")
`````` ``````
# Convert Time to a discrete (categorical) variable with factor()
ggplot(BOD, aes(x = factor(Time), y = demand)) + geom_bar(stat = "identity")
`````` ``````
ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity", fill = "lightblue",
colour = "black")
`````` 2.涉及分组变量的条形图

``````ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(position = "dodge")
``````
``````## Mapping a variable to y and also using stat="bin".
##   With stat="bin", it will attempt to set the y value to the count of cases in each group.
##   This can result in unexpected behavior and will not be allowed in a future version of ggplot2.
##   If you want y to represent counts of cases, use stat="bin" and don"t map a variable to y.
##   If you want y to represent values in the data, use stat="identity".
##   See ?geom_bar for examples. (Deprecated; last used in version 0.9.2)
`````` ``````
# To add a black outline, use colour="black" inside geom_bar().
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(position = "dodge",
colour = "black") + scale_fill_brewer(palette = "Pastel1")
``````
``````## Mapping a variable to y and also using stat="bin".
##   With stat="bin", it will attempt to set the y value to the count of cases in each group.
##   This can result in unexpected behavior and will not be allowed in a future version of ggplot2.
##   If you want y to represent counts of cases, use stat="bin" and don"t map a variable to y.
##   If you want y to represent values in the data, use stat="identity".
##   See ?geom_bar for examples. (Deprecated; last used in version 0.9.2)
`````` ``````# there are any missing combinations of the categorical variables
ce <- cabbage_exp[1:5, ]  # Copy the data without last row
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(position = "dodge",
colour = "black") + scale_fill_brewer(palette = "Pastel1")
``````
``````## Mapping a variable to y and also using stat="bin".
##   With stat="bin", it will attempt to set the y value to the count of cases in each group.
##   This can result in unexpected behavior and will not be allowed in a future version of ggplot2.
##   If you want y to represent counts of cases, use stat="bin" and don"t map a variable to y.
##   If you want y to represent values in the data, use stat="identity".
##   See ?geom_bar for examples. (Deprecated; last used in version 0.9.2)
`````` 3.计数数据条形图

``````# With geom_bar(), the default behavior is to use stat="bin", which counts
# up the number of cases for each group
ggplot(diamonds, aes(x = carat)) + geom_bar()
``````
``````## stat_bin: binwidth defaulted to range/30. Use "binwidth = x" to adjust this.
`````` 4.条形图颜色

``````
upc <- subset(uspopchange, rank(Change) > 40)
ggplot(upc, aes(x = Abb, y = Change, fill = Region)) + geom_bar(stat = "identity")
`````` ``````# using scale_fill_brewer() or scale_fill_manual() to set colors. Note that
# setting occurs outside of aes(), while mapping occurs within aes()
ggplot(upc, aes(x = reorder(Abb, Change), y = Change, fill = Region)) + geom_bar(stat = "identity",
colour = "black") + scale_fill_manual(values = c("#669933", "#FFCC66")) +
xlab("State")
`````` 5.对正负数用不同的颜色作图

``````csub <- subset(climate, Source == "Berkeley" & Year >= 1900)
csub\$pos <- csub\$Anomaly10y >= 0
ggplot(csub, aes(x = Year, y = Anomaly10y, fill = pos)) + geom_bar(stat = "identity",
position = "identity")
`````` ``````# change the colors with scale_fill_manual() and remove the legend with
# guide=FALSE
ggplot(csub, aes(x = Year, y = Anomaly10y, fill = pos)) + geom_bar(stat = "identity",
position = "identity", colour = "black", size = 0.25) + scale_fill_manual(values = c("#CCEEFF",
"#FFDDDD"), guide = FALSE)
`````` 6.调整条形宽度和空间

``````# To make the bars narrower or wider, set width in geom_bar().
ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity")
`````` ``````ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity", width = 0.5)
`````` ``````ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity", width = 1)
`````` ``````# For a grouped bar graph with narrow bars
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity",
width = 0.5, position = "dodge")
`````` ``````# with some space between the bars
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity",
width = 0.5, position = position_dodge(0.7))
`````` 7.堆叠条形图

``````# geom_bar() and map a variable fill
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity")
`````` ``````# reverse the order of items in the legend by using guides()
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity") +
guides(fill = guide_legend(reverse = TRUE))
`````` ``````# pecify order=desc() in the aesthetic mapping:
library(plyr)  # Needed for desc()``````
``````ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar, order = desc(Cultivar))) +
geom_bar(stat = "identity")``````

` `

``````# use scale_fill_brewer() to get a different color palette, and use
# colour="black" to get a black outline
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity",
colour = "black") + guides(fill = guide_legend(reverse = TRUE)) + scale_fill_brewer(palette = "Pastel1")
`````` 8.比例堆栈图形

``````# scale the data to 100% within each stack. This can be done by using
# ddply() from the plyr package, with transform().
library(plyr)
# Do a group-wise transform(), splitting on "Date"
ce <- ddply(cabbage_exp, "Date", transform, percent_weight = Weight/sum(Weight) *
100)
ggplot(ce, aes(x = Date, y = percent_weight, fill = Cultivar)) + geom_bar(stat = "identity")
`````` ``````# To calculate the percentages within each Weight group, we used the ddply()
# function
ddply(cabbage_exp, "Date", transform, percent_weight = Weight/sum(Weight) *
100)
``````
``````##   Cultivar Date Weight     sd  n      se percent_weight
## 1      c39  d16   3.18 0.9566 10 0.30251          58.46
## 2      c52  d16   2.26 0.4452 10 0.14079          41.54
## 3      c39  d20   2.80 0.2789 10 0.08819          47.38
## 4      c52  d20   3.11 0.7909 10 0.25009          52.62
## 5      c39  d21   2.74 0.9834 10 0.31098          65.08
## 6      c52  d21   1.47 0.2111 10 0.06675          34.92
``````
``````ggplot(ce, aes(x = Date, y = percent_weight, fill = Cultivar)) + geom_bar(stat = "identity",
colour = "black") + guides(fill = guide_legend(reverse = TRUE)) + scale_fill_brewer(palette = "Pastel1")
`````` 9.添加标签

``````# Add geom_text() to your graph Below the top
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) + geom_bar(stat = "identity") +
geom_text(aes(label = Weight), vjust = 1.5, colour = "white")
`````` ``````# Above the top
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) + geom_bar(stat = "identity") +
geom_text(aes(label = Weight), vjust = -0.2)
`````` ``````# Adjust y limits to be a little higher
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) + geom_bar(stat = "identity") +
geom_text(aes(label = Weight), vjust = -0.2) + ylim(0, max(cabbage_exp\$Weight) *
1.05)
`````` ``````# Map y positions slightly above bar top - y range of plot will auto-adjust
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) + geom_bar(stat = "identity") +
geom_text(aes(y = Weight + 0.1, label = Weight))
`````` ``````# use size to specify a smaller font to make the labels fit
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity",
position = "dodge") + geom_text(aes(label = Weight), vjust = 1.5, colour = "white",
position = position_dodge(0.9), size = 3)
``````
``````## ymax not defined: adjusting position using y instead
`````` ``````# Putting labels on stacked bar graphs requires finding the cumulative sum
# for each stack;use the arrange() function from the plyr
library(plyr)
# Sort by the day and sex columns
ce <- arrange(cabbage_exp, Date, Cultivar)
# Get the cumulative sum
ce <- ddply(ce, "Date", transform, label_y = cumsum(Weight))
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity") +
geom_text(aes(y = label_y, label = Weight), vjust = 1.5, colour = "white")
`````` ``````# To put the labels in the middle of each bar,y offset in geom_bar() can be
# removed
ce <- arrange(cabbage_exp, Date, Cultivar)
# Calculate y position, placing it in the middle
ce <- ddply(ce, "Date", transform, label_y = cumsum(Weight) - 0.5 * Weight)
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity") +
geom_text(aes(y = label_y, label = Weight), colour = "white")
`````` ``````# change the legend order and colors, add labels in the middle with a
# smaller font using size, add a “kg” using paste, and make sure there are
# always two digits after the decimal point by using format
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity",
colour = "black") + geom_text(aes(y = label_y, label = paste(format(Weight,
nsmall = 2), "kg")), size = 4) + guides(fill = guide_legend(reverse = TRUE)) +
scale_fill_brewer(palette = "Pastel1")
`````` 10.Cleveland点阵图

``````#The simplest way to create a dot plot is to use geom_point():
tophit <- tophitters2001[1:25, ] # Take the top 25 from the tophitters data set
ggplot(tophit, aes(x=avg, y=name)) + geom_point()
`````` ``````tophit[, c("name", "lg", "avg")]
``````
``````##                 name lg    avg
## 1       Larry Walker NL 0.3501
## 2      Ichiro Suzuki AL 0.3497
## 3       Jason Giambi AL 0.3423
## 4     Roberto Alomar AL 0.3357
## 5        Todd Helton NL 0.3356
## 6        Moises Alou NL 0.3314
## 7      Lance Berkman NL 0.3310
## 8         Bret Boone AL 0.3307
## 9  Frank Catalanotto AL 0.3305
## 10     Chipper Jones NL 0.3304
## 11     Albert Pujols NL 0.3288
## 12       Barry Bonds NL 0.3277
## 13        Sammy Sosa NL 0.3276
## 14       Juan Pierre NL 0.3274
## 15     Juan Gonzalez AL 0.3252
## 16     Luis Gonzalez NL 0.3251
## 17      Rich Aurilia NL 0.3239
## 18      Paul Lo Duca NL 0.3196
## 19        Jose Vidro NL 0.3189
## 20    Alex Rodriguez AL 0.3180
## 21       Cliff Floyd NL 0.3171
## 22   Shannon Stewart AL 0.3156
## 23      Jeff Cirillo NL 0.3125
## 24       Jeff Conine AL 0.3111
## 25       Derek Jeter AL 0.3111
``````
``````#use reorder(name, avg), which takes the name column, turns it into a factor, and sorts the factor levels by avg
ggplot(tophit, aes(x=avg, y=reorder(name, avg))) +
geom_point(size=3) + # Use a larger dot
theme_bw() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(colour="grey60", linetype="dashed"))
`````` ``````#rotate the text labels by 60 degrees
ggplot(tophit, aes(x=reorder(name, avg), y=avg)) +
geom_point(size=3) + # Use a larger dot
theme_bw() +
theme(axis.text.x = element_text(angle=60, hjust=1),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_line(colour="grey60", linetype="dashed"))
`````` ``````#the reorder() function will only order factor levels by one other variable; to order the factor levels by two variables, we must do it manually
# Get the names, sorted first by lg, then by avg
nameorder <- tophit\$name[order(tophit\$lg, tophit\$avg)]
# Turn name into a factor, with levels in the order of nameorder
tophit\$name <- factor(tophit\$name, levels=nameorder)
#Note that geom_segment() needs values for x, y, xend, and yend
ggplot(tophit, aes(x=avg, y=name)) +
geom_segment(aes(yend=name), xend=0, colour="grey50") +
geom_point(size=3, aes(colour=lg)) +
scale_colour_brewer(palette="Set1", limits=c("NL","AL")) +
theme_bw() +
theme(panel.grid.major.y = element_blank(), # No horizontal grid lines
legend.position=c(1, 0.55), # Put legend inside plot area
legend.justification=c(1, 0.5))
`````` ``````#Another way to separate the two groups is to use facets
ggplot(tophit, aes(x=avg, y=name)) +
geom_segment(aes(yend=name), xend=0, colour="grey50") +
geom_point(size=3, aes(colour=lg)) +
scale_colour_brewer(palette="Set1", limits=c("NL","AL"), guide=FALSE) +
theme_bw() +
theme(panel.grid.major.y = element_blank()) +
facet_grid(lg ~ ., scales="free_y", space="free_y")
`````` 