ggplot2越来越受人欢迎,其画图语法的优雅性和美观让人难忘,这篇文章很好的介绍了ggplot2画图的实战,小编看了一遍就学得请清除和粗了.在这列分享出来.
FROM:《R Graphics Cookbook-By Winston Chang》
导入需要的软件包,ggplot2用于作图,gcookbook存储有案例数据。
library(ggplot2)
library(gcookbook)
1.基础的条形图
ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity")
ggplot(BOD, aes(x = Time, y = demand)) + geom_bar(stat = "identity")
# Convert Time to a discrete (categorical) variable with factor()
ggplot(BOD, aes(x = factor(Time), y = demand)) + geom_bar(stat = "identity")
ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity", fill = "lightblue",
colour = "black")
2.涉及分组变量的条形图
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(position = "dodge")
## Mapping a variable to y and also using stat="bin".
## With stat="bin", it will attempt to set the y value to the count of cases in each group.
## This can result in unexpected behavior and will not be allowed in a future version of ggplot2.
## If you want y to represent counts of cases, use stat="bin" and don"t map a variable to y.
## If you want y to represent values in the data, use stat="identity".
## See ?geom_bar for examples. (Deprecated; last used in version 0.9.2)
# To add a black outline, use colour="black" inside geom_bar().
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(position = "dodge",
colour = "black") + scale_fill_brewer(palette = "Pastel1")
## Mapping a variable to y and also using stat="bin".
## With stat="bin", it will attempt to set the y value to the count of cases in each group.
## This can result in unexpected behavior and will not be allowed in a future version of ggplot2.
## If you want y to represent counts of cases, use stat="bin" and don"t map a variable to y.
## If you want y to represent values in the data, use stat="identity".
## See ?geom_bar for examples. (Deprecated; last used in version 0.9.2)
# there are any missing combinations of the categorical variables
ce <- cabbage_exp[1:5, ] # Copy the data without last row
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(position = "dodge",
colour = "black") + scale_fill_brewer(palette = "Pastel1")
## Mapping a variable to y and also using stat="bin".
## With stat="bin", it will attempt to set the y value to the count of cases in each group.
## This can result in unexpected behavior and will not be allowed in a future version of ggplot2.
## If you want y to represent counts of cases, use stat="bin" and don"t map a variable to y.
## If you want y to represent values in the data, use stat="identity".
## See ?geom_bar for examples. (Deprecated; last used in version 0.9.2)
3.计数数据条形图
# With geom_bar(), the default behavior is to use stat="bin", which counts
# up the number of cases for each group
ggplot(diamonds, aes(x = carat)) + geom_bar()
## stat_bin: binwidth defaulted to range/30. Use "binwidth = x" to adjust this.
4.条形图颜色
upc <- subset(uspopchange, rank(Change) > 40)
ggplot(upc, aes(x = Abb, y = Change, fill = Region)) + geom_bar(stat = "identity")
# using scale_fill_brewer() or scale_fill_manual() to set colors. Note that
# setting occurs outside of aes(), while mapping occurs within aes()
ggplot(upc, aes(x = reorder(Abb, Change), y = Change, fill = Region)) + geom_bar(stat = "identity",
colour = "black") + scale_fill_manual(values = c("#669933", "#FFCC66")) +
xlab("State")
5.对正负数用不同的颜色作图
csub <- subset(climate, Source == "Berkeley" & Year >= 1900)
csub$pos <- csub$Anomaly10y >= 0
ggplot(csub, aes(x = Year, y = Anomaly10y, fill = pos)) + geom_bar(stat = "identity",
position = "identity")
# change the colors with scale_fill_manual() and remove the legend with
# guide=FALSE
ggplot(csub, aes(x = Year, y = Anomaly10y, fill = pos)) + geom_bar(stat = "identity",
position = "identity", colour = "black", size = 0.25) + scale_fill_manual(values = c("#CCEEFF",
"#FFDDDD"), guide = FALSE)
6.调整条形宽度和空间
# To make the bars narrower or wider, set width in geom_bar().
ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity")
ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity", width = 0.5)
ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity", width = 1)
# For a grouped bar graph with narrow bars
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity",
width = 0.5, position = "dodge")
# with some space between the bars
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity",
width = 0.5, position = position_dodge(0.7))
7.堆叠条形图
# geom_bar() and map a variable fill
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity")
# reverse the order of items in the legend by using guides()
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity") +
guides(fill = guide_legend(reverse = TRUE))
# pecify order=desc() in the aesthetic mapping:
library(plyr) # Needed for desc()
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar, order = desc(Cultivar))) +
geom_bar(stat = "identity")
# use scale_fill_brewer() to get a different color palette, and use
# colour="black" to get a black outline
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity",
colour = "black") + guides(fill = guide_legend(reverse = TRUE)) + scale_fill_brewer(palette = "Pastel1")
8.比例堆栈图形
# scale the data to 100% within each stack. This can be done by using
# ddply() from the plyr package, with transform().
library(plyr)
# Do a group-wise transform(), splitting on "Date"
ce <- ddply(cabbage_exp, "Date", transform, percent_weight = Weight/sum(Weight) *
100)
ggplot(ce, aes(x = Date, y = percent_weight, fill = Cultivar)) + geom_bar(stat = "identity")
# To calculate the percentages within each Weight group, we used the ddply()
# function
ddply(cabbage_exp, "Date", transform, percent_weight = Weight/sum(Weight) *
100)
## Cultivar Date Weight sd n se percent_weight
## 1 c39 d16 3.18 0.9566 10 0.30251 58.46
## 2 c52 d16 2.26 0.4452 10 0.14079 41.54
## 3 c39 d20 2.80 0.2789 10 0.08819 47.38
## 4 c52 d20 3.11 0.7909 10 0.25009 52.62
## 5 c39 d21 2.74 0.9834 10 0.31098 65.08
## 6 c52 d21 1.47 0.2111 10 0.06675 34.92
ggplot(ce, aes(x = Date, y = percent_weight, fill = Cultivar)) + geom_bar(stat = "identity",
colour = "black") + guides(fill = guide_legend(reverse = TRUE)) + scale_fill_brewer(palette = "Pastel1")
9.添加标签
# Add geom_text() to your graph Below the top
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) + geom_bar(stat = "identity") +
geom_text(aes(label = Weight), vjust = 1.5, colour = "white")
# Above the top
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) + geom_bar(stat = "identity") +
geom_text(aes(label = Weight), vjust = -0.2)
# Adjust y limits to be a little higher
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) + geom_bar(stat = "identity") +
geom_text(aes(label = Weight), vjust = -0.2) + ylim(0, max(cabbage_exp$Weight) *
1.05)
# Map y positions slightly above bar top - y range of plot will auto-adjust
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) + geom_bar(stat = "identity") +
geom_text(aes(y = Weight + 0.1, label = Weight))
# use size to specify a smaller font to make the labels fit
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity",
position = "dodge") + geom_text(aes(label = Weight), vjust = 1.5, colour = "white",
position = position_dodge(0.9), size = 3)
## ymax not defined: adjusting position using y instead
# Putting labels on stacked bar graphs requires finding the cumulative sum
# for each stack;use the arrange() function from the plyr
library(plyr)
# Sort by the day and sex columns
ce <- arrange(cabbage_exp, Date, Cultivar)
# Get the cumulative sum
ce <- ddply(ce, "Date", transform, label_y = cumsum(Weight))
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity") +
geom_text(aes(y = label_y, label = Weight), vjust = 1.5, colour = "white")
# To put the labels in the middle of each bar,y offset in geom_bar() can be
# removed
ce <- arrange(cabbage_exp, Date, Cultivar)
# Calculate y position, placing it in the middle
ce <- ddply(ce, "Date", transform, label_y = cumsum(Weight) - 0.5 * Weight)
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity") +
geom_text(aes(y = label_y, label = Weight), colour = "white")
# change the legend order and colors, add labels in the middle with a
# smaller font using size, add a “kg” using paste, and make sure there are
# always two digits after the decimal point by using format
ggplot(ce, aes(x = Date, y = Weight, fill = Cultivar)) + geom_bar(stat = "identity",
colour = "black") + geom_text(aes(y = label_y, label = paste(format(Weight,
nsmall = 2), "kg")), size = 4) + guides(fill = guide_legend(reverse = TRUE)) +
scale_fill_brewer(palette = "Pastel1")
10.Cleveland点阵图
#The simplest way to create a dot plot is to use geom_point():
tophit <- tophitters2001[1:25, ] # Take the top 25 from the tophitters data set
ggplot(tophit, aes(x=avg, y=name)) + geom_point()
tophit[, c("name", "lg", "avg")]
## name lg avg
## 1 Larry Walker NL 0.3501
## 2 Ichiro Suzuki AL 0.3497
## 3 Jason Giambi AL 0.3423
## 4 Roberto Alomar AL 0.3357
## 5 Todd Helton NL 0.3356
## 6 Moises Alou NL 0.3314
## 7 Lance Berkman NL 0.3310
## 8 Bret Boone AL 0.3307
## 9 Frank Catalanotto AL 0.3305
## 10 Chipper Jones NL 0.3304
## 11 Albert Pujols NL 0.3288
## 12 Barry Bonds NL 0.3277
## 13 Sammy Sosa NL 0.3276
## 14 Juan Pierre NL 0.3274
## 15 Juan Gonzalez AL 0.3252
## 16 Luis Gonzalez NL 0.3251
## 17 Rich Aurilia NL 0.3239
## 18 Paul Lo Duca NL 0.3196
## 19 Jose Vidro NL 0.3189
## 20 Alex Rodriguez AL 0.3180
## 21 Cliff Floyd NL 0.3171
## 22 Shannon Stewart AL 0.3156
## 23 Jeff Cirillo NL 0.3125
## 24 Jeff Conine AL 0.3111
## 25 Derek Jeter AL 0.3111
#use reorder(name, avg), which takes the name column, turns it into a factor, and sorts the factor levels by avg
ggplot(tophit, aes(x=avg, y=reorder(name, avg))) +
geom_point(size=3) + # Use a larger dot
theme_bw() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(colour="grey60", linetype="dashed"))
#rotate the text labels by 60 degrees
ggplot(tophit, aes(x=reorder(name, avg), y=avg)) +
geom_point(size=3) + # Use a larger dot
theme_bw() +
theme(axis.text.x = element_text(angle=60, hjust=1),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.grid.major.x = element_line(colour="grey60", linetype="dashed"))
#the reorder() function will only order factor levels by one other variable; to order the factor levels by two variables, we must do it manually
# Get the names, sorted first by lg, then by avg
nameorder <- tophit$name[order(tophit$lg, tophit$avg)]
# Turn name into a factor, with levels in the order of nameorder
tophit$name <- factor(tophit$name, levels=nameorder)
#Note that geom_segment() needs values for x, y, xend, and yend
ggplot(tophit, aes(x=avg, y=name)) +
geom_segment(aes(yend=name), xend=0, colour="grey50") +
geom_point(size=3, aes(colour=lg)) +
scale_colour_brewer(palette="Set1", limits=c("NL","AL")) +
theme_bw() +
theme(panel.grid.major.y = element_blank(), # No horizontal grid lines
legend.position=c(1, 0.55), # Put legend inside plot area
legend.justification=c(1, 0.5))
#Another way to separate the two groups is to use facets
ggplot(tophit, aes(x=avg, y=name)) +
geom_segment(aes(yend=name), xend=0, colour="grey50") +
geom_point(size=3, aes(colour=lg)) +
scale_colour_brewer(palette="Set1", limits=c("NL","AL"), guide=FALSE) +
theme_bw() +
theme(panel.grid.major.y = element_blank()) +
facet_grid(lg ~ ., scales="free_y", space="free_y")
精选的每一篇文章均来源于公开网络,仅供学习使用,不会用于任何商业用途,文章版权归原作者所有,如果侵犯到原作者的权益,请您与我们联系删除或者授权事宜,。转载网站文章请注明原文章作者,否则产生的任何版权纠纷与无关。