Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 38 additions & 39 deletions exercise-1/exercise.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,108 +2,107 @@

# Install and load `ggplot2`
# You will also want to load `dplyr`
install.packages("ggplot2")
library("ggplot2")
library("dplyr")

# For this exercise you'll be working with the `diamonds` data set included in the ggplot2 library
# Use `?diamonds` to get more information about this data set (including the column descriptions
# Also check the _column names_ and the _number of rows_ in the data set
?diamonds
colnames(diamonds)
nrow(diamonds)

# This data set has a lot of rows. To make things a bit more readable,

# This data set has a lot of rows. To make things a bit more readable,
# use dplyr's `sample_n()` function to get a random 1000 rows from the data set
# Store this sample in a variable `diamonds.sample`
diamonds.sample <- sample_n(diamonds, 1000)
ggplot(data = diamonds.sample) +
geom_point(mapping = aes(x = carat, y = price, color = clarity))

# Start by making a new `ggplot` with the `diamonds.sample` as the data (no geometry yet)
# What do you see?
ggplot(data = diamonds.sample)

# Draw a scatter plot (with point geometry) with for the `diamonds.sample` set,

# Draw a scatter plot (with point geometry) with for the `diamonds.sample` set,
# with the `carat` mapped to the x-position and `price` mapped to the y-position.
ggplot(data = diamonds.sample) +
geom_point(mapping = aes(x = carat, y = price))


# Draw the same plot as above, but color each of the points based on their clarity.
ggplot(data = diamonds.sample) +
geom_point(mapping = aes(x = carat, y = price, color=clarity))


# Draw the same plot as above, but for the entire `diamonds` data set. Note this may take
# a few seconds to generate.
ggplot(data = diamonds) +
geom_point(mapping = aes(x = carat, y = price, color=clarity))
geom_point(mapping = aes(x = carat, y = price, color = clarity))


# Draw another scatter plot for `diamonds.sample` of price (y) by carat (x),
# but with all of the dots colored "blue".
# Hint: you'll need to set the color channel, not map a value to it!
ggplot(data = diamonds) +
geom_point(mapping = aes(x = carat, y = price), color="blue")
ggplot(data = diamonds.sample) +
geom_point(mapping = aes(x = carat, y = price), color = "blue")

# Draw a scatter plot for `diamonds.sample` of `price` by `carat`, where each
# point has an aesthetic _shape_ based on the diamond's `cut`.
ggplot(data = diamonds.sample) +
geom_point(mapping = aes(x = carat, y = price, shape=cut))
ggplot(data = diamonds.sample) +
geom_point(mapping = aes(x = carat, y = price, shape = cut))

# Draw a scatter plot for `diamonds.sample` of *`cut`* by `carat`, where each
# point has an aesthetic _size_ based on the diamond's *`price`*
ggplot(data = diamonds.sample) +
geom_point(mapping = aes(x = carat, y = cut, size=price))
ggplot(data = diamonds.sample) +
geom_point(mapping = aes(x = carat, y = cut, size = price, color = price))


# Try coloring the above plot based on the diamond's price!
ggplot(data = diamonds.sample) +
geom_point(mapping = aes(x = carat, y = cut, size=price, color=price))


# Draw a line plot (with line geometry) for `diamonds.sample`. The x-position should be mapped to
# carat, y-position to price, and color to carat.
ggplot(data = diamonds.sample) +
geom_line(mapping = aes(x = carat, y = price, color=cut))
ggplot(data = diamonds.sample) +
geom_line(mapping = aes(x = carat, y = price, color = carat))


# That's kind of messy. Try using `smooth` geometry instead.
ggplot(data = diamonds.sample) +
geom_smooth(mapping = aes(x = carat, y = price, color=cut))
ggplot(data = diamonds.sample) +
geom_smooth(mapping = aes(x = carat, y = price, color = carat))


# Draw a plot with bar geometry (a bar chart), mapping the diamond's `cut` to the x-axis
ggplot(data = diamonds) + # full data set example
ggplot(data = diamonds.sample) +
geom_bar(mapping = aes(x = cut))


# Add an aesthetic property that will _fill_ each bar geometry based on the `clarity` of the diamonds
# What kind of chart do you get?
ggplot(data = diamonds.sample) +
geom_bar(mapping = aes(x = cut, fill=clarity))
ggplot(data = diamonds.sample) +
geom_bar(mapping = aes(x = cut, fill = clarity))

# Draw a histogram of diamond prices.
# Try mapping each bar based on clarity as well!
ggplot(data = diamonds.sample) +
geom_histogram(aes(x=price, fill=clarity))
ggplot(data = diamonds.sample) +
geom_histogram(mapping = aes(x = price, fill = clarity))


# (For a more traditional "bell-curve", make a histogram of diamond `depths`)

# Draw a plot of the `diamonds.sample` data (price by carat), with both points for each
# diamond AND smoothed lines for each cut (hint: in a separate color)
# Making the points have some `alpha` transparency will make the plot look nicer
ggplot(data = diamonds.sample) +
geom_point(mapping = aes(x = carat, y = price, alpha = 0.8)) +
geom_smooth(mapping = aes(x = carat, y = price, alpha = 0, color = cut))


# multiple geoms (point & smooth)
ggplot(data = diamonds.sample) +
geom_point(mapping = aes(x = carat, y = price, color=cut), alpha=0.1) +
geom_smooth(mapping = aes(x = carat, y = price, color=cut), se=FALSE)


## Bonus
# Draw a bar chart of average diamond prices by clarity, and include "error bars" marking
# the standard error of each measurement.
#
# You can calculate standard error as the _standard deviation_ divided by the square root
# You can calculate standard error as the _standard deviation_ divided by the square root
# of the number of measurements (prices)

# Start by creating a data frame `diamond.summary` that includes summarized data for each clarity group.
# Your summary data shuld include the mean price and the standard error of the price.
diamond.summary <- diamonds %>%
group_by(clarity) %>%
summarize(mean = mean(price), sd = sd(price), se = sd/sqrt(length(price)))


# Then draw the plot. The error bars should stretch from the mean-error to the mean+error.
ggplot(data = diamond.summary, mapping = aes(x=clarity, y=mean)) +
geom_bar(aes(fill=clarity), stat="identity") +
geom_errorbar(data = diamond.summary, aes(ymin=(mean-se), ymax=(mean+se)))
62 changes: 21 additions & 41 deletions exercise-2/exercise.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,26 @@ library("ggplot2")

# For this exercise you will again be working with the `diamonds` data set.
# Use `?diamonds` to review details about this data set
?diamonds


## Statistical Transformations

# Draw a bar chart of the diamonds data, organized by cut
# The height of each bar is based on the "count" (number) of diamonds with that cut
ggplot(data = diamonds) +
geom_bar(mapping = aes(x=cut))
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut))

# Use the `stat_count` to apply the statistical transformation "count" to the diamonds
# by cut. You do not need a separate geometry layer!
ggplot(data = diamonds) +
stat_count(mapping = aes(x=cut))
ggplot(data = diamonds) +
stat_count(mapping = aes(x = cut))

# Use the `stat_summary` function to draw a chart with a summary layer.
# Map the x-position to diamond `cut`, and the y-position to diamond `depth`
# Bonus: use `min` as the function ymin, `max` as the function ymax, and `median` as the function y
ggplot(data = diamonds) +
stat_summary(mapping = aes(x = cut, y = depth),
fun.ymin = min, fun.ymax = max, fun.y = median)
ggplot(data = diamonds) +
stat_summary(mapping = aes(x = cut, y = depth), fun.ymin = min, fun.ymax = max, fun.y = median)



## Position Adjustments
Expand All @@ -40,87 +39,68 @@ ggplot(data = diamonds) +
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = clarity), position = "fill")

# Draw the same chart again, but with each element positioned to "dodge" each other
# Draw the same chart again, but with each element positioned to "dodge" each other" the y axis
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = clarity), position = "dodge")
geom_bar(mapping = aes(x = cut, fill = clarity), position = "dodge")

# Draw a plot with point geometry with the x-position mapped to `cut` and the y-position mapped to `clarity`
# This creates a "grid" grouping the points
ggplot(data = diamonds) +
geom_point(mapping = aes(x = cut, y = clarity))


# Use the "jitter" position adjustment to keep the points from all overlapping!
# (This works a little better with a sample of diamond data, such as from the previous exercise).
ggplot(data = diamonds) +
geom_point(mapping = aes(x = cut, y = clarity), position = "jitter")



## Scales

# Draw a "boxplot" (with `geom_boxplot()`) for the diamond's price (y) by color (x)
ggplot(data = diamonds) +
geom_boxplot(mapping = aes(x=color, y=price))


# This has a lot of outliers, making it harder to read. To fix this, draw the same plot but
# with a _logarithmic_ scale for the y axis.
ggplot(data = diamonds) +
geom_boxplot(mapping = aes(x=color, y=price)) +
scale_y_log10()


# For another version, draw the same plot but with `violin` geometry instead of `boxplot` geometry!
# How does the logarithmic scale change the data presentation?
ggplot(data = diamonds) +
geom_violin(mapping = aes(x=color, y=price)) +
scale_y_log10()


# Another interesting plot: draw a plot of the diamonds price (y) by carat (x), using a heatmap of 2d bins
# (geom_bin2d)
# What happens when you make the x and y channels scale logarithmically?
ggplot(data = diamonds) +
geom_bin2d(mapping = aes(x=carat, y=price)) +
scale_x_log10() +
scale_y_log10()


# Draw a scatter plot for the diamonds price (y) by carat (x). Color each point by the clarity
# (Remember, this will take a while. Use a sample of the diamonds for faster results)
ggplot(data = diamonds) +
geom_point(mapping = aes(x = carat, y = price, color = clarity))


# Change the color of the previous plot using a ColorBrewer scale of your choice. What looks nice?
ggplot(data = diamonds) +
geom_point(mapping = aes(x = carat, y = price, color = clarity)) +
scale_color_brewer(palette = "Spectral")



## Coordinate Systems

# Draw a bar chart with x-position and fill color BOTH mapped to cut
# For best results, SET the `width` of the geometry to be 1 (fill plot, no space between)
# You can save this to a variable for easier modifications
bar <- ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut, fill = cut), width=1)
bar


# Draw the same chart, but with the coordinate system flipped
bar + coord_flip()


# Draw the same chart, but in a polar coordinate system. Now you have a Coxcomb chart!
bar + coord_polar()



## Facets

# Take the scatter plot of price by carat data (colored by clarity) and add _facets_ based on
# the diamond's `color`
ggplot(data = diamonds) +
geom_point(mapping = aes(x = carat, y = price, color = clarity)) +
scale_color_brewer(palette = "Spectral") +
facet_wrap(~color)



## Saving Plots

# Use the `ggsave()` function to save one of your plots (the most recent one generated) to disk.
# Name the output file "my-plot.png".
# Make sure you've set the working directory!!
ggsave("my-plot.png")
Binary file removed exercise-2/my-plot.png
Binary file not shown.