diff --git a/exercise-1/exercise.R b/exercise-1/exercise.R index 7167c6f..a71f712 100644 --- a/exercise-1/exercise.R +++ b/exercise-1/exercise.R @@ -2,108 +2,107 @@ # Install and load `ggplot2` # You will also want to load `dplyr` -install.packages("ggplot2") library("ggplot2") library("dplyr") # For this exercise you'll be working with the `diamonds` data set included in the ggplot2 library # Use `?diamonds` to get more information about this data set (including the column descriptions # Also check the _column names_ and the _number of rows_ in the data set -?diamonds -colnames(diamonds) -nrow(diamonds) -# This data set has a lot of rows. To make things a bit more readable, + +# This data set has a lot of rows. To make things a bit more readable, # use dplyr's `sample_n()` function to get a random 1000 rows from the data set # Store this sample in a variable `diamonds.sample` diamonds.sample <- sample_n(diamonds, 1000) +ggplot(data = diamonds.sample) + + geom_point(mapping = aes(x = carat, y = price, color = clarity)) # Start by making a new `ggplot` with the `diamonds.sample` as the data (no geometry yet) # What do you see? -ggplot(data = diamonds.sample) -# Draw a scatter plot (with point geometry) with for the `diamonds.sample` set, + +# Draw a scatter plot (with point geometry) with for the `diamonds.sample` set, # with the `carat` mapped to the x-position and `price` mapped to the y-position. -ggplot(data = diamonds.sample) + - geom_point(mapping = aes(x = carat, y = price)) + # Draw the same plot as above, but color each of the points based on their clarity. -ggplot(data = diamonds.sample) + - geom_point(mapping = aes(x = carat, y = price, color=clarity)) + # Draw the same plot as above, but for the entire `diamonds` data set. Note this may take # a few seconds to generate. ggplot(data = diamonds) + - geom_point(mapping = aes(x = carat, y = price, color=clarity)) + geom_point(mapping = aes(x = carat, y = price, color = clarity)) + # Draw another scatter plot for `diamonds.sample` of price (y) by carat (x), # but with all of the dots colored "blue". # Hint: you'll need to set the color channel, not map a value to it! -ggplot(data = diamonds) + - geom_point(mapping = aes(x = carat, y = price), color="blue") +ggplot(data = diamonds.sample) + + geom_point(mapping = aes(x = carat, y = price), color = "blue") # Draw a scatter plot for `diamonds.sample` of `price` by `carat`, where each # point has an aesthetic _shape_ based on the diamond's `cut`. -ggplot(data = diamonds.sample) + - geom_point(mapping = aes(x = carat, y = price, shape=cut)) +ggplot(data = diamonds.sample) + + geom_point(mapping = aes(x = carat, y = price, shape = cut)) # Draw a scatter plot for `diamonds.sample` of *`cut`* by `carat`, where each # point has an aesthetic _size_ based on the diamond's *`price`* -ggplot(data = diamonds.sample) + - geom_point(mapping = aes(x = carat, y = cut, size=price)) +ggplot(data = diamonds.sample) + + geom_point(mapping = aes(x = carat, y = cut, size = price, color = price)) + # Try coloring the above plot based on the diamond's price! -ggplot(data = diamonds.sample) + - geom_point(mapping = aes(x = carat, y = cut, size=price, color=price)) + # Draw a line plot (with line geometry) for `diamonds.sample`. The x-position should be mapped to # carat, y-position to price, and color to carat. -ggplot(data = diamonds.sample) + - geom_line(mapping = aes(x = carat, y = price, color=cut)) +ggplot(data = diamonds.sample) + + geom_line(mapping = aes(x = carat, y = price, color = carat)) + # That's kind of messy. Try using `smooth` geometry instead. -ggplot(data = diamonds.sample) + - geom_smooth(mapping = aes(x = carat, y = price, color=cut)) +ggplot(data = diamonds.sample) + + geom_smooth(mapping = aes(x = carat, y = price, color = carat)) + # Draw a plot with bar geometry (a bar chart), mapping the diamond's `cut` to the x-axis -ggplot(data = diamonds) + # full data set example +ggplot(data = diamonds.sample) + geom_bar(mapping = aes(x = cut)) + # Add an aesthetic property that will _fill_ each bar geometry based on the `clarity` of the diamonds # What kind of chart do you get? -ggplot(data = diamonds.sample) + - geom_bar(mapping = aes(x = cut, fill=clarity)) +ggplot(data = diamonds.sample) + + geom_bar(mapping = aes(x = cut, fill = clarity)) # Draw a histogram of diamond prices. # Try mapping each bar based on clarity as well! -ggplot(data = diamonds.sample) + - geom_histogram(aes(x=price, fill=clarity)) +ggplot(data = diamonds.sample) + + geom_histogram(mapping = aes(x = price, fill = clarity)) + # (For a more traditional "bell-curve", make a histogram of diamond `depths`) # Draw a plot of the `diamonds.sample` data (price by carat), with both points for each # diamond AND smoothed lines for each cut (hint: in a separate color) # Making the points have some `alpha` transparency will make the plot look nicer +ggplot(data = diamonds.sample) + + geom_point(mapping = aes(x = carat, y = price, alpha = 0.8)) + + geom_smooth(mapping = aes(x = carat, y = price, alpha = 0, color = cut)) + # multiple geoms (point & smooth) -ggplot(data = diamonds.sample) + - geom_point(mapping = aes(x = carat, y = price, color=cut), alpha=0.1) + - geom_smooth(mapping = aes(x = carat, y = price, color=cut), se=FALSE) + ## Bonus # Draw a bar chart of average diamond prices by clarity, and include "error bars" marking # the standard error of each measurement. # -# You can calculate standard error as the _standard deviation_ divided by the square root +# You can calculate standard error as the _standard deviation_ divided by the square root # of the number of measurements (prices) # Start by creating a data frame `diamond.summary` that includes summarized data for each clarity group. # Your summary data shuld include the mean price and the standard error of the price. -diamond.summary <- diamonds %>% - group_by(clarity) %>% - summarize(mean = mean(price), sd = sd(price), se = sd/sqrt(length(price))) + # Then draw the plot. The error bars should stretch from the mean-error to the mean+error. -ggplot(data = diamond.summary, mapping = aes(x=clarity, y=mean)) + - geom_bar(aes(fill=clarity), stat="identity") + - geom_errorbar(data = diamond.summary, aes(ymin=(mean-se), ymax=(mean+se))) diff --git a/exercise-2/exercise.R b/exercise-2/exercise.R index aa6ebad..cb9e12d 100644 --- a/exercise-2/exercise.R +++ b/exercise-2/exercise.R @@ -6,27 +6,26 @@ library("ggplot2") # For this exercise you will again be working with the `diamonds` data set. # Use `?diamonds` to review details about this data set -?diamonds ## Statistical Transformations # Draw a bar chart of the diamonds data, organized by cut # The height of each bar is based on the "count" (number) of diamonds with that cut -ggplot(data = diamonds) + - geom_bar(mapping = aes(x=cut)) +ggplot(data = diamonds) + + geom_bar(mapping = aes(x = cut)) # Use the `stat_count` to apply the statistical transformation "count" to the diamonds # by cut. You do not need a separate geometry layer! -ggplot(data = diamonds) + - stat_count(mapping = aes(x=cut)) +ggplot(data = diamonds) + + stat_count(mapping = aes(x = cut)) # Use the `stat_summary` function to draw a chart with a summary layer. # Map the x-position to diamond `cut`, and the y-position to diamond `depth` # Bonus: use `min` as the function ymin, `max` as the function ymax, and `median` as the function y -ggplot(data = diamonds) + - stat_summary(mapping = aes(x = cut, y = depth), - fun.ymin = min, fun.ymax = max, fun.y = median) +ggplot(data = diamonds) + + stat_summary(mapping = aes(x = cut, y = depth), fun.ymin = min, fun.ymax = max, fun.y = median) + ## Position Adjustments @@ -40,56 +39,43 @@ ggplot(data = diamonds) + ggplot(data = diamonds) + geom_bar(mapping = aes(x = cut, fill = clarity), position = "fill") -# Draw the same chart again, but with each element positioned to "dodge" each other +# Draw the same chart again, but with each element positioned to "dodge" each other" the y axis ggplot(data = diamonds) + - geom_bar(mapping = aes(x = cut, fill = clarity), position = "dodge") +geom_bar(mapping = aes(x = cut, fill = clarity), position = "dodge") # Draw a plot with point geometry with the x-position mapped to `cut` and the y-position mapped to `clarity` # This creates a "grid" grouping the points -ggplot(data = diamonds) + - geom_point(mapping = aes(x = cut, y = clarity)) + # Use the "jitter" position adjustment to keep the points from all overlapping! # (This works a little better with a sample of diamond data, such as from the previous exercise). -ggplot(data = diamonds) + - geom_point(mapping = aes(x = cut, y = clarity), position = "jitter") + ## Scales # Draw a "boxplot" (with `geom_boxplot()`) for the diamond's price (y) by color (x) -ggplot(data = diamonds) + - geom_boxplot(mapping = aes(x=color, y=price)) + # This has a lot of outliers, making it harder to read. To fix this, draw the same plot but # with a _logarithmic_ scale for the y axis. -ggplot(data = diamonds) + - geom_boxplot(mapping = aes(x=color, y=price)) + - scale_y_log10() + # For another version, draw the same plot but with `violin` geometry instead of `boxplot` geometry! # How does the logarithmic scale change the data presentation? -ggplot(data = diamonds) + - geom_violin(mapping = aes(x=color, y=price)) + - scale_y_log10() + # Another interesting plot: draw a plot of the diamonds price (y) by carat (x), using a heatmap of 2d bins # (geom_bin2d) # What happens when you make the x and y channels scale logarithmically? -ggplot(data = diamonds) + - geom_bin2d(mapping = aes(x=carat, y=price)) + - scale_x_log10() + - scale_y_log10() + # Draw a scatter plot for the diamonds price (y) by carat (x). Color each point by the clarity # (Remember, this will take a while. Use a sample of the diamonds for faster results) -ggplot(data = diamonds) + - geom_point(mapping = aes(x = carat, y = price, color = clarity)) + # Change the color of the previous plot using a ColorBrewer scale of your choice. What looks nice? -ggplot(data = diamonds) + - geom_point(mapping = aes(x = carat, y = price, color = clarity)) + - scale_color_brewer(palette = "Spectral") + ## Coordinate Systems @@ -97,25 +83,20 @@ ggplot(data = diamonds) + # Draw a bar chart with x-position and fill color BOTH mapped to cut # For best results, SET the `width` of the geometry to be 1 (fill plot, no space between) # You can save this to a variable for easier modifications -bar <- ggplot(data = diamonds) + - geom_bar(mapping = aes(x = cut, fill = cut), width=1) -bar + # Draw the same chart, but with the coordinate system flipped -bar + coord_flip() + # Draw the same chart, but in a polar coordinate system. Now you have a Coxcomb chart! -bar + coord_polar() + ## Facets # Take the scatter plot of price by carat data (colored by clarity) and add _facets_ based on # the diamond's `color` -ggplot(data = diamonds) + - geom_point(mapping = aes(x = carat, y = price, color = clarity)) + - scale_color_brewer(palette = "Spectral") + - facet_wrap(~color) + ## Saving Plots @@ -123,4 +104,3 @@ ggplot(data = diamonds) + # Use the `ggsave()` function to save one of your plots (the most recent one generated) to disk. # Name the output file "my-plot.png". # Make sure you've set the working directory!! -ggsave("my-plot.png") diff --git a/exercise-2/my-plot.png b/exercise-2/my-plot.png deleted file mode 100644 index 3a71b43..0000000 Binary files a/exercise-2/my-plot.png and /dev/null differ