diff --git a/README.md b/README.md
index 391ec8f6..56936ed5 100644
--- a/README.md
+++ b/README.md
@@ -2,4 +2,7 @@
 
 This repository contains _all_ programming exercises for the [_Programming Skills for Data Science_](https://programming-for-data-science.github.io/) book. 
  
-Solutions can be found in the `solution` branch.
\ No newline at end of file
+Solutions can be found in the `solution` branch.
+
+### "Cidade Deus" is a great film
+
diff --git a/chapter-05-exercises/exercise-1/.RData b/chapter-05-exercises/exercise-1/.RData
new file mode 100644
index 00000000..eae5b658
Binary files /dev/null and b/chapter-05-exercises/exercise-1/.RData differ
diff --git a/chapter-05-exercises/exercise-1/.Rhistory b/chapter-05-exercises/exercise-1/.Rhistory
new file mode 100644
index 00000000..d2857143
--- /dev/null
+++ b/chapter-05-exercises/exercise-1/.Rhistory
@@ -0,0 +1,2 @@
+too_expensive = FALES
+too_expensive = FALSE
diff --git a/chapter-05-exercises/exercise-1/exercise.R b/chapter-05-exercises/exercise-1/exercise.R
index 71eb3f2a..f88a199c 100644
--- a/chapter-05-exercises/exercise-1/exercise.R
+++ b/chapter-05-exercises/exercise-1/exercise.R
@@ -1,26 +1,30 @@
 # Exercise 1: practice with basic R syntax
 
 # Create a variable `hometown` that stores the city in which you were born
-
+hometown <- "Toledo"
 
 # Assign your name to the variable `my_name`
-
+my_name <- "Alejandro"
 
 # Assign your height (in inches) to a variable `my_height`
-
+my_height <- 1.74
 
 # Create a variable `puppies` equal to the number of puppies you'd like to have
-
+puppies <- 0
 
 # Create a variable `puppy_price`, which is how much you think a puppy costs
-
+puppy_price <- 20
 
 # Create a variable `total_cost` that has the total cost of all of your puppies
-
+total_cost <- 20
 
 # Create a boolean variable `too_expensive`, set to TRUE if the cost is greater 
 # than $1,000
-
+if total_cost > 1000:
+  too_expensive = TRUE
+else
+  too_expensive = FALSE
+  
 
 # Create a variable `max_puppies`, which is the number of puppies you can 
 # afford for $1,000
diff --git a/chapter-06-exercises/exercise-1/.Rhistory b/chapter-06-exercises/exercise-1/.Rhistory
new file mode 100644
index 00000000..e69de29b
diff --git a/chapter-08-exercises/exercise-1/.RData b/chapter-08-exercises/exercise-1/.RData
new file mode 100644
index 00000000..4fd3e210
Binary files /dev/null and b/chapter-08-exercises/exercise-1/.RData differ
diff --git a/chapter-08-exercises/exercise-1/.Rhistory b/chapter-08-exercises/exercise-1/.Rhistory
new file mode 100644
index 00000000..974c4346
--- /dev/null
+++ b/chapter-08-exercises/exercise-1/.Rhistory
@@ -0,0 +1,80 @@
+meals
+# Create a vector `my_breakfast` of everything you ate for breakfast
+my_breakfast <- c("leche", "galletas")
+# Create a vector `my_lunch` of everything you ate (or will eat) for lunch
+my_lunch <- c("tomate", "lechuga", "coliflor")
+# Create a list `meals` that has contains your breakfast and lunch
+meals <- my_lunch + my_breakfast
+# Create a list `meals` that has contains your breakfast and lunch
+meals <- my_lunch, my_breakfast
+# Create a list `meals` that has contains your breakfast and lunch
+meals <- c(my_lunch, my_breakfast)
+meals
+# Add a "dinner" element to your `meals` list that has what you plan to eat
+# for dinner
+dinner <- c("hamburguesa", "patatas fritas")
+meals <- c(meals, dinner)
+meals
+# Use dollar notation to extract your `dinner` element from your list
+# and save it in a vector called 'dinner'
+dinner2 <- meals[2]
+dinner2
+# Use dollar notation to extract your `dinner` element from your list
+# and save it in a vector called 'dinner'
+dinner2 <- meals[-1]
+dinner2
+# Use dollar notation to extract your `dinner` element from your list
+# and save it in a vector called 'dinner'
+dinner2 <- meals[length(meals)]
+dinner2
+meals
+meals <- c(c(meals), c(dinner)
+meals <- c(c(meals), c(dinner))
+# Use dollar notation to extract your `dinner` element from your list
+# and save it in a vector called 'dinner'
+dinner2 <- meals[length(meals)]
+dinner2
+# Use double-bracket notation to extract your `lunch` element from your list
+# and save it in your list as the element at index 5 (no reason beyond practice)
+meals[1:4]
+# Create a list that has the number of items you ate for each meal
+# Hint: use the `lappy()` function to apply the `length()` function to each item
+l <- list(breakfast : c("leche","cafe"),
+dinner : c("hamburguesa", "patatas"))
+# Create a list that has the number of items you ate for each meal
+# Hint: use the `lappy()` function to apply the `length()` function to each item
+l <- list(breakfast = c("leche","cafe"),
+dinner = c("hamburguesa", "patatas"))
+l
+lapply(l, length())
+l[1]
+lapply(l[1], length())
+length(l[1])
+length(l[2])
+lapply(l, length)
+lapply(l, length)
+return "pizza"
+# Write a function `add_pizza` that adds pizza to a given meal vector, and
+# returns the pizza-fied vector
+add_pizza <- function(){
+return "pizza"
+}
+return p
+return p;
+return
+return p
+return p
+return("pizza")
+# Write a function `add_pizza` that adds pizza to a given meal vector, and
+# returns the pizza-fied vector
+add_pizza <- function(){
+return("pizza")
+}
+lapply(l, add_pizza)
+# Write a function `add_pizza` that adds pizza to a given meal vector, and
+# returns the pizza-fied vector
+add_pizza <- function(ventrada, vsalida){
+vsalida <- c(ventrada, "pizza")
+return(vsalida)
+}
+lapply(l, add_pizza)
diff --git a/chapter-08-exercises/exercise-1/exercise.R b/chapter-08-exercises/exercise-1/exercise.R
index 75f89c33..77b4476b 100644
--- a/chapter-08-exercises/exercise-1/exercise.R
+++ b/chapter-08-exercises/exercise-1/exercise.R
@@ -1,25 +1,29 @@
 # Exercise 1: creating and accessing lists
 
 # Create a vector `my_breakfast` of everything you ate for breakfast
-
+my_breakfast <- c("leche", "galletas")
 
 # Create a vector `my_lunch` of everything you ate (or will eat) for lunch
-
+my_lunch <- c("tomate", "lechuga", "coliflor")
 
 # Create a list `meals` that has contains your breakfast and lunch
-
+meals <- c(my_lunch, my_breakfast)
+meals
 
 # Add a "dinner" element to your `meals` list that has what you plan to eat 
 # for dinner
-
+dinner <- c("hamburguesa", "patatas fritas")
+meals <- c(meals, dinner)
+meals
 
 # Use dollar notation to extract your `dinner` element from your list
 # and save it in a vector called 'dinner'
-
+dinner2 <- meals[length(meals)]
+dinner2
 
 # Use double-bracket notation to extract your `lunch` element from your list
 # and save it in your list as the element at index 5 (no reason beyond practice)
-
+meals[1:4]
 
 # Use single-bracket notation to extract your breakfast and lunch from your list
 # and save them to a list called `early_meals`
@@ -30,11 +34,17 @@
 
 # Create a list that has the number of items you ate for each meal
 # Hint: use the `lappy()` function to apply the `length()` function to each item
-
+l <- list(breakfast = c("leche","cafe"),
+          dinner = c("hamburguesa", "patatas"))
+lapply(l, length)
 
 # Write a function `add_pizza` that adds pizza to a given meal vector, and
 # returns the pizza-fied vector
-
+add_pizza <- function(ventrada, vsalida){
+  vsalida <- c(ventrada, "pizza")
+  return(vsalida)
+}
+lapply(l, add_pizza)
 
 # Create a vector `better_meals` that is all your meals, but with pizza!
 
diff --git a/chapter-17-exercises/exercise-1/.RData b/chapter-17-exercises/exercise-1/.RData
new file mode 100644
index 00000000..16521446
Binary files /dev/null and b/chapter-17-exercises/exercise-1/.RData differ
diff --git a/chapter-17-exercises/exercise-1/.Rhistory b/chapter-17-exercises/exercise-1/.Rhistory
new file mode 100644
index 00000000..bf15c678
--- /dev/null
+++ b/chapter-17-exercises/exercise-1/.Rhistory
@@ -0,0 +1,512 @@
+#origin y dest
+#¿average speed? ¿distance / air_time ?
+city <- flights %>%
+group_by(dest) %>%
+summarize(h_speed = mean((distance/air_time), na.rm = TRUE))
+city
+# Which city was flown to with the highest average speed?
+#origin y dest
+#¿average speed? ¿distance / air_time ?
+city <- flights %>%
+group_by(dest) %>%
+summarize(h_speed = mean((distance/air_time), na.rm = TRUE)) %>%
+head(1)
+city
+# Which city was flown to with the highest average speed?
+#origin y dest
+#¿average speed? ¿distance / air_time ?
+city <- flights %>%
+group_by(origin) %>%
+summarize(h_speed = mean((distance/air_time), na.rm = TRUE)) %>%
+head(1)
+city
+dep_delay_by_month
+# What was the average departure delay in each month?
+# Save this as a data frame `dep_delay_by_month`
+# Hint: you'll have to perform a grouping operation then summarizing your data
+#group by month y luego avg columa dep_delay
+dep_delay_by_month <- flights %>%
+group_by(month) %>%
+summarise(avg_delay = mean(dep_delay, na.rm = TRUE))
+dep_delay_by_month
+dep_delay_by_month
+# Which month had the greatest average departure delay?
+max_avg <- dep_delay_by_month %>%
+filter(avg_delay == max(avg_delay)) %>%
+select(month)
+max_avg
+# Which month had the greatest average departure delay?
+max_avg <- dep_delay_by_month %>%
+filter(avg_delay == max(avg_delay))
+#            select(month)
+max_avg
+# To which destinations were the average arrival delays the highest?
+# Hint: you'll have to perform a grouping operation then summarize your data
+# You can use the `head()` function to view just the first few rows
+h_dest_delay <- flights %>%
+group_by(dest) %>%
+summarise(avg_delay = mean(arr_delay, na.rm = TRUE)) %>%
+head(1)
+h_dest_delay
+by_dest %>%
+rename(faa = dest)%>%
+left_join(airports, by=c("dest" = "faa"))
+by_dest %>%
+rename(faa = dest)%>%
+left_join(airports, by="faa")
+by_dest %>%
+group_by(dest) %>%
+summarise(avg_delay = mean(arr_delay, na.rm = TRUE)) %>%
+rename(faa = dest)%>%
+left_join(airports, by="faa")
+by_dest <- flights %>%
+group_by(dest) %>%
+summarise(avg_delay = mean(arr_delay, na.rm = TRUE)) %>%
+rename(faa = dest)%>%
+left_join(airports, by="faa")
+by_dest
+# Which city was flown to with the highest average speed?
+#origin y dest
+#¿average speed? ¿distance / air_time ?
+city <- flights %>%
+mutate(speed = (distance/air_time)) %>%speed
+group_by(dest) %>%
+summarize(h_speed = mean(speed, na.rm = TRUE)) %>%
+left_join(airports, by="faa")
+# Which city was flown to with the highest average speed?
+#origin y dest
+#¿average speed? ¿distance / air_time ?
+city <- flights %>%
+mutate(speed = (distance/air_time)) %>%
+group_by(dest) %>%
+summarize(h_speed = mean(speed, na.rm = TRUE)) %>%
+left_join(airports, by="faa")
+by_dest <- flights %>%
+group_by(dest) %>%
+summarise(avg_delay = mean(arr_delay, na.rm = TRUE)) %>%
+rename(faa = dest)%>%
+left_join(airports, by="faa")
+by_dest
+# To which destinations were the average arrival delays the highest?
+# Hint: you'll have to perform a grouping operation then summarize your data
+# You can use the `head()` function to view just the first few rows
+h_dest_delay <- flights %>%
+group_by(dest) %>%
+summarise(avg_delay = mean(arr_delay, na.rm = TRUE)) %>%
+head(1)
+h_dest_delay
+by_dest <- flights %>%
+group_by(dest) %>%
+summarise(avg_delay = mean(arr_delay, na.rm = TRUE)) %>%
+rename(faa = dest)%>%
+left_join(airports, by="faa")
+by_dest
+city
+# You can look up these airports in the `airports` data frame!
+View(airports)
+library("nycflights13")
+library("dplyr")
+# What was the average departure delay in each month?
+# Save this as a data frame `dep_delay_by_month`
+# Hint: you'll have to perform a grouping operation then summarizing your data
+#group by month y luego avg columa dep_delay
+dep_delay_by_month <- flights %>%
+group_by(month) %>%
+summarise(avg_delay = mean(dep_delay, na.rm = TRUE))
+dep_delay_by_month
+# Which month had the greatest average departure delay?
+max_avg <- dep_delay_by_month %>%
+filter(avg_delay == max(avg_delay))
+#            select(month)
+max_avg
+# If your above data frame contains just two columns (e.g., "month", and "delay"
+# in that order), you can create a scatterplot by passing that data frame to the
+# `plot()` function
+plot(dep_delay_by_month)
+# To which destinations were the average arrival delays the highest?
+# Hint: you'll have to perform a grouping operation then summarize your data
+# You can use the `head()` function to view just the first few rows
+h_dest_delay <- flights %>%
+group_by(dest) %>%
+summarise(avg_delay = mean(arr_delay, na.rm = TRUE)) %>%
+head(1)
+h_dest_delay
+by_dest <- flights %>%
+group_by(dest) %>%
+summarise(avg_delay = mean(arr_delay, na.rm = TRUE)) %>%
+rename(faa = dest)%>%
+left_join(airports, by="faa")
+by_dest
+city <- flights %>%
+mutate(speed = (distance/air_time)) %>%
+group_by(dest) %>%
+summarize(h_speed = mean(speed, na.rm = TRUE)) %>%
+left_join(airports, by="faa")
+by_dest <- flights %>%
+group_by(dest) %>%
+summarise(avg_delay = mean(arr_delay, na.rm = TRUE)) %>%
+rename(faa = dest)%>%
+left_join(airports, by="faa")
+by_dest
+library(midwest)
+library("midwest")
+# Change the color of each point based on the state it is in
+ggplot(data = midwest) +
+geom_point(
+mapping = aes(x = percollege, y = percadultpoverty, color = state)
+) +
+scale_color_brewer(palette = "Set3") # use the "Set3" color palette
+install.library(midwest)
+install.packages(midwest)
+install.packages("midwest")
+# Load packages
+library("dplyr")
+library("ggmap")
+library("ggplot2")
+# Load data from GitHub
+permit_data <- read.csv(
+"https://raw.githubusercontent.com/programming-for-data-science/in-action/master/interactive-vis/data/Building_Permits.csv",
+stringsAsFactors = F)
+# Formatting and filtering data
+samp <- permit_data %>%
+filter(!is.na(Longitude), !is.na(Latitude), Longitude <0, Latitude < 100) %>%
+filter(PermitTypeDesc == "New") %>%
+mutate(year = as.numeric(substr(IssuedDate, 1, 4))) %>%
+filter(year > 2010)
+# Chart option 1: years as categories
+qmplot(
+data = samp,
+x = Longitude,
+y = Latitude,
+color = as.character(year)
+) + scale_color_discrete() +
+labs(color = "Year")
+# Chart option 2: years as continuous
+qmplot(
+data = samp,
+x = Longitude,
+y = Latitude,
+color = year
+) + scale_color_continuous() +
+labs(color = "Year")
+install.packages("ggmap")
+library("ggmap")
+# Chart option 1: years as categories
+qmplot(
+data = samp,
+x = Longitude,
+y = Latitude,
+color = as.character(year)
+) + scale_color_discrete() +
+labs(color = "Year")
+df <- data.frame(
+label = c("A", "B", "C", "D"),
+value = 1:4,
+stringsAsFactors = F
+)
+ggplot(df) +
+geom_col(mapping = aes(x = label, y = value, fill = "blue"))
+ggplot(df) +
+geom_col(mapping = aes(x = label, y = value, fill = blue))
+ggplot(df) +
+geom_col(mapping = aes(x = label, y = value, fill = blue))
+# Install and load the `ggplot2` package
+# You will also want to load `dplyr`
+library(ggplot2)
+?diamonds
+# For this exercise you'll be working with the `diamonds` data set included in
+# the ggplot2 library
+# Use `?diamonds` to get more information about this data set (including the
+# column descriptions. Also check the _column names_ and the _number of rows_
+# in the data set
+nrows(diamonds)
+# For this exercise you'll be working with the `diamonds` data set included in
+# the ggplot2 library
+# Use `?diamonds` to get more information about this data set (including the
+# column descriptions. Also check the _column names_ and the _number of rows_
+# in the data set
+nrows("diamonds")
+diamonds
+nrow(diamonds)
+ncol(diamonds)
+colnames(diamonds)
+# This data set has A LOT of rows. To make things a bit more readable,
+# use dplyr's `sample_n()` function to get a random 1000 rows from the data set
+# Store this sample in a variable `diamonds_sample`
+diamonds_sample <- sample_n(1000)
+library(dplyr)
+# This data set has A LOT of rows. To make things a bit more readable,
+# use dplyr's `sample_n()` function to get a random 1000 rows from the data set
+# Store this sample in a variable `diamonds_sample`
+diamonds_sample <- sample_n(tbl = diamonds, size=1000)
+nrow(diamonds_sample)
+View(diamonds_sample)
+# Start by making a new `ggplot` with the `diamonds_sample` as the data (no
+# geometry yet)
+# What do you see? (What did you expect?)
+ggplot(data = diamonds_sample)
+# Draw a scatter plot (with point geometry) with for the `diamonds_sample` set,
+# with the `carat` mapped to the x-position and `price` mapped to the y-position.
+scatter(x = "carat", y = "price" )
+# Draw a scatter plot (with point geometry) with for the `diamonds_sample` set,
+# with the `carat` mapped to the x-position and `price` mapped to the y-position.
+ggplot(data = diamonds_sample) +
+geom_point(x = "carat", y = "price")
+# Draw a scatter plot (with point geometry) with for the `diamonds_sample` set,
+# with the `carat` mapped to the x-position and `price` mapped to the y-position.
+ggplot(data = diamonds_sample) +
+geom_point(mapping = aes(x = "carat", y = "price"))
+# Draw the same plot as above, but color each of the points based on their
+# clarity.
+ggplot(data = diamonds_sample) +
+geom_point(mapping = aes(x = "carat", y = "price", colour = "clarity"))
+# Draw a scatter plot (with point geometry) with for the `diamonds_sample` set,
+# with the `carat` mapped to the x-position and `price` mapped to the y-position.
+ggplot(data = diamonds_sample) +
+geom_point(mapping = aes(x = carat, y =price))
+# Draw the same plot as above, but color each of the points based on their
+# clarity.
+ggplot(data = diamonds_sample) +
+geom_point(mapping = aes(x = carat, y = price, colour = clarity))
+# Draw the same plot as above, but for the entire `diamonds` data set. Note this
+# may take a few seconds to generate.
+ggplot(data = diamonds) +
+geom_point(mapping = aes(x = carat, y = price, colour = clarity))
+# Draw another scatter plot for `diamonds_sample` of price (y) by carat (x),
+# but with all of the dots colored "blue".
+# Hint: you'll need to set the color channel, not map a value to it!
+ggplot(data = diamonds_sample) +
+geom_point(mapping = aes(x = carat, y = price, colour = )) +
+scale_color_brewer(palette = "Set3")
+# Draw another scatter plot for `diamonds_sample` of price (y) by carat (x),
+# but with all of the dots colored "blue".
+# Hint: you'll need to set the color channel, not map a value to it!
+ggplot(data = diamonds_sample) +
+geom_point(mapping = aes(x = carat, y = price)) +
+scale_color_brewer(palette = "Set3")
+# Draw another scatter plot for `diamonds_sample` of price (y) by carat (x),
+# but with all of the dots colored "blue".
+# Hint: you'll need to set the color channel, not map a value to it!
+ggplot(data = diamonds_sample) +
+geom_point(mapping = aes(x = carat, y = price)) +
+scale_color_brewer(palette = "Set1")
+# Draw another scatter plot for `diamonds_sample` of price (y) by carat (x),
+# but with all of the dots colored "blue".
+# Hint: you'll need to set the color channel, not map a value to it!
+ggplot(data = diamonds_sample) +
+geom_point(mapping = aes(x = carat, y = price, colour = clarity)) +
+scale_color_brewer(palette = "Set1")
+# Draw another scatter plot for `diamonds_sample` of price (y) by carat (x),
+# but with all of the dots colored "blue".
+# Hint: you'll need to set the color channel, not map a value to it!
+ggplot(data = diamonds_sample) +
+geom_point(mapping = aes(x = carat, y = price, colour = clarity)) +
+scale_color_brewer(palette = "Set3")
+# Draw another scatter plot for `diamonds_sample` of price (y) by carat (x),
+# but with all of the dots colored "blue".
+# Hint: you'll need to set the color channel, not map a value to it!
+ggplot(data = diamonds_sample) +
+geom_point(mapping = aes(x = carat, y = price, colour = clarity)) +
+scale_color_brewer(palette = "Blue")
+# Draw another scatter plot for `diamonds_sample` of price (y) by carat (x),
+# but with all of the dots colored "blue".
+# Hint: you'll need to set the color channel, not map a value to it!
+ggplot(data = diamonds_sample) +
+geom_point(mapping = aes(x = carat, y = price, colour = clarity)) +
+scale_color_brewer(palette = "PuBu")
+# Draw a scatter plot for `diamonds_sample` of `price` by `carat`, where each
+# point has an aesthetic _shape_ based on the diamond's `cut`.
+ggplot(data = diamonds_sample) +
+geom_point(mapping = aes(x = carat, y = price, shape=4))
+# Draw a scatter plot for `diamonds_sample` of `price` by `carat`, where each
+# point has an aesthetic _shape_ based on the diamond's `cut`.
+ggplot(data = diamonds_sample) +
+geom_point(mapping = aes(x = carat, y = price, shape = cut ))
+# Draw a scatter plot for `diamonds_sample` of *`cut`* by `carat`, where each
+# point has an aesthetic _size_ based on the diamond's *`price`*
+ggplot(data = diamonds_sample) +
+geom_point(mapping = aes(x = carat, y = cut, shape = price ))
+# Draw a scatter plot for `diamonds_sample` of `price` by `carat`, where each
+# point has an aesthetic _shape_ based on the diamond's `cut`.
+ggplot(data = diamonds_sample) +
+geom_point(mapping = aes(x = carat, y = price, shape = cut ))
+# Draw a scatter plot for `diamonds_sample` of *`cut`* by `carat`, where each
+# point has an aesthetic _size_ based on the diamond's *`price`*
+ggplot(data = diamonds_sample) +
+geom_point(mapping = aes(x = carat, y = cut, size = price ))
+# Try coloring the above plot based on the diamond's price!
+ggplot(data = diamonds_sample) +
+geom_point(mapping = aes(x = carat, y = cut, size = price, colour = price ))
+# Try coloring the above plot based on the diamond's price!
+ggplot(data = diamonds_sample) +
+geom_point(mapping = aes(x = carat, y = cut, size = price, colour = price )) +
+scale_color_brewer(palette = "3")
+# Try coloring the above plot based on the diamond's price!
+ggplot(data = diamonds_sample) +
+geom_point(mapping = aes(x = carat, y = cut, size = price, colour = price )) +
+scale_color_brewer(palette = "3-class")
+# Try coloring the above plot based on the diamond's price!
+ggplot(data = diamonds_sample) +
+geom_point(mapping = aes(x = carat, y = cut, size = price, colour = price )) +
+scale_color_brewer(palette = "Set1")
+# Try coloring the above plot based on the diamond's price!
+ggplot(data = diamonds_sample) +
+geom_point(mapping = aes(x = carat, y = cut, size = price)) +
+scale_color_brewer(palette = "Set1")
+# Try coloring the above plot based on the diamond's price!
+ggplot(data = diamonds_sample) +
+geom_point(mapping = aes(x = carat, y = cut, size = price)) +
+scale_color_brewer(palette = "Set3")
+# Try coloring the above plot based on the diamond's price!
+ggplot(data = diamonds_sample) +
+geom_point(mapping = aes(x = carat, y = cut, size = price)) +
+scale_color_brewer(palette = "PuBu")
+# Draw a scatter plot (with point geometry) with for the `diamonds_sample` set,
+# with the `carat` mapped to the x-position and `price` mapped to the y-position.
+ggplot(data = diamonds_sample) +
+geom_point(mapping = aes(x = carat, y =price), color = "blue")
+# Draw the same plot as above, but color each of the points based on their
+# clarity.
+ggplot(data = diamonds_sample) +
+geom_point(mapping = aes(x = carat, y = price, colour = clarity))
+cd /home/alejandro/Descargas
+%cd% "/home/alejandro/Descargas"
+cd "/home/alejandro/Descargas"
+df <- data.frame("/home/alejandro/Descargas/madrid_2001.csv.zip")
+ncols(df)
+ncol(df)
+df <- data.frame("/home/alejandro/Descargas/madrid_2001.csv")
+ncol(df)
+ncol(df)
+ncols(df)
+colnames(df)
+df <- data.frame("/home/alejandro/Descargas/madrid_2001.csv")
+colnames(df)
+df[1]
+df <- read.csv("/home/alejandro/Descargas/madrid_2001.csv")
+colnames(df)
+nrow(df)
+ncol(df)
+nrow(df)
+ncol(df), nrow(df)
+ncol(df); nrow(df)
+df <- read.csv("/home/alejandro/Descargas/madrid_2001.csv"); ncol(df); nrow(df)
+df <- read.csv("/home/alejandro/Descargas/madrid_2002.csv"); ncol(df); nrow(df)
+df <- read.csv("/home/alejandro/Descargas/madrid_2003.csv"); ncol(df); nrow(df)
+df <- read.csv("/home/alejandro/Descargas/madrid_2004.csv"); ncol(df); nrow(df)
+df <- read.csv("/home/alejandro/Descargas/madrid_2005.csv"); ncol(df); nrow(df)
+df <- read.csv("/home/alejandro/Descargas/madrid_2006.csv"); ncol(df); nrow(df)
+f <-
+df <- read.csv("/home/alejandro/Descargas/madrid_2007.csv"); ncol(df); nrow(df)
+df <- read.csv("/home/alejandro/Descargas/madrid_2008.csv"); ncol(df); nrow(df)
+df <- read.csv("/home/alejandro/Descargas/madrid_2009.csv"); ncol(df); nrow(df)
+df <- read.csv("/home/alejandro/Descargas/madrid_2010.csv"); ncol(df); nrow(df)
+df <- read.csv("/home/alejandro/Descargas/madrid_2011.csv"); ncol(df); nrow(df)
+df <- read.csv("/home/alejandro/Descargas/madrid_2012.csv"); ncol(df); nrow(df)
+df <- read.csv("/home/alejandro/Descargas/madrid_2013.csv"); ncol(df); nrow(df)
+df <- read.csv("/home/alejandro/Descargas/madrid_2014.csv"); ncol(df); nrow(df)
+df <- read.csv("/home/alejandro/Descargas/madrid_2015.csv"); ncol(df); nrow(df)
+df <- read.csv("/home/alejandro/Descargas/madrid_2016.csv"); ncol(df); nrow(df)
+df <- read.csv("/home/alejandro/Descargas/madrid_2017.csv"); ncol(df); nrow(df)
+df <- read.csv("/home/alejandro/Descargas/madrid_2018.csv"); ncol(df); nrow(df)
+# Load necessary packages (`dplyr`, `ggplot2`, and `plotly`)
+library(dplyr)
+library(ggplot2)
+library(plotly)
+library(plotly)
+# Exercise 1: Creating a grouped bar chart of cancer rates in King County, WA
+# (using plotly)
+library(dplyr)
+library(ggplot2)
+library(plotly)
+# Load the `"data/IHME_WASHINGTON_MORTALITY_RATES_1980_2014.csv` file
+# into a variable `mortality_rates`
+# Make sure strings are *not* read in as factors
+mortality_rates <- read.csv("data/IHME_WASHINGTON_MORTALITY_RATES_1980_2014.csv")
+setwd("~/Dropbox/Data/UPM/1.DP/book-exercises/chapter-17-exercises/exercise-1")
+# Load the `"data/IHME_WASHINGTON_MORTALITY_RATES_1980_2014.csv` file
+# into a variable `mortality_rates`
+# Make sure strings are *not* read in as factors
+mortality_rates <- read.csv("data/IHME_WASHINGTON_MORTALITY_RATES_1980_2014.csv")
+# This is actually a very large and rich dataset, but we will only focus on
+# a small subset of it. Create a new data frame `plot_data` by filtering the
+# `mortality_rates` data to the following:
+# - The `location_name` is "King County"
+# - The `sex` is *not* "Both"
+# - The `cause_name` is "Neoplasms"
+# - The `year_id` is greater than 2004
+# - Only keep the columns `sex`, `year_id`, and `mortality_rate`
+plot_data <- mortality_rates %>%
+filter(location_name == "King County")
+# This is actually a very large and rich dataset, but we will only focus on
+# a small subset of it. Create a new data frame `plot_data` by filtering the
+# `mortality_rates` data to the following:
+# - The `location_name` is "King County"
+# - The `sex` is *not* "Both"
+# - The `cause_name` is "Neoplasms"
+# - The `year_id` is greater than 2004
+# - Only keep the columns `sex`, `year_id`, and `mortality_rate`
+plot_data <- mortality_rates %>%
+filter(location_name == "King County") %>%
+filter(sex != "Both")
+# This is actually a very large and rich dataset, but we will only focus on
+# a small subset of it. Create a new data frame `plot_data` by filtering the
+# `mortality_rates` data to the following:
+# - The `location_name` is "King County"
+# - The `sex` is *not* "Both"
+# - The `cause_name` is "Neoplasms"
+# - The `year_id` is greater than 2004
+# - Only keep the columns `sex`, `year_id`, and `mortality_rate`
+plot_data <- mortality_rates %>%
+filter(location_name == "King County") %>%
+filter(sex != "Both") %>%
+filter(cause_name == "Neoplasms") %>%
+filter(year_id > 2004) %>%
+select(sex, year_id, mortality_rate)
+colnames(plot_data)
+head(plot_data)
+# Using ggplot2 (recall chapter 16), make a grouped ("dodge") bar chart of
+# mortality rates each year, with different bars for each sex.
+# Store this plot in a variable `mort_plot`
+mort_plot <- ggplot(data = "plot_data") +
+geom_bar(mapping = aes(x = year, y = sex ))
+# Using ggplot2 (recall chapter 16), make a grouped ("dodge") bar chart of
+# mortality rates each year, with different bars for each sex.
+# Store this plot in a variable `mort_plot`
+mort_plot <- ggplot(data = plot_data) +
+geom_bar(mapping = aes(x = year, y = sex ))
+# To make this plot interactive, pass `mort_plot` to the `ggplotly()` function
+# (which is part of the `plotly` package). This will make your plot interactive!
+ggplotly(mort_plot)
+# Using ggplot2 (recall chapter 16), make a grouped ("dodge") bar chart of
+# mortality rates each year, with different bars for each sex.
+# Store this plot in a variable `mort_plot`
+mort_plot <- ggplot(data = plot_data) +
+geom_bar(mapping = aes(x = year_id, y = sex ))
+# To make this plot interactive, pass `mort_plot` to the `ggplotly()` function
+# (which is part of the `plotly` package). This will make your plot interactive!
+ggplotly(mort_plot)
+# Using ggplot2 (recall chapter 16), make a grouped ("dodge") bar chart of
+# mortality rates each year, with different bars for each sex.
+# Store this plot in a variable `mort_plot`
+mort_plot <- ggplot(data = plot_data) +
+geom_bar(mapping = aes(x = year_id, y = sex ))
+# To make this plot interactive, pass `mort_plot` to the `ggplotly()` function
+# (which is part of the `plotly` package). This will make your plot interactive!
+ggplotly(mort_plot)
+head(plot_data)
+tail(plot_data)
+# Using ggplot2 (recall chapter 16), make a grouped ("dodge") bar chart of
+# mortality rates each year, with different bars for each sex.
+# Store this plot in a variable `mort_plot`
+mort_plot <- ggplot(data = plot_data) +
+geom_bar(mapping = aes(x = year_id, y = mortality_rate ))
+# To make this plot interactive, pass `mort_plot` to the `ggplotly()` function
+# (which is part of the `plotly` package). This will make your plot interactive!
+ggplotly(mort_plot)
+# Using the `plot_ly()` function from the `plotly` package, pass in `plot_data`
+# as the data, and specify `year_id` as the x variable, mortality_rate as
+# the y variable, and `sex` as the color variable.
+# (make sure to specify these as *formulas*)
+# Also set the plot type to "bar". Store the result in a variable.
+plot_ly(data=plot_data)
diff --git a/chapter-17-exercises/exercise-1/exercise.R b/chapter-17-exercises/exercise-1/exercise.R
index 48c73172..6597779f 100755
--- a/chapter-17-exercises/exercise-1/exercise.R
+++ b/chapter-17-exercises/exercise-1/exercise.R
@@ -1,7 +1,9 @@
 # Exercise 1: Creating a grouped bar chart of cancer rates in King County, WA
 # (using plotly)
-
 # Load necessary packages (`dplyr`, `ggplot2`, and `plotly`)
+library(dplyr)
+library(ggplot2)
+library(plotly)
 
 
 # Set your working directory using the RStudio menu:
@@ -10,7 +12,7 @@
 # Load the `"data/IHME_WASHINGTON_MORTALITY_RATES_1980_2014.csv` file
 # into a variable `mortality_rates`
 # Make sure strings are *not* read in as factors
-
+mortality_rates <- read.csv("data/IHME_WASHINGTON_MORTALITY_RATES_1980_2014.csv")
 
 # This is actually a very large and rich dataset, but we will only focus on
 # a small subset of it. Create a new data frame `plot_data` by filtering the
@@ -20,26 +22,36 @@
 # - The `cause_name` is "Neoplasms"
 # - The `year_id` is greater than 2004
 # - Only keep the columns `sex`, `year_id`, and `mortality_rate`
+plot_data <- mortality_rates %>%
+  filter(location_name == "King County") %>%
+  filter(sex != "Both") %>%
+  filter(cause_name == "Neoplasms") %>%
+  filter(year_id > 2004) %>%
+  select(sex, year_id, mortality_rate)
 
+colnames(plot_data)
+tail(plot_data)
 
 # Using ggplot2 (recall chapter 16), make a grouped ("dodge") bar chart of
 # mortality rates each year, with different bars for each sex.
 # Store this plot in a variable `mort_plot`
-
+mort_plot <- ggplot(data = plot_data) +
+  geom_bar(mapping = aes(x = year_id, y = sex ))
 
 # To make this plot interactive, pass `mort_plot` to the `ggplotly()` function
 # (which is part of the `plotly` package). This will make your plot interactive!
-
+ggplotly(mort_plot)
 
 # As an alternative to making a ggplot chart interactive, we can build the same
 # plot using the plotly API directly
+#plot_ly
 
 # Using the `plot_ly()` function from the `plotly` package, pass in `plot_data`
 # as the data, and specify `year_id` as the x variable, mortality_rate as
 # the y variable, and `sex` as the color variable. 
 # (make sure to specify these as *formulas*)
 # Also set the plot type to "bar". Store the result in a variable.
-
+plot_ly(data = plot_data, )
 
 
 # You should see that the cancer mortaility rates for female and males