library(tidyverse)
## ── Attaching core tidyverse packages ─────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ lubridate 1.9.3 ✔ stringr 1.5.1
## ✔ purrr 1.0.2 ✔ tibble 3.2.1
## ── Conflicts ───────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Question 1:
glimpse(iris)
## Rows: 150
## Columns: 5
## $ Sepal.Length <dbl> 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.…
## $ Sepal.Width <dbl> 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.…
## $ Petal.Length <dbl> 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.…
## $ Petal.Width <dbl> 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.…
## $ Species <fct> setosa, setosa, setosa, setosa, setosa, setosa, setosa, s…
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
there are 5 columns or variables each referring to iris flower part (sepal width/length, petal width/length and then species of iris). It includes observations for 150 individual flowers
Question 2:
iris1 <- filter(iris, Species == c("virginica", "versicolor"), Sepal.Length > 6, Sepal.Width > 2.5)
glimpse(iris1)
## Rows: 28
## Columns: 5
## $ Sepal.Length <dbl> 6.4, 6.1, 6.7, 6.1, 6.1, 6.6, 6.7, 6.1, 6.2, 6.3, 7.1, 6.…
## $ Sepal.Width <dbl> 3.2, 2.9, 3.1, 2.8, 2.8, 3.0, 3.0, 3.0, 2.9, 3.3, 3.0, 3.…
## $ Petal.Length <dbl> 4.5, 4.7, 4.4, 4.0, 4.7, 4.4, 5.0, 4.6, 4.3, 6.0, 5.9, 5.…
## $ Petal.Width <dbl> 1.5, 1.4, 1.4, 1.3, 1.2, 1.4, 1.7, 1.4, 1.3, 2.5, 2.1, 2.…
## $ Species <fct> versicolor, versicolor, versicolor, versicolor, versicolo…
there are observations and still 5 varibales because I used filter and did not manipulate/mutate any columns (variables)
Question 3:
iris2 <- select(iris1, Sepal.Length, Sepal.Width, Species)
glimpse(iris2)
## Rows: 28
## Columns: 3
## $ Sepal.Length <dbl> 6.4, 6.1, 6.7, 6.1, 6.1, 6.6, 6.7, 6.1, 6.2, 6.3, 7.1, 6.…
## $ Sepal.Width <dbl> 3.2, 2.9, 3.1, 2.8, 2.8, 3.0, 3.0, 3.0, 2.9, 3.3, 3.0, 3.…
## $ Species <fct> versicolor, versicolor, versicolor, versicolor, versicolo…
In this dataset there are 28 observations and 3 variables
Question 4:
iris3 <- arrange(iris2, by = desc(Sepal.Length))
head(iris3)
## Sepal.Length Sepal.Width Species
## 1 7.7 2.6 virginica
## 2 7.7 2.8 virginica
## 3 7.4 2.8 virginica
## 4 7.1 3.0 virginica
## 5 6.9 3.2 virginica
## 6 6.8 3.0 virginica
Question 5:
iris4 <- mutate(iris3, Sepal.Area= (Sepal.Length * Sepal.Width))
glimpse(iris4)
## Rows: 28
## Columns: 4
## $ Sepal.Length <dbl> 7.7, 7.7, 7.4, 7.1, 6.9, 6.8, 6.7, 6.7, 6.7, 6.7, 6.7, 6.…
## $ Sepal.Width <dbl> 2.6, 2.8, 2.8, 3.0, 3.2, 3.0, 3.1, 3.0, 3.3, 3.1, 3.3, 3.…
## $ Species <fct> virginica, virginica, virginica, virginica, virginica, vi…
## $ Sepal.Area <dbl> 20.02, 21.56, 20.72, 21.30, 22.08, 20.40, 20.77, 20.10, 2…
Observations are still 28 and variables are now 4
Question 6:
iris5 <- summarize(iris4, meanSepal.length = mean(Sepal.Length), meanSepal.Width = mean(Sepal.Width), number=n())
print(iris5)
## meanSepal.length meanSepal.Width number
## 1 6.575 3.003571 28
Question 7:
iris6 <- iris4 %>%
group_by(Species) %>%
summarize(meanSepal.length = mean(Sepal.Length), meanSepal.Width = mean(Sepal.Width), number=n())
print(iris6)
## # A tibble: 2 × 4
## Species meanSepal.length meanSepal.Width number
## <fct> <dbl> <dbl> <int>
## 1 versicolor 6.33 2.97 9
## 2 virginica 6.69 3.02 19
Question 8:
using the pipe for all prev questions
irisFinal <- iris %>%
filter(Species == c("virginica", "versicolor"), Sepal.Length > 6, Sepal.Width > 2.5) %>%
select(Sepal.Length, Sepal.Width, Species) %>%
arrange(by = desc(Sepal.Length)) %>%
mutate(Sepal.Area= (Sepal.Length * Sepal.Width)) %>%
group_by(Species) %>%
summarize(meanSepal.length = mean(Sepal.Length), meanSepal.Width = mean(Sepal.Width), number=n())
print(irisFinal)
## # A tibble: 2 × 4
## Species meanSepal.length meanSepal.Width number
## <fct> <dbl> <dbl> <int>
## 1 versicolor 6.33 2.97 9
## 2 virginica 6.69 3.02 19
Question 9:
Create a ‘longer’ data frame using the original iris data set with three columns named “Species”, “Measure”, “Value”. The column “Species” will retain the species names of the data set. The column “Measure” will include whether the value corresponds to Sepal.Length, Sepal.Width, Petal.Length, or Petal.Width and the column “Value” will include the numerical values of those measurements.
long_iris <- iris %>%
pivot_longer(col = Sepal.Length:Petal.Width, names_to = "Measure", values_to = "Value")
print(long_iris)
## # A tibble: 600 × 3
## Species Measure Value
## <fct> <chr> <dbl>
## 1 setosa Sepal.Length 5.1
## 2 setosa Sepal.Width 3.5
## 3 setosa Petal.Length 1.4
## 4 setosa Petal.Width 0.2
## 5 setosa Sepal.Length 4.9
## 6 setosa Sepal.Width 3
## 7 setosa Petal.Length 1.4
## 8 setosa Petal.Width 0.2
## 9 setosa Sepal.Length 4.7
## 10 setosa Sepal.Width 3.2
## # ℹ 590 more rows