library(tidyverse)
## ── Attaching core tidyverse packages ─────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ lubridate 1.9.3     ✔ stringr   1.5.1
## ✔ purrr     1.0.2     ✔ tibble    3.2.1
## ── Conflicts ───────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Question 1:

glimpse(iris) 
## Rows: 150
## Columns: 5
## $ Sepal.Length <dbl> 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.…
## $ Sepal.Width  <dbl> 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.…
## $ Petal.Length <dbl> 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.…
## $ Petal.Width  <dbl> 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.…
## $ Species      <fct> setosa, setosa, setosa, setosa, setosa, setosa, setosa, s…
str(iris)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...

there are 5 columns or variables each referring to iris flower part (sepal width/length, petal width/length and then species of iris). It includes observations for 150 individual flowers

Question 2:

iris1 <- filter(iris, Species == c("virginica", "versicolor"), Sepal.Length > 6, Sepal.Width > 2.5) 

glimpse(iris1)
## Rows: 28
## Columns: 5
## $ Sepal.Length <dbl> 6.4, 6.1, 6.7, 6.1, 6.1, 6.6, 6.7, 6.1, 6.2, 6.3, 7.1, 6.…
## $ Sepal.Width  <dbl> 3.2, 2.9, 3.1, 2.8, 2.8, 3.0, 3.0, 3.0, 2.9, 3.3, 3.0, 3.…
## $ Petal.Length <dbl> 4.5, 4.7, 4.4, 4.0, 4.7, 4.4, 5.0, 4.6, 4.3, 6.0, 5.9, 5.…
## $ Petal.Width  <dbl> 1.5, 1.4, 1.4, 1.3, 1.2, 1.4, 1.7, 1.4, 1.3, 2.5, 2.1, 2.…
## $ Species      <fct> versicolor, versicolor, versicolor, versicolor, versicolo…

there are observations and still 5 varibales because I used filter and did not manipulate/mutate any columns (variables)

Question 3:

iris2 <- select(iris1, Sepal.Length, Sepal.Width, Species)
glimpse(iris2)
## Rows: 28
## Columns: 3
## $ Sepal.Length <dbl> 6.4, 6.1, 6.7, 6.1, 6.1, 6.6, 6.7, 6.1, 6.2, 6.3, 7.1, 6.…
## $ Sepal.Width  <dbl> 3.2, 2.9, 3.1, 2.8, 2.8, 3.0, 3.0, 3.0, 2.9, 3.3, 3.0, 3.…
## $ Species      <fct> versicolor, versicolor, versicolor, versicolor, versicolo…

In this dataset there are 28 observations and 3 variables

Question 4:

iris3 <- arrange(iris2, by = desc(Sepal.Length))
head(iris3)
##   Sepal.Length Sepal.Width   Species
## 1          7.7         2.6 virginica
## 2          7.7         2.8 virginica
## 3          7.4         2.8 virginica
## 4          7.1         3.0 virginica
## 5          6.9         3.2 virginica
## 6          6.8         3.0 virginica

Question 5:

iris4 <- mutate(iris3, Sepal.Area= (Sepal.Length * Sepal.Width))
glimpse(iris4)
## Rows: 28
## Columns: 4
## $ Sepal.Length <dbl> 7.7, 7.7, 7.4, 7.1, 6.9, 6.8, 6.7, 6.7, 6.7, 6.7, 6.7, 6.…
## $ Sepal.Width  <dbl> 2.6, 2.8, 2.8, 3.0, 3.2, 3.0, 3.1, 3.0, 3.3, 3.1, 3.3, 3.…
## $ Species      <fct> virginica, virginica, virginica, virginica, virginica, vi…
## $ Sepal.Area   <dbl> 20.02, 21.56, 20.72, 21.30, 22.08, 20.40, 20.77, 20.10, 2…

Observations are still 28 and variables are now 4

Question 6:

iris5 <- summarize(iris4, meanSepal.length = mean(Sepal.Length), meanSepal.Width = mean(Sepal.Width), number=n())

print(iris5)
##   meanSepal.length meanSepal.Width number
## 1            6.575        3.003571     28

Question 7:

iris6 <- iris4 %>% 
  group_by(Species) %>%
  summarize(meanSepal.length = mean(Sepal.Length), meanSepal.Width = mean(Sepal.Width), number=n())

print(iris6)
## # A tibble: 2 × 4
##   Species    meanSepal.length meanSepal.Width number
##   <fct>                 <dbl>           <dbl>  <int>
## 1 versicolor             6.33            2.97      9
## 2 virginica              6.69            3.02     19

Question 8:

using the pipe for all prev questions

irisFinal <- iris %>% 
  filter(Species == c("virginica", "versicolor"), Sepal.Length > 6, Sepal.Width > 2.5) %>%
  select(Sepal.Length, Sepal.Width, Species) %>% 
  arrange(by = desc(Sepal.Length)) %>%
  mutate(Sepal.Area= (Sepal.Length * Sepal.Width)) %>%
  group_by(Species) %>%
  summarize(meanSepal.length = mean(Sepal.Length), meanSepal.Width = mean(Sepal.Width), number=n())

print(irisFinal)
## # A tibble: 2 × 4
##   Species    meanSepal.length meanSepal.Width number
##   <fct>                 <dbl>           <dbl>  <int>
## 1 versicolor             6.33            2.97      9
## 2 virginica              6.69            3.02     19

Question 9:

Create a ‘longer’ data frame using the original iris data set with three columns named “Species”, “Measure”, “Value”. The column “Species” will retain the species names of the data set. The column “Measure” will include whether the value corresponds to Sepal.Length, Sepal.Width, Petal.Length, or Petal.Width and the column “Value” will include the numerical values of those measurements.

long_iris <- iris %>%
   pivot_longer(col = Sepal.Length:Petal.Width, names_to = "Measure", values_to = "Value")
  
print(long_iris)
## # A tibble: 600 × 3
##    Species Measure      Value
##    <fct>   <chr>        <dbl>
##  1 setosa  Sepal.Length   5.1
##  2 setosa  Sepal.Width    3.5
##  3 setosa  Petal.Length   1.4
##  4 setosa  Petal.Width    0.2
##  5 setosa  Sepal.Length   4.9
##  6 setosa  Sepal.Width    3  
##  7 setosa  Petal.Length   1.4
##  8 setosa  Petal.Width    0.2
##  9 setosa  Sepal.Length   4.7
## 10 setosa  Sepal.Width    3.2
## # ℹ 590 more rows