Load your dataset in with the function below. The input is the date the dataset was issued. You should be able to get this from the tt_available()
function.
coffee <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-07/coffee_ratings.csv')
## Parsed with column specification:
## cols(
## .default = col_character(),
## total_cup_points = col_double(),
## number_of_bags = col_double(),
## aroma = col_double(),
## flavor = col_double(),
## aftertaste = col_double(),
## acidity = col_double(),
## body = col_double(),
## balance = col_double(),
## uniformity = col_double(),
## clean_cup = col_double(),
## sweetness = col_double(),
## cupper_points = col_double(),
## moisture = col_double(),
## category_one_defects = col_double(),
## quakers = col_double(),
## category_two_defects = col_double(),
## altitude_low_meters = col_double(),
## altitude_high_meters = col_double(),
## altitude_mean_meters = col_double()
## )
## See spec(...) for full column specifications.
Given your inital exploration of the data, what was the question you wanted to answer?
Does processing method affect overall coffee rating?
skimr::skim(coffee)
Name | coffee |
Number of rows | 1339 |
Number of columns | 43 |
_______________________ | |
Column type frequency: | |
character | 24 |
numeric | 19 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
species | 0 | 1.00 | 7 | 7 | 0 | 2 | 0 |
owner | 7 | 0.99 | 3 | 50 | 0 | 315 | 0 |
country_of_origin | 1 | 1.00 | 4 | 28 | 0 | 36 | 0 |
farm_name | 359 | 0.73 | 1 | 73 | 0 | 571 | 0 |
lot_number | 1063 | 0.21 | 1 | 71 | 0 | 227 | 0 |
mill | 315 | 0.76 | 1 | 77 | 0 | 460 | 0 |
ico_number | 151 | 0.89 | 1 | 40 | 0 | 847 | 0 |
company | 209 | 0.84 | 3 | 73 | 0 | 281 | 0 |
altitude | 226 | 0.83 | 1 | 41 | 0 | 396 | 0 |
region | 59 | 0.96 | 2 | 76 | 0 | 356 | 0 |
producer | 231 | 0.83 | 1 | 100 | 0 | 691 | 0 |
bag_weight | 0 | 1.00 | 1 | 8 | 0 | 56 | 0 |
in_country_partner | 0 | 1.00 | 7 | 85 | 0 | 27 | 0 |
harvest_year | 47 | 0.96 | 3 | 24 | 0 | 46 | 0 |
grading_date | 0 | 1.00 | 13 | 20 | 0 | 567 | 0 |
owner_1 | 7 | 0.99 | 3 | 50 | 0 | 319 | 0 |
variety | 226 | 0.83 | 4 | 21 | 0 | 29 | 0 |
processing_method | 170 | 0.87 | 5 | 25 | 0 | 5 | 0 |
color | 218 | 0.84 | 4 | 12 | 0 | 4 | 0 |
expiration | 0 | 1.00 | 13 | 20 | 0 | 566 | 0 |
certification_body | 0 | 1.00 | 7 | 85 | 0 | 26 | 0 |
certification_address | 0 | 1.00 | 40 | 40 | 0 | 32 | 0 |
certification_contact | 0 | 1.00 | 40 | 40 | 0 | 29 | 0 |
unit_of_measurement | 0 | 1.00 | 1 | 2 | 0 | 2 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
total_cup_points | 0 | 1.00 | 82.09 | 3.50 | 0 | 81.08 | 82.50 | 83.67 | 90.58 | ▁▁▁▁▇ |
number_of_bags | 0 | 1.00 | 154.18 | 129.99 | 0 | 14.00 | 175.00 | 275.00 | 1062.00 | ▇▇▁▁▁ |
aroma | 0 | 1.00 | 7.57 | 0.38 | 0 | 7.42 | 7.58 | 7.75 | 8.75 | ▁▁▁▁▇ |
flavor | 0 | 1.00 | 7.52 | 0.40 | 0 | 7.33 | 7.58 | 7.75 | 8.83 | ▁▁▁▁▇ |
aftertaste | 0 | 1.00 | 7.40 | 0.40 | 0 | 7.25 | 7.42 | 7.58 | 8.67 | ▁▁▁▁▇ |
acidity | 0 | 1.00 | 7.54 | 0.38 | 0 | 7.33 | 7.58 | 7.75 | 8.75 | ▁▁▁▁▇ |
body | 0 | 1.00 | 7.52 | 0.37 | 0 | 7.33 | 7.50 | 7.67 | 8.58 | ▁▁▁▁▇ |
balance | 0 | 1.00 | 7.52 | 0.41 | 0 | 7.33 | 7.50 | 7.75 | 8.75 | ▁▁▁▁▇ |
uniformity | 0 | 1.00 | 9.83 | 0.55 | 0 | 10.00 | 10.00 | 10.00 | 10.00 | ▁▁▁▁▇ |
clean_cup | 0 | 1.00 | 9.84 | 0.76 | 0 | 10.00 | 10.00 | 10.00 | 10.00 | ▁▁▁▁▇ |
sweetness | 0 | 1.00 | 9.86 | 0.62 | 0 | 10.00 | 10.00 | 10.00 | 10.00 | ▁▁▁▁▇ |
cupper_points | 0 | 1.00 | 7.50 | 0.47 | 0 | 7.25 | 7.50 | 7.75 | 10.00 | ▁▁▁▇▁ |
moisture | 0 | 1.00 | 0.09 | 0.05 | 0 | 0.09 | 0.11 | 0.12 | 0.28 | ▃▇▅▁▁ |
category_one_defects | 0 | 1.00 | 0.48 | 2.55 | 0 | 0.00 | 0.00 | 0.00 | 63.00 | ▇▁▁▁▁ |
quakers | 1 | 1.00 | 0.17 | 0.83 | 0 | 0.00 | 0.00 | 0.00 | 11.00 | ▇▁▁▁▁ |
category_two_defects | 0 | 1.00 | 3.56 | 5.31 | 0 | 0.00 | 2.00 | 4.00 | 55.00 | ▇▁▁▁▁ |
altitude_low_meters | 230 | 0.83 | 1750.71 | 8669.44 | 1 | 1100.00 | 1310.64 | 1600.00 | 190164.00 | ▇▁▁▁▁ |
altitude_high_meters | 230 | 0.83 | 1799.35 | 8668.81 | 1 | 1100.00 | 1350.00 | 1650.00 | 190164.00 | ▇▁▁▁▁ |
altitude_mean_meters | 230 | 0.83 | 1775.03 | 8668.63 | 1 | 1100.00 | 1310.64 | 1600.00 | 190164.00 | ▇▁▁▁▁ |
coffee %>%
janitor::tabyl(country_of_origin) %>%
arrange(desc(n)) %>%
gt::gt()
country_of_origin | n | percent | valid_percent |
---|---|---|---|
Mexico | 236 | 0.176250934 | 0.1763826607 |
Colombia | 183 | 0.136669156 | 0.1367713004 |
Guatemala | 181 | 0.135175504 | 0.1352765321 |
Brazil | 132 | 0.098581031 | 0.0986547085 |
Taiwan | 75 | 0.056011949 | 0.0560538117 |
United States (Hawaii) | 73 | 0.054518297 | 0.0545590433 |
Honduras | 53 | 0.039581777 | 0.0396113602 |
Costa Rica | 51 | 0.038088125 | 0.0381165919 |
Ethiopia | 44 | 0.032860344 | 0.0328849028 |
Tanzania, United Republic Of | 40 | 0.029873040 | 0.0298953662 |
Uganda | 36 | 0.026885736 | 0.0269058296 |
Thailand | 32 | 0.023898432 | 0.0239162930 |
Nicaragua | 26 | 0.019417476 | 0.0194319880 |
Kenya | 25 | 0.018670650 | 0.0186846039 |
El Salvador | 21 | 0.015683346 | 0.0156950673 |
Indonesia | 20 | 0.014936520 | 0.0149476831 |
China | 16 | 0.011949216 | 0.0119581465 |
India | 14 | 0.010455564 | 0.0104633782 |
Malawi | 11 | 0.008215086 | 0.0082212257 |
Peru | 10 | 0.007468260 | 0.0074738416 |
United States | 10 | 0.007468260 | 0.0074738416 |
Myanmar | 8 | 0.005974608 | 0.0059790732 |
Vietnam | 8 | 0.005974608 | 0.0059790732 |
Haiti | 6 | 0.004480956 | 0.0044843049 |
Philippines | 5 | 0.003734130 | 0.0037369208 |
Panama | 4 | 0.002987304 | 0.0029895366 |
United States (Puerto Rico) | 4 | 0.002987304 | 0.0029895366 |
Ecuador | 3 | 0.002240478 | 0.0022421525 |
Laos | 3 | 0.002240478 | 0.0022421525 |
Burundi | 2 | 0.001493652 | 0.0014947683 |
Cote d?Ivoire | 1 | 0.000746826 | 0.0007473842 |
Japan | 1 | 0.000746826 | 0.0007473842 |
Mauritius | 1 | 0.000746826 | 0.0007473842 |
Papua New Guinea | 1 | 0.000746826 | 0.0007473842 |
Rwanda | 1 | 0.000746826 | 0.0007473842 |
Zambia | 1 | 0.000746826 | 0.0007473842 |
NA | 1 | 0.000746826 | NA |
total_cup_points
versus processing_method
ggplot(coffee) +
aes(y=total_cup_points, x=processing_method, fill=processing_method) +
geom_boxplot() +
theme(axis.text.x = element_text(angle=90, hjust = 1)) +
coord_flip()
coffee %>%
mutate(country_of_origin= fct_rev(country_of_origin)) %>%
ggplot() +
aes(y=country_of_origin, x=processing_method,
color=processing_method) +
geom_count() +
theme(axis.text.x = element_text(angle=90))
Here’s a sortable table of the above table
library(reactable)
coffee %>%
janitor::tabyl(country_of_origin, processing_method) %>%
reactable::reactable()
total_cup_points
coffee %>% mutate(sample_id = rownames(coffee)) %>%
select(sample_id, country_of_origin, total_cup_points, aroma, flavor, acidity, body, balance, uniformity, clean_cup, sweetness, cupper_points)%>%
pivot_longer(cols = c(aroma, flavor, acidity, body, balance, uniformity, clean_cup, sweetness, cupper_points), names_to="type", values_to="score") %>%
mutate(sample_id = fct_reorder(sample_id, total_cup_points)) %>%
ggplot() +
aes(y=sample_id, x=type, fill=score) +
geom_tile()
library(heatmaply)
## Loading required package: plotly
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
## Loading required package: viridis
## Loading required package: viridisLite
## Registered S3 method overwritten by 'seriation':
## method from
## reorder.hclust gclus
##
## ======================
## Welcome to heatmaply version 1.1.0
##
## Type citation('heatmaply') for how to cite the package.
## Type ?heatmaply for the main documentation.
##
## The github page is: https://github.com/talgalili/heatmaply/
## Please submit your suggestions and bug-reports at: https://github.com/talgalili/heatmaply/issues
## Or contact: <tal.galili@gmail.com>
## ======================
coffee %>% mutate(sample_id = rownames(coffee)) %>%
select(aroma, flavor, acidity, body, balance, uniformity, clean_cup, sweetness, cupper_points) %>% heatmaply()
Dry / Natural
coffee %>%
filter(processing_method == "Natural / Dry") %>%
mutate(country_of_origin = fct_reorder(country_of_origin, total_cup_points, median)) %>%
ggplot() +
aes(y=total_cup_points, x=country_of_origin, fill=country_of_origin) +
geom_boxplot() +
theme(axis.text.x = element_text(angle=90), legend.position = "none") +
coord_flip() +
labs(title="Tanzania leads with ratings in Natural/Dry")
Washed / Wet
coffee %>%
filter(processing_method == "Washed / Wet") %>%
mutate(country_of_origin = fct_reorder(country_of_origin, total_cup_points, median)) %>%
ggplot() +
aes(y=total_cup_points, x=country_of_origin, fill=country_of_origin) +
geom_boxplot() +
theme(axis.text.x = element_text(angle=90), legend.position = "none") +
coord_flip() +
labs(title="US leads in Ratings in Washed/Wet")
coffee %>%
filter(country_of_origin == "Mexico") %>%
mutate(processing_method = fct_reorder(processing_method, total_cup_points, median)) %>%
ggplot() +
aes(y=total_cup_points, x=processing_method, fill=processing_method) +
geom_boxplot(color="black") +
coord_flip()
total_cup_points
broom::tidy(lm(total_cup_points ~
country_of_origin +
category_one_defects, data=coffee)) %>%
filter(p.value < 0.05) %>%
arrange(p.value)
## # A tibble: 9 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 82.4 0.286 288. 0
## 2 country_of_originHonduras -3.06 0.535 -5.72 0.0000000133
## 3 country_of_originEthiopia 3.10 0.573 5.41 0.0000000757
## 4 category_one_defects -0.163 0.0371 -4.40 0.0000116
## 5 country_of_originMexico -1.40 0.359 -3.90 0.000100
## 6 country_of_originHaiti -5.00 1.37 -3.64 0.000284
## 7 country_of_originNicaragua -1.92 0.706 -2.73 0.00649
## 8 country_of_originKenya 1.89 0.718 2.63 0.00859
## 9 country_of_originColombia 0.741 0.376 1.97 0.0488