knitr::opts_chunk$set(warning = FALSE, message = FALSE)
library(tidyverse)
charts <- read_csv("data/charts.csv", n_max = 100000)
head(charts)
title <chr> | rank <dbl> | date <date> | artist <chr> | |
|---|---|---|---|---|
| Despacito (Featuring Daddy Yankee) | 1 | 2017-02-01 | Luis Fonsi | |
| Reggaetón Lento (Bailemos) | 2 | 2017-02-01 | CNCO | |
| Chantaje (feat. Maluma) | 3 | 2017-02-01 | Shakira | |
| Cuando Se Pone a Bailar | 4 | 2017-02-01 | Rombai | |
| Otra vez (feat. J Balvin) | 5 | 2017-02-01 | Zion & Lennox | |
| El Amante | 6 | 2017-02-01 | Nicky Jam |
charts %>%
filter(chart == 'top200', region == 'Sweden') %>%
group_by(artist) %>%
summarise(
n_dates = n_distinct(date),
n_songs = n_distinct(title)) %>%
arrange(desc(n_dates)) %>%
head(10)
artist <chr> | n_dates <int> | n_songs <int> | ||
|---|---|---|---|---|
| A R I Z O N A | 8 | 2 | ||
| Adele | 8 | 5 | ||
| AJR | 8 | 1 | ||
| Alan Walker | 8 | 3 | ||
| Alessia Cara | 8 | 2 | ||
| Alok, Bruno Martini, Zeeba | 8 | 1 | ||
| Ariana Grande, Nicki Minaj | 8 | 1 | ||
| Bebe Rexha | 8 | 1 | ||
| Big Sean | 8 | 1 | ||
| Bruno Mars | 8 | 2 |
library(DBI)
library(duckdb)
con = dbConnect(duckdb(), dbdir="duckdb_r_db")
dbExecute(con,"drop table if exists charts;")
## [1] 0
dbExecute(con, "
create table charts as
select * from read_csv_auto('data/charts.csv');
")
## [1] 25001801
dbGetQuery(con, "select * from charts limit 10")
title <chr> | rank <int> | date <date> | artist <chr> | |
|---|---|---|---|---|
| Despacito (Featuring Daddy Yankee) | 1 | 2017-02-01 | Luis Fonsi | |
| Reggaetón Lento (Bailemos) | 2 | 2017-02-01 | CNCO | |
| Chantaje (feat. Maluma) | 3 | 2017-02-01 | Shakira | |
| Cuando Se Pone a Bailar | 4 | 2017-02-01 | Rombai | |
| Otra vez (feat. J Balvin) | 5 | 2017-02-01 | Zion & Lennox | |
| El Amante | 6 | 2017-02-01 | Nicky Jam | |
| Shape of You | 7 | 2017-02-01 | Ed Sheeran | |
| Vente Pa' Ca (feat. Maluma) | 8 | 2017-02-01 | Ricky Martin | |
| Safari | 9 | 2017-02-01 | J Balvin, Pharrell Williams, BIA, Sky | |
| Vacaciones | 10 | 2017-02-01 | Wisin |
dbGetQuery(con, "
select
artist,
count(distinct date) n_dates,
count(distinct title) n_songs
from charts
where chart = 'top200' and region = 'Sweden'
group by artist
order by n_dates desc
limit 10
")
artist <chr> | n_dates <dbl> | n_songs <dbl> | ||
|---|---|---|---|---|
| Hov1 | 1664 | 46 | ||
| Avicii | 1540 | 52 | ||
| Ed Sheeran | 1514 | 59 | ||
| Laleh | 1484 | 18 | ||
| Molly Sandén | 1468 | 37 | ||
| Journey | 1431 | 1 | ||
| Miriam Bryant | 1291 | 18 | ||
| James Arthur | 1271 | 16 | ||
| Post Malone | 1249 | 39 | ||
| Billie Eilish | 1205 | 40 |
duckdb_charts <- tbl(con, "charts")
head(duckdb_charts)
title <chr> | rank <int> | date <date> | artist <chr> | |
|---|---|---|---|---|
| Despacito (Featuring Daddy Yankee) | 1 | 2017-02-01 | Luis Fonsi | |
| Reggaetón Lento (Bailemos) | 2 | 2017-02-01 | CNCO | |
| Chantaje (feat. Maluma) | 3 | 2017-02-01 | Shakira | |
| Cuando Se Pone a Bailar | 4 | 2017-02-01 | Rombai | |
| Otra vez (feat. J Balvin) | 5 | 2017-02-01 | Zion & Lennox | |
| El Amante | 6 | 2017-02-01 | Nicky Jam |
duckdb_charts <- tbl(con, "charts")
duckdb_charts %>%
filter(chart == 'top200', region == 'Sweden') %>%
group_by(artist) %>%
summarise(n_dates = n_distinct(date), n_songs = n_distinct(title)) %>%
arrange(desc(n_dates)) %>%
head(10)
artist <chr> | n_dates <dbl> | n_songs <dbl> | ||
|---|---|---|---|---|
| Hov1 | 1664 | 46 | ||
| Avicii | 1540 | 52 | ||
| Ed Sheeran | 1514 | 59 | ||
| Laleh | 1484 | 18 | ||
| Molly Sandén | 1468 | 37 | ||
| Journey | 1431 | 1 | ||
| Miriam Bryant | 1291 | 18 | ||
| James Arthur | 1271 | 16 | ||
| Post Malone | 1249 | 39 | ||
| Billie Eilish | 1205 | 40 |
dbDisconnect(con, shutdown=TRUE)
library(sparklyr)
library(tidyverse)
sc <- spark_connect(master = "local")
spark_charts <- spark_read_csv(sc, "charts", "data/charts.csv")
head(spark_charts)
title <chr> | rank <chr> | date <chr> | artist <chr> | |
|---|---|---|---|---|
| Despacito (Featuring Daddy Yankee) | 1 | 2017-02-01 | Luis Fonsi | |
| Reggaetón Lento (Bailemos) | 2 | 2017-02-01 | CNCO | |
| Chantaje (feat. Maluma) | 3 | 2017-02-01 | Shakira | |
| Cuando Se Pone a Bailar | 4 | 2017-02-01 | Rombai | |
| Otra vez (feat. J Balvin) | 5 | 2017-02-01 | Zion & Lennox | |
| El Amante | 6 | 2017-02-01 | Nicky Jam |
spark_charts %>%
filter(chart == 'top200', region == 'Sweden') %>%
group_by(artist) %>%
summarise(n_dates = n_distinct(date), n_songs = n_distinct(title)) %>%
arrange(desc(n_dates)) %>%
head(10)
artist <chr> | n_dates <dbl> | n_songs <dbl> | ||
|---|---|---|---|---|
| Hov1 | 1664 | 46 | ||
| Avicii | 1540 | 52 | ||
| Ed Sheeran | 1514 | 59 | ||
| Laleh | 1484 | 18 | ||
| Molly Sandén | 1468 | 37 | ||
| Journey | 1431 | 1 | ||
| Miriam Bryant | 1291 | 18 | ||
| James Arthur | 1271 | 16 | ||
| Post Malone | 1249 | 39 | ||
| Billie Eilish | 1205 | 40 |
dbGetQuery(sc, "select * from charts limit 10")
title <chr> | rank <chr> | date <chr> | artist <chr> | |
|---|---|---|---|---|
| Despacito (Featuring Daddy Yankee) | 1 | 2017-02-01 | Luis Fonsi | |
| Reggaetón Lento (Bailemos) | 2 | 2017-02-01 | CNCO | |
| Chantaje (feat. Maluma) | 3 | 2017-02-01 | Shakira | |
| Cuando Se Pone a Bailar | 4 | 2017-02-01 | Rombai | |
| Otra vez (feat. J Balvin) | 5 | 2017-02-01 | Zion & Lennox | |
| El Amante | 6 | 2017-02-01 | Nicky Jam | |
| Shape of You | 7 | 2017-02-01 | Ed Sheeran | |
| Vente Pa' Ca (feat. Maluma) | 8 | 2017-02-01 | Ricky Martin | |
| Safari | 9 | 2017-02-01 | J Balvin, Pharrell Williams, BIA, Sky | |
| Vacaciones | 10 | 2017-02-01 | Wisin |
dbGetQuery(sc, "
select
artist,
count(distinct date) n_dates,
count(distinct title) n_songs
from charts
where chart = 'top200' and region = 'Sweden'
group by artist
order by n_dates desc
limit 10
")
artist <chr> | n_dates <dbl> | n_songs <dbl> | ||
|---|---|---|---|---|
| Hov1 | 1664 | 46 | ||
| Avicii | 1540 | 52 | ||
| Ed Sheeran | 1514 | 59 | ||
| Laleh | 1484 | 18 | ||
| Molly Sandén | 1468 | 37 | ||
| Journey | 1431 | 1 | ||
| Miriam Bryant | 1291 | 18 | ||
| James Arthur | 1271 | 16 | ||
| Post Malone | 1249 | 39 | ||
| Billie Eilish | 1205 | 40 |
spark_disconnect(sc)
library("RJDBC")
library("DBI")
jdbcDriver <- JDBC(driverClass="com.snowflake.client.jdbc.SnowflakeDriver",
classPath=Sys.getenv("SNOWFLAKE_CLASS_PATH"))
con <- dbConnect(jdbcDriver, "jdbc:snowflake://kc75442.us-east-1.snowflakecomputing.com",
Sys.getenv("SNOWFLAKE_USER"),Sys.getenv("SNOWFLAKE_PASSWORD"))
dbSendQuery(con, "use role data_scientist")
## <JDBCResult>
dbSendQuery(con, "use schema analytics.ds")
## <JDBCResult>
dbGetQuery(con, "select * from charts limit 10")
TITLE <chr> | RANK <dbl> | DATE <chr> | ARTIST <chr> | ||
|---|---|---|---|---|---|
| 1 | #Ysya2020 Vol. 6 - Un Flow de Infarto | 29 | 2020-11-17 | YSY A, Bizarrap | |
| 2 | Reloj | 30 | 2020-11-17 | Rauw Alejandro, Anuel AA | |
| 3 | Make A Wish (Birthday Song) | 31 | 2020-11-17 | NCT U | |
| 4 | Bonita | 32 | 2020-11-17 | Jeeiph | |
| 5 | Una Locura - Remix | 33 | 2020-11-17 | Bebe DJ, Maxi Jayat | |
| 6 | La Tóxica | 34 | 2020-11-17 | Farruko | |
| 7 | Why Not? | 35 | 2020-11-17 | LOONA | |
| 8 | Sofia | 36 | 2020-11-17 | Clairo | |
| 9 | Jeans | 37 | 2020-11-17 | Justin Quiles | |
| 10 | Se Te Nota (with Guaynaa) | 38 | 2020-11-17 | Lele Pons |
dbGetQuery(con, "
select
artist,
count(distinct date) n_dates,
count(distinct title) n_songs
from charts
where chart = 'top200' and region = 'Sweden'
group by artist
order by n_dates desc
limit 10
")
ARTIST <chr> | N_DATES <dbl> | N_SONGS <dbl> | ||
|---|---|---|---|---|
| 1 | Hov1 | 1664 | 46 | |
| 2 | Avicii | 1540 | 52 | |
| 3 | Ed Sheeran | 1514 | 59 | |
| 4 | Laleh | 1484 | 18 | |
| 5 | Molly Sandén | 1468 | 37 | |
| 6 | Journey | 1431 | 1 | |
| 7 | Miriam Bryant | 1291 | 18 | |
| 8 | James Arthur | 1271 | 16 | |
| 9 | Post Malone | 1249 | 39 | |
| 10 | Billie Eilish | 1205 | 40 |
dbGetQuery(con, "
select *
from charts
where artist = 'Journey'
order by date desc
limit 100
")
TITLE <chr> | RANK <dbl> | DATE <chr> | ARTIST <chr> | ||
|---|---|---|---|---|---|
| 1 | Don't Stop Believin' | 103 | 2021-09-30 | Journey | |
| 2 | Don't Stop Believin' | 196 | 2021-09-30 | Journey | |
| 3 | Don't Stop Believin' | 101 | 2021-09-30 | Journey | |
| 4 | Don't Stop Believin' | 137 | 2021-09-30 | Journey | |
| 5 | Don't Stop Believin' | 193 | 2021-09-30 | Journey | |
| 6 | Don't Stop Believin' | 173 | 2021-09-30 | Journey | |
| 7 | Don't Stop Believin' | 139 | 2021-09-30 | Journey | |
| 8 | Don't Stop Believin' | 125 | 2021-09-29 | Journey | |
| 9 | Don't Stop Believin' | 149 | 2021-09-29 | Journey | |
| 10 | Don't Stop Believin' | 184 | 2021-09-29 | Journey |
dbDisconnect(con)
## [1] TRUE