knitr::opts_chunk$set(warning = FALSE, message = FALSE)
library(tidyverse)
charts <- read_csv("data/charts.csv", n_max = 100000)
head(charts)
title <chr> | rank <dbl> | date <date> | artist <chr> | |
---|---|---|---|---|
Despacito (Featuring Daddy Yankee) | 1 | 2017-02-01 | Luis Fonsi | |
Reggaetón Lento (Bailemos) | 2 | 2017-02-01 | CNCO | |
Chantaje (feat. Maluma) | 3 | 2017-02-01 | Shakira | |
Cuando Se Pone a Bailar | 4 | 2017-02-01 | Rombai | |
Otra vez (feat. J Balvin) | 5 | 2017-02-01 | Zion & Lennox | |
El Amante | 6 | 2017-02-01 | Nicky Jam |
charts %>%
filter(chart == 'top200', region == 'Sweden') %>%
group_by(artist) %>%
summarise(
n_dates = n_distinct(date),
n_songs = n_distinct(title)) %>%
arrange(desc(n_dates)) %>%
head(10)
artist <chr> | n_dates <int> | n_songs <int> | ||
---|---|---|---|---|
A R I Z O N A | 8 | 2 | ||
Adele | 8 | 5 | ||
AJR | 8 | 1 | ||
Alan Walker | 8 | 3 | ||
Alessia Cara | 8 | 2 | ||
Alok, Bruno Martini, Zeeba | 8 | 1 | ||
Ariana Grande, Nicki Minaj | 8 | 1 | ||
Bebe Rexha | 8 | 1 | ||
Big Sean | 8 | 1 | ||
Bruno Mars | 8 | 2 |
library(DBI)
library(duckdb)
con = dbConnect(duckdb(), dbdir="duckdb_r_db")
dbExecute(con,"drop table if exists charts;")
## [1] 0
dbExecute(con, "
create table charts as
select * from read_csv_auto('data/charts.csv');
")
## [1] 25001801
dbGetQuery(con, "select * from charts limit 10")
title <chr> | rank <int> | date <date> | artist <chr> | |
---|---|---|---|---|
Despacito (Featuring Daddy Yankee) | 1 | 2017-02-01 | Luis Fonsi | |
Reggaetón Lento (Bailemos) | 2 | 2017-02-01 | CNCO | |
Chantaje (feat. Maluma) | 3 | 2017-02-01 | Shakira | |
Cuando Se Pone a Bailar | 4 | 2017-02-01 | Rombai | |
Otra vez (feat. J Balvin) | 5 | 2017-02-01 | Zion & Lennox | |
El Amante | 6 | 2017-02-01 | Nicky Jam | |
Shape of You | 7 | 2017-02-01 | Ed Sheeran | |
Vente Pa' Ca (feat. Maluma) | 8 | 2017-02-01 | Ricky Martin | |
Safari | 9 | 2017-02-01 | J Balvin, Pharrell Williams, BIA, Sky | |
Vacaciones | 10 | 2017-02-01 | Wisin |
dbGetQuery(con, "
select
artist,
count(distinct date) n_dates,
count(distinct title) n_songs
from charts
where chart = 'top200' and region = 'Sweden'
group by artist
order by n_dates desc
limit 10
")
artist <chr> | n_dates <dbl> | n_songs <dbl> | ||
---|---|---|---|---|
Hov1 | 1664 | 46 | ||
Avicii | 1540 | 52 | ||
Ed Sheeran | 1514 | 59 | ||
Laleh | 1484 | 18 | ||
Molly Sandén | 1468 | 37 | ||
Journey | 1431 | 1 | ||
Miriam Bryant | 1291 | 18 | ||
James Arthur | 1271 | 16 | ||
Post Malone | 1249 | 39 | ||
Billie Eilish | 1205 | 40 |
duckdb_charts <- tbl(con, "charts")
head(duckdb_charts)
title <chr> | rank <int> | date <date> | artist <chr> | |
---|---|---|---|---|
Despacito (Featuring Daddy Yankee) | 1 | 2017-02-01 | Luis Fonsi | |
Reggaetón Lento (Bailemos) | 2 | 2017-02-01 | CNCO | |
Chantaje (feat. Maluma) | 3 | 2017-02-01 | Shakira | |
Cuando Se Pone a Bailar | 4 | 2017-02-01 | Rombai | |
Otra vez (feat. J Balvin) | 5 | 2017-02-01 | Zion & Lennox | |
El Amante | 6 | 2017-02-01 | Nicky Jam |
duckdb_charts <- tbl(con, "charts")
duckdb_charts %>%
filter(chart == 'top200', region == 'Sweden') %>%
group_by(artist) %>%
summarise(n_dates = n_distinct(date), n_songs = n_distinct(title)) %>%
arrange(desc(n_dates)) %>%
head(10)
artist <chr> | n_dates <dbl> | n_songs <dbl> | ||
---|---|---|---|---|
Hov1 | 1664 | 46 | ||
Avicii | 1540 | 52 | ||
Ed Sheeran | 1514 | 59 | ||
Laleh | 1484 | 18 | ||
Molly Sandén | 1468 | 37 | ||
Journey | 1431 | 1 | ||
Miriam Bryant | 1291 | 18 | ||
James Arthur | 1271 | 16 | ||
Post Malone | 1249 | 39 | ||
Billie Eilish | 1205 | 40 |
dbDisconnect(con, shutdown=TRUE)
library(sparklyr)
library(tidyverse)
sc <- spark_connect(master = "local")
spark_charts <- spark_read_csv(sc, "charts", "data/charts.csv")
head(spark_charts)
title <chr> | rank <chr> | date <chr> | artist <chr> | |
---|---|---|---|---|
Despacito (Featuring Daddy Yankee) | 1 | 2017-02-01 | Luis Fonsi | |
Reggaetón Lento (Bailemos) | 2 | 2017-02-01 | CNCO | |
Chantaje (feat. Maluma) | 3 | 2017-02-01 | Shakira | |
Cuando Se Pone a Bailar | 4 | 2017-02-01 | Rombai | |
Otra vez (feat. J Balvin) | 5 | 2017-02-01 | Zion & Lennox | |
El Amante | 6 | 2017-02-01 | Nicky Jam |
spark_charts %>%
filter(chart == 'top200', region == 'Sweden') %>%
group_by(artist) %>%
summarise(n_dates = n_distinct(date), n_songs = n_distinct(title)) %>%
arrange(desc(n_dates)) %>%
head(10)
artist <chr> | n_dates <dbl> | n_songs <dbl> | ||
---|---|---|---|---|
Hov1 | 1664 | 46 | ||
Avicii | 1540 | 52 | ||
Ed Sheeran | 1514 | 59 | ||
Laleh | 1484 | 18 | ||
Molly Sandén | 1468 | 37 | ||
Journey | 1431 | 1 | ||
Miriam Bryant | 1291 | 18 | ||
James Arthur | 1271 | 16 | ||
Post Malone | 1249 | 39 | ||
Billie Eilish | 1205 | 40 |
dbGetQuery(sc, "select * from charts limit 10")
title <chr> | rank <chr> | date <chr> | artist <chr> | |
---|---|---|---|---|
Despacito (Featuring Daddy Yankee) | 1 | 2017-02-01 | Luis Fonsi | |
Reggaetón Lento (Bailemos) | 2 | 2017-02-01 | CNCO | |
Chantaje (feat. Maluma) | 3 | 2017-02-01 | Shakira | |
Cuando Se Pone a Bailar | 4 | 2017-02-01 | Rombai | |
Otra vez (feat. J Balvin) | 5 | 2017-02-01 | Zion & Lennox | |
El Amante | 6 | 2017-02-01 | Nicky Jam | |
Shape of You | 7 | 2017-02-01 | Ed Sheeran | |
Vente Pa' Ca (feat. Maluma) | 8 | 2017-02-01 | Ricky Martin | |
Safari | 9 | 2017-02-01 | J Balvin, Pharrell Williams, BIA, Sky | |
Vacaciones | 10 | 2017-02-01 | Wisin |
dbGetQuery(sc, "
select
artist,
count(distinct date) n_dates,
count(distinct title) n_songs
from charts
where chart = 'top200' and region = 'Sweden'
group by artist
order by n_dates desc
limit 10
")
artist <chr> | n_dates <dbl> | n_songs <dbl> | ||
---|---|---|---|---|
Hov1 | 1664 | 46 | ||
Avicii | 1540 | 52 | ||
Ed Sheeran | 1514 | 59 | ||
Laleh | 1484 | 18 | ||
Molly Sandén | 1468 | 37 | ||
Journey | 1431 | 1 | ||
Miriam Bryant | 1291 | 18 | ||
James Arthur | 1271 | 16 | ||
Post Malone | 1249 | 39 | ||
Billie Eilish | 1205 | 40 |
spark_disconnect(sc)
library("RJDBC")
library("DBI")
jdbcDriver <- JDBC(driverClass="com.snowflake.client.jdbc.SnowflakeDriver",
classPath=Sys.getenv("SNOWFLAKE_CLASS_PATH"))
con <- dbConnect(jdbcDriver, "jdbc:snowflake://kc75442.us-east-1.snowflakecomputing.com",
Sys.getenv("SNOWFLAKE_USER"),Sys.getenv("SNOWFLAKE_PASSWORD"))
dbSendQuery(con, "use role data_scientist")
## <JDBCResult>
dbSendQuery(con, "use schema analytics.ds")
## <JDBCResult>
dbGetQuery(con, "select * from charts limit 10")
TITLE <chr> | RANK <dbl> | DATE <chr> | ARTIST <chr> | ||
---|---|---|---|---|---|
1 | #Ysya2020 Vol. 6 - Un Flow de Infarto | 29 | 2020-11-17 | YSY A, Bizarrap | |
2 | Reloj | 30 | 2020-11-17 | Rauw Alejandro, Anuel AA | |
3 | Make A Wish (Birthday Song) | 31 | 2020-11-17 | NCT U | |
4 | Bonita | 32 | 2020-11-17 | Jeeiph | |
5 | Una Locura - Remix | 33 | 2020-11-17 | Bebe DJ, Maxi Jayat | |
6 | La Tóxica | 34 | 2020-11-17 | Farruko | |
7 | Why Not? | 35 | 2020-11-17 | LOONA | |
8 | Sofia | 36 | 2020-11-17 | Clairo | |
9 | Jeans | 37 | 2020-11-17 | Justin Quiles | |
10 | Se Te Nota (with Guaynaa) | 38 | 2020-11-17 | Lele Pons |
dbGetQuery(con, "
select
artist,
count(distinct date) n_dates,
count(distinct title) n_songs
from charts
where chart = 'top200' and region = 'Sweden'
group by artist
order by n_dates desc
limit 10
")
ARTIST <chr> | N_DATES <dbl> | N_SONGS <dbl> | ||
---|---|---|---|---|
1 | Hov1 | 1664 | 46 | |
2 | Avicii | 1540 | 52 | |
3 | Ed Sheeran | 1514 | 59 | |
4 | Laleh | 1484 | 18 | |
5 | Molly Sandén | 1468 | 37 | |
6 | Journey | 1431 | 1 | |
7 | Miriam Bryant | 1291 | 18 | |
8 | James Arthur | 1271 | 16 | |
9 | Post Malone | 1249 | 39 | |
10 | Billie Eilish | 1205 | 40 |
dbGetQuery(con, "
select *
from charts
where artist = 'Journey'
order by date desc
limit 100
")
TITLE <chr> | RANK <dbl> | DATE <chr> | ARTIST <chr> | ||
---|---|---|---|---|---|
1 | Don't Stop Believin' | 103 | 2021-09-30 | Journey | |
2 | Don't Stop Believin' | 196 | 2021-09-30 | Journey | |
3 | Don't Stop Believin' | 101 | 2021-09-30 | Journey | |
4 | Don't Stop Believin' | 137 | 2021-09-30 | Journey | |
5 | Don't Stop Believin' | 193 | 2021-09-30 | Journey | |
6 | Don't Stop Believin' | 173 | 2021-09-30 | Journey | |
7 | Don't Stop Believin' | 139 | 2021-09-30 | Journey | |
8 | Don't Stop Believin' | 125 | 2021-09-29 | Journey | |
9 | Don't Stop Believin' | 149 | 2021-09-29 | Journey | |
10 | Don't Stop Believin' | 184 | 2021-09-29 | Journey |
dbDisconnect(con)
## [1] TRUE