https://nyu.databrary.org/volume/444
https://nyu.databrary.org/volume/254
pilot_demog <- summarize_demog(vol.id = pilot.vol.id, return.df = TRUE)
pilot_demog %>%
ggplot(.) +
aes(x = participant.gestational.age, y = participant.birth.weight, color = participant.race) +
geom_point()
## Warning: Removed 1 rows containing missing values (geom_point).
pilot_demog %>%
ggplot(.) +
aes(x = age.weeks, fill = participant.gender) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
survey.session.id <- 26295
list_assets_in_session(vol.id = pilot.vol.id, session.id = survey.session.id) %>%
select(asset.name, asset.type, asset.id)
(csv_fl <- list.files('csv', '\\.csv$', full.names = TRUE))
## [1] "csv/databrary444-Adolph-Tamis-LeMonda-Gilmore-PLAY-pilot-data-materials-26295-Parent_report_data-116787-language-exposure.csv"
## [2] "csv/databrary444-Adolph-Tamis-LeMonda-Gilmore-PLAY-pilot-data-materials-26295-Parent_report_data-116790-family.csv"
## [3] "csv/databrary444-Adolph-Tamis-LeMonda-Gilmore-PLAY-pilot-data-materials-26295-Parent_report_data-117092-locomotion.csv"
lang_exp <- read_csv(csv_fl[1])
## Parsed with column specification:
## cols(
## id = col_integer(),
## language = col_character(),
## exposure_context = col_character()
## )
names(lang_exp)
## [1] "id" "language" "exposure_context"
play.palette <- scale_fill_manual(values=c("blue2", "firebrick2", "chartreuse2", "darkorchid2"))
play.theme <-
theme_classic() +
theme(legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = rel(1.2)),
axis.title = element_text(size = rel(1.5), face ="bold"),
axis.text.x = element_text(size = rel(1.2)),
axis.text.y = element_text(size = rel(1.2)),
axis.line = element_blank(),
axis.title.y = element_blank())
lang_exp %>%
ggplot(.) +
aes(x = language) +
facet_grid(exposure_context ~ .) +
geom_histogram(stat='count') +
play.theme
## Warning: Ignoring unknown parameters: binwidth, bins, pad
loco_onset <- read_csv(csv_fl[3])
## Parsed with column specification:
## cols(
## id = col_integer(),
## hkcrawl_onset_date = col_character(),
## hkcrawl_onset_mos = col_double(),
## walk_onset_date = col_character(),
## walk_onset_mos = col_double(),
## walk_onset_src = col_character(),
## `Interview Comments` = col_character()
## )
names(loco_onset)
## [1] "id" "hkcrawl_onset_date" "hkcrawl_onset_mos"
## [4] "walk_onset_date" "walk_onset_mos" "walk_onset_src"
## [7] "Interview Comments"
loco_onset %>%
gather(key = milestone, value = age.mos, hkcrawl_onset_mos, walk_onset_mos) %>%
mutate(milestone = factor(milestone, labels = c("crawl", "walk"))) %>%
# arrange(desc(age.mos)) %>%
ggplot() +
aes(x = age.mos, y = id) +
geom_point(aes(shape = milestone, color = milestone)) +
geom_line(aes(group = id)) +
xlab("Age (months)") +
geom_rug(aes(x = age.mos, group = milestone, color = milestone),
sides = "b") +
play.theme
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_path).
https://nyu.databrary.org/volume/563
lego_demog <- databraryapi::summarize_demog(vol.id = lego.vol.id, return.df = TRUE)
## Warning in `[<-.factor`(`*tmp*`, unreported, value = "Unknown or not
## reported"): invalid factor level, NA generated
lego_demog %>%
ggplot(.) +
aes(y = age.weeks, x = group.name) +
geom_boxplot()
lego_demog %>%
ggplot(.) +
aes(participant.gender, fill = participant.gender) +
facet_grid(participant.race ~ group.name) +
geom_bar()
Let’s focus on NYU_019 in volume 254
https://nyu.databrary.org/slot/14514/69908000,73576800/asset/61054/download?inline=true
https://nyu.databrary.org/slot/14514/69908000,73623840/asset/61086/download?inline=true
vol_254_assets <- list_assets_in_session(254, 14514)
vol_254_assets %>%
select(., asset.name, asset.type, asset.id)
unlink('tmp', recursive = TRUE)
dv_info <- filter(vol_254_assets, asset.type == "Datavyu")
dv_dir <- download_datavyu(vol.id = 254, session.id = 14514, asset.id = dv_info$asset.id)
## Creating directory tmp/
## Successful HTML GET query.
## Content-type is application/vnd.datavyu
## File name unspecified. Generating unique name.
## Downloading Datavyu file as:
## tmp/254-14514-63635-2018-10-15-1859-55.opf
Now, let’s extract the components of the Datavyu file, including the code definitions.
extract_dv(in.dir = dv_dir, auto.write.over = TRUE)
## [1] "tmp"
dv_to_csv(dv.dir = dv_dir)
list.files('tmp')
## [1] "254-14514-63635-2018-10-15-1859-55.csv"
## [2] "254-14514-63635-2018-10-15-1859-55.opf"
## [3] "db"
## [4] "project"
Next, we can load the Datavyu file as a csv.
dv_fn <- list.files('tmp', '\\.csv$', full.names = TRUE)
vol_254_14514 <- read_csv(dv_fn)
## Parsed with column specification:
## cols(
## code = col_character(),
## onset = col_character(),
## offset = col_character(),
## code.value = col_character()
## )
names(vol_254_14514)
## [1] "code" "onset" "offset" "code.value"
vol_254_14514
extract_dv_code_defs(in.dir = 'tmp')
## [1] "tmp/254-14514-63635-2018-10-15-1859-55-code-defs.csv"
code_fn <- list.files('tmp', '-code-defs\\.csv$', full.names = TRUE)
vol_254_14514_codes <- read_csv(code_fn)
## Parsed with column specification:
## cols(
## code = col_character(),
## code_vals = col_character(),
## code_type = col_character()
## )
vol_254_14514_codes
Let’s look at the baby’s locomotion (babyloc
) code.
(babyloc <- filter(vol_254_14514, code == 'babyloc'))
# Surrounding assignment w/ parentheses also prints out the assigned value.)
Let’s look at the code values.
unique(babyloc$code.value)
## [1] "l" "h" "."
Focus on holds h
because there are fewer of them.
# Select segments for this code
(babyloc_segs <- dplyr::filter(babyloc, code.value == 'h'))
Let’s pick the 1st segment just for fun.
# What is the segment range for the video?
this_seg <- 1
seg_range <- get_asset_segment_range(vol.id = 254, session.id = 14514, asset.id = 61054)
onset_ms <- HHMMSSmmm_to_ms(babyloc_segs$onset[this_seg]) + seg_range[1]
offset_ms <- HHMMSSmmm_to_ms(babyloc_segs$offset[this_seg]) + seg_range[1]
# babyloc$onset_ms <- lapply(babyloc$onset, HHMMSSmmm_to_ms) + seg_range[1]
# babyloc$offset_ms <- lapply(babyloc$offset, HHMMSSmmm_to_ms) + seg_range[1]
# babyloc
# Add cols to babyloc_segs
# babyloc_segs <- babyloc_segs %>%
# mutate(., onset_ms = HHMMSSmmm_to_ms(babyloc_segs$onset) + seg_range[1],
# offset_ms = HHMMSSmmm_to_ms(babyloc_segs$onset) + seg_range[1])
selected_seg <- paste0(onset_ms, ",", offset_ms)
message(paste0('Picking segment ', selected_seg, '.'))
## Picking segment 71408031,71408535.
Now download the video.
download_video(254, 14514, 61054, out.dir = 'tmp', segment.id = selected_seg)
## [1] "tmp"
And see if we can display video or if needed, go to it on Databrary.
v <- list.files('tmp', '\\.mp4$', full.names = TRUE)
render_video_in_html(v[1], params = ' controls width=600px')