1 PLAY pilot summary data

https://nyu.databrary.org/volume/444

https://nyu.databrary.org/volume/254

1.1 Demographic summary

pilot_demog <- summarize_demog(vol.id = pilot.vol.id, return.df = TRUE)

pilot_demog %>% 
  ggplot(.) +
  aes(x = participant.gestational.age, y = participant.birth.weight, color = participant.race) +
  geom_point()
## Warning: Removed 1 rows containing missing values (geom_point).

pilot_demog %>%
  ggplot(.) +
  aes(x = age.weeks, fill = participant.gender) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

1.2 Parent-report variables

survey.session.id <- 26295
list_assets_in_session(vol.id = pilot.vol.id, session.id = survey.session.id) %>%
  select(asset.name, asset.type, asset.id)
(csv_fl <- list.files('csv', '\\.csv$', full.names = TRUE)) 
## [1] "csv/databrary444-Adolph-Tamis-LeMonda-Gilmore-PLAY-pilot-data-materials-26295-Parent_report_data-116787-language-exposure.csv"
## [2] "csv/databrary444-Adolph-Tamis-LeMonda-Gilmore-PLAY-pilot-data-materials-26295-Parent_report_data-116790-family.csv"           
## [3] "csv/databrary444-Adolph-Tamis-LeMonda-Gilmore-PLAY-pilot-data-materials-26295-Parent_report_data-117092-locomotion.csv"
lang_exp <- read_csv(csv_fl[1])
## Parsed with column specification:
## cols(
##   id = col_integer(),
##   language = col_character(),
##   exposure_context = col_character()
## )
names(lang_exp)
## [1] "id"               "language"         "exposure_context"
play.palette <- scale_fill_manual(values=c("blue2", "firebrick2", "chartreuse2", "darkorchid2"))

play.theme <-   
  theme_classic() +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        legend.text = element_text(size = rel(1.2)),
        axis.title = element_text(size = rel(1.5), face ="bold"),
        axis.text.x = element_text(size = rel(1.2)),
        axis.text.y = element_text(size = rel(1.2)),
        axis.line = element_blank(),
        axis.title.y = element_blank())

lang_exp %>%
  ggplot(.) +
  aes(x = language) +
  facet_grid(exposure_context ~ .) +
  geom_histogram(stat='count') +
  play.theme
## Warning: Ignoring unknown parameters: binwidth, bins, pad

loco_onset <- read_csv(csv_fl[3])
## Parsed with column specification:
## cols(
##   id = col_integer(),
##   hkcrawl_onset_date = col_character(),
##   hkcrawl_onset_mos = col_double(),
##   walk_onset_date = col_character(),
##   walk_onset_mos = col_double(),
##   walk_onset_src = col_character(),
##   `Interview Comments` = col_character()
## )
names(loco_onset)
## [1] "id"                 "hkcrawl_onset_date" "hkcrawl_onset_mos" 
## [4] "walk_onset_date"    "walk_onset_mos"     "walk_onset_src"    
## [7] "Interview Comments"
loco_onset %>%
  gather(key = milestone, value = age.mos, hkcrawl_onset_mos, walk_onset_mos) %>%
  mutate(milestone = factor(milestone, labels = c("crawl", "walk"))) %>%
  # arrange(desc(age.mos)) %>%
  ggplot() +
  aes(x = age.mos, y = id) +
  geom_point(aes(shape = milestone, color = milestone)) +
  geom_line(aes(group = id)) +
  xlab("Age (months)") +
  geom_rug(aes(x = age.mos, group = milestone, color = milestone),
           sides = "b") +
  play.theme
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_path).

2 LEGO

https://nyu.databrary.org/volume/563

lego_demog <- databraryapi::summarize_demog(vol.id = lego.vol.id, return.df = TRUE)
## Warning in `[<-.factor`(`*tmp*`, unreported, value = "Unknown or not
## reported"): invalid factor level, NA generated
lego_demog %>%
  ggplot(.) +
  aes(y = age.weeks, x = group.name) +
  geom_boxplot()

lego_demog %>%
  ggplot(.) +
  aes(participant.gender, fill = participant.gender) +
  facet_grid(participant.race ~ group.name) +
  geom_bar()

3 Individual-specific reports from PLAY

Let’s focus on NYU_019 in volume 254

3.2 Assets in session

vol_254_assets <- list_assets_in_session(254, 14514)
vol_254_assets %>%
  select(., asset.name, asset.type, asset.id)

3.3 Process Datavyu

3.3.1 Download

unlink('tmp', recursive = TRUE)
dv_info <- filter(vol_254_assets, asset.type == "Datavyu")
dv_dir <- download_datavyu(vol.id = 254, session.id = 14514, asset.id = dv_info$asset.id)
## Creating directory tmp/
## Successful HTML GET query.
## Content-type is application/vnd.datavyu
## File name unspecified. Generating unique name.
## Downloading Datavyu file as: 
## tmp/254-14514-63635-2018-10-15-1859-55.opf

3.3.2 Extract codes and code definitions

Now, let’s extract the components of the Datavyu file, including the code definitions.

extract_dv(in.dir = dv_dir, auto.write.over = TRUE)
## [1] "tmp"
dv_to_csv(dv.dir = dv_dir)
list.files('tmp')
## [1] "254-14514-63635-2018-10-15-1859-55.csv"
## [2] "254-14514-63635-2018-10-15-1859-55.opf"
## [3] "db"                                    
## [4] "project"

Next, we can load the Datavyu file as a csv.

dv_fn <- list.files('tmp', '\\.csv$', full.names = TRUE)
vol_254_14514 <- read_csv(dv_fn)
## Parsed with column specification:
## cols(
##   code = col_character(),
##   onset = col_character(),
##   offset = col_character(),
##   code.value = col_character()
## )
names(vol_254_14514)
## [1] "code"       "onset"      "offset"     "code.value"
vol_254_14514

3.3.3 Extract code definitions

extract_dv_code_defs(in.dir = 'tmp')
## [1] "tmp/254-14514-63635-2018-10-15-1859-55-code-defs.csv"
code_fn <- list.files('tmp', '-code-defs\\.csv$', full.names = TRUE)
vol_254_14514_codes <- read_csv(code_fn)
## Parsed with column specification:
## cols(
##   code = col_character(),
##   code_vals = col_character(),
##   code_type = col_character()
## )
vol_254_14514_codes

3.3.4 Visualize clips from specific codes

Let’s look at the baby’s locomotion (babyloc) code.

(babyloc <- filter(vol_254_14514, code == 'babyloc'))
# Surrounding assignment w/ parentheses also prints out the assigned value.)

Let’s look at the code values.

unique(babyloc$code.value) 
## [1] "l" "h" "."

Focus on holds h because there are fewer of them.

# Select segments for this code
(babyloc_segs <- dplyr::filter(babyloc, code.value == 'h'))

3.3.5 Download video segment corresponding to one or more of these codes

Let’s pick the 1st segment just for fun.

# What is the segment range for the video?
this_seg <- 1
seg_range <- get_asset_segment_range(vol.id = 254, session.id = 14514, asset.id = 61054)

onset_ms <- HHMMSSmmm_to_ms(babyloc_segs$onset[this_seg]) + seg_range[1]
offset_ms <- HHMMSSmmm_to_ms(babyloc_segs$offset[this_seg]) + seg_range[1]

# babyloc$onset_ms <- lapply(babyloc$onset, HHMMSSmmm_to_ms) + seg_range[1]
# babyloc$offset_ms <- lapply(babyloc$offset, HHMMSSmmm_to_ms) + seg_range[1]
# babyloc
# Add cols to babyloc_segs
# babyloc_segs <- babyloc_segs %>%
#   mutate(., onset_ms = HHMMSSmmm_to_ms(babyloc_segs$onset) + seg_range[1],
#          offset_ms = HHMMSSmmm_to_ms(babyloc_segs$onset) + seg_range[1])

selected_seg <- paste0(onset_ms, ",", offset_ms)
message(paste0('Picking segment ', selected_seg, '.'))
## Picking segment 71408031,71408535.

Now download the video.

download_video(254, 14514, 61054, out.dir = 'tmp', segment.id = selected_seg)
## [1] "tmp"

And see if we can display video or if needed, go to it on Databrary.

v <- list.files('tmp', '\\.mp4$', full.names = TRUE)
render_video_in_html(v[1], params = ' controls width=600px')

3.3.6 Another segment

this_seg <- 2
onset_ms <- HHMMSSmmm_to_ms(babyloc_segs$onset[this_seg]) + seg_range[1]
offset_ms <- HHMMSSmmm_to_ms(babyloc_segs$offset[this_seg]) + seg_range[1]
selected_seg <- paste0(onset_ms, ",", offset_ms)
download_video(254, 14514, 61054, out.dir = 'tmp', segment.id = selected_seg)

[1] “tmp”

v <- list.files('tmp', '\\.mp4$', full.names = TRUE)
render_video_in_html(v[this_seg], params = ' controls width=600px')