1 Purpose

This document summarizes the analyses conducted on the Play & Learning Across a Year (PLAY) project pilot data. The data are stored and shared at https://nyu.databrary.org/volume/444.

2 Set-up

knitr::opts_chunk$set(echo = TRUE)

# Load databraryapi package from github.com/PLAY-behaviorome/databraryapi
if (!require(databraryapi)) {
  devtools::install_github("PLAY-behaviorome/databraryapi")
  library(databraryapi)
}
## Loading required package: databraryapi
## Welcome to the databraryapi package
# Load other libraries
library(tidyverse)
## ── Attaching packages ─────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0       ✔ purrr   0.3.0  
## ✔ tibble  2.0.1       ✔ dplyr   0.8.0.1
## ✔ tidyr   0.8.2       ✔ stringr 1.4.0  
## ✔ readr   1.3.1       ✔ forcats 0.3.0
## Warning: package 'tibble' was built under R version 3.5.2
## Warning: package 'purrr' was built under R version 3.5.2
## Warning: package 'dplyr' was built under R version 3.5.2
## Warning: package 'stringr' was built under R version 3.5.2
## ── Conflicts ────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(ggplot2)

## Variables for this particular volume and session.
play_vol_id <- 444
survey_data_session_name = "Parent report data"

The Databrary volume is 444.

We need to log in to Databrary to have access to these data. If you have logged-in before from the machine you are running this report, then you will be prompted to enter your Databrary ID (email address). If you have not logged-in before, you will be prompted to enter your Databrary ID (email address) and your Databrary password.

databraryapi::login_db(params$db_account)
## Login successful.
## [1] TRUE

3 Gather data

There are several data sources we need to gather and clean. Participant demographic data are stored in the Databrary spreadsheet. Parent report (survey) data are stored in the session with session_name == “Parent report data”. Let’s store that in a variable for later use: `r

3.1 Participant demographics

We’ll first gather the participant demographic data stored in the Databrary spreadsheet.

play_demog_raw <- databraryapi::download_session_csv(vol_id = play_vol_id)

if (is.null(play_demog_raw)) {
  stop("No data downloaded from Databrary. Are you logged in?")
}

# Select relevant spreadsheet columns, filter those with actual session data, and rename variables to use underscores
play_demog <- play_demog_raw %>%
  select(session_id, session_release, participant.gender,
         participant.race, participant.ethnicity,
         participant.gestational.age, participant.birth.weight,
         participant.disability, participant.language, group.name,
         context.language) %>%
  filter(session_release %in% c('EXCERPTS', 'PRIVATE', 'SHARED')) %>%
  rename(participant_gender = participant.gender,
         participant_race = participant.race,
         participant_ethnicity = participant.ethnicity,
         participant_gestational_age = participant.gestational.age,
         participant_birth_weight = participant.birth.weight,
         participant_disability = participant.disability, 
         participant_language = participant.language, 
         group_name = group.name,
         context_language = context.language)

3.1.1 Structure of demographic data file

str(play_demog)
## 'data.frame':    20 obs. of  11 variables:
##  $ session_id                 : int  18801 18803 18805 18806 18807 18808 18810 18811 18813 18814 ...
##  $ session_release            : Factor w/ 4 levels "","EXCERPTS",..: 2 2 3 2 2 2 2 2 2 2 ...
##  $ participant_gender         : Factor w/ 3 levels "","Female","Male": 3 3 2 3 3 2 2 2 3 3 ...
##  $ participant_race           : Factor w/ 5 levels "","Asian","More than one",..: 4 5 3 5 5 5 5 5 5 5 ...
##  $ participant_ethnicity      : Factor w/ 4 levels "","Hispanic or Latino",..: 4 3 2 3 3 3 3 2 3 3 ...
##  $ participant_gestational_age: num  NA 40 39.5 40 40 38 40 40 41 41 ...
##  $ participant_birth_weight   : num  NA 8.25 8 7 8.38 ...
##  $ participant_disability     : Factor w/ 4 levels "","Acid Reflux",..: 4 3 3 3 3 3 3 3 3 3 ...
##  $ participant_language       : Factor w/ 9 levels "","English","English, German",..: 9 6 9 2 2 2 7 4 3 2 ...
##  $ group_name                 : Factor w/ 4 levels "","12 mos","18 mos",..: 3 3 3 3 4 3 3 2 3 3 ...
##  $ context_language           : Factor w/ 3 levels "","English","English, Spanish": 2 2 2 2 2 2 2 2 2 2 ...

3.2 Parent report data

First, we need to list the data files available in the session where the parent report data are stored.

parent_rpt_session_id <- play_demog_raw %>%
  filter(session_name == survey_data_session_name) %>%
  select(session_id) %>%
  as.numeric()

Now, we list the data files in that session.

surveys <- list_assets_in_session(vol_id = play_vol_id,
                                  session_id = parent_rpt_session_id) %>%
  select(asset_id, name)
knitr::kable(surveys)
asset_id name
159177 childcare
117092 locomotion
116791 child-birth
116790 family
116789 sleep
116787 language-exposure

Let’s download each of these separately.

childcare_df <- read_csv_data_as_df(session_id = parent_rpt_session_id, 
                               asset_id = surveys$asset_id[1])
## No encoding supplied: defaulting to UTF-8.
loco_df <- read_csv_data_as_df(session_id = parent_rpt_session_id, asset_id = surveys$asset_id[2])
## No encoding supplied: defaulting to UTF-8.
childbirth_df <- read_csv_data_as_df(session_id = parent_rpt_session_id, 
                                 asset_id = surveys$asset_id[3])
## No encoding supplied: defaulting to UTF-8.
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec =
## dec, : EOF within quoted string
family_df <- read_csv_data_as_df(session_id = parent_rpt_session_id, 
                                 asset_id = surveys$asset_id[4])
## No encoding supplied: defaulting to UTF-8.
sleep_df <- read_csv_data_as_df(session_id = parent_rpt_session_id, 
                                 asset_id = surveys$asset_id[5])
## No encoding supplied: defaulting to UTF-8.
lang_exp_df <- read_csv_data_as_df(session_id = parent_rpt_session_id, 
                                 asset_id = surveys$asset_id[6])
## No encoding supplied: defaulting to UTF-8.

3.2.1 Examining structure of parent report data files

3.2.1.1 Locomotor milestones

str(loco_df)
## 'data.frame':    17 obs. of  7 variables:
##  $ id                : int  2 3 5 6 7 9 10 11 12 13 ...
##  $ hkcrawl_onset_date: chr  "8/13/15" "7/31/15" "5/2/15" "3/10/16" ...
##  $ hkcrawl_onset_mos : num  7.53 7.03 8.48 12.59 7.2 ...
##  $ walk_onset_date   : chr  "12/5/15" "11/30/15" "8/4/15" "7/9/16" ...
##  $ walk_onset_mos    : num  11.3 11.1 11.6 16.6 11.7 ...
##  $ walk_onset_src    : chr  "phone" NA "phone" "babybook" ...
##  $ Interview.Comments: chr  "" "" "" "" ...

3.2.1.2 Childbirth and health

str(childbirth_df)
## 'data.frame':    11 obs. of  11 variables:
##  $ id                       : int  2 3 5 6 7 9 10 11 12 13 ...
##  $ bdate_child              : chr  "12/27/14" "12/29/14" "8/17/14" "2/21/15" ...
##  $ born_duedate             : chr  "yes" "yes" "yes" "yes" ...
##  $ child_age                : num  18.3 18.3 24.3 18.2 17.8 ...
##  $ agegroup                 : int  18 18 24 18 18 18 18 18 24 12 ...
##  $ birth_weight_pounds      : int  8 8 8 8 7 6 8 6 6 8 ...
##  $ birth_weight_ounces      : num  14 0.5 6 2 4 9 3 12 0 9 ...
##  $ newborn_complications_yn : chr  "no" "no" "no" "no" ...
##  $ newborn_complications    : chr  NA NA NA NA ...
##  $ illness_diagnosis_yn     : chr  "no" "no" "no" "no" ...
##  $ illness_diagnosis_details: chr  NA NA NA NA ...

3.2.1.3 Family structure

str(family_df)
## 'data.frame':    17 obs. of  21 variables:
##  $ id                : int  2 3 5 6 7 9 10 11 12 13 ...
##  $ mom_bdate         : chr  "12/23/75" "1/9/81" "6/30/69" "1/23/82" ...
##  $ mom_race          : chr  "5" "7" "5" "5" ...
##  $ mom_ethnicity     : chr  "no" "yes" "no" "no" ...
##  $ caregiver_country : int  4 4 1 1 1 4 1 1 1 1 ...
##  $ mom_education     : int  22 19 23 19 22 21 21 17 22 22 ...
##  $ mom_working       : chr  "ft" "ft" "pt" "ft" ...
##  $ mom_occupation    : chr  "Assistant professor" NA "Lawyer" "Editor for academic journal" ...
##  $ mom_training      : chr  "no" "no" "no" "yes" ...
##  $ caregiver_us_entry: chr  "1/1/87" "1/1/89" NA NA ...
##  $ partner_cohabitate: chr  "yes" "yes" "yes" "yes" ...
##  $ partner_race      : chr  "5" "7" "5" "5" ...
##  $ partner_ethnicity : chr  "no" "yes" "no" "no" ...
##  $ partner_education : int  21 21 21 19 22 NA 21 17 23 21 ...
##  $ partner_working   : chr  "ft" "ft" "ft" "ft" ...
##  $ partner_training  : chr  "no" "yes" "no" "no" ...
##  $ partner_occupation: chr  "Finance, Asset Manager" NA "Manager" "Sales manager" ...
##  $ partner_dob       : chr  NA "9/14/82" "3/29/80" "10/24/85" ...
##  $ family_sib_dob    : chr  NA NA NA NA ...
##  $ brothers          : int  NA NA NA NA NA NA NA NA 0 0 ...
##  $ siblings          : int  NA NA NA NA NA NA NA NA 0 0 ...

3.2.1.4 Sleep hygiene

str(sleep_df)
## 'data.frame':    17 obs. of  5 variables:
##  $ id          : int  2 3 5 6 7 9 10 11 12 13 ...
##  $ sleep.time  : chr  "20:30:00" "20:45:00" "19:30:00" "19:45:00" ...
##  $ wake.time   : chr  "07:00:00" "07:20:00" "06:30:00" "07:15:00" ...
##  $ naps.hours  : num  2 2 2.5 2 2 2 2.5 1.5 2 2.5 ...
##  $ sleeping.loc: chr  "crib_separate" "crib_separate" "toddlerbed" "crib_separate" ...

3.2.1.5 Childcare arrangements

str(childcare_df)
## 'data.frame':    17 obs. of  5 variables:
##  $ id                      : int  2 3 5 6 7 9 10 11 12 13 ...
##  $ childcare_alt           : chr  "person" "center" "both" "center" ...
##  $ childcare_hours         : int  60 40 52 50 NA 50 29 21 35 20 ...
##  $ childcare_other_children: int  1 11 15 9 NA 0 11 3 9 0 ...
##  $ childcare_init_age      : num  0 8 12 3 NA 4 1.5 14 1 6 ...

3.2.1.6 Language exposure

str(lang_exp_df)
## 'data.frame':    66 obs. of  3 variables:
##  $ id              : int  2 2 3 5 6 9 10 11 11 12 ...
##  $ language        : chr  "Russian" "Armenian" "English" "English" ...
##  $ exposure_context: chr  "childcare" "childcare" "childcare" "childcare" ...

4 Plot data

Let’s set some plot thematic elements.

play.palette <- scale_fill_manual(values=c("blue2", "firebrick2", "chartreuse2", "darkorchid2"))
play.theme <-   
  theme_classic() +
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        legend.text = element_text(size = rel(1.2)),
        axis.title = element_text(size = rel(1.5), face ="bold"),
        axis.text.x = element_text(size = rel(1.2)),
        axis.text.y = element_text(size = rel(1.2)),
        axis.line = element_blank(),
        axis.title.y = element_blank())

4.1 Participant demographics

play_demog %>% 
  ggplot(.) +
  aes(x = participant_gestational_age, y = participant_birth_weight, color = participant_race) +
  geom_point()
## Warning: Removed 1 rows containing missing values (geom_point).

4.2 Parent surveys

These are some illustrative plots.

4.2.1 Sleep hygiene

We have to convert the sleep.time and wake.time variables so that they can be plotted. It looks like the lubridate package is a good option.

# sleep <- lubridate::ymd_hms("2001-01-01 20:30:00")
# awake <- lubridate::ymd_hms("2001-01-02 07:00:00")
# (night_hrs <- awake-sleep)

# Helper functions for conversion
sleep_time <- function(hhmmss){
  lubridate::ymd_hms(paste0("2001-01-01 ", hhmmss))
}
wake_time <- function(hhmmss){
  # One date later than sleep time
  lubridate::ymd_hms(paste0("2001-01-02 ", hhmmss))
}

time_asleep <- function(sleep_tm, awake_tm){
  wake_time(awake_tm) - sleep_time(sleep_tm)
}

sleep_df <- sleep_df %>%
  mutate(sleep_tm = sleep_time(sleep.time), 
         wake_tm = wake_time(wake.time), 
         nightsleep_hrs = wake_tm - sleep_tm)

Now, we can plot.

sleep_df %>%
  ggplot(.) +
  aes(x = nightsleep_hrs) +
  geom_histogram(bins = 10) +
  play.theme
## Don't know how to automatically pick scale for object of type difftime. Defaulting to continuous.

4.2.2 Childcare

childcare_df %>%
  ggplot(.) +
  aes(x = childcare_hours, fill = childcare_alt) +
  geom_histogram(bins = 30) +
  play.theme
## Warning: Removed 2 rows containing non-finite values (stat_bin).

4.2.3 Language exposure

lang_exp_df %>%
  ggplot(.) +
  aes(x = language) +
  facet_grid(exposure_context ~ .) +
  geom_histogram(stat='count') +
  play.theme
## Warning: Ignoring unknown parameters: binwidth, bins, pad

4.2.4 Locomotion onset

loco_df %>%
  gather(key = milestone, value = age.mos, 
         hkcrawl_onset_mos, walk_onset_mos) %>%
  mutate(milestone = factor(milestone, 
                            labels = c("crawl", "walk"))) %>%
  ggplot() +
  aes(x = age.mos, y = id) +
  geom_point(aes(shape = milestone, color = milestone)) +
  geom_line(aes(group = id)) +
  xlab("Age (months)") +
  ylab("Participant ID") +
  geom_rug(aes(x = age.mos, group = milestone, color = milestone),
           sides = "b") +
  play.theme
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_path).