diff --git a/facebook/qsf-tools/append-old-changelog-umd.R b/facebook/qsf-tools/append-old-changelog-umd.R index 675838a8b..8b53ef08a 100644 --- a/facebook/qsf-tools/append-old-changelog-umd.R +++ b/facebook/qsf-tools/append-old-changelog-umd.R @@ -26,10 +26,10 @@ add_rationales_from_old_changelog <- function(path_to_changelog, path_to_old_cha new_version = col_double(), old_version = col_double() )) %>% - select(new_version, old_version, variable_name, change_type,eu_version, notes) + select(new_version, old_version, variable_name, change_type, eu_noneu, notes) changelog <- changelog %>% select(-notes) %>% - left_join(old_changelog, by=c("new_version", "old_version", "variable_name", "change_type","eu_version")) + left_join(old_changelog, by=c("new_version", "old_version", "variable_name", "change_type", "eu_noneu")) write_excel_csv(changelog, path_to_changelog, quote="needed") diff --git a/facebook/qsf-tools/generate-changelog.R b/facebook/qsf-tools/generate-changelog.R index 3733fee01..da2ca7a55 100644 --- a/facebook/qsf-tools/generate-changelog.R +++ b/facebook/qsf-tools/generate-changelog.R @@ -11,6 +11,8 @@ suppressPackageStartupMessages({ library(tidyverse) + library(jsonlite) + library(stringr) }) # "old" = new @@ -46,7 +48,7 @@ WAVE_COMPARE_MAP <- list( DIFF_COLS <- c( "question", - "matrix_subquestion", + "subquestion", "response_options", "display_logic", "response_option_randomization", @@ -59,9 +61,9 @@ CHANGE_TYPE_MAP <- c( question = "Question wording changed", display_logic = "Display logic changed", response_options = "Answer choices changed", - matrix_subquestion = "Matrix subquestion text changed", + subquestion = "Matrix subquestion text changed", response_option_randomization = "Answer choice order changed", - respondent_group = "Respondent group changed" + respondent_group = "Display logic changed" ) @@ -151,33 +153,30 @@ generate_changelog <- function(path_to_codebook, select(-x_exists, -y_exists) combos <- added_items %>% - filter(question_type == "Matrix" | !is.na(new_matrix_base_name) | !is.na(new_matrix_subquestion)) %>% - distinct(old_version, new_matrix_base_name) + filter(question_type == "Matrix" | !is.na(new_originating_question) | !is.na(new_subquestion)) %>% + distinct(old_version, new_originating_question) for (i in seq_len(nrow(combos))) { wave = combos[i,] %>% pull(old_version) - base_name = combos[i,] %>% pull(new_matrix_base_name) + base_name = combos[i,] %>% pull(new_originating_question) tmp <- added_items %>% filter( - old_version == wave, new_matrix_base_name == base_name + old_version == wave, new_originating_question == base_name ) added_items <- anti_join(added_items, tmp) - if (nrow(filter(codebook_raw, version == wave, matrix_base_name == base_name)) == 0) { + if (nrow(filter(codebook_raw, version == wave, originating_question == base_name)) == 0) { # Dedup subqs so only report base question once tmp <- tmp %>% - group_by(old_matrix_base_name, new_matrix_base_name, new_version, old_version) %>% + group_by(old_originating_question, new_originating_question, new_version, old_version) %>% mutate( - variable_name = new_matrix_base_name, - old_matrix_subquestion = NA, - new_matrix_subquestion = "Differ by subquestion", - old_response_options = case_when( - length(unique(old_response_options)) == 1 ~ old_response_options, - TRUE ~ "Differ by subquestion" - ), + old_subquestion = NA, + new_subquestion = collapse_subq_elements(variable_name, new_subquestion, base_name), + old_response_options = NA, new_response_options = case_when( length(unique(new_response_options)) == 1 ~ new_response_options, - TRUE ~ "Differ by subquestion" - ) + TRUE ~ rep(collapse_subq_elements(variable_name, new_response_options, base_name), length(new_response_options)) + ), + variable_name = new_originating_question ) %>% slice_head() %>% ungroup() @@ -205,33 +204,30 @@ generate_changelog <- function(path_to_codebook, select(-x_exists, -y_exists) combos <- removed_items %>% - filter(question_type == "Matrix" | !is.na(old_matrix_base_name) | !is.na(old_matrix_subquestion)) %>% - distinct(new_version, old_matrix_base_name) + filter(question_type == "Matrix" | !is.na(old_originating_question) | !is.na(old_subquestion)) %>% + distinct(new_version, old_originating_question) for (i in seq_len(nrow(combos))) { wave = combos[i,] %>% pull(new_version) - base_name = combos[i,] %>% pull(old_matrix_base_name) + base_name = combos[i,] %>% pull(old_originating_question) tmp <- removed_items %>% filter( - new_version == wave, old_matrix_base_name == base_name + new_version == wave, old_originating_question == base_name ) removed_items <- anti_join(removed_items, tmp) - if (nrow(filter(codebook_raw, version == wave, matrix_base_name == base_name)) == 0) { + if (nrow(filter(codebook_raw, version == wave, originating_question == base_name)) == 0) { # Dedup subqs so only report base question once tmp <- tmp %>% - group_by(old_matrix_base_name, new_matrix_base_name, new_version, old_version) %>% + group_by(old_originating_question, new_originating_question, new_version, old_version) %>% mutate( - variable_name = old_matrix_base_name, - old_matrix_subquestion = "Differ by subquestion", - new_matrix_subquestion = NA, + old_subquestion = collapse_subq_elements(variable_name, old_subquestion, base_name), + new_subquestion = NA, old_response_options = case_when( length(unique(old_response_options)) == 1 ~ old_response_options, - TRUE ~ "Differ by subquestion" + TRUE ~ rep(collapse_subq_elements(variable_name, old_response_options, base_name), length(old_response_options)) ), - new_response_options = case_when( - length(unique(new_response_options)) == 1 ~ new_response_options, - TRUE ~ "Differ by subquestion" - ) + new_response_options = NA, + variable_name = old_originating_question ) %>% slice_head() %>% ungroup() @@ -270,11 +266,11 @@ generate_changelog <- function(path_to_codebook, ## Don't report all matrix subquestions when the change is shared between all ## of them, just report the base item. - # Group by matrix_base_name, change_type, and wave, as long as the change_type is relevant and matrix_base_name is not NA. + # Group by originating_question, change_type, and wave, as long as the change_type is relevant and originating_question is not NA. # Keep only one obs for each group. - # Set var name in kept obs to matrix_base_name for generality and to be able to join rationales on. + # Set var name in kept obs to originating_question for generality and to be able to join rationales on. combos <- changelog %>% - filter((question_type == "Matrix" | !is.na(old_matrix_base_name) | !is.na(old_matrix_subquestion)) & + filter((question_type == "Matrix" | !is.na(old_originating_question) | !is.na(old_subquestion)) & change_type %in% c( "Question wording changed", "Display logic changed", @@ -283,7 +279,7 @@ generate_changelog <- function(path_to_codebook, "Respondent group changed" ) ) %>% - distinct(new_version, old_version, new_matrix_base_name, old_matrix_base_name, change_type) + distinct(new_version, old_version, new_originating_question, old_originating_question, change_type) SPECIAL_HANDLING <- list( "Answer choices changed" = list("new_response_options", "old_response_options"), @@ -292,16 +288,16 @@ generate_changelog <- function(path_to_codebook, for (i in seq_len(nrow(combos))) { new_v <- combos[i,] %>% pull(new_version) old_v <- combos[i,] %>% pull(old_version) - new_base <- combos[i,] %>% pull(new_matrix_base_name) - old_base <- combos[i,] %>% pull(old_matrix_base_name) + new_base <- combos[i,] %>% pull(new_originating_question) + old_base <- combos[i,] %>% pull(old_originating_question) change <- combos[i,] %>% pull(change_type) tmp <- changelog %>% filter( new_version == new_v, old_version == old_v, - new_matrix_base_name == new_base, - old_matrix_base_name == old_base, + new_originating_question == new_base, + old_originating_question == old_base, change_type == change ) changelog <- anti_join(changelog, tmp) @@ -316,8 +312,8 @@ generate_changelog <- function(path_to_codebook, length(unique(tmp[[new_col]])) == 1 && length(unique(tmp[[old_col]])) == 1 && ( - nrow(tmp) == codebook_raw %>% filter(version == old_v, matrix_base_name == old_base) %>% nrow() || - nrow(tmp) == codebook_raw %>% filter(version == new_v, matrix_base_name == new_base) %>% nrow() + nrow(tmp) == codebook_raw %>% filter(version == old_v, originating_question == old_base) %>% nrow() || + nrow(tmp) == codebook_raw %>% filter(version == new_v, originating_question == new_base) %>% nrow() ) ) { combine_flag <- TRUE @@ -331,11 +327,11 @@ generate_changelog <- function(path_to_codebook, slice_head() %>% mutate( variable_name = case_when( - old_matrix_base_name != new_matrix_base_name ~ paste(old_matrix_base_name, new_matrix_base_name, sep="/"), - TRUE ~ old_matrix_base_name + old_originating_question != new_originating_question ~ paste(old_originating_question, new_originating_question, sep="/"), + TRUE ~ old_originating_question ), - old_matrix_subquestion = NA, - new_matrix_subquestion = NA + old_subquestion = NA, + new_subquestion = NA ) } @@ -365,8 +361,8 @@ generate_changelog <- function(path_to_codebook, rename( new_question_text = new_question, old_question_text = old_question, - new_matrix_subquestion_text = new_matrix_subquestion, - old_matrix_subquestion_text = old_matrix_subquestion + new_subquestion_text = new_subquestion, + old_subquestion_text = old_subquestion ) %>% select( new_version, @@ -374,16 +370,16 @@ generate_changelog <- function(path_to_codebook, variable_name, description, change_type, - new_matrix_base_name, + new_originating_question, new_question_text, - new_matrix_subquestion_text, + new_subquestion_text, new_response_options, new_display_logic, new_response_option_randomization, new_respondent_group, - old_matrix_base_name, + old_originating_question, old_question_text, - old_matrix_subquestion_text, + old_subquestion_text, old_response_options, old_display_logic, old_response_option_randomization, @@ -396,7 +392,7 @@ generate_changelog <- function(path_to_codebook, } rename_col <- function(col, prefix) { - if (col %in% c(DIFF_COLS, "matrix_base_name")) { + if (col %in% c(DIFF_COLS, "originating_question")) { paste(prefix, col, sep = "_") } else { col @@ -411,6 +407,16 @@ get_old_version <- function(new_version, compare_map) { ifelse(new_version %in% compare_map, compare_map[compare_map == new_version] %>% names(), NA_character_) } +collapse_subq_elements <- function(variable_name, matrix_field, base_name) { + subq_codes <- str_replace(variable_name, paste0(base_name, "_"), "") %>% + strsplit("_") %>% + # Get the first underscore-delimited chunk. Handles the C10 case, where + # matrix subqs are called C10__1. + purrr::map(~ .x[1]) + matrix_field <- as.list(matrix_field) + names(matrix_field) <- subq_codes + toJSON(matrix_field, auto_unbox = TRUE) +} args <- commandArgs(TRUE) diff --git a/facebook/qsf-tools/generate-codebook.R b/facebook/qsf-tools/generate-codebook.R index 17c5111e4..c03bc53fa 100644 --- a/facebook/qsf-tools/generate-codebook.R +++ b/facebook/qsf-tools/generate-codebook.R @@ -28,7 +28,7 @@ process_qsf <- function(path_to_qsf, path_to_rename_map <- localize_static_filepath(rename_map_file, survey_version) path_to_drop_columns <- localize_static_filepath(drop_columns_file, survey_version) - q <- read_json(path_to_qsf) + q <- read_json(path_to_qsf, encoding = "UTF-8") wave <- get_wave(path_to_qsf) displayed_questions <- subset_qsf_to_displayed(q) @@ -41,19 +41,6 @@ process_qsf <- function(path_to_qsf, item_names <- displayed_questions %>% map_chr(~ .x$Payload$DataExportTag) %>% patch_item_names(path_to_rename_map, wave) - - if (survey_version == "UMD") { - item_names[item_names == "D2_30" & qids == "QID294"] <- "D2_30_cheer" - item_names[item_names == "D2_30" & qids == "QID293"] <- "D2_30_calm" - item_names[item_names == "B13" & qids == "QID253"] <- "B13_likert" - item_names[item_names == "B13" & qids == "QID255"] <- "B13_profile" - item_names[item_names == "B14" & qids == "QID254"] <- "B14_likert" - item_names[item_names == "B14" & qids == "QID259"] <- "B14_profile" - item_names[item_names == "B12a" & qids == "QID250"] <- "B12a_likert" - item_names[item_names == "B12a" & qids == "QID258"] <- "B12a_profile" - item_names[item_names == "B12b" & qids == "QID251"] <- "B12b_likert" - item_names[item_names == "B12b" & qids == "QID257"] <- "B12b_profile" - } # get question text: questions <- displayed_questions %>% @@ -121,7 +108,7 @@ process_qsf <- function(path_to_qsf, } ) - # get the "answers". These are answer choices for matrix items, and missing for non-matrix items. + # get the "answers". These are answer choices for matrix and dropdown items, and missing for other questions. matrix_answers <- displayed_questions %>% map(~ .x$Payload$Answers) %>% map(~ map(.x, "Display")) @@ -136,9 +123,9 @@ process_qsf <- function(path_to_qsf, }) %>% unlist() %>% which() # Swap matrix answer choices into `choices` and matrix subquestion text into another variable - ii_matrix <- which(qtype == "Matrix") - matrix_subquestions <- rep(list(list()), length(choices)) - matrix_subquestions[ii_matrix] <- choices[ii_matrix] + ii_matrix <- which(qtype == "Matrix" | qtype == "Dropdown") + subquestions <- rep(list(list()), length(choices)) + subquestions[ii_matrix] <- choices[ii_matrix] choices[ii_matrix] <- matrix_answers[ii_matrix] # Recode response options if overriding Qualtrics auto-assigned coding. @@ -155,19 +142,23 @@ process_qsf <- function(path_to_qsf, map(~ .x$Payload$Answers) %>% map(~ map(.x, ~ map(.x, "Display"))) + # Null out dropdown choices to avoid including super long list. + ii_dropdown <- which(qtype == "Dropdown") + choices[ii_dropdown] <- rep(list("Not listed due to length, see notes"), length(ii_dropdown)) + # Get matrix subquestion field names as reported in microdata. NULL if not # defined (not a Matrix question); FALSE if not set; otherwise a list - matrix_subquestion_field_names <- displayed_questions %>% + subquestion_field_names <- displayed_questions %>% map(~ .x$Payload$ChoiceDataExportTags) # When subquestion field names are not set, generate incrementing names - ii_unset_matrix_subq_names <- (matrix_subquestion_field_names %>% + ii_unset_matrix_subq_names <- (subquestion_field_names %>% map(~ !inherits(.x, "list")) %>% - unlist() & qtype == "Matrix") %>% + unlist() & (qtype == "Matrix" | qtype == "Dropdown")) %>% which() - matrix_subquestion_field_names[ii_unset_matrix_subq_names] <- lapply(ii_unset_matrix_subq_names, function(ind){ + subquestion_field_names[ii_unset_matrix_subq_names] <- lapply(ii_unset_matrix_subq_names, function(ind){ paste( item_names[ind], - 1:length(matrix_subquestions[ind] %>% unlist()), + 1:length(subquestions[ind] %>% unlist()), sep = "_" ) %>% list() }) @@ -175,7 +166,7 @@ process_qsf <- function(path_to_qsf, if (survey_version == "CMU") { # Bodge E1_* names for Wave 11 if (wave == 11) { - matrix_subquestion_field_names[item_names == "E1"] <- list(c("E1_1", "E1_2", "E1_3", "E1_4")) + subquestion_field_names[item_names == "E1"] <- list(c("E1_1", "E1_2", "E1_3", "E1_4")) } } @@ -278,10 +269,10 @@ process_qsf <- function(path_to_qsf, question = questions, question_type = qtype, response_options = choices, - matrix_subquestions = matrix_subquestions, + subquestions = subquestions, display_logic = display_logic, response_option_randomization = response_option_randomization, - matrix_subquestion_field_names = matrix_subquestion_field_names) + subquestion_field_names = subquestion_field_names) if (file.exists(path_to_drop_columns)){ drop_cols <- read_csv(path_to_drop_columns, trim_ws = FALSE, col_types = cols(item = col_character() @@ -344,26 +335,26 @@ process_qsf <- function(path_to_qsf, # separate matrix subquestions into separate fields (to match exported data) nonmatrix_items <- qdf %>% - filter(question_type != "Matrix") %>% - mutate(matrix_base_name = NA_character_) %>% - select(-matrix_subquestion_field_names) + filter(question_type != "Matrix" & question_type != "Dropdown") %>% + mutate(originating_question = NA_character_) %>% + select(-subquestion_field_names) has_response_by_subq <- qdf %>% - filter(question_type == "Matrix") %>% + filter(question_type == "Matrix" | question_type == "Dropdown") %>% pull(response_options) %>% map_lgl(~ all(map_lgl(.x, ~ inherits(.x, "list"))) && !identical(.x, list())) matrix_items <- qdf %>% - filter(question_type == "Matrix") %>% + filter(question_type == "Matrix" | question_type == "Dropdown") %>% filter(!has_response_by_subq) %>% rowwise() %>% mutate(new = list( - tibble(matrix_base_name = variable, - variable = unlist(matrix_subquestion_field_names), + tibble(originating_question = variable, + variable = unlist(subquestion_field_names), qid = qid, question = question, - matrix_subquestion = unlist(matrix_subquestions), + subquestion = unlist(subquestions), question_type = question_type, response_option_randomization = ifelse( response_option_randomization == "randomized", "none", response_option_randomization), @@ -377,15 +368,15 @@ process_qsf <- function(path_to_qsf, matrix_items_resp_by_subq <- qdf %>% - filter(question_type == "Matrix") %>% + filter(question_type == "Matrix" | question_type == "Dropdown") %>% filter(has_response_by_subq) %>% rowwise() %>% mutate(new = list( - tibble(matrix_base_name = variable, - variable = unlist(matrix_subquestion_field_names), + tibble(originating_question = variable, + variable = unlist(subquestion_field_names), qid = qid, question = question, - matrix_subquestion = unlist(matrix_subquestions), + subquestion = unlist(subquestions), question_type = question_type, response_option_randomization = ifelse( response_option_randomization == "randomized", "none", response_option_randomization), @@ -398,7 +389,7 @@ process_qsf <- function(path_to_qsf, unnest(new) matrix_items <- rbind(matrix_items, matrix_items_resp_by_subq) %>% - select(variable, matrix_base_name, everything()) + select(variable, originating_question, everything()) # Custom matrix formatting if (survey_version == "CMU") { @@ -408,8 +399,6 @@ process_qsf <- function(path_to_qsf, mutate(variable = if_else(str_starts(variable, "C10"), paste0(variable, "_1"), variable), question_type = if_else(str_starts(variable, "A5|C10"), "Text", question_type), response_options = if_else(str_starts(variable, "A5|C10"), list(list()), response_options)) - } else if (survey_version == "UMD") { - # pass } qdf <- bind_rows(nonmatrix_items, matrix_items) @@ -430,11 +419,11 @@ process_qsf <- function(path_to_qsf, select(wave, variable, qid, - matrix_base_name, + originating_question, replaces, description, question, - matrix_subquestion, + subquestion, response_options, question_type, display_logic, @@ -510,7 +499,7 @@ add_qdf_to_codebook <- function(qdf, replaces = col_character(), description = col_character(), question = col_character(), - matrix_subquestion = col_character(), + subquestion = col_character(), question_type = col_character(), display_logic = col_character(), response_option_randomization = col_character() @@ -593,7 +582,7 @@ get_static_fields <- function(wave, replaces = col_character(), description = col_character(), question = col_character(), - matrix_subquestion = col_character(), + subquestion = col_character(), question_type = col_character(), response_option_randomization = col_character() )) %>% diff --git a/facebook/qsf-tools/qsf-differ.R b/facebook/qsf-tools/qsf-differ.R index 07350a30b..979aa8c72 100644 --- a/facebook/qsf-tools/qsf-differ.R +++ b/facebook/qsf-tools/qsf-differ.R @@ -126,8 +126,8 @@ get_qsf_file <- function(path, survey_version, # Recode subquestion names to match exported data. # FALSE if not set, otherwise a list - matrix_subquestion_field_names <- question_raw$ChoiceDataExportTags - if (!inherits(matrix_subquestion_field_names, "list")) { + subquestion_field_names <- question_raw$ChoiceDataExportTags + if (!inherits(subquestion_field_names, "list")) { # When subquestion field names are not set, generate incrementing names names(question$Subquestions) <- paste( question$DataExportTag, @@ -135,7 +135,7 @@ get_qsf_file <- function(path, survey_version, sep = "_" ) } else { - names(question$Subquestions) <- matrix_subquestion_field_names[names(question$Subquestions)] %>% unlist() + names(question$Subquestions) <- subquestion_field_names[names(question$Subquestions)] %>% unlist() } } } @@ -155,21 +155,7 @@ get_qsf_file <- function(path, survey_version, # Deduplicate some UMD items. - if (survey_version == "UMD") { - question$DataExportTag <- case_when( - question$DataExportTag == "D2_30" & question$QuestionID == "QID294" ~ "D2_30_cheer", - question$DataExportTag == "D2_30" & question$QuestionID == "QID293" ~ "D2_30_calm", - question$DataExportTag == "B13" & question$QuestionID == "QID253" ~ "B13_likert", - question$DataExportTag == "B13" & question$QuestionID == "QID255" ~ "B13_profile", - question$DataExportTag == "B14" & question$QuestionID == "QID254" ~ "B14_likert", - question$DataExportTag == "B14" & question$QuestionID == "QID259" ~ "B14_profile", - question$DataExportTag == "B12a" & question$QuestionID == "QID250" ~ "B12a_likert", - question$DataExportTag == "B12a" & question$QuestionID == "QID258" ~ "B12a_profile", - question$DataExportTag == "B12b" & question$QuestionID == "QID251" ~ "B12b_likert", - question$DataExportTag == "B12b" & question$QuestionID == "QID257" ~ "B12b_profile", - TRUE ~ question$DataExportTag - ) - } else if (survey_version == "CMU") { + if (survey_version == "CMU") { if (wave == 10) { question$DataExportTag <- case_when( question$DataExportTag == "C6" ~ "C6a", diff --git a/facebook/qsf-tools/qsf-utils.R b/facebook/qsf-tools/qsf-utils.R index 4ec7280f3..78d67cb88 100644 --- a/facebook/qsf-tools/qsf-utils.R +++ b/facebook/qsf-tools/qsf-utils.R @@ -50,23 +50,30 @@ get_block_item_map <- function(qsf) { #' Wave number as provided in the qsf name should be an integer or a float with #' one decimal place. #' -#' @param path_to_qsf +#' @param path_to_file #' #' @return (mostly) integer wave number -get_wave <- function(path_to_qsf) { - qsf_name_pattern <- "(.*[Ww]ave_?)([0-9]*([.][0-9])?)(.*qsf.*)" - if (!grepl(qsf_name_pattern, path_to_qsf)) { +get_wave <- function(path_to_file) { + wave_name_pattern <- "(.*[Ww]ave_?)([0-9]+([.][0-9])?)(_.*qsf.*)" + version_name_pattern <- "(.*v)([0-9]+([.][0-9])?)(_.*qsf.*)" + if (!grepl(wave_name_pattern, path_to_file) && !grepl(version_name_pattern, path_to_file)) { stop( - "The qsf filename must include the string 'qsf', and the wave number in ", - "the format 'Wave_XX', 'WaveXX', 'wave_XX', or 'waveXX' where 'XX' is an ", + "The QSF filename must include the string 'qsf', and the wave number in ", + "the format 'Wave_XX_', 'WaveXX_', 'wave_XX_', 'waveXX_', or 'vXX_' where 'XX' is an ", "integer or float. The wave specification can occur anywhere in the ", "filename but must precede the string 'qsf'." ) } - wave <- as.numeric( - sub(qsf_name_pattern, "\\2", path_to_qsf) - ) + if (grepl(wave_name_pattern, path_to_file)) { + wave <- as.numeric( + sub(wave_name_pattern, "\\2", path_to_file) + ) + } else if (grepl(version_name_pattern, path_to_file)) { + wave <- as.numeric( + sub(version_name_pattern, "\\2", path_to_file) + ) + } return(wave) } @@ -80,19 +87,25 @@ get_wave <- function(path_to_qsf) { #' #' @return (mostly) integer wave number get_wave_from_csv <- function(path_to_file) { - name_pattern <- "(.*[Ww]ave_?)([0-9]*([.][0-9])?)(.*csv.*)" - if (!grepl(name_pattern, path_to_file)) { + wave_name_pattern <- "(.*[Ww]ave_?)([0-9]+([.][0-9])?)(.*csv.*)" + version_name_pattern <- "(.*v)([0-9]+([.][0-9])?)(.*csv.*)" + if (!grepl(wave_name_pattern, path_to_file) && !grepl(version_name_pattern, path_to_file)) { stop( "The CSV filename must include the string 'csv', and the wave number in ", - "the format 'Wave_XX', 'WaveXX', 'wave_XX', or 'waveXX' where 'XX' is an ", + "the format 'Wave_XX_', 'WaveXX_', 'wave_XX_', 'waveXX_', or 'vXX_' where 'XX' is an ", "integer or float. The wave specification can occur anywhere in the ", "filename but must precede the string 'csv'." ) } - - wave <- as.numeric( - sub(name_pattern, "\\2", path_to_file) - ) + if (grepl(wave_name_pattern, path_to_file)) { + wave <- as.numeric( + sub(wave_name_pattern, "\\2", path_to_file) + ) + } else if (grepl(version_name_pattern, path_to_file)) { + wave <- as.numeric( + sub(version_name_pattern, "\\2", path_to_file) + ) + } return(wave) } diff --git a/facebook/qsf-tools/replace_translation_qids.R b/facebook/qsf-tools/replace_translation_qids.R index ab769a4df..319e1d5cd 100644 --- a/facebook/qsf-tools/replace_translation_qids.R +++ b/facebook/qsf-tools/replace_translation_qids.R @@ -19,18 +19,19 @@ suppressPackageStartupMessages({ replace_qid_wrapper <- function(path_to_translations, path_to_codebook) { if (dir.exists(path_to_translations)) { # Process all CSVs in directory - csvs <- list.files(path_to_translations, pattern = "*.csv$", full.names = TRUE) + csvs <- list.files(path_to_translations, pattern = "*.csv$", full.names = FALSE) for (csv in csvs) { - replace_qids(csv, path_to_codebook) + replace_qids(path_to_translations, csv, path_to_codebook) } } else if (file.exists(path_to_translations)) { - replace_qids(path_to_translations, path_to_codebook) + main_dir <- dirname(path_to_translations) + replace_qids(main_dir, path_to_translations, path_to_codebook) } else { stop(path_to_translations, " is not a valid file or directory") } } -replace_qids <- function(path_to_translation_file, path_to_codebook) { +replace_qids <- function(path_to_dir, path_to_translation_file, path_to_codebook) { wave <- get_wave_from_csv(path_to_translation_file) # Load codebook codebook <- read_csv(path_to_codebook, col_types = cols( @@ -40,12 +41,12 @@ replace_qids <- function(path_to_translation_file, path_to_codebook) { filter(!is.na(qid), version == wave) # Load translation file - translation <- read_csv(path_to_translation_file, show_col_types = FALSE) %>% + translation <- read_csv(file.path(path_to_dir, path_to_translation_file), show_col_types = FALSE) %>% # Drop survey ID line filter(!startsWith(PhraseID, "SV_")) # Use codebook to make a mapping of QID -> item name. - var_qid_pairs <- codebook %>% mutate(variable = coalesce(matrix_base_name, variable)) %>% distinct(qid, variable) + var_qid_pairs <- codebook %>% mutate(variable = coalesce(originating_question, variable)) %>% distinct(qid, variable) qid_item_map <- var_qid_pairs %>% pull(variable) names(qid_item_map) <- var_qid_pairs %>% pull(qid) @@ -57,8 +58,11 @@ replace_qids <- function(path_to_translation_file, path_to_codebook) { }) ) - # Save processed file back to CSV under the same name. - write_excel_csv(translation, path_to_translation_file, quote = "needed") + # Save processed file back to CSV under the same name in a new dir `modified_translations` (created + # if doesn't exist) within the parent directory of where the translation files live. + out_path <- file.path(path_to_dir, "..", "modified_translations") + if (!dir.exists(out_path)) { dir.create(out_path) } + write_excel_csv(translation, file.path(out_path, path_to_translation_file), quote = "needed") } args <- commandArgs(TRUE) diff --git a/facebook/qsf-tools/static/CMU/static_microdata_fields.csv b/facebook/qsf-tools/static/CMU/static_microdata_fields.csv index 482794f96..dd5fc8730 100644 --- a/facebook/qsf-tools/static/CMU/static_microdata_fields.csv +++ b/facebook/qsf-tools/static/CMU/static_microdata_fields.csv @@ -1,4 +1,4 @@ -variable,replaces,description,question,matrix_subquestion,question_type,response_option_randomization +variable,replaces,description,question,subquestion,question_type,response_option_randomization StartDatetime,NA,"survey start timestamp in Pacific time (UTC-7)",NA,NA,NA,NA EndDatetime,NA,"survey end timestamp in Pacific time (UTC-7)",NA,NA,NA,NA wave,NA,"survey version",NA,NA,NA,NA diff --git a/facebook/qsf-tools/static/UMD/item_shortquestion_map.csv b/facebook/qsf-tools/static/UMD/item_shortquestion_map.csv index f7a207255..710c91a78 100644 --- a/facebook/qsf-tools/static/UMD/item_shortquestion_map.csv +++ b/facebook/qsf-tools/static/UMD/item_shortquestion_map.csv @@ -19,19 +19,12 @@ B11a,wanted test 24h B11b,wanted to test sympdays B11c,wanted test 14d B12,reason not tested 14d -B12a_likert,reason not tested 14d -B12a_profile,reason not tested 14d +B12a,reason not tested 14d B12b,reason not tested 14d -B12b_likert,reason not tested 14d -B12b_profile,reason not tested 14d B12c,reason not tested 14d B12c_matrix,reason not tested 14d matrix B13,needed med services 30d -B13_likert,needed med services 30d -B13_profile,needed med services 30d B14,reason no med services 30d -B14_likert,reason no med services 30d -B14_profile,reason no med services 30d B15,reason tested 14d B1_matrix,symptoms B1b,unusual symptoms diff --git a/facebook/qsf-tools/static/UMD/static_microdata_fields.csv b/facebook/qsf-tools/static/UMD/static_microdata_fields.csv index 1ea10a2b1..96bbd7310 100644 --- a/facebook/qsf-tools/static/UMD/static_microdata_fields.csv +++ b/facebook/qsf-tools/static/UMD/static_microdata_fields.csv @@ -1,4 +1,4 @@ -variable,replaces,description,question,matrix_subquestion,question_type,response_option_randomization +variable,replaces,description,question,subquestion,question_type,response_option_randomization survey_region,NA,"survey version",NA,NA,NA,NA survey_version,NA,"survey version",NA,NA,NA,NA weight,NA,"Facebook respondent weight",NA,NA,NA,NA