The system of record stores characteristic values in a
numerically encoded format. This function can be used to translate those
values into a human-readable format. For example, EXT_WALL = 2 will become
EXT_WALL = "Masonry". Note that the values and their translations are
must be specified via a user-defined dictionary. The default dictionary is
vars_dict
.
Options for code_type
are:
"long"
, which transforms EXT_WALL = 1 to EXT_WALL = Frame
"short"
, which transforms EXT_WALL = 1 to EXT_WALL = FRME
"code"
, which keeps the original values (useful for removing
improperly coded values, see note below)
vars_recode(
data,
cols = dplyr::everything(),
code_type = "long",
as_factor = TRUE,
dictionary = ccao::vars_dict
)
A data frame or tibble with columns to have values replaced.
A <tidy-select>
column selection or vector of column
names. Looks for all columns with numerically encoded character
values by default.
Output/recode type. See description for options.
If TRUE
, re-encoded values will be returned as
factors with their levels pre-specified by the dictionary. Otherwise, will
return re-encoded values as characters only.
The dictionary used to translate encodings. Uses
vars_dict
by default. Use vars_dict_legacy
for
legacy data column encodings.
The input data frame with re-encoded values for the specified columns.
Values which are in the data but are NOT in vars_dict
will be converted to NA. For example, there is no numeric value 3 for AIR,
so it will become NA.
Other vars_funs:
vars_rename()
# Recode all char columns
sample_data <- chars_sample_universe[1:5, 18:27]
sample_data
#> DT_PER_ASS CDU DT_KEY_PIN APTS EXT_WALL ROOF_CNST ROOMS BEDS BSMT BSMT_FIN
#> 1 1 <NA> <NA> 0 2 1 6 3 1 3
#> 2 1 <NA> <NA> 0 2 1 6 3 1 3
#> 3 1 AV <NA> 0 3 1 5 2 1 3
#> 4 1 AV <NA> 0 2 1 6 2 1 3
#> 5 1 <NA> <NA> 0 2 1 5 3 1 3
vars_recode(
data = sample_data,
dictionary = ccao::vars_dict_legacy
)
#> DT_PER_ASS CDU DT_KEY_PIN APTS EXT_WALL ROOF_CNST ROOMS BEDS
#> 1 1 <NA> <NA> <NA> Masonry Shingle + Asphalt 6 3
#> 2 1 <NA> <NA> <NA> Masonry Shingle + Asphalt 6 3
#> 3 1 AV <NA> <NA> Frame + Masonry Shingle + Asphalt 5 2
#> 4 1 AV <NA> <NA> Masonry Shingle + Asphalt 6 2
#> 5 1 <NA> <NA> <NA> Masonry Shingle + Asphalt 5 3
#> BSMT BSMT_FIN
#> 1 Full Unfinished
#> 2 Full Unfinished
#> 3 Full Unfinished
#> 4 Full Unfinished
#> 5 Full Unfinished
vars_recode(
data = sample_data,
code_type = "short",
dictionary = ccao::vars_dict_legacy
)
#> DT_PER_ASS CDU DT_KEY_PIN APTS EXT_WALL ROOF_CNST ROOMS BEDS BSMT BSMT_FIN
#> 1 1 <NA> <NA> <NA> MASR SHAS 6 3 FL UNF
#> 2 1 <NA> <NA> <NA> MASR SHAS 6 3 FL UNF
#> 3 1 AV <NA> <NA> FRMA SHAS 5 2 FL UNF
#> 4 1 AV <NA> <NA> MASR SHAS 6 2 FL UNF
#> 5 1 <NA> <NA> <NA> MASR SHAS 5 3 FL UNF
# Recode only the specified columns
gar_sample <- chars_sample_universe[1:5, 30:40]
gar_sample
#> AIR FRPL ATTIC_TYPE FBATH HBATH TP_PLAN TP_DSGN CNST_QLTY SITE GAR1_SIZE
#> 1 2 0 1 1 0 2 0 2 2 3
#> 2 1 0 1 1 0 2 2 2 2 7
#> 3 2 0 1 1 0 0 0 2 3 1
#> 4 2 0 3 1 1 2 2 2 3 1
#> 5 2 0 1 1 1 0 0 2 2 3
#> GAR1_CNST
#> 1 1
#> 2 0
#> 3 1
#> 4 1
#> 5 1
vars_recode(
data = gar_sample,
cols = dplyr::starts_with("GAR"),
dictionary = ccao::vars_dict_legacy
)
#> AIR FRPL ATTIC_TYPE FBATH HBATH TP_PLAN TP_DSGN CNST_QLTY SITE GAR1_SIZE
#> 1 2 0 1 1 0 2 0 2 2 2 cars
#> 2 1 0 1 1 0 2 2 2 2 0 cars
#> 3 2 0 1 1 0 0 0 2 3 1 cars
#> 4 2 0 3 1 1 2 2 2 3 1 cars
#> 5 2 0 1 1 1 0 0 2 2 2 cars
#> GAR1_CNST
#> 1 Frame
#> 2 <NA>
#> 3 Frame
#> 4 Frame
#> 5 Frame
vars_recode(
data = gar_sample,
cols = "GAR1_SIZE",
dictionary = ccao::vars_dict_legacy
)
#> AIR FRPL ATTIC_TYPE FBATH HBATH TP_PLAN TP_DSGN CNST_QLTY SITE GAR1_SIZE
#> 1 2 0 1 1 0 2 0 2 2 2 cars
#> 2 1 0 1 1 0 2 2 2 2 0 cars
#> 3 2 0 1 1 0 0 0 2 3 1 cars
#> 4 2 0 3 1 1 2 2 2 3 1 cars
#> 5 2 0 1 1 1 0 0 2 2 2 cars
#> GAR1_CNST
#> 1 1
#> 2 0
#> 3 1
#> 4 1
#> 5 1
# Using data from Athena
sample_data_athena <- chars_sample_athena[1:5, c(1:5, 10:20)]
sample_data_athena
#> pin year class char_yrblt char_bldg_sf char_hbath char_frpl
#> 1 10254170360000 2015 205 1948 1775 1 2
#> 2 09363230550000 2019 203 1923 1200 1 0
#> 3 09363230550000 2016 203 1923 1200 1 0
#> 4 14321260280000 2018 211 1878 2850 0 0
#> 5 10253190450000 2018 204 1951 2469 1 1
#> char_type_resd char_cnst_qlty char_apts char_tp_dsgn char_attic_fnsh
#> 1 2 2 6 2 3
#> 2 1 2 6 2 3
#> 3 1 2 6 2 3
#> 4 3 2 2 2 3
#> 5 1 2 6 0 3
#> char_gar1_att char_gar1_area char_gar1_size char_gar1_cnst
#> 1 2 2 7 0
#> 2 2 2 7 0
#> 3 2 2 7 0
#> 4 2 2 7 0
#> 5 1 2 3 2
vars_recode(
data = sample_data_athena,
code_type = "code",
dictionary = ccao::vars_dict_legacy
)
#> pin year class char_yrblt char_bldg_sf char_hbath char_frpl
#> 1 10254170360000 2015 205 1948 1775 1 2
#> 2 09363230550000 2019 203 1923 1200 1 0
#> 3 09363230550000 2016 203 1923 1200 1 0
#> 4 14321260280000 2018 211 1878 2850 0 0
#> 5 10253190450000 2018 204 1951 2469 1 1
#> char_type_resd char_cnst_qlty char_apts char_tp_dsgn char_attic_fnsh
#> 1 2 2 6 2 3
#> 2 1 2 6 2 3
#> 3 1 2 6 2 3
#> 4 3 2 2 2 3
#> 5 1 2 6 <NA> 3
#> char_gar1_att char_gar1_area char_gar1_size char_gar1_cnst
#> 1 2 2 7 <NA>
#> 2 2 2 7 <NA>
#> 3 2 2 7 <NA>
#> 4 2 2 7 <NA>
#> 5 1 2 3 2
vars_recode(
data = sample_data_athena,
code_type = "long",
dictionary = ccao::vars_dict_legacy
)
#> pin year class char_yrblt char_bldg_sf char_hbath char_frpl
#> 1 10254170360000 2015 205 1948 1775 1 2
#> 2 09363230550000 2019 203 1923 1200 1 0
#> 3 09363230550000 2016 203 1923 1200 1 0
#> 4 14321260280000 2018 211 1878 2850 0 0
#> 5 10253190450000 2018 204 1951 2469 1 1
#> char_type_resd char_cnst_qlty char_apts char_tp_dsgn char_attic_fnsh
#> 1 2 Story Average None No None
#> 2 1 Story Average None No None
#> 3 1 Story Average None No None
#> 4 3 Story + Average Three No None
#> 5 1 Story Average None <NA> None
#> char_gar1_att char_gar1_area char_gar1_size char_gar1_cnst
#> 1 No No 0 cars <NA>
#> 2 No No 0 cars <NA>
#> 3 No No 0 cars <NA>
#> 4 No No 0 cars <NA>
#> 5 Yes No 2 cars Masonry