The system of record stores characteristic values in a numerically encoded format. This function can be used to translate those values into a human-readable format. For example, EXT_WALL = 2 will become EXT_WALL = "Frame + Masonry". Note that the values and their translations are must be specified via a user-defined dictionary. The default dictionary is vars_dict.

Options for type are:

  • "long", which transforms EXT_WALL = 1 to EXT_WALL = Frame

  • "short", which transforms EXT_WALL = 1 to EXT_WALL = FRME

  • "code", which keeps the original values (useful for removing improperly coded values, see note below)

vars_recode(
  data,
  cols = dplyr::everything(),
  type = "long",
  as_factor = TRUE,
  dict = ccao::vars_dict
)

Arguments

data

A data frame or tibble with columns to have values replaced.

cols

A <tidy-select> column selection or vector of column names. Looks for all columns with numerically encoded character values by default.

type

Output/recode type. See description for options.

as_factor

If TRUE, re-encoded values will be returned as factors with their levels pre-specified by the dictionary. Otherwise, will return re-encoded values as characters only.

dict

The dictionary used to translate encodings. Uses vars_dict by default. Use vars_dict_legacy for legacy data column encodings.

Value

The input data frame with re-encoded values for the specified columns.

Note

Values which are in the data but are NOT in vars_dict will be converted to NA. For example, there is no numeric value 3 for AIR, so it will become NA.

See also

Other vars_funs: vars_check_class(), vars_rename()

Examples


# Recode all char columns
sample_data <- chars_sample_universe[1:5, 18:27]

sample_data
#>   DT_PER_ASS  CDU DT_KEY_PIN APTS EXT_WALL ROOF_CNST ROOMS BEDS BSMT BSMT_FIN
#> 1          1 <NA>       <NA>    0        2         1     6    3    1        3
#> 2          1 <NA>       <NA>    0        2         1     6    3    1        3
#> 3          1   AV       <NA>    0        3         1     5    2    1        3
#> 4          1   AV       <NA>    0        2         1     6    2    1        3
#> 5          1 <NA>       <NA>    0        2         1     5    3    1        3
vars_recode(
  data = sample_data,
  dict = ccao::vars_dict_legacy
)
#>   DT_PER_ASS  CDU DT_KEY_PIN APTS        EXT_WALL         ROOF_CNST ROOMS BEDS
#> 1          1 <NA>       <NA> <NA>         Masonry Shingle + Asphalt     6    3
#> 2          1 <NA>       <NA> <NA>         Masonry Shingle + Asphalt     6    3
#> 3          1   AV       <NA> <NA> Frame + Masonry Shingle + Asphalt     5    2
#> 4          1   AV       <NA> <NA>         Masonry Shingle + Asphalt     6    2
#> 5          1 <NA>       <NA> <NA>         Masonry Shingle + Asphalt     5    3
#>   BSMT   BSMT_FIN
#> 1 Full Unfinished
#> 2 Full Unfinished
#> 3 Full Unfinished
#> 4 Full Unfinished
#> 5 Full Unfinished
vars_recode(
  data = sample_data,
  type = "short",
  dict = ccao::vars_dict_legacy
)
#>   DT_PER_ASS  CDU DT_KEY_PIN APTS EXT_WALL ROOF_CNST ROOMS BEDS BSMT BSMT_FIN
#> 1          1 <NA>       <NA> <NA>     MASR      SHAS     6    3   FL      UNF
#> 2          1 <NA>       <NA> <NA>     MASR      SHAS     6    3   FL      UNF
#> 3          1   AV       <NA> <NA>     FRMA      SHAS     5    2   FL      UNF
#> 4          1   AV       <NA> <NA>     MASR      SHAS     6    2   FL      UNF
#> 5          1 <NA>       <NA> <NA>     MASR      SHAS     5    3   FL      UNF

# Recode only the specified columns
gar_sample <- chars_sample_universe[1:5, 30:40]

gar_sample
#>   AIR FRPL ATTIC_TYPE FBATH HBATH TP_PLAN TP_DSGN CNST_QLTY SITE GAR1_SIZE
#> 1   2    0          1     1     0       2       0         2    2         3
#> 2   1    0          1     1     0       2       2         2    2         7
#> 3   2    0          1     1     0       0       0         2    3         1
#> 4   2    0          3     1     1       2       2         2    3         1
#> 5   2    0          1     1     1       0       0         2    2         3
#>   GAR1_CNST
#> 1         1
#> 2         0
#> 3         1
#> 4         1
#> 5         1
vars_recode(
  data = gar_sample,
  cols = dplyr::starts_with("GAR"),
  dict = ccao::vars_dict_legacy
)
#>   AIR FRPL ATTIC_TYPE FBATH HBATH TP_PLAN TP_DSGN CNST_QLTY SITE GAR1_SIZE
#> 1   2    0          1     1     0       2       0         2    2    2 cars
#> 2   1    0          1     1     0       2       2         2    2    0 cars
#> 3   2    0          1     1     0       0       0         2    3    1 cars
#> 4   2    0          3     1     1       2       2         2    3    1 cars
#> 5   2    0          1     1     1       0       0         2    2    2 cars
#>   GAR1_CNST
#> 1     Frame
#> 2      <NA>
#> 3     Frame
#> 4     Frame
#> 5     Frame
vars_recode(
  data = gar_sample,
  cols = "GAR1_SIZE",
  dict = ccao::vars_dict_legacy
)
#>   AIR FRPL ATTIC_TYPE FBATH HBATH TP_PLAN TP_DSGN CNST_QLTY SITE GAR1_SIZE
#> 1   2    0          1     1     0       2       0         2    2    2 cars
#> 2   1    0          1     1     0       2       2         2    2    0 cars
#> 3   2    0          1     1     0       0       0         2    3    1 cars
#> 4   2    0          3     1     1       2       2         2    3    1 cars
#> 5   2    0          1     1     1       0       0         2    2    2 cars
#>   GAR1_CNST
#> 1         1
#> 2         0
#> 3         1
#> 4         1
#> 5         1

# Using data from Athena
sample_data_athena <- chars_sample_athena[1:5, c(1:5, 10:20)]
sample_data_athena
#>              pin year class char_yrblt char_bldg_sf char_hbath char_frpl
#> 1 10254170360000 2015   205       1948         1775          1         2
#> 2 09363230550000 2019   203       1923         1200          1         0
#> 3 09363230550000 2016   203       1923         1200          1         0
#> 4 14321260280000 2018   211       1878         2850          0         0
#> 5 10253190450000 2018   204       1951         2469          1         1
#>   char_type_resd char_cnst_qlty char_apts char_tp_dsgn char_attic_fnsh
#> 1              2              2         6            2               3
#> 2              1              2         6            2               3
#> 3              1              2         6            2               3
#> 4              3              2         2            2               3
#> 5              1              2         6            0               3
#>   char_gar1_att char_gar1_area char_gar1_size char_gar1_cnst
#> 1             2              2              7              0
#> 2             2              2              7              0
#> 3             2              2              7              0
#> 4             2              2              7              0
#> 5             1              2              3              2
vars_recode(
  data = sample_data_athena,
  type = "code",
  dict = ccao::vars_dict_legacy
)
#>              pin year class char_yrblt char_bldg_sf char_hbath char_frpl
#> 1 10254170360000 2015   205       1948         1775          1         2
#> 2 09363230550000 2019   203       1923         1200          1         0
#> 3 09363230550000 2016   203       1923         1200          1         0
#> 4 14321260280000 2018   211       1878         2850          0         0
#> 5 10253190450000 2018   204       1951         2469          1         1
#>   char_type_resd char_cnst_qlty char_apts char_tp_dsgn char_attic_fnsh
#> 1              2              2         6            2               3
#> 2              1              2         6            2               3
#> 3              1              2         6            2               3
#> 4              3              2         2            2               3
#> 5              1              2         6         <NA>               3
#>   char_gar1_att char_gar1_area char_gar1_size char_gar1_cnst
#> 1             2              2              7           <NA>
#> 2             2              2              7           <NA>
#> 3             2              2              7           <NA>
#> 4             2              2              7           <NA>
#> 5             1              2              3              2
vars_recode(
  data = sample_data_athena,
  type = "long",
  dict = ccao::vars_dict_legacy
)
#>              pin year class char_yrblt char_bldg_sf char_hbath char_frpl
#> 1 10254170360000 2015   205       1948         1775          1         2
#> 2 09363230550000 2019   203       1923         1200          1         0
#> 3 09363230550000 2016   203       1923         1200          1         0
#> 4 14321260280000 2018   211       1878         2850          0         0
#> 5 10253190450000 2018   204       1951         2469          1         1
#>   char_type_resd char_cnst_qlty char_apts char_tp_dsgn char_attic_fnsh
#> 1        2 Story        Average      None           No            None
#> 2        1 Story        Average      None           No            None
#> 3        1 Story        Average      None           No            None
#> 4      3 Story +        Average     Three           No            None
#> 5        1 Story        Average      None         <NA>            None
#>   char_gar1_att char_gar1_area char_gar1_size char_gar1_cnst
#> 1            No             No         0 cars           <NA>
#> 2            No             No         0 cars           <NA>
#> 3            No             No         0 cars           <NA>
#> 4            No             No         0 cars           <NA>
#> 5           Yes             No         2 cars        Masonry