If the proportion of overall missings is less than the cutoff,

  • na_polish_measures() removes columns or observations.

  • na_polish_key() polishes data by rows or observations, removing the whole rows of key series.

  • na_polish_index() polishes data by rows or observations, removing either the starting or the ending NA blocks (if any) within each key series.

  • na_polish_index2() polishes data by rows or observations, removing the ending NA blocks (if any) within each key series. It is a shortcut of na_polish_index(na_fun = na_ends_with).

na_polish_measures(data, cutoff)

na_polish_key(data, cutoff)

na_polish_index(data, cutoff, na_fun = na_starts_with)

na_polish_index2(data, cutoff)

Arguments

data

A tsibble.

cutoff

A numeric between 0 and 1. Rows/cols will be kept, if the proportion of overall missings is less than the cutoff.

na_fun

Either na_starts_with or na_ends_with.

Details

The proportion of overall missings is defined as the number of NA divided by the number of measurements (i.e. excluding key and index).

See also

Other missing value polishing functions: na_polish_auto, na_polish_metrics

Examples

wdi_ts <- tsibble::as_tsibble(wdi, key = country_code, index = year) (wdi_cols <- na_polish_measures(wdi_ts, cutoff = .7))
#> # A tsibble: 10,850 x 46 [1Y] #> # Key: country_code [217] #> country_code year ag_lnd_frst_k2 ag_srf_totl_k2 bx_klt_dinv_cd_… #> <fct> <int> <dbl> <dbl> <dbl> #> 1 BDI 1969 NA 27830 NA #> 2 BDI 1970 NA 27830 NA #> 3 BDI 1971 NA 27830 NA #> 4 BDI 1972 NA 27830 NA #> 5 BDI 1973 NA 27830 NA #> 6 BDI 1974 NA 27830 NA #> 7 BDI 1975 NA 27830 NA #> 8 BDI 1976 NA 27830 NA #> 9 BDI 1977 NA 27830 NA #> 10 BDI 1978 NA 27830 NA #> # … with 10,840 more rows, and 41 more variables: bx_trf_pwkr_cd_dt <dbl>, #> # dt_dod_dect_cd <dbl>, dt_oda_alld_cd <dbl>, dt_tds_dect_ex_zs <dbl>, #> # eg_use_elec_kh_pc <dbl>, eg_use_pcap_kg_oe <dbl>, en_atm_co2e_pc <dbl>, #> # en_pop_dnst <dbl>, fs_ast_doms_gd_zs <dbl>, gc_rev_xgrt_gd_zs <dbl>, #> # gc_tax_totl_gd_zs <dbl>, it_cel_sets_p2 <dbl>, ms_mil_xpnd_gd_zs <dbl>, #> # ne_exp_gnfs_zs <dbl>, ne_gdi_totl_zs <dbl>, ne_imp_gnfs_zs <dbl>, #> # nv_agr_totl_zs <dbl>, nv_ind_totl_zs <dbl>, ny_gdp_defl_kd_zg <dbl>, #> # ny_gdp_mktp_cd <dbl>, ny_gdp_mktp_kd_zg <dbl>, ny_gnp_atls_cd <dbl>, #> # ny_gnp_mktp_pp_cd <dbl>, ny_gnp_pcap_cd <dbl>, ny_gnp_pcap_pp_cd <dbl>, #> # se_enr_prsc_fm_zs <dbl>, se_prm_cmpt_zs <dbl>, se_prm_enrr <dbl>, #> # se_sec_enrr <dbl>, sh_dyn_aids_zs <dbl>, sh_dyn_mort <dbl>, #> # sh_imm_meas <dbl>, sp_ado_tfrt <dbl>, sp_dyn_le00_in <dbl>, #> # sp_dyn_tfrt_in <dbl>, sp_pop_grow <dbl>, sp_pop_totl <dbl>, #> # sp_urb_grow <dbl>, tg_val_totl_gd_zs <dbl>, tt_pri_mrch_xd_wd <dbl>, #> # tx_val_tech_mf_zs <dbl>
# columns removed setdiff(names(wdi_ts), names(wdi_cols))
#> [1] "er_h2o_fwtl_zs" "er_ptd_totl_zs" "ic_reg_durs" "iq_sci_ovrl" #> [5] "sh_sta_brtc_zs" "sh_sta_maln_zs" "si_dst_frst_20" "si_pov_dday" #> [9] "si_pov_nahc" "sm_pop_netm" "sp_dyn_conu_zs"
na_polish_key(wdi_ts, cutoff = .7)
#> # A tsibble: 9,600 x 57 [1Y] #> # Key: country_code [192] #> country_code year ag_lnd_frst_k2 ag_srf_totl_k2 bx_klt_dinv_cd_… #> <fct> <int> <dbl> <dbl> <dbl> #> 1 BDI 1969 NA 27830 NA #> 2 BDI 1970 NA 27830 NA #> 3 BDI 1971 NA 27830 NA #> 4 BDI 1972 NA 27830 NA #> 5 BDI 1973 NA 27830 NA #> 6 BDI 1974 NA 27830 NA #> 7 BDI 1975 NA 27830 NA #> 8 BDI 1976 NA 27830 NA #> 9 BDI 1977 NA 27830 NA #> 10 BDI 1978 NA 27830 NA #> # … with 9,590 more rows, and 52 more variables: bx_trf_pwkr_cd_dt <dbl>, #> # dt_dod_dect_cd <dbl>, dt_oda_alld_cd <dbl>, dt_tds_dect_ex_zs <dbl>, #> # eg_use_elec_kh_pc <dbl>, eg_use_pcap_kg_oe <dbl>, en_atm_co2e_pc <dbl>, #> # en_pop_dnst <dbl>, er_h2o_fwtl_zs <dbl>, er_ptd_totl_zs <dbl>, #> # fs_ast_doms_gd_zs <dbl>, gc_rev_xgrt_gd_zs <dbl>, gc_tax_totl_gd_zs <dbl>, #> # ic_reg_durs <dbl>, iq_sci_ovrl <dbl>, it_cel_sets_p2 <dbl>, #> # ms_mil_xpnd_gd_zs <dbl>, ne_exp_gnfs_zs <dbl>, ne_gdi_totl_zs <dbl>, #> # ne_imp_gnfs_zs <dbl>, nv_agr_totl_zs <dbl>, nv_ind_totl_zs <dbl>, #> # ny_gdp_defl_kd_zg <dbl>, ny_gdp_mktp_cd <dbl>, ny_gdp_mktp_kd_zg <dbl>, #> # ny_gnp_atls_cd <dbl>, ny_gnp_mktp_pp_cd <dbl>, ny_gnp_pcap_cd <dbl>, #> # ny_gnp_pcap_pp_cd <dbl>, se_enr_prsc_fm_zs <dbl>, se_prm_cmpt_zs <dbl>, #> # se_prm_enrr <dbl>, se_sec_enrr <dbl>, sh_dyn_aids_zs <dbl>, #> # sh_dyn_mort <dbl>, sh_imm_meas <dbl>, sh_sta_brtc_zs <dbl>, #> # sh_sta_maln_zs <dbl>, si_dst_frst_20 <dbl>, si_pov_dday <dbl>, #> # si_pov_nahc <dbl>, sm_pop_netm <dbl>, sp_ado_tfrt <dbl>, #> # sp_dyn_conu_zs <dbl>, sp_dyn_le00_in <dbl>, sp_dyn_tfrt_in <dbl>, #> # sp_pop_grow <dbl>, sp_pop_totl <dbl>, sp_urb_grow <dbl>, #> # tg_val_totl_gd_zs <dbl>, tt_pri_mrch_xd_wd <dbl>, tx_val_tech_mf_zs <dbl>
na_polish_index(wdi_ts, cutoff = .7)
#> # A tsibble: 6,876 x 57 [1Y] #> # Key: country_code [197] #> country_code year ag_lnd_frst_k2 ag_srf_totl_k2 bx_klt_dinv_cd_… #> <fct> <int> <dbl> <dbl> <dbl> #> 1 BDI 1980 NA 27830 NA #> 2 BDI 1981 NA 27830 NA #> 3 BDI 1982 NA 27830 NA #> 4 BDI 1983 NA 27830 NA #> 5 BDI 1984 NA 27830 NA #> 6 BDI 1985 NA 27830 538567. #> 7 BDI 1986 NA 27830 1524029. #> 8 BDI 1987 NA 27830 1367714. #> 9 BDI 1988 NA 27830 1196624. #> 10 BDI 1989 NA 27830 567227. #> # … with 6,866 more rows, and 52 more variables: bx_trf_pwkr_cd_dt <dbl>, #> # dt_dod_dect_cd <dbl>, dt_oda_alld_cd <dbl>, dt_tds_dect_ex_zs <dbl>, #> # eg_use_elec_kh_pc <dbl>, eg_use_pcap_kg_oe <dbl>, en_atm_co2e_pc <dbl>, #> # en_pop_dnst <dbl>, er_h2o_fwtl_zs <dbl>, er_ptd_totl_zs <dbl>, #> # fs_ast_doms_gd_zs <dbl>, gc_rev_xgrt_gd_zs <dbl>, gc_tax_totl_gd_zs <dbl>, #> # ic_reg_durs <dbl>, iq_sci_ovrl <dbl>, it_cel_sets_p2 <dbl>, #> # ms_mil_xpnd_gd_zs <dbl>, ne_exp_gnfs_zs <dbl>, ne_gdi_totl_zs <dbl>, #> # ne_imp_gnfs_zs <dbl>, nv_agr_totl_zs <dbl>, nv_ind_totl_zs <dbl>, #> # ny_gdp_defl_kd_zg <dbl>, ny_gdp_mktp_cd <dbl>, ny_gdp_mktp_kd_zg <dbl>, #> # ny_gnp_atls_cd <dbl>, ny_gnp_mktp_pp_cd <dbl>, ny_gnp_pcap_cd <dbl>, #> # ny_gnp_pcap_pp_cd <dbl>, se_enr_prsc_fm_zs <dbl>, se_prm_cmpt_zs <dbl>, #> # se_prm_enrr <dbl>, se_sec_enrr <dbl>, sh_dyn_aids_zs <dbl>, #> # sh_dyn_mort <dbl>, sh_imm_meas <dbl>, sh_sta_brtc_zs <dbl>, #> # sh_sta_maln_zs <dbl>, si_dst_frst_20 <dbl>, si_pov_dday <dbl>, #> # si_pov_nahc <dbl>, sm_pop_netm <dbl>, sp_ado_tfrt <dbl>, #> # sp_dyn_conu_zs <dbl>, sp_dyn_le00_in <dbl>, sp_dyn_tfrt_in <dbl>, #> # sp_pop_grow <dbl>, sp_pop_totl <dbl>, sp_urb_grow <dbl>, #> # tg_val_totl_gd_zs <dbl>, tt_pri_mrch_xd_wd <dbl>, tx_val_tech_mf_zs <dbl>
na_polish_index2(wdi_ts, cutoff = .7)
#> # A tsibble: 8,997 x 57 [1Y] #> # Key: country_code [210] #> country_code year ag_lnd_frst_k2 ag_srf_totl_k2 bx_klt_dinv_cd_… #> <fct> <int> <dbl> <dbl> <dbl> #> 1 BDI 1969 NA 27830 NA #> 2 BDI 1970 NA 27830 NA #> 3 BDI 1971 NA 27830 NA #> 4 BDI 1972 NA 27830 NA #> 5 BDI 1973 NA 27830 NA #> 6 BDI 1974 NA 27830 NA #> 7 BDI 1975 NA 27830 NA #> 8 BDI 1976 NA 27830 NA #> 9 BDI 1977 NA 27830 NA #> 10 BDI 1978 NA 27830 NA #> # … with 8,987 more rows, and 52 more variables: bx_trf_pwkr_cd_dt <dbl>, #> # dt_dod_dect_cd <dbl>, dt_oda_alld_cd <dbl>, dt_tds_dect_ex_zs <dbl>, #> # eg_use_elec_kh_pc <dbl>, eg_use_pcap_kg_oe <dbl>, en_atm_co2e_pc <dbl>, #> # en_pop_dnst <dbl>, er_h2o_fwtl_zs <dbl>, er_ptd_totl_zs <dbl>, #> # fs_ast_doms_gd_zs <dbl>, gc_rev_xgrt_gd_zs <dbl>, gc_tax_totl_gd_zs <dbl>, #> # ic_reg_durs <dbl>, iq_sci_ovrl <dbl>, it_cel_sets_p2 <dbl>, #> # ms_mil_xpnd_gd_zs <dbl>, ne_exp_gnfs_zs <dbl>, ne_gdi_totl_zs <dbl>, #> # ne_imp_gnfs_zs <dbl>, nv_agr_totl_zs <dbl>, nv_ind_totl_zs <dbl>, #> # ny_gdp_defl_kd_zg <dbl>, ny_gdp_mktp_cd <dbl>, ny_gdp_mktp_kd_zg <dbl>, #> # ny_gnp_atls_cd <dbl>, ny_gnp_mktp_pp_cd <dbl>, ny_gnp_pcap_cd <dbl>, #> # ny_gnp_pcap_pp_cd <dbl>, se_enr_prsc_fm_zs <dbl>, se_prm_cmpt_zs <dbl>, #> # se_prm_enrr <dbl>, se_sec_enrr <dbl>, sh_dyn_aids_zs <dbl>, #> # sh_dyn_mort <dbl>, sh_imm_meas <dbl>, sh_sta_brtc_zs <dbl>, #> # sh_sta_maln_zs <dbl>, si_dst_frst_20 <dbl>, si_pov_dday <dbl>, #> # si_pov_nahc <dbl>, sm_pop_netm <dbl>, sp_ado_tfrt <dbl>, #> # sp_dyn_conu_zs <dbl>, sp_dyn_le00_in <dbl>, sp_dyn_tfrt_in <dbl>, #> # sp_pop_grow <dbl>, sp_pop_totl <dbl>, sp_urb_grow <dbl>, #> # tg_val_totl_gd_zs <dbl>, tt_pri_mrch_xd_wd <dbl>, tx_val_tech_mf_zs <dbl>