##########################################################
# IMPUTE FUTURE FOREIGN POPULATION COUNTS AND MEAN RENTS #
##########################################################################################

if (!file.exists("data/output/RANGE.RData")) 
  stop(
    "Parameter 'PAR$cv' is set to FALSE, while there are no saved results from a previous 
    cross-validation (expected path: ~/basismodell/data/output/RANGE.Rdata). 
    Set 'PAR$cv = TRUE' for at least one run to let the script '4_cross-validation.R' 
    produce and save the optimal year ranges for the extrapolation of the explanatory 
    variables.")

# Range of trend extrapolation points from cross-validation (see '4_cross-validation.R').
load("data/output/RANGE.RData")

# Set up list to collect results.
RES_LIST <- list()

# Imputation over rent type, sex, and domicile for the BFS reference scenario 'A'.
RES_LIST[["au"]] <- A_DATA %>%
  filter(scen %in% c("H", "A"), dom == "au") %>%
  mutate(t_n = ifelse(year >= RANGE$nau, 1, NA), 
         t_m = ifelse(year >= RANGE$mau, 1, NA)) %>%
  group_by(sex, type) %>%
  arrange(year, .by_group = TRUE) %>% 
  mutate(d_n = n - lag(n), d_m = m - lag(m)) %>%
  # Extrapolate pension counts and mean pension levels with a linear time trend over
  # the Cartesian product of pension type, sex, and domicile.
  ungroup() %>%
  impute_lm(d_n ~ 0 + t_n | sex + type) %>%
  impute_lm(d_m ~ 0 + t_m | sex + type) %>%
  group_by(sex, type) %>%
  mutate(d_n = ifelse(year > first(PAR$pint), d_n, 0),
         d_m = ifelse(year > first(PAR$pint), d_m, 0)) %>%
  fill(m, n) %>% 
  # Prevent eventual negative predictions, and enforce legal maximum pension of twice
  # the minimal pension (implicit assumption: pension supplements due to deferrals will
  # never increase the mean pension level above the upper limit).
  mutate(m = pmin(2, pmax(0, m + cumsum(d_m))),
         n =         pmax(0, n + cumsum(d_n))) %>%
  select(- t_n, - d_n, - t_m, - d_m, - scen)

RES_LIST[["ch"]] <- A_DATA %>%
  filter(scen %in% c("H", "A"), dom == "ch") %>%
  mutate(t_m = ifelse(year >= RANGE$mch, 1, NA)) %>%
  # Extrapolate mean pension levels with a linear time trend over the Cartesian product of 
  # pension type, sex, and domicile.
  group_by(sex, type) %>%
  arrange(year, .by_group = TRUE) %>% 
  mutate(d_m = m - lag(m)) %>%
  ungroup() %>%
  impute_lm(d_m ~ 0 + t_m | sex + type) %>%
  group_by(sex, type) %>%
  mutate(d_m = ifelse(year > first(PAR$pint), d_m, 0)) %>%
  fill(m) %>% 
  mutate(m = pmin(2, pmax(0, m + cumsum(d_m)))) %>%
  select(- t_m, - d_m, - scen)

# Import historical intermediate-year deaths of retirees and their average
# pensions (source: RR). Factor 'mean(1:11) / 12' in 'save_rel' approximates the
# ratio of savings due to within-year pensioner deaths under monthly versus
# yearly payout of 13th AHV pension payment according to the Liechtenstein model.
# For further explanations, please contact the BSV directly.
LIECHTENSTEIN <-
  read_delim(PAR$in_lim) %>%
  select(year = an_rr, sex = csex, dom = recoded_cdom, n = anzahl,
         m_dec = rentensumme_dez) %>%
  mutate(year = year + 1,
         sex  = recode(sex, `1` = "m", `2` = "f"),
         dom  = recode(dom, `100` = "ch", `900` = "au")) %>%
  # Multiplication by '11/12' corrects for death count in the preceding december
  # (implicit assumption: uniform death rate over months and pension levels).
  summarize(m = 11 / 12 * sum(m_dec) / sum(n), n = sum(n),
            .by = c("year", "sex", "dom")) %>%
  right_join(
    expand_grid(year = min(.$year):last(PAR$pint),
                sex  = c("m", "f"), dom = c("ch", "au")),
             by = c("year", "sex", "dom")
    ) %>%
  left_join(MINIMAL_PENSION, by = "year") %>%
  # Express mean monthly pensions as multiples of minimal rent.
  mutate(m = m / mp) %>%
  group_by(sex, dom) %>%
  arrange(year, .by_group = TRUE) %>% 
  mutate(d_n = c(0, diff(n)), d_m = c(0, diff(m))) %>%
  ungroup() %>%
  # Impute incremental death counts and mean pensions linearly and separately by sex and 
  # domicile.
  impute_lm(d_n + d_m ~ 1 | sex + dom) %>%  
  group_by(sex, dom) %>% 
  fill(m, n) %>% 
  mutate(d_n = cumsum((year > first(PAR$pint)) * d_n),
         d_m = cumsum((year > first(PAR$pint)) * d_m),
         m = pmin(2, pmax(0, m + d_m)), 
         n =         pmax(0, n + d_n),
         savings_tot = m * n * mp) %>%
  ungroup() %>% 
  summarize(savings_tot = sum(savings_tot), .by = c("sex", "dom", "year")) %>%
  filter(year >= first(PAR$pint)) %>%
  # Calculate effective savings relative to yearly payout.
  mutate(savings_rel = ifelse(year >= 2026, savings_tot * (1 - mean(1:11) / 12), 0),
         type = "alt") %>%
  select(year, sex, dom, type, savings_rel)

# Replace relative average pension projections for widows with exogenous vector based on 
# consolidated STATPOP & pension registry data (author: Thomas Borek @BSV, mathematics
# department).
PROJ_DATA <- RES_LIST %>% 
  bind_rows() %>%
  filter(!(type == "wit" & year > first(PAR$pint))) %>%
  bind_rows(
    WIDOWS %>% 
      filter(year %in% PAR$pint, scen == "A") %>%
      select(- scen)
  ) %>%
  left_join(LIECHTENSTEIN, by = c("sex", "dom", "type", "year")) %>%
  replace_na(list(savings_rel = 0)) %>% 
  # Incorporate Liechtenstein effect through an equivalent decrease of surviving 
  # retirees' average pension levels.
  mutate(m = ifelse(type == "alt" & year >= 2026,
                    m * (1 + PAR$ahv13 * 1/12) - 
                      PAR$ahv13 * savings_rel / (n * mp), m)) %>% 
  left_join(ZAS, by = "year") %>% 
  ungroup()

# Fit model for the incremental change in total expenditures using the extrapolated 
# pension sum increments as predictors.
FIT_DATA <- PROJ_DATA %>%
  summarize(m = weighted.mean(m, n), n = sum(n), .by = c("mp", "exp_tot", "year")) %>%
  filter(year %in% RANGE[["tot"]]:first(PAR$pint))

fit <-
  lm(diff(exp_tot) ~ 0 + diff(n * m * mp), FIT_DATA)

# Consolidate results.
RES_TAB <- PROJ_DATA %>%
  summarize(m = weighted.mean(m, n), n = sum(n),
            .by = c("year", "exp_tot", "mp")) %>%  
  filter(year %in% PAR$pint) %>%
  mutate(d_nmmp = c(0, cumsum(diff(n * m * mp)))) %>% 
  # Adjust projected pension sum increments by estimated cost top-up.
  mutate(exp_tot = 
           ifelse(year > first(PAR$pint), 
                  first(exp_tot) + d_nmmp * coef(fit), exp_tot)) %>% 
  rename(ref = exp_tot) %>% 
  select(- d_nmmp)

# Adjust the projections by the exogenous AHV 21 predictions of the cost effects, 
# apart from the reference age shifts.
RES_TAB <- RES_TAB %>%  
  left_join(AHV21_COST, by = "year") %>%
  # Assignment of reform cost differential to groups via variable 's' (only relevant
  # if 'PAR$agg' is set to FALSE).
  mutate(ref = ref + cost) %>% 
  select(- cost)

# Deflate results, if desired.
if (PAR$real)
  RES_TAB <- RES_TAB %>%  
    left_join(select(ECKWERTE, year, df), by = "year") %>% 
    mutate(ref = ref * df) %>% 
    select(- df)

# Express results in millions, round, and then round later projection horizons again to
# one hundred million in agreement with the website presentation of the 
# Finanzperspektiven.
RES_TAB <- RES_TAB %>% 
  mutate(ref = round(ref / 1e6),
         ref = ifelse(year >= last(PAR$pint - 5), round(ref, -2), ref))

# Print and save final projections.
if (PAR$write)
  write_delim(select(RES_TAB, year, ref), delim = ";",
              file = ifelse(PAR$real, "data/output/proj_exp_real.csv", 
                                      "data/output/proj_exp_nomi.csv"))

# Print reference projection (unless uncertainty bands are calculated as well).
if (!PAR$bands)
  select(RES_TAB, - mp, - m, - n, - s)

