###############################
# READ AND PROCESS INPUT DATA #
##########################################################################################

# Disable 'collapse' namespace to avoid collisions.
set_collapse(mask = NULL)

# STATPOP. -------------------------------------------------------------------------------

pop <- 
  loadRData(par$in_pop) %>% 
  filter(variable == "bevendejahr") %>% 
  select(year = jahr, sex, nat, age = alt, pop = value) %>% 
  filter(year %in% 2001:first(par$pint))

# Pension counts by year, sex, nationality, pension type and age. ------------------------

pen_n <-
  read_delim(par$in_rr) %>%
  filter(tsex    != "Sexe - total"       ,
         twohn   == "Suisse"             ,
         tnation != "Nationalité - total",
         lage    != - 99999) %>%
  select(year   = an_rr, sex = tsex, nat = tnation, age = lage,
         n_alt  = RAV ,
         n_wit  = RAS ,
         n_waiv = RAOp,
         n_waim = RAOm,
         n_waid = RAOd,
         n_kinv = RAEp,
         n_kinm = RAEm) %>%
  mutate(sex = recode(sex, "Homme"    = "m" , "Femme"  = "f"),
         nat = recode(nat, "Etranger" = "au", "Suisse" = "ch"),
         # Consolidate higher age ranges for consistency with STATPOP.
         age = pmin(99, age)) %>%
  pivot_longer(cols = contains("n_"), names_to = "type", names_prefix = "n_",
               values_to = "n") %>%
  # Dispense with father-mother distinction in pension types.
  mutate(type = fct_collapse(type,
                             wai = c("waiv", "waim", "waid"),
                             kin = c("kinv", "kinm"))) %>% 
  # Consolidate groups regarding age 99+ and discarded pension type distinctions.
  dplyr::summarize(n = sum(n), .by = c("year", "sex", "nat", "age", "type")) %>% 
  # Complement unobserved categories.
  complete(year, sex, nat, age, type) %>%
  tidyr::replace_na(list(n = 0, m = 0))

# Pension counts and levels by year, sex, domicile and pension type. ---------------------

pen_nm <-
  read_delim(par$in_rr) %>%
  filter(tsex    != "Sexe - total"             ,
         twohn   != "Pays de résidence - total",
         tnation == "Nationalité - total"      ,
         lage    != - 99999) %>%
  select(year = an_rr, sex = tsex, dom = twohn, age = lage,
         # Prefix 'n' refers to pension count, 'm' to the mean pension level per 
         # recipient.
         n_alt  = RAV , m_alt  = RAV_moy ,
         n_wit  = RAS , m_wit  = RAS_moy ,
         n_waiv = RAOp, m_waiv = RAOp_moy,
         n_waim = RAOm, m_waim = RAOm_moy,
         n_waid = RAOd, m_waid = RAOd_moy,
         n_kinv = RAEp, m_kinv = RAEp_moy,
         n_kinm = RAEm, m_kinm = RAEm_moy) %>%
  pivot_longer(cols = contains(c("n_", "m_")), names_to = c("metric", "type"),
               names_sep = "_") %>%
  mutate(sex = recode(sex, "Homme"    = "m" , "Femme"  = "f") ,
         dom = recode(dom, "Etranger" = "au", "Suisse" = "ch")) %>%
  pivot_wider(names_from = metric, values_from = value, values_fn = sum) %>%
  select(year, sex, dom, type, n, m) %>%
  # Dispense with father-mother distinction in pension types.
  mutate(type = fct_collapse(type,
                             wai = c("waiv", "waim", "waid"),
                             kin = c("kinv", "kinm"))) %>% 
  # Consolidate groups regarding the discarded pension type distinctions. 
  dplyr::summarize(m = weighted.mean(m, n), n = sum(n), 
                   .by = c("year", "sex", "dom", "type"))

# Take-up rates for different pension types by nationality and age in current year. ------

sh <-
  left_join(pop, pen_n, by = c("year", "sex", "nat", "age"),
            relationship = "one-to-many") %>%
  # Select reference STATPOP year.
  filter(year == first(par$pint) - 2) %>%
  mutate(share = n / pop) %>%
  select(- year, - pop, - n)

# Attach reference age changes via parameter 'reftab'.
sh <- 
  bind_rows(par$pint %>% map(\(x) mutate(sh, year = x))) %>%
  relocate(year) %>%
  left_join(refage_table(par$reftab, par$pint), 
            by = c("year", "sex", "type"), relationship = "many-to-one") %>%
  tidyr::replace_na(list(dage = 0)) %>% 
  group_by(year, type, sex, nat) %>%
  arrange(year, type, sex, nat, age) %>% 
  # Shift take-up rates according to changes in reference ages. Implicit assumptions:
  # 1. birthdays are evenly distributed across months per year;
  # 2. take-up decisions shift in parallel to the reference age;
  # 3. distinction of partial early or late uptakes is negligible.
  mutate(
    share =
      ifelse(type == "alt",
    (1 - (dage - floor(dage))) * dplyr::lag(share, n =   first(floor(dage)), def = 0) +
         (dage - floor(dage))  * dplyr::lag(share, n = first(ceiling(dage)), def = 0),
    share)) %>%
  ungroup()

# BFS population scenario. ---------------------------------------------------------------

scen <-
  read_delim(par$in_scen) %>% 
  select(scen = scenario, year = jahr, sex, nat, age = alt, val = bevendejahr) %>%
  mutate(scen = 
           fct_recode(scen, 
                      "A" = "A_00_2025",
                      "B" = "B_00_2025",
                      "C" = "C_00_2025")) %>%
  filter(scen %in% c("A", "B", "C")) %>%
  mutate(scen = fct_drop(scen), 
         # Consolidate higher age ranges for consistency with STATPOP.
         age = pmin(99, age)) %>% 
  dplyr::summarize(val = sum(val), 
                   .by = c("scen", "year", "sex", "nat", "age")) %>% 
  # Only keep latest observed year for adjustment purposes.
  filter(year %in% par$pint) %>% 
  left_join(sh, 
            by = c("year", "sex", "nat", "age"), relationship = "many-to-many") %>%
  tidyr::replace_na(list(share = 0)) %>% 
  mutate(heads = val * share) %>%
  dplyr::summarize(heads = sum(heads), 
                   .by = c("scen", "year", "sex", "nat", "type"))

# Adjust scenario-derived pension counts to the observed ones in the pension registry.
adj <-
  filter(scen, year == first(par$pint)) %>%
  dplyr::summarize(heads = sum(heads), 
                   .by = c("scen", "sex", "nat", "type")) %>%
  left_join(filter(pen_n, year == first(par$pint)) %>%
              dplyr::summarize(n = sum(n), .by = c("sex", "nat", "type")), 
            by = c("sex", "nat", "type")) %>%
  mutate(adj = n - heads) %>%
  select(- heads, - n)

# Additive adjustment (normally multiplicative; exception in 2025 due to Ukrainian 
# fugitives). 
scen %<>%
  left_join(adj, by = c("scen", "sex", "nat", "type")) %>%
  mutate(heads = heads + adj) %>%
  select(- adj) %>% 
  dplyr::summarize(heads = sum(heads), .by = c("scen", "year", "sex", "type")) %>%
  mutate(dom = "ch") %>%
  select(scen, year, sex, dom, type, n = heads) %>%
  mutate(m = NA) %>%
  filter(year > first(par$pint))

# Historic and projected minimal pensions, inflation and assumed AHV-Fonds returns. ------

mp <-
  read_delim(par$in_mp) %>% 
  select(year = jahr, mp = minimalrente) %>% 
  mutate(mp = 12 * mp)

inf <-
  read_delim(par$in_inf) %>% 
  select(year = jahr, inf = lik_basis_1977)

eck <-
  read_delim(par$in_eck) %>%
  filter(id == str_sub(par$in_eck, -11, - 5)) %>% 
  select(year = jahr, df = preis) %>% 
  complete(year = first(par$pint):last(par$pint)) %>% 
  fill(df) %>% 
  # Calculate discount factors for 'real' Finanzhaushalte.
  mutate(df = ifelse(year == first(par$pint), 0, df),
         df = cumprod(1 / (1 + df / 100)))

# Widow projections from complementary model. --------------------------------------------

wid <- 
  read_delim(par$in_wid) %>% 
  mutate(type = "wit") %>% 
  select(year, sex = csex_hila, dom = cdom, type, m = hila_rel, n = benefs_go) %>% 
  mutate(sex = recode(sex, `1`   = "m" , `2`   = "f"),
         dom = recode(dom, `100` = "ch", `900` = "au")) %>%  
  left_join(mp, by = "year")


# Consolidation into final analysis data for regression. ---------------------------------

a_dat <-
  bind_rows(mutate(pen_nm, scen = "H"), scen) %>%
  # Extend analysis data by missing 'dom == au' combinations.
  right_join(
    expand_grid(year = min(pen_nm$year):last(par$pint), 
                scen = c("H", "A", "B", "C"),
                dom  = c("ch", "au"), sex = c("m", "f"),
                type = c("alt", "kin", "wai", "wit")) %>% 
      filter(!(scen == "H" & year > first(par$pint)),
             !(scen %in% c("A", "B", "C") & year <= first(par$pint))),
    by = c("scen", "year", "sex", "dom", "type")
  ) %>% 
  relocate(year, scen, sex, dom, type, n) %>% 
  left_join(mp, by = "year") %>%
  arrange(dom, sex, type, scen, year) %>%
  # Express mean pensions as multiples of contemporaneous minimal pensions.
  mutate(m = 12 * m / mp) %>% 
  arrange(scen, year)

# Save for later usage in 'scenarios.R'.
write_delim(a_dat, "data/a_dat.csv")

# Yearly ZAS expenditure data. -----------------------------------------------------------

zas <-
  read_excel(par$in_zas, sheet = "daten", skip = 10) %>%
  select(year = jahr, exp_tot = aus_tot) %>% 
  mutate(exp_tot = exp_tot * 1e6)
