###############################
# READ AND PROCESS INPUT DATA #
##########################################################################################

# STATPOP. -------------------------------------------------------------------------------

POP <- 
  loadRData(PAR$in_pop) %>% 
  # Filter for 'ständige Wohnbevölkerung der Schweiz Ende Dezember'.
  filter(jahr %in% 2001:first(PAR$pint), variable == "bevendejahr") %>% 
  select(sex, nat, age = alt, year = jahr, pop = value)

# Pension counts by year, sex, nationality, pension type and age. ------------------------

PEN_N <-
  read_delim(PAR$in_rr) %>%
  filter(tsex    != "Sexe - total"       ,
         twohn   == "Suisse"             ,
         tnation != "Nationalité - total",
         lage    != - 99999) %>%
  select(year   = an_rr, sex = tsex, nat = tnation, age = lage,
         # Prefix 'n' refers to pension counts.
         n_alt  = RAV ,
         n_wit  = RAS ,
         n_waiv = RAOp,
         n_waim = RAOm,
         n_waid = RAOd,
         n_kinv = RAEp,
         n_kinm = RAEm) %>%
  mutate(sex = recode(sex, "Homme"    = "m" , "Femme"  = "f"),
         nat = recode(nat, "Etranger" = "au", "Suisse" = "ch"),
         # Consolidate age ranges for consistency with STATPOP.
         age = pmin(99, age)) %>%
  pivot_longer(cols = contains("n_"), names_to = "type", names_prefix = "n_",
               values_to = "n") %>%
  # Dispense with father-mother distinction in pension types to improve projections.
  mutate(type = fct_collapse(type,
                             wai = c("waiv", "waim", "waid"),
                             kin = c("kinv", "kinm"))) %>% 
  # Consolidate groups regarding age 99+ and discarded pension type distinctions.
  summarize(n = sum(n), .by = c("sex", "nat", "type", "age", "year")) %>% 
  # Make implicit missing values explicit.
  complete(sex, nat, type, age, year, fill = list(n = 0, m = 0))

# Pension counts and levels by year, sex, domicile and pension type. ---------------------

PEN_NM <-
  read_delim(PAR$in_rr) %>%
  filter(tsex    != "Sexe - total"             ,
         twohn   != "Pays de résidence - total",
         tnation == "Nationalité - total"      ,
         lage    != - 99999) %>%
  select(year = an_rr, sex = tsex, dom = twohn, age = lage,
         # Prefix 'n' refers to pension counts and 'm' to the mean pension level per 
         # recipient relative to the legal minimum pension of the respective year.
         n_alt  = RAV , m_alt  = RAV_moy ,
         n_wit  = RAS , m_wit  = RAS_moy ,
         n_waiv = RAOp, m_waiv = RAOp_moy,
         n_waim = RAOm, m_waim = RAOm_moy,
         n_waid = RAOd, m_waid = RAOd_moy,
         n_kinv = RAEp, m_kinv = RAEp_moy,
         n_kinm = RAEm, m_kinm = RAEm_moy) %>%
  pivot_longer(cols = contains(c("n_", "m_")), names_to = c("metric", "type"),
               names_sep = "_") %>%
  mutate(sex = recode(sex, "Homme"    = "m" , "Femme"  = "f") ,
         dom = recode(dom, "Etranger" = "au", "Suisse" = "ch")) %>%
  pivot_wider(names_from = metric, values_from = value, values_fn = sum) %>%
  select(year, sex, dom, type, n, m) %>%
  # Dispense with father-mother distinction in pension types to improve projections.
  mutate(type = fct_collapse(type,
                             wai = c("waiv", "waim", "waid"),
                             kin = c("kinv", "kinm"))) %>% 
  # Consolidate groups regarding the discarded pension type distinctions. 
  summarize(m = weighted.mean(m, n), n = sum(n), 
            .by = c("sex", "dom", "type", "year"))

# Take-up rates for different pension types by nationality and age in current year. ------

PENSION_RATES <-
  left_join(POP, PEN_N, 
            by = c("sex", "nat", "age", "year"), relationship = "one-to-many") %>%
  # Select reference STATPOP year.
  filter(year == first(PAR$pint) - 2) %>%
  mutate(share = n / pop) %>%
  select(- year, - pop, - n)

# Attach reference age changes via parameter 'reftab'.
PENSION_RATES <- 
  bind_rows(PAR$pint %>% map(\(x) mutate(PENSION_RATES, year = x))) %>%
  relocate(year) %>%
  left_join(refage_table(PAR$reftab, PAR$pint), 
            by = c("sex", "type", "year"), relationship = "many-to-one") %>%
  tidyr::replace_na(list(dage = 0)) %>% 
  group_by(sex, nat, type, year) %>%
  arrange(age, .by_group = TRUE) %>% 
  # Shift take-up rates according to changes in reference ages. Implicit assumptions:
  # 1. birthdays are evenly distributed across months per year;
  # 2. take-up decisions shift in parallel to the reference age;
  # 3. distinction of PARtial early or late uptakes is negligible.
  mutate(
    share =
      ifelse(type == "alt",
    (1 - (dage - floor(dage))) * dplyr::lag(share, n =   first(floor(dage)), def = 0) +
         (dage - floor(dage))  * dplyr::lag(share, n = first(ceiling(dage)), def = 0),
    share)) %>%
  ungroup()

# BFS population scenario. ---------------------------------------------------------------

SCEN <-
  read_delim(PAR$in_scen) %>% 
  select(scen = scenario, year = jahr, sex, nat, age = alt, val = bevendejahr) %>%
  # Filter for relevant BFS demographic scenario edition.
  mutate(scen = 
           fct_recode(scen, 
                      "A" = paste0("A_00_", PAR$scen),
                      "B" = paste0("B_00_", PAR$scen),
                      "C" = paste0("C_00_", PAR$scen)
                      )
         ) %>%
  filter(scen %in% c("A", "B", "C")) %>%
  mutate(scen = fct_drop(scen), 
         # Consolidate higher age ranges for consistency with STATPOP.
         age = pmin(99, age)) %>% 
  summarize(val = sum(val), 
                   .by = c("scen", "sex", "nat", "age", "year")) %>% 
  # Only keep latest observed year for adjustment purposes.
  filter(year %in% PAR$pint) %>% 
  left_join(PENSION_RATES, 
            by = c("year", "sex", "nat", "age"), relationship = "many-to-many") %>%
  replace_na(list(share = 0)) %>% 
  mutate(heads = val * share) %>%
  summarize(heads = sum(heads), 
            .by = c("scen", "sex", "nat", "type", "year"))

# Adjust scenario-derived pension counts to the observed ones in the pension registry.
ADJ <-
  filter(SCEN, year == first(PAR$pint)) %>%
  summarize(heads = sum(heads), 
            .by = c("scen", "sex", "nat", "type")) %>%
  left_join(filter(PEN_N, year == first(PAR$pint)) %>%
              summarize(n = sum(n), .by = c("sex", "nat", "type")), 
            by = c("sex", "nat", "type")) %>%
  mutate(adj = n - heads) %>%
  select(- heads, - n)

# Additive adjustment (normally multiplicative; exception in 2025 due to Ukrainian 
# fugitives). 
SCEN <- SCEN %>% 
  left_join(ADJ, by = c("scen", "sex", "nat", "type")) %>%
  mutate(heads = pmax(0, heads + adj)) %>%
  select(- adj) %>% 
  summarize(heads = sum(heads), .by = c("scen", "year", "sex", "type")) %>%
  mutate(dom = "ch") %>%
  select(scen, year, sex, dom, type, n = heads) %>%
  mutate(m = NA) %>%
  filter(year > first(PAR$pint))

# Historic and projected legal minimal pensions, inflation rates and supposed AHV-Fonds 
# returns. -------------------------------------------------------------------------------

MINIMAL_PENSION <-
  read_delim(PAR$in_mp) %>% 
  select(year = jahr, mp = minimalrente) %>%
  # Upscale to yearly numbers.
  mutate(mp = 12 * mp)

INFLATION <-
  read_delim(PAR$in_inf) %>% 
  select(year = jahr, inf = lik_basis_1977)

ECKWERTE <-
  read_delim(PAR$in_eck) %>%
  filter(id == str_sub(PAR$in_eck, -11, - 5)) %>% 
  select(year = jahr, df = preis) %>% 
  complete(year = first(PAR$pint):last(PAR$pint)) %>% 
  fill(df) %>% 
  # Calculate discount factors for 'real' Finanzhaushalte.
  mutate(df = ifelse(year == first(PAR$pint), 0, df),
         df = cumprod(1 / (1 + df / 100)))

# Widow projections from complementary model. --------------------------------------------

WIDOWS <- 
  read_delim(PAR$in_wid) %>% 
  mutate(type = "wit",
         scen = 
           fct_recode(scen, 
                      "A" = paste0("A_00_", PAR$scen),
                      "B" = paste0("B_00_", PAR$scen),
                      "C" = paste0("C_00_", PAR$scen)
           )) %>% 
  select(scen, sex, dom, type, year = jahr, m, n) %>% 
  left_join(MINIMAL_PENSION, by = "year")


# Consolidation into final analysis data for regression. ---------------------------------

A_DATA <-
  bind_rows(mutate(PEN_NM, scen = "H"), SCEN) %>%
  # Extend analysis data by missing 'dom == au' combinations.
  right_join(
    expand_grid(year = min(PEN_NM$year):last(PAR$pint), 
                scen = c("H", "A", "B", "C"),
                dom  = c("ch", "au"), sex = c("m", "f"),
                type = c("alt", "kin", "wai", "wit")) %>% 
      filter(!(scen == "H" & year > first(PAR$pint)),
             !(scen %in% c("A", "B", "C") & year <= first(PAR$pint))),
    by = c("scen", "sex", "dom", "type", "year")
  ) %>% 
  relocate(year, scen, sex, dom, type, n) %>% 
  left_join(MINIMAL_PENSION, by = "year") %>%
  # Express mean pensions as multiples of contemporaneous minimal pensions.
  mutate(m = 12 * m / mp) %>% 
  arrange(scen, year)

# Save analysis data for later usage in script '5_scenarios.R'.
write_delim(A_DATA, "data/output/A_DATA.csv")

# Yearly ZAS expenditure data. -----------------------------------------------------------

ZAS <-
  read_excel(PAR$in_zas, sheet = "daten", skip = 10) %>%
  select(year = jahr, exp_tot = aus_tot) %>%
  # Rescale to unit Frankenbeträge.
  mutate(exp_tot = exp_tot * 1e6) %>% 
  filter(year <= first(PAR$pint))
