######################################################################################
# VISUALIZE AND DECOMPOSE DIFFERENTIAL EFFECTS OF REFERENZALTERERHOEHUNG ACROSS TIME #
######################################################################################

# Preamble. ------------------------------------------------------------------------------

# Clear workspace and load necessary packages.
sapply(c("tidyverse", "magrittr", "simputation", "readxl", "tidymodels",
         "collapse" , "dqrng"   , "ggshadow"   , "strucchange"), 
       library, char = TRUE)

# Suppress column type guessing messages from 'readr' functions.
options(readr.show_col_types = FALSE)

# Set working directory to Basismodell folder.
setwd("~/basismodell")

# Calculations. --------------------------------------------------------------------------

# Load baseline parameters.
source("scripts/base_par.R")

# Define auxiliary functions.
source("scripts/aux_fun.R")


# Extract Erstrenten and minimal pension projections. -------------------------------

# STATPOP. -------------------------------------------------------------------------------

pop <- 
  loadRData(par$in_pop) %>% 
  filter(variable == "bevendejahr") %>% 
  select(year = jahr, sex, nat, age = alt, pop = value) %>% 
  filter(year %in% 2001:first(par$pint))

# Pension counts by year, sex, nationality, pension type and age. ------------------------

pen_n <-
  read_delim(par$in_rr) %>%
  filter(tsex    != "Sexe - total"       ,
         twohn   == "Suisse"             ,
         tnation != "Nationalité - total",
         lage    != - 99999) %>%
  select(year   = an_rr, sex = tsex, nat = tnation, age = lage,
         n_alt  = RAV ,
         n_wit  = RAS ,
         n_waiv = RAOp,
         n_waim = RAOm,
         n_waid = RAOd,
         n_kinv = RAEp,
         n_kinm = RAEm) %>%
  mutate(sex = recode(sex, "Homme"    = "m" , "Femme"  = "f"),
         nat = recode(nat, "Etranger" = "au", "Suisse" = "ch"),
         # Consolidate higher age ranges for consistency with STATPOP.
         age = pmin(99, age)) %>%
  pivot_longer(cols = contains("n_"), names_to = "type", names_prefix = "n_",
               values_to = "n") %>%
  # Dispense with father-mother distinction in pension types.
  mutate(type = fct_collapse(type,
                             wai = c("waiv", "waim", "waid"),
                             kin = c("kinv", "kinm"))) %>% 
  # Consolidate groups regarding age 99+ and discarded pension type distinctions.
  dplyr::summarize(n = sum(n), .by = c("year", "sex", "nat", "age", "type")) %>% 
  # Complement unobserved categories.
  complete(year, sex, nat, age, type) %>%
  tidyr::replace_na(list(n = 0, m = 0))

# Take-up rates for different pension types by nationality and age in current year. ------

sh <-
  left_join(pop, pen_n, 
            by = c("year", "sex", "nat", "age"),
            relationship = "one-to-many") %>%
  # Select reference STATPOP year.
  filter(year == first(par$pint) - 2) %>%
  mutate(share = n / pop) %>%
  select(- year, - pop, - n)

# Attach reference age changes via parameter 'reftab'.
sh <- 
  bind_rows(par$pint %>% map(\(x) mutate(sh, year = x))) %>%
  relocate(year) %>%
  left_join(refage_table(par$reftab, par$pint), 
            by = c("year", "sex", "type"), relationship = "many-to-one") %>%
  tidyr::replace_na(list(dage = 0)) %>% 
  group_by(year, type, sex, nat) %>%
  arrange(year, type, sex, nat, age) %>% 
  # Shift take-up rates according to changes in reference ages. Implicit assumptions:
  # 1. birthdays are evenly distributed across months per year;
  # 2. take-up decisions shift in parallel to the reference age;
  # 3. distinction of partial early or late uptakes is negligible.
  mutate(
    share =
      ifelse(type == "alt",
             (1 - (dage - floor(dage))) * dplyr::lag(share, n =   first(floor(dage)), def = 0) +
                  (dage - floor(dage))  * dplyr::lag(share, n = first(ceiling(dage)), def = 0),
             share)) %>%
  ungroup()

# BFS population scenario. ---------------------------------------------------------------

scen <-
  read_delim(par$in_scen) %>% 
  select(scen = scenario, year = jahr, sex, nat, age = alt, val = bevendejahr) %>%
  mutate(scen = 
           fct_recode(scen, 
                      "A" = paste0("A_00_", par$scen),
                      "B" = paste0("B_00_", par$scen),
                      "C" = paste0("C_00_", par$scen)
           )
  ) %>%
  filter(scen %in% c("A", "B", "C")) %>%
  mutate(scen = fct_drop(scen), 
         # Consolidate higher age ranges for consistency with STATPOP.
         age = pmin(99, age)) %>% 
  dplyr::summarize(val = sum(val), 
                   .by = c("scen", "year", "sex", "nat", "age")) %>% 
  # Only keep latest observed year for adjustment purposes.
  filter(year %in% par$pint) %>% 
  left_join(sh, 
            by = c("year", "sex", "nat", "age"), relationship = "many-to-many") %>%
  tidyr::replace_na(list(share = 0)) %>% 
  mutate(heads = val * share) %>%
  dplyr::summarize(heads = sum(heads), 
                   .by = c("scen", "year", "sex", "nat", "type", "age"))

# Adjust scenario-derived pension counts to the observed ones in the pension registry.
adj <-
  filter(scen, year == first(par$pint)) %>%
  dplyr::summarize(heads = sum(heads), 
                   .by = c("scen", "sex", "nat", "type", "age")) %>%
  left_join(filter(pen_n, year == first(par$pint)) %>%
              dplyr::summarize(n = sum(n), .by = c("sex", "nat", "type", "age")), 
            by = c("sex", "nat", "type", "age")) %>%
  mutate(adj = n - heads) %>%
  select(- heads, - n)

# Additive adjustment (normally multiplicative; exception in 2025 due to Ukrainian 
# fugitives). 
scen %<>%
  left_join(adj, by = c("scen", "sex", "nat", "type", "age")) %>%
  mutate(heads = pmax(0, heads + adj)) %>%
  select(- adj) %>% 
  dplyr::summarize(heads = sum(heads), .by = c("scen", "year", "sex", "type", "age")) %>%
  mutate(dom = "ch") %>%
  select(scen, year, sex, dom, type, age, n = heads) %>%
  mutate(m = NA) %>%
  filter(year > first(par$pint))

# Filter for old-age Erstrenten cohorts from 2028 onwards.
MP <-
  read_delim(par$in_mp) %>% 
  select(year = jahr, mp = minimalrente) %>% 
  filter(year >= 2028)

VIZ <- scen %>% 
  filter(year >= 2029, dom == "ch", type == "alt", age %in% 65, scen == "A") %>% 
  dplyr::summarize(n = sum(n), .by = c("year", "type")) %>% 
  left_join(MP, by = "year")

mult <- max(VIZ$n) / max(VIZ$mp)

ggplot(VIZ, aes(x = as.factor(year), group = type)) +
  geom_shadowstep(aes(y = mp * mult), alpha = .5, col = "darkred") +
  geom_shadowpoint(aes(y = mp * mult), col = "darkred") +
  geom_shadowline(aes(y = n), alpha = .5, col = "darkblue") +
  geom_shadowpoint(aes(y = n), col = "darkblue") +
  scale_y_continuous(
    name = "Anzahl Erstrentenbeziehender",
    sec.axis = sec_axis(~ . / mult, name = "Gesetzliche Minimalrente"),
    labels = comma
  ) +
  theme_grey(base_size = 14) +
  labs(x = NULL, title = "Projektionsvergleich: Grösse der Erstrentenkohorten versus Minimalrente",
       subtitle = "AHV-Altersrentenansprüche innerhalb der Schweiz,\nErstrentenbeziehende approximiert durch Anzahl Rentenansprüche im Alter 65") +
  theme(
    axis.title.y = element_text(color = "darkblue"),
    axis.title.y.right = element_text(color = "darkred")
  )


# DECOMPOSE DIFFERENTIAL EFFECT OF 1-YEAR SHIFT FOR 2039-2040 -----------------------

# Pension counts by year, sex, nationality, pension type and age. ------------------------

pen_n <-
  read_delim(par$in_rr) %>%
  filter(tsex    != "Sexe - total"       ,
         twohn   == "Suisse"             ,
         tnation != "Nationalité - total",
         lage    != - 99999) %>%
  select(year   = an_rr, sex = tsex, nat = tnation, age = lage,
         n_alt  = RAV ,
         n_wit  = RAS ,
         n_waiv = RAOp,
         n_waim = RAOm,
         n_waid = RAOd,
         n_kinv = RAEp,
         n_kinm = RAEm) %>%
  mutate(sex = recode(sex, "Homme"    = "m" , "Femme"  = "f"),
         nat = recode(nat, "Etranger" = "au", "Suisse" = "ch"),
         # Consolidate higher age ranges for consistency with STATPOP.
         age = pmin(99, age)) %>%
  pivot_longer(cols = contains("n_"), names_to = "type", names_prefix = "n_",
               values_to = "n") %>%
  # Dispense with father-mother distinction in pension types.
  mutate(type = fct_collapse(type,
                             wai = c("waiv", "waim", "waid"),
                             kin = c("kinv", "kinm"))) %>% 
  # Consolidate groups regarding age 99+ and discarded pension type distinctions.
  dplyr::summarize(n = sum(n), .by = c("year", "sex", "nat", "age", "type")) %>% 
  # Complement unobserved categories.
  complete(year, sex, nat, age, type) %>%
  tidyr::replace_na(list(n = 0, m = 0))

# Historic and projected minimal pensions, inflation and assumed AHV-Fonds returns. ------

eck <-
  read_delim(par$in_eck) %>%
  filter(id == str_sub(par$in_eck, -11, - 5)) %>% 
  select(year = jahr, df = preis) %>% 
  complete(year = first(par$pint):last(par$pint)) %>% 
  fill(df) %>% 
  # Calculate discount factors for 'real' Finanzhaushalte.
  mutate(df = ifelse(year == first(par$pint), 0, df),
         df = cumprod(1 / (1 + df / 100)))

# Counterfactual (without Referenzaltererhöhung). -----------------------------------

# Take-up rates for different pension types by nationality and age in current year. ------

sh <-
  left_join(pop, pen_n, 
            by = c("year", "sex", "nat", "age"),
            relationship = "one-to-many") %>%
  # Select reference STATPOP year.
  filter(year == first(par$pint) - 2) %>%
  mutate(share = n / pop) %>%
  select(- year, - pop, - n)

# Attach reference age changes via parameter 'reftab'.
sh <- 
  bind_rows(par$pint %>% map(\(x) mutate(sh, year = x))) %>%
  relocate(year) %>%
  left_join(refage_table(par$reftab, par$pint), 
            by = c("year", "sex", "type"), relationship = "many-to-one") %>%
  tidyr::replace_na(list(dage = 0)) %>% 
  group_by(year, type, sex, nat) %>%
  arrange(year, type, sex, nat, age) %>% 
  # Shift take-up rates according to changes in reference ages. Implicit assumptions:
  # 1. birthdays are evenly distributed across months per year;
  # 2. take-up decisions shift in parallel to the reference age;
  # 3. distinction of partial early or late uptakes is negligible.
  mutate(
    share =
      ifelse(type == "alt",
             (1 - (dage - floor(dage))) * dplyr::lag(share, n =   first(floor(dage)), def = 0) +
                  (dage - floor(dage))  * dplyr::lag(share, n = first(ceiling(dage)), def = 0),
             share)) %>%
  ungroup()

# BFS population scenario. ---------------------------------------------------------------

scen <-
  read_delim(par$in_scen) %>% 
  select(scen = scenario, year = jahr, sex, nat, age = alt, val = bevendejahr) %>%
  mutate(scen = 
           fct_recode(scen, 
                      "A" = paste0("A_00_", par$scen),
                      "B" = paste0("B_00_", par$scen),
                      "C" = paste0("C_00_", par$scen)
           )
  ) %>%
  filter(scen %in% c("A", "B", "C")) %>%
  mutate(scen = fct_drop(scen), 
         # Consolidate higher age ranges for consistency with STATPOP.
         age = pmin(99, age)) %>% 
  dplyr::summarize(val = sum(val), 
                   .by = c("scen", "year", "sex", "nat", "age")) %>% 
  # Only keep latest observed year for adjustment purposes.
  filter(year %in% par$pint) %>% 
  left_join(sh, 
            by = c("year", "sex", "nat", "age"), relationship = "many-to-many") %>%
  tidyr::replace_na(list(share = 0)) %>% 
  mutate(heads = val * share) %>%
  dplyr::summarize(heads = sum(heads), 
                   .by = c("scen", "year", "sex", "nat", "type", "age"))

# Adjust scenario-derived pension counts to the observed ones in the pension registry.
adj <-
  filter(scen, year == first(par$pint)) %>%
  dplyr::summarize(heads = sum(heads), 
                   .by = c("scen", "sex", "nat", "type", "age")) %>%
  left_join(filter(pen_n, year == first(par$pint)) %>%
              dplyr::summarize(n = sum(n), 
                               .by = c("sex", "nat", "type", "age")), 
            by = c("sex", "nat", "type", "age")) %>%
  mutate(adj = n - heads) %>%
  select(- heads, - n)

# Additive adjustment (normally multiplicative; exception in 2025 due to Ukrainian 
# fugitives). 
scen_c <- scen %>% 
  left_join(adj, by = c("scen", "sex", "nat", "type", "age")) %>%
  mutate(heads = pmax(0, heads + adj)) %>%
  select(- adj) %>% 
  dplyr::summarize(heads = sum(heads), .by = c("scen", "year", "sex", "type", "age")) %>%
  mutate(dom = "ch") %>%
  select(scen, year, sex, dom, type, age, n = heads) %>%
  mutate(m = NA) %>%
  filter(year > first(par$pint))


# Development with Referenzalter shift. ---------------------------------------------

# Take-up rates for different pension types by nationality and age in current year. ------

sh <-
  left_join(pop, pen_n, 
            by = c("year", "sex", "nat", "age"),
            relationship = "one-to-many") %>%
  # Select reference STATPOP year.
  filter(year == first(par$pint) - 2) %>%
  mutate(share = n / pop) %>%
  select(- year, - pop, - n)

# Implement one-year shift of Referenzalter.
par$reftab =
  tribble(
    ~ year, ~ sex,       ~ refage,
    2024,   "f",       64      ,
    
    # Reform AHV 21.
    2025,   "f",       64 + 1/4,
    2026,   "f",       64 + 2/4,
    2027,   "f",       64 + 3/4,
    2028,   "f",       64 + 4/4,
    2029,   "f",       66,
    
    2024,   "m",       65,
    2029,   "m",       66
  )

# Attach reference age changes via parameter 'reftab'.
sh <- 
  bind_rows(par$pint %>% map(\(x) mutate(sh, year = x))) %>%
  relocate(year) %>%
  left_join(refage_table(par$reftab, par$pint), 
            by = c("year", "sex", "type"), relationship = "many-to-one") %>%
  tidyr::replace_na(list(dage = 0)) %>% 
  group_by(year, type, sex, nat) %>%
  arrange(year, type, sex, nat, age) %>% 
  # Shift take-up rates according to changes in reference ages. Implicit assumptions:
  # 1. birthdays are evenly distributed across months per year;
  # 2. take-up decisions shift in parallel to the reference age;
  # 3. distinction of partial early or late uptakes is negligible.
  mutate(
    share =
      ifelse(type == "alt",
             (1 - (dage - floor(dage))) * dplyr::lag(share, n =   first(floor(dage)), def = 0) +
               (dage - floor(dage))  * dplyr::lag(share, n = first(ceiling(dage)), def = 0),
             share)) %>%
  ungroup()

# BFS population scenario. ---------------------------------------------------------------

scen <-
  read_delim(par$in_scen) %>% 
  select(scen = scenario, year = jahr, sex, nat, age = alt, val = bevendejahr) %>%
  mutate(scen = 
           fct_recode(scen, 
                      "A" = paste0("A_00_", par$scen),
                      "B" = paste0("B_00_", par$scen),
                      "C" = paste0("C_00_", par$scen)
           )
  ) %>%
  filter(scen %in% c("A", "B", "C")) %>%
  mutate(scen = fct_drop(scen), 
         # Consolidate higher age ranges for consistency with STATPOP.
         age = pmin(99, age)) %>% 
  dplyr::summarize(val = sum(val), 
                   .by = c("scen", "year", "sex", "nat", "age")) %>% 
  # Only keep latest observed year for adjustment purposes.
  filter(year %in% par$pint) %>% 
  left_join(sh, 
            by = c("year", "sex", "nat", "age"), relationship = "many-to-many") %>%
  tidyr::replace_na(list(share = 0)) %>% 
  mutate(heads = val * share) %>%
  dplyr::summarize(heads = sum(heads), 
                   .by = c("scen", "year", "sex", "nat", "type", "age"))

# Adjust scenario-derived pension counts to the observed ones in the pension registry.
adj <-
  filter(scen, year == first(par$pint)) %>%
  dplyr::summarize(heads = sum(heads), 
                   .by = c("scen", "sex", "nat", "type", "age")) %>%
  left_join(filter(pen_n, year == first(par$pint)) %>%
              dplyr::summarize(n = sum(n), 
                               .by = c("sex", "nat", "type", "age")), 
            by = c("sex", "nat", "type", "age")) %>%
  mutate(adj = n - heads) %>%
  select(- heads, - n)

# Additive adjustment (normally multiplicative; exception in 2025 due to Ukrainian 
# fugitives). 
scen_f <- scen %>% 
  left_join(adj, by = c("scen", "sex", "nat", "type", "age")) %>%
  mutate(heads = pmax(0, heads + adj)) %>%
  select(- adj) %>% 
  dplyr::summarize(heads = sum(heads), .by = c("scen", "year", "sex", "type", "age")) %>%
  mutate(dom = "ch") %>%
  select(scen, year, sex, dom, type, age, n = heads) %>%
  mutate(m = NA) %>%
  filter(year > first(par$pint))

# Visualize effect on number of pension entitlements in 2039.
VIZ_C <- scen_c %>% 
  filter(year %in% 2029:2040, type == "alt", age %in% 63:71, dom == "ch", scen == "A") %>% 
  dplyr::summarize(n = sum(n), .by = c("age", "year")) %>% 
  mutate(type = "C") %>% 
  select(type, age, year, n)

VIZ_F <- scen_f %>% 
  filter(year %in% 2029:2040, type == "alt", age %in% 63:71, dom == "ch", scen == "A") %>% 
  dplyr::summarize(n = sum(n), .by = c("age", "year")) %>% 
  mutate(type = "F", n = ifelse(age == 63, 0, n)) %>% 
  select(type, age, year, n) 

VIZ_T <- 
  bind_rows(VIZ_C, VIZ_F)

ggplot(VIZ_T %>% filter(year == 2039), aes(x = as.factor(age), y = n, fill = type)) +
  scale_fill_brewer(type = "qual") +
  geom_col(position = "dodge") +
  labs(x = "Lebensalter", y = "Anzahl Rentenansprüche", 
       title = "Vergleich projizierter Rentenansprüche nach Alter in 2039 (Inland)",
       subtitle = "Grün: Referenzalter 65, Violett: Referenzalter 66") +
  theme_grey(base_size = 14) +
  scale_y_continuous(labels = comma) +
  guides(fill = "none")

# Visualize differential effect of Referenzalter shift compared to 2029.
VIZ_29 <- VIZ_T %>% 
  filter(year == 2029, type == "F", age %in% 63:71) %>% 
  rename(n_ref = n) %>% 
  select(- year, - type)

VIZ_COMP <- VIZ_T %>% 
  filter(year != 2029, type == "F") %>% 
  left_join(VIZ_29, by = "age") %>% 
  na.omit() %>% 
  mutate(d_n = n_ref - n) %>% 
  dplyr::summarize(d_n = sum(d_n), .by = "year")


