---
title: "Visualizing indicators"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{Visualizing indicators}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
  collapse   = TRUE,
  comment    = "#>",
  fig.width  = 6.5,
  fig.height = 4,
  fig.align  = "center"
)
```

```{r setup, message = FALSE}
library(DSIR)
library(dplyr)
library(ggplot2)
```

`gho_clean()` and `sdg_clean()` return data in the same 15-column
schema, so a handful of `ggplot2` recipes cover most routine indicator
charts. This vignette collects the recipes we reach for most often:
trend lines with confidence bands, sex/age breakdowns, forest-style
country comparisons, dumbbell progress plots, and progress tracking
with `aarr()`.

Chart-building helpers stay out of the package on purpose — each
recipe below is a few lines of plain `ggplot2` on top of the cleaned
schema, and your project code can adapt them freely.

## Example data

The recipes need data in the cleaned schema. A real workflow would
start from the APIs:

```{r, eval = FALSE}
ncd <- gho_data("NCDMORT3070", spatial_type = "country",
                area = c("PHL", "VNM", "KHM", "MNG", "FJI", "LAO")) |>
  gho_clean()
```

So that this vignette builds offline (and to keep the examples
reproducible), we simulate a small dataset with the same shape: an
NCD-mortality-style indicator for six Western Pacific countries,
2000--2023, with a sex breakdown in `dim1` and a confidence band in
`low` / `high`.

```{r simulate}
set.seed(416)

countries <- who_countries |>
  filter(iso3 %in% c("PHL", "VNM", "KHM", "MNG", "FJI", "LAO")) |>
  select(iso3, location_name = name_short)

grid <- expand.grid(
  iso3 = countries$iso3,
  year = 2000:2023,
  dim1 = c("SEX_BTSX", "SEX_MLE", "SEX_FMLE"),
  stringsAsFactors = FALSE
) |>
  as_tibble()

base_rate <- c(PHL = 30, VNM = 26, KHM = 28, MNG = 34, FJI = 31, LAO = 29)
decline   <- c(PHL = 0.010, VNM = 0.016, KHM = 0.012,
               MNG = 0.006, FJI = -0.002, LAO = 0.011)
sex_shift <- c(SEX_BTSX = 1, SEX_MLE = 1.25, SEX_FMLE = 0.8)

ncd <- grid |>
  left_join(countries, by = "iso3") |>
  mutate(
    value_num = base_rate[iso3] * (1 - decline[iso3])^(year - 2000) *
      sex_shift[dim1] * exp(rnorm(n(), sd = 0.01)),
    low       = value_num * 0.93,
    high      = value_num * 1.07,
    source    = "gho",
    id        = "NCDMORT3070",
    indicator = "Probability of premature death from NCDs (%)",
    location  = iso3,
    year      = as.integer(year),
    value     = as.character(round(value_num, 1)),
    series    = NA_character_,
    dim2      = NA_character_,
    dim3      = NA_character_
  ) |>
  select(source, id, indicator, location, iso3, location_name, year,
         value, value_num, low, high, series, dim1, dim2, dim3)

ncd
```

## Trend lines with a confidence band

`low` and `high` map directly onto `geom_ribbon()`. Filter to a single
stratum first — here the both-sexes rows (`dim1 == "SEX_BTSX"`).
`theme_dsi_facet()` is the facet-friendly variant of `theme_dsi()`.

```{r ribbon, fig.height = 4.5}
ncd |>
  filter(dim1 == "SEX_BTSX") |>
  ggplot(aes(year, value_num)) +
  geom_ribbon(aes(ymin = low, ymax = high), fill = "#0093D5", alpha = 0.2) +
  geom_line(color = "#0093D5", linewidth = 0.7) +
  facet_wrap(~ location_name) +
  labs(
    title    = "Premature NCD mortality is declining in most countries",
    subtitle = "Probability of premature death from NCDs, both sexes, 2000-2023",
    x = NULL, y = "%",
    caption  = "Simulated data for illustration."
  ) +
  theme_dsi_facet()
```

## Breakdowns from `dim1` / `dim2` / `dim3`

GHO breakdowns (sex, age group, ...) arrive in `dim1` / `dim2` /
`dim3`. Map the stratum to `colour` and keep palettes colorblind-safe
— `scale_color_viridis_d()` ships with `ggplot2`.

```{r dim-breakdown, fig.height = 4.5}
ncd |>
  filter(dim1 %in% c("SEX_MLE", "SEX_FMLE")) |>
  ggplot(aes(year, value_num, color = dim1)) +
  geom_line(linewidth = 0.7) +
  facet_wrap(~ location_name) +
  scale_color_viridis_d(end = 0.6,
                        labels = c(SEX_FMLE = "Female", SEX_MLE = "Male")) +
  labs(
    title    = "Men die prematurely from NCDs more often than women",
    x = NULL, y = "%", color = NULL,
    caption  = "Simulated data for illustration."
  ) +
  theme_dsi_facet()
```

## Forest-style country comparison

To compare countries at a single point in time with their uncertainty
intervals, take the latest year per country and use
`geom_pointrange()`. Ordering the axis by value makes the ranking
readable.

```{r forest}
latest <- ncd |>
  filter(dim1 == "SEX_BTSX") |>
  slice_max(year, by = iso3)

ggplot(latest, aes(value_num, reorder(location_name, value_num))) +
  geom_pointrange(aes(xmin = low, xmax = high),
                  color = "#0093D5", linewidth = 0.7) +
  labs(
    title   = "Premature NCD mortality, 2023",
    x = "%", y = NULL,
    caption = "Point estimate and 95% interval. Simulated data."
  ) +
  theme_dsi()
```

## Dumbbell progress plot

A dumbbell plot compares two time points per country. Build the wide
shape with two filtered slices and a join — no reshaping package
needed. The colors are from the colorblind-safe Okabe-Ito palette.

```{r dumbbell}
start <- ncd |>
  filter(dim1 == "SEX_BTSX", year == 2000) |>
  select(location_name, start = value_num)
end <- ncd |>
  filter(dim1 == "SEX_BTSX", year == 2023) |>
  select(location_name, end = value_num)

dumbbell <- start |>
  left_join(end, by = "location_name") |>
  mutate(
    direction     = ifelse(end < start, "Decline", "Increase"),
    location_name = reorder(location_name, start)
  )

ggplot(dumbbell) +
  geom_segment(aes(x = start, xend = end,
                   y = location_name, yend = location_name,
                   color = direction),
               linewidth = 1.5) +
  geom_point(aes(start, location_name), size = 3, color = "grey65") +
  geom_point(aes(end, location_name, color = direction), size = 3) +
  scale_color_manual(values = c(Decline = "#0072B2", Increase = "#D55E00")) +
  labs(
    title   = "Change in premature NCD mortality, 2000 to 2023",
    x = "%", y = NULL, color = NULL,
    caption = "Grey dot = 2000, colored dot = 2023. Simulated data."
  ) +
  theme_dsi()
```

## Tracking progress with `aarr()`

`aarr()` computes the average annual rate of reduction — the standard
WHO / UNICEF progress metric. It slots directly into a grouped
`summarise()` on the cleaned schema. Remember the sign convention:
positive means the indicator is declining.

```{r aarr}
progress <- ncd |>
  filter(dim1 == "SEX_BTSX") |>
  summarise(aarr = aarr(year, value_num), .by = c(iso3, location_name))

progress |>
  mutate(aarr_pct = round(100 * aarr, 1)) |>
  arrange(desc(aarr))
```

A back-of-envelope projection carries the latest value forward at the
observed AARR. This is a quick screening device, not a substitute for
the indicator-specific models behind official 2030 projections.

```{r projection}
latest |>
  left_join(progress, by = c("iso3", "location_name")) |>
  mutate(projected_2030 = value_num * (1 - aarr)^(2030 - year)) |>
  select(location_name, value_2023 = value_num, aarr, projected_2030) |>
  arrange(projected_2030)
```
