TL;DR

A trimmed version of this analysis (without code) is published in medium.

1 Background

In this analysis, we are using all the result data of all the published activities of Netherlands (NL) based publishers.

For those new to IATI Standard, please refer to Result page to understand the content of result element.

2 Setup

First we need to load the required packages.

library(glue)
library(RPostgreSQL)
library(tidyverse)
library(viridis)
library(lubridate)
library(jsonlite)
library(scales)
library(skimr)
library(gt)

cl.result.types <- read.csv(url("http://reference.iatistandard.org/203/codelists/downloads/clv1/codelist/ResultType.csv"), stringsAsFactors = FALSE)
cl.result.types.labels <- setNames(as.character(cl.result.types$name), cl.result.types$code)

cl.indicator.measures <- read.csv(url("http://reference.iatistandard.org/203/codelists/downloads/clv1/codelist/IndicatorMeasure.csv"))
cl.indicator.measures.labels <- setNames(as.character(cl.indicator.measures$name),
                                         cl.indicator.measures$code)

options(scipen = 999) # to show the numbers instead of scientific notations in ggplot2 graphs
options(warn = -1)

3 Data Preparation

3.1 Data Import

The following code imports necessary fields from the database. For simplicity, I saved the results from database to RDS file and loaded from RDS file for quick import.

knitr::opts_chunk$set(echo = TRUE)
organisations.all.rds <- here::here("aidstream","04-sessions","organisations.all.rds")
activities.all.rds <- here::here("aidstream","04-sessions","activities.all.rds")
results.all.rds <- here::here("aidstream","04-sessions","results.all.rds")

if(file.exists(organisations.all.rds) & 
   file.exists(activities.all.rds) &
   file.exists(results.all.rds)) {
  df.organizations.all <- readRDS(organisations.all.rds)
  df.activities.all <- readRDS(activities.all.rds)
  df.results.all <- readRDS(results.all.rds)
  } else {
  dbconn <- dbConnect(dbDriver("PostgreSQL"), 
                      host="localhost", 
                      dbname="aidstream_prod_local", 
                      user="postgres")
  fetch_query <- function(conn, query) {
    resultset <- dbSendQuery(conn, query)
    fetch(resultset, n=-1)
  }
  df.organizations.all <- fetch_query(dbconn, "select id, user_identifier, name, reporting_org, country,
                                  published_to_registry, created_at, updated_at 
                                  from organizations where country='NL';")
  df.activities.all <- fetch_query(dbconn, glue("select id, identifier, title, organization_id, 
                        activity_status, activity_date, recipient_country, recipient_region,
                        sector, published_to_registry,
                        created_at, updated_at 
                        from activity_data where
                        published_to_registry = 1 and 
                        organization_id in ({paste0(df.organizations.all$id, collapse=\",\")});"))
  df.results.all <- fetch_query(dbconn, glue("select id, activity_id, result, created_at
                           from activity_results where
                           activity_id in ({paste0(df.activities.all$id, collapse=\",\")});"))  
  saveRDS(df.organizations.all, file=organisations.all.rds)
  saveRDS(df.activities.all, file=activities.all.rds)
  saveRDS(df.results.all, file=results.all.rds)
  }
df.summary <- df.results.all %>% 
  left_join(df.activities.all, by=c("activity_id"="id")) %>% 
  left_join(df.organizations.all, by=c("organization_id"="id"))

Total Organisations: 79

Total unique Activities: 901

3.2 Preparing `result` element

First we need to convert JSON string to R list before we can further process the data.

There can be multiple indicators within a given result and multiple baseline and multiple period elements within an indicator. Lets extract the count of the indicators, baselines and periods mentioned inside each result element to see how many indicators, baselines and periods are reported under each result.

results.rds <- here::here("aidstream","04-sessions","results.rds")
if(file.exists(results.rds)) {
  df.results <- readRDS(results.rds)
  } else {
    df.results.all <- df.results.all %>% 
      mutate(result_json = map(result, fromJSON))
    get_indicator_count <- function(result_json) {
      return(nrow(result_json$indicator))
    }
    get_indicator_baseline_count <- function(result_json) {
      sum(map_int(result_json$indicator$baseline, nrow))
    }
    get_indicator_period_count <- function(result_json) {
      sum(map_int(result_json$indicator$period, nrow))
    }
    
    get_indicators <- function(result_json) {
      df <- data.frame(
        indicator_title = map_chr(result_json$indicator$title, function(.x) {
          .x$narrative[[1]]$narrative[1]
        }),
        indicator_measure = as.character(result_json$indicator$measure),
        indicator_ascending = as.character(result_json$indicator$ascending),
        stringsAsFactors = FALSE
      )
      df$baseline = result_json$indicator$baseline
      df$period = result_json$indicator$period
      df
    }
    
    df.results <- df.results.all %>% 
      transmute(id, activity_id,
                result_title = map_chr(result_json, function(.x) {
                  .x$title$narrative[[1]]$narrative[1]
                }),
                result_type = map_chr(result_json, "type"),
                indicators_count = map_int(result_json, get_indicator_count),
                indicators_baseline_count = map_int(result_json, get_indicator_baseline_count),
                indicators_period_count = map_int(result_json, get_indicator_period_count),
                indicator = map(result_json, get_indicators)
      )
    saveRDS(df.results, file = results.rds)
    }

3.2.1 `Result` quick summary

skim_with(integer = list(hist = NULL, p0=NULL, p25=NULL, p50=NULL, p75=NULL, p100=NULL),
          character = list(min = NULL, max = NULL))
df.results %>% 
  select(starts_with("result")) %>% 
  skim() %>% 
  pander()

Skim summary statistics
n obs: 7237
n variables: 2

variable	missing	complete	n	empty	n_unique
result_title	5	7232	7237	0	3253
result_type	0	7237	7237	0	4

3.3 Preparing `indicator` sub-element

We create a new dataframe for indicators only.

df.indicators <- df.results %>% 
  unnest(indicator)

3.3.1 `indicator` quick summary

skim_with(integer = list(hist = NULL, p0=NULL, p25=NULL, p50=NULL, p75=NULL, p100=NULL))
df.indicators %>% 
  select(indicator_measure, indicator_ascending, indicator_title,
         result_title, result_type) %>% 
  skim() %>% 
  pander()

Skim summary statistics
n obs: 11179
n variables: 5

variable	missing	complete	n	empty	n_unique
indicator_ascending	0	11179	11179	2432	5
indicator_measure	0	11179	11179	2	6
indicator_title	1	11178	11179	3	4723
result_title	5	11174	11179	0	3253
result_type	0	11179	11179	0	4

3.4 Preparing `baseline` and `period` sub-elements

Simiarly we prepare dataframes for periods and baselines.

get_baselines <- function(baseline_json) {
  df <- data.frame(
    baseline_year = as.character(baseline_json$year),
    baseline_value = as.character(baseline_json$value),
    baseline_comment = baseline_json$comment[[1]]$narrative[[1]]$narrative[1],
    stringsAsFactors = FALSE
  )
  df
}

get_periods <- function(period_json) {
  df <- data.frame(
    period_start = map_chr(period_json$period_start, "date"),
    period_end = map_chr(period_json$period_end, "date"),
    period_target = map_chr(period_json$target, "value"),
    period_actual = map_chr(period_json$actual, "value"),
    stringsAsFactors = FALSE
  )
  df
}

df.indicators <- df.indicators %>% 
  mutate(baseline_df = map(baseline, get_baselines),
         period_df = map(period, get_periods))

df.baselines <- df.indicators %>% 
  unnest(baseline_df)

df.periods <- df.indicators %>% 
  unnest(period_df)

df.periods$period_start <- parse_date_time(df.periods$period_start, "y-m-d", tz = "GMT")
df.periods$period_end <- parse_date_time(df.periods$period_end, "y-m-d", tz = "GMT")
df.periods$period_target <- as.numeric(df.periods$period_target)
df.periods$period_actual <- as.numeric(df.periods$period_actual)

3.4.1 `baseline` quick summary

skim_with(integer = list(hist = NULL, p0=NULL, p25=NULL, p50=NULL, p75=NULL, p100=NULL))
df.baselines %>% 
  select(-ends_with("count"),-ends_with("id"), -baseline, -starts_with("period")) %>% 
  skim() %>% 
  pander(caption = "Baseline Summary")

Skim summary statistics
n obs: 11179
n variables: 8

variable	missing	complete	n	empty	n_unique
baseline_comment	9	11170	11179	9575	900
baseline_value	7	11172	11179	5365	450
baseline_year	0	11179	11179	5300	13
indicator_ascending	0	11179	11179	2432	5
indicator_measure	0	11179	11179	2	6
indicator_title	1	11178	11179	3	4723
result_title	5	11174	11179	0	3253
result_type	0	11179	11179	0	4

3.4.2 `period` quick summary

skim_with(numeric = list(hist = NULL, p0=NULL, p25=NULL, p50=NULL, p75=NULL, p100=NULL, sd=NULL),
          POSIXct = list(median = NULL, min=NULL, max=NULL))
df.periods %>% 
  select(-ends_with("count"),-ends_with("id")) %>% 
  skim() %>% 
  pander()

Skim summary statistics
n obs: 15748
n variables: 9

variable	missing	complete	n	empty	n_unique
indicator_ascending	0	15748	15748	3062	5
indicator_measure	0	15748	15748	2	6
indicator_title	1	15747	15748	3	4723
result_title	8	15740	15748	0	3253
result_type	0	15748	15748	0	4

variable	missing	complete	n	mean
period_actual	5910	9838	15748	11856.91
period_target	6939	8809	15748	133389.73

variable	missing	complete	n	n_unique
period_end	458	15290	15748	260
period_start	458	15290	15748	236

Now we can proceed with the analysis.

4 Data analysis

We see in indicator summary that there are 11179 indicators mentioned in 7237 results. There’s an average of 1.54 indicators in a given result. Lets see which results have large number of indicators.

df.results %>% 
  group_by(indicators_count) %>% 
  count() %>% 
  ungroup() %>% 
  gt() %>% 
  cols_label(
     indicators_count = "Unique count of indicators in each <Result> element",
     n = "Count of Results containing the given number of indicators"
  ) %>% 
  tab_style(
    style = cells_styles(
      text_size = px(12)
    ),
    locations = cells_data(
      columns = everything()
    )
  )

Unique count of indicators in each <Result> element	Count of Results containing the given number of indicators
1	5543
2	664
3	490
4	289
5	94
6	82
7	16
8	30
9	6
10	4
11	12
12	4
30	1
32	2

We can see that there are 2 result elements, that contains as many as 32 indicators. That makes result very long and complex. But we see that majority of result elements contain one indicator only. Ideally a given result can have multiple relevant indicators.

There are activities which has number of result element with the same result metadata.

get_identifier <- function(identifier_text) {
  id_json <- fromJSON(identifier_text)
  id_json$iati_identifier_text
}
  
df.results %>% 
  group_by(result_title, activity_id) %>% 
  count() %>% 
  filter(n>1) %>% 
  arrange(desc(n)) %>% 
  ungroup() %>% 
  head(10) %>% 
  left_join(df.activities.all, by=c("activity_id"="id")) %>% 
  left_join(df.organizations.all, by=c("organization_id"="id")) %>% 
  mutate(iati_identifier = map_chr(identifier, get_identifier)) %>%
  select(name, iati_identifier, result_title, n) %>% 
  group_by(name) %>% 
  gt() %>% 
  cols_label(
    iati_identifier = "IATI Identifier",
    result_title = "Result Title",
    n = "Number of Result element in a given activity"
  ) %>% 
  cols_hide(
    columns = vars(activity_id)
  ) %>% 
  tab_style(
    style = cells_styles(
      text_size = px(12)
    ),
    locations = cells_data(
      columns = everything()
    )
  )

IATI Identifier	Result Title	Number of Result element in a given activity
Solidaridad Netherlands
NL-KVK-51756811-SSEA-P1467	(Output) Stakeholder dynamics generate dialogue & debate on sustainability or land issues (land rights)	30
CHOICE for Youth and Sexuality
NL-KVK-32108345-NLBB26-8842-ID	Goal 2. Adolescent girls and boys are meaningfully engaged to claim their SHR rights	27
NL-KVK-32108345-NLBB26-8845-MZ	Goal 2. Adolescent girls and boys are meaningfully engaged to claim their SHR rights	22
Rutgers
NL-KVK-41193594-GUS7140ET	Increased utilisation of comprehensive SRHR information and education by all people	27
NL-KVK-41193594-GUS7130ID	Increased utilisation of comprehensive SRHR information and education by all people	23
NL-KVK-41193594-GUS7135UG	Increased utilisation of comprehensive SRHR information and education by all people	23
NL-KVK-41193594-GUS7145KE	Increased utilisation of comprehensive SRHR information and education by all people	23
NL-KVK-41193594-GUS7150MW	Increased utilisation of comprehensive SRHR information and education by all people	23
NL-KVK-41193594-GUS7075GH-GUS7050GH	Increased utilisation of comprehensive SRHR information and education by all people	23
NL-KVK-41193594-GUS7120PK	Increased utilisation of comprehensive SRHR information and education by all people	21

4.1 Number of `result` element based on `result/type`

df.results %>% 
  group_by(result_type) %>% 
  count() %>% 
  select(result_type, n) %>% 
  ggplot(aes(x=result_type, y=n)) +
  geom_bar(stat = "identity", fill="gray") +
  coord_flip() +
  scale_x_discrete(labels = cl.result.types.labels) +
  geom_text(aes(label=n), hjust=-0.1, size=3) +
  theme_minimal() +
  theme(axis.text.x = element_blank()) +
  labs(x="Result Type", y="Count of <Result> Element")

We see that most of the result are Output-based and Outcome-based in the published activities.

4.2 `indicator` subelement count based on `indicator/measure`

indicator/measure defines the unit of measure in which the value is reported in the given indicator i.e. in baseline/value and values for period/target/value and period/actual/value.

df.indicators %>% 
  group_by(indicator_measure) %>% 
  count() %>% 
  ungroup() %>% 
  mutate(label = cl.indicator.measures.labels[indicator_measure]) %>% 
  select(indicator_measure, label, n) %>% 
  mutate(label = replace_na(label,"-")) %>% 
  gt() %>% 
  cols_label(
    label = "Indicator Measure",
    n = "Count of <indicator> sub-element"
  ) %>% 
  cols_hide(
    columns = vars(indicator_measure)
  ) %>% 
  tab_footnote(
    footnote = "Missing indicator-measure in <indicator>",
    locations = cells_data(
      columns = vars(label),
      rows = label == "-"
    )
  ) %>% 
  tab_style(
    style = cells_styles(
      text_size = px(12)
    ),
    locations = cells_data(
      columns = everything()
    )
  )

Indicator Measure	Count of <indicator> sub-element
-¹	2
Unit	9586
Percentage	1559
Nominal	10
Ordinal	7
Qualitative	15
¹ Missing indicator-measure in <indicator>

We see that most of the indicator/measure are of quantitative nature i.e. Unit and Percentage. There are 5 defined codes for indicator/measure. There’s missing value for indicator/measure as well.

4.3 `result/type` vs `indicator/measure`

Lets see what indicator/measures are used for each result/type.

df.indicators %>% 
  group_by(result_type, indicator_measure) %>% 
  count() %>% 
  spread(indicator_measure, n, fill=0) %>% 
  gather(indicator_measure, n, -result_type) %>% 
  ggplot(aes(x=result_type, y=indicator_measure)) +
  geom_tile(aes(fill=n), colour = "gray") +
  geom_text(aes(label=n)) +
  scale_x_discrete(labels = cl.result.types.labels) +
  scale_y_discrete(labels = c(cl.indicator.measures.labels, V1="Missing*")) +
  scale_fill_gradient(low = "white", high = "steelblue") +
  labs(x="Result Type", y="Indicator Measure", fill="Count of Indicators")

We see that bulk of quantitative measures (Unit and Percentage) are reported for Outcome and Outcome. So we can focus further analysis to quantitative measures only.

4.4 Exploring `baseline` element

4.4.1 Number of `baseline`s reported by `baseline/year`

Now we dive into baseline sub-element of indicator. From the baseline summary above, we see that majority of baseline/year values are empty.

df.baselines %>% 
  group_by(baseline_year) %>% 
  count() %>% 
  ungroup() %>% 
  gt() %>% 
  cols_label(
    baseline_year = "Baseline Year",
    n = "Number of baselines reported for that year"
  ) %>% 
  tab_style(
    style = cells_styles(
      text_size = px(12)
    ),
    locations = cells_data(
      columns = everything()
    )
  )

Baseline Year	Number of baselines reported for that year
	5300
2010	5
2011	7
2012	37
2013	180
2014	35
2015	258
2016	3581
2017	1244
2018	474
2019	19
2020	38
217	1

We see that there are few issues with the year as well like 217. Lets identify which organisation published 217 year in their result/indicator/baseline data.

df.baselines %>% 
  filter(indicator_measure==1 & baseline_year == 217) %>% 
  select(activity_id) %>% 
  left_join(df.activities.all, by=c("activity_id"="id")) %>% 
  left_join(df.organizations.all, by=c("organization_id"="id")) %>% 
  mutate(iati_identifier = fromJSON(identifier)$iati_identifier_text) %>% 
  select(name, iati_identifier) %>% 
  gt() %>% 
  cols_label(
    name = "Organisation",
    iati_identifier = "IATI Identifier"
  ) %>% 
  tab_style(
    style = cells_styles(
      text_size = px(12)
    ),
    locations = cells_data(
      columns = everything()
    )
  )

Organisation	IATI Identifier
War Child Holland	NL-KVK-41215393-PJ3516

http://preview.iatistandard.org/index.php?url=https%3A//aidstream.org/files/xml/warchildholland-activities.xml shows 217 in the data.

4.4.2 Exploring Unit Quantitative `baseline/value`s

For simplicity, we will explore the baseline/value for unit indicator/measure only. As per standard, baseline/value is mandatory for quantiative indicator/measure i.e. for both Unit[1] and Percentage[2].

Lets see how many baseline/value are present.

df.baselines %>% 
  filter(indicator_measure==1) %>% 
  group_by(baseline_value) %>% 
  count() %>% 
  mutate(baseline_value_present = ifelse(trimws(baseline_value)=="" | is.na(baseline_value),FALSE,TRUE)) %>% 
  group_by(baseline_value_present) %>% 
  summarize(n=sum(n)) %>% 
  ungroup() %>% 
  mutate(Percentage = round(100*n/sum(n), digits=2)) %>% 
  gt() %>% 
  cols_label(
    baseline_value_present = "Baseline value present?",
    n = "Count of baseline value",
  ) %>% 
  tab_style(
    style = cells_styles(
      text_size = px(12)
    ),
    locations = cells_data(
      columns = everything()
    )
  )

Baseline value present?	Count of baseline value	Percentage
FALSE	4920	51.32
TRUE	4666	48.68

4.4.3 `baseline/@value` type

We see only 48% of the reported baseline/value are present for Unit measure. Now we need to see if all those reported values are numeric or not. These values should be numeric as per standard.

The @value must be included for non-qualitative measures. The @value should be a valid number for all non-qualitative measures.

df.baselines %>% 
  filter(indicator_measure==1) %>% 
  group_by(baseline_value) %>% 
  count() %>% 
  filter(baseline_value !="") %>% 
  mutate(baseline_value_number = as.numeric(baseline_value)) %>% 
  mutate(baseline_value_type = ifelse(is.na(baseline_value_number),"Text","Number")) %>% 
  group_by(baseline_value_type) %>% 
  summarise(n=sum(n)) %>% 
  ungroup() %>% 
  gt() %>% 
  cols_label(
    baseline_value_type = "Baseline Value Type",
    n = "Count of Baseline Value",
  ) %>% 
  tab_style(
    style = cells_styles(
      text_size = px(12)
    ),
    locations = cells_data(
      columns = everything()
    )
  )

Baseline Value Type	Count of Baseline Value
Number	4635
Text	31

We see that there are 31 instances where baseline/value are mentioned in textual form.

4.4.3.1 `baseline/value` vs `baseline/year`

Lets plot the baseline/value against baseline/year and for Unit quantitative measure only i.e. indicator-measure = 1[Unit].

df.baselines %>% 
  filter(indicator_measure==1 & baseline_year != 217) %>% 
  group_by(baseline_value, baseline_year, result_type) %>% 
  count() %>% 
  filter(baseline_value !="") %>% 
  mutate(baseline_value_number = as.numeric(baseline_value)) %>% 
  mutate(baseline_value_type = ifelse(is.na(baseline_value_number),"Text","Number")) %>% 
  filter(baseline_value_type == "Number") %>% 
  ungroup() %>% 
  mutate(result_type = factor(cl.result.types.labels[result_type], levels = cl.result.types.labels)) %>% 
  ggplot(aes(x=baseline_year,y=baseline_value_number, color=result_type)) +
  geom_point(alpha = 0.5) +
  scale_y_log10(label=comma) +
  labs(x="Baseline year", y = "Baseline value", color="Result Type")

The log10 scale was used as the baseline/value ranges from 0 to 10,000,000. It’s not clear whether we can have future baseline/value, but apparently there are baseline/value for 2019 and 2020.

df.baselines %>% 
  filter(indicator_measure==1) %>% 
  group_by(baseline_value, baseline_year, result_type) %>% 
  count() %>% 
  filter(baseline_value !="") %>% 
  mutate(baseline_value_number = as.numeric(baseline_value)) %>% 
  mutate(baseline_value_type = ifelse(is.na(baseline_value_number),"Text","Number")) %>% 
  filter(baseline_value_type == "Number") %>% 
  ungroup() %>% 
  mutate(result_type = factor(cl.result.types.labels[result_type], levels = cl.result.types.labels)) %>% 
  ggplot(aes(x=baseline_year,y=baseline_value_number)) +
  geom_boxplot(alpha=0.5) +
  scale_y_log10(label=comma) +
  stat_summary(fun.y=mean,shape=4,color='red',geom='point') +
  labs(x="Baseline year", y = "Baseline value", color="Result Type")

From 2014 to 2017, medium (represented by and mean (represented by red x) of baseline/@values lie between 10 and 1000.

4.4.3.2 High value `baseline/value` result

There are few baseline/@value beyond 1,000,000. Lets list those values. It would help the organisations to verify and correct those large numbers if those are typos.

df.baselines %>% 
  filter(indicator_measure==1) %>% 
  mutate(baseline_value_number = as.numeric(baseline_value)) %>% 
  filter(baseline_value_number >1000000) %>% 
  select(result_title, indicator_title, baseline_comment, baseline_value) %>% 
  mutate(baseline_value = as.numeric(baseline_value)) %>% 
  arrange(desc(baseline_value)) %>% 
  gt() %>% 
  cols_label(
    result_title = "Result Title",
    indicator_title = "Indicator Title",
    baseline_comment = "Baseline Comment",
    baseline_value = "Baseline Value"
  ) %>% 
  fmt_number(
    columns = vars(baseline_value),
    decimals = 0
  ) %>% 
  tab_style(
    style = cells_styles(
      text_size = px(12)
    ),
    locations = cells_data(
      columns = everything()
    )
  )

Result Title	Indicator Title	Baseline Comment	Baseline Value
Reach: Membership of Trade Unions directly supported(OP3)	Membership male and female (DO31 + DO32)	2017 is start year of working with ToCs in new sectors and a new geographical focus of Mondiaal FNV	8,830,024
Number of people about which the study results are valid: 6,072,449	Number of people about which the study results are valid: 6,072,449		6,072,449
Number of people about which the study results are valid: 4,600,000	Number of people about which the study results are valid: 4,600,000		4,600,000
Outcome- Program Progress (MYP 2016-2020)	Area where trained practices are applied either on farmland or in protection area (hectares)		3,500,000
Output - Program Progress (MYP 2016-2020)	Volume of sustainably produced production		3,000,000
Output- Program Progress (MYP 2016-2020)	Volume of sustainably produced production (MT)	FEFAC Guidelines, RTRS or equivalent	2,000,000
I01. Access to safe water	I1 - Number of people having access to safe water	Baseline amount shows the number of people who already have access to safe water before the start of the project. Tha target value only shows the additional number of people targeted.	1,125,845

These could be correct numbers, but it would be good to verify the big numbers at least once.

4.5 Exploring `period` element

df.periods %>% 
  filter(indicator_measure != "") %>% 
  mutate(indicator_measure = paste0(indicator_measure, "-", cl.indicator.measures.labels[indicator_measure])) %>% 
  mutate(result_type = paste0(result_type, "-", cl.result.types.labels[result_type])) %>% 
  janitor::tabyl(result_type, indicator_measure) %>% 
  janitor::adorn_totals(c("col","row")) %>% 
  gt() %>% 
  cols_label(
    result_type =  "Result Type"
  )

Result Type	1-Unit	2-Percentage	3-Nominal	4-Ordinal	5-Qualitative	Total
1-Output	5511	731	1	4	6	6253
2-Outcome	7230	1938	8	3	9	9188
3-Impact	89	106	1	0	0	196
9-Other	107	2	0	0	0	109
Total	12937	2777	10	7	15	15746

  # tab_spanner(
  #   label = "Indicator Measure",
  #   columns = vars(`1-Nominal`, Ordinal, Percentage, Qualitative, Unit)
  # )

4.5.1 `period-start` vs `period-end`

Lets create an scatter plot of all the points against period-start and period-end dates.

df.periods %>% 
  mutate(result_type = factor(cl.result.types.labels[result_type], levels = cl.result.types.labels)) %>% 
  ggplot(aes(x=period_start, y=period_end)) +
  geom_point(alpha = 0.4, aes(color=result_type)) +
  labs(x="Period Start Date", y="Period End Date",color="Result Type")

We see that there are certain period which has very long duration, mostly those periods, which end after 2030. See top-left corner of the above chart.

Lets list those periods.

4.5.1.1 List of long period duration `result`

df.periods %>% 
  filter(year(period_end) > 2030) %>% 
  left_join(df.activities.all, by=c("activity_id"="id")) %>% 
  left_join(df.organizations.all, by=c("organization_id"="id")) %>% 
  select(name, identifier, result_title, period_start, period_end) %>% 
  mutate(iati_identifier = map_chr(identifier, get_identifier)) %>% 
  group_by(name, iati_identifier) %>% 
  gt() %>% 
  cols_label(
    result_title = "Result Title",
    period_start = "Period Start Date",
    period_end = "Period End Date"
  ) %>% 
  cols_hide(
    columns = "identifier"
  ) %>% 
  tab_style(
    style = cells_styles(
      text_size = px(12)
    ),
    locations = cells_data(
      columns = everything()
    )
  )

Result Title	Period Start Date	Period End Date
Stichting CARE Nederland - NL-KVK-41158230-NLRCGT0001
Interagency Strategic Agenda (ISA) as a space for dialogue for IRM	2001-01-16	2031-12-20
Interagency Strategic Agenda (ISA) as a space for dialogue for IRM	2001-01-16	2031-12-16
Interagency Strategic Agenda (ISA) as a space for dialogue for IRM	2001-07-17	2031-12-17
Interagency Strategic Agenda (ISA) as a space for dialogue for IRM	2001-01-16	2031-12-20
Interagency Strategic Agenda (ISA) as a space for dialogue for IRM	2001-07-17	2031-12-17
Empowerment of women in the IRM approach	2001-01-16	2031-12-20
Empowerment of women in the IRM approach	2001-07-17	2031-12-17
Empowerment of women in the IRM approach	2001-01-16	2031-12-20
Empowerment of women in the IRM approach	2001-01-16	2031-12-16
Empowerment of women in the IRM approach	2001-07-17	2031-12-17
Stichting CARE Nederland - NL-KVK-41158230-NLRCET0001
Improving food security through the management of agriculture, livestock and rangeland practices	2001-01-16	2031-12-20
Improving food security through the management of agriculture, livestock and rangeland practices	2001-01-16	2031-12-16
Improving food security through the management of agriculture, livestock and rangeland practices	2001-07-17	2031-12-17
Investment strategies are based on the Environmental Impact Assessment	2001-01-16	2031-12-20
Investment strategies are based on the Environmental Impact Assessment	2001-01-16	2031-12-16
Investment strategies are based on the Environmental Impact Assessment	2001-07-17	2031-12-17
Stichting CARE Nederland - NL-KVK-41158230-NLRCID0001
Gender-sensitive community-based development plans incorporate and fund IRM initiatives	2001-01-16	2031-12-20
Gender-sensitive community-based development plans incorporate and fund IRM initiatives	2001-01-16	2031-12-16
Gender-sensitive community-based development plans incorporate and fund IRM initiatives	2001-07-17	2031-12-17
Gender-sensitive community-based development plans incorporate and fund IRM initiatives	2001-01-16	2031-12-20
Gender-sensitive community-based development plans incorporate and fund IRM initiatives	2001-01-16	2031-12-16
Gender-sensitive community-based development plans incorporate and fund IRM initiatives	2001-07-17	2031-12-17
Gender-sensitive community-based development plans incorporate and fund IRM initiatives	2001-01-16	2031-12-20
Gender-sensitive community-based development plans incorporate and fund IRM initiatives	2001-01-16	2031-12-16
Gender-sensitive community-based development plans incorporate and fund IRM initiatives	2001-07-17	2031-12-17
Stichting CARE Nederland - NL-KVK-41158230-NLRCPH0002
Key stakeholders mainstream Integrated Risk Management in inclusive planning guidelines	2001-01-16	2031-12-20
Key stakeholders mainstream Integrated Risk Management in inclusive planning guidelines	2001-01-16	2031-12-16
Key stakeholders mainstream Integrated Risk Management in inclusive planning guidelines	2001-07-17	2031-12-17
Key stakeholders mainstream Integrated Risk Management in inclusive planning guidelines	2001-01-16	2031-12-20
Key stakeholders mainstream Integrated Risk Management in inclusive planning guidelines	2001-01-16	2031-12-16
Key stakeholders mainstream Integrated Risk Management in inclusive planning guidelines	2001-07-17	2031-12-17
Key stakeholders mainstream Integrated Risk Management in inclusive planning guidelines	2001-01-16	2031-12-20
Key stakeholders mainstream Integrated Risk Management in inclusive planning guidelines	2001-01-16	2031-12-16
Key stakeholders mainstream Integrated Risk Management in inclusive planning guidelines	2001-07-17	2031-12-17
Stichting CARE Nederland - NL-KVK-41158230-NLRCUG0001
Integrated risk management approach is mainstreamed in development policies re. climate change	2001-01-16	2031-12-20
Integrated risk management approach is mainstreamed in development policies re. climate change	2001-01-16	2031-12-16
Integrated risk management approach is mainstreamed in development policies re. climate change	2001-07-17	2031-12-17

It’s only “Stichting CARE Nederland” that seems to have longer periods duration. We can verify those numbers from the XML file as well. It’s worth to verify these dates at least once to ensure that we are projecting the right data to the audience at large.

4.5.2 `period/target` vs `period/actual`

Lets see how many target and actual values are present in period.

df.periods %>% 
  mutate(is_target_present = !is.na(period_target),
         is_actual_present = !is.na(period_actual)) %>% 
  group_by(is_target_present, is_actual_present) %>% 
  count() %>% 
  ungroup() %>% 
  spread(is_actual_present, n) %>% 
  gt() %>% 
  tab_spanner(
    label = "Is Actual Present?",
    columns = vars(FALSE, TRUE)
  ) %>% 
  cols_label(
    is_target_present = "Is Target Present?"
  ) %>% 
  tab_style(
    style = cells_styles(
      text_size = px(12)
    ),
    locations = cells_data(
      columns = everything()
    )
  )

Is Target Present?	Is Actual Present?
Is Target Present?	FALSE	TRUE
FALSE	3011	3928
TRUE	2899	5910

Lets narrow our analysis to unit quantitative measure only, i.e. indicator/measure = 1.

df.periods %>% 
  filter(indicator_measure==1) %>% 
  mutate(is_target_present = !is.na(period_target),
         is_actual_present = !is.na(period_actual)) %>% 
  group_by(is_target_present, is_actual_present) %>% 
  count() %>% 
  spread(is_actual_present, n) %>% 
  ungroup() %>% 
  gt() %>% 
  tab_spanner(
    label = "Is Actual Present?",
    columns = vars(FALSE, TRUE)
  ) %>% 
  cols_label(
    is_target_present = "Is Target Present?"
  ) %>% 
  tab_style(
    style = cells_styles(
      text_size = px(12)
    ),
    locations = cells_data(
      columns = everything()
    )
  )

Is Target Present?	Is Actual Present?
Is Target Present?	FALSE	TRUE
FALSE	2590	3069
TRUE	2320	4958

Now lets plot 4958 points of actual and target values in scatterplot.

df.periods %>% 
  filter(indicator_measure==1) %>% 
  mutate(is_target_present = !is.na(period_target),
         is_actual_present = !is.na(period_actual)) %>% 
  filter(is_target_present == TRUE & is_actual_present == TRUE) %>% 
  mutate(result_type = factor(cl.result.types.labels[result_type], levels = cl.result.types.labels)) %>% 
  ggplot(aes(x=as.numeric(period_target), y=as.numeric(period_actual), color=result_type)) +
  geom_point(alpha=0.5) +
  geom_rug(alpha=0.1) +
  scale_y_log10(oob=squish_infinite) +
  scale_x_log10(oob=squish_infinite) +
  labs(x = "Target value", y="Actual value",color="Result Type")

There’s lot of points.

Lets see these points separately for each result/type

df.periods %>% 
  filter(indicator_measure==1) %>% 
  mutate(is_target_present = !is.na(period_target),
         is_actual_present = !is.na(period_actual)) %>% 
  filter(is_target_present == TRUE & is_actual_present == TRUE) %>% 
  mutate(period_target_number = as.numeric(period_target), 
         period_actual_number = as.numeric(period_actual)) %>% 
  filter(period_target_number>0 & period_actual_number>0) %>% 
  mutate(percent_change = (period_actual_number-period_target_number)/period_target_number) %>% 
  mutate(result_type = factor(cl.result.types.labels[result_type], levels = cl.result.types.labels)) %>% 
  ggplot(aes(x=period_target_number, y=period_actual_number, color=result_type)) +
  geom_point(alpha=0.3) +
  scale_y_log10(oob=squish_infinite) +
  scale_x_log10(oob=squish_infinite) +
  facet_grid(result_type ~ .) +
  labs(x = "Target value", y="Actual value") +
  theme(
    legend.position = "none"
  )

Here we see that target and actual values are heavily mentioned for outputs and outcomes. It would be good to

4.5.3 Percent change between `target` and `actual` values

df.periods %>% 
  filter(indicator_measure==1) %>% 
  mutate(is_target_present = !is.na(period_target),
         is_actual_present = !is.na(period_actual)) %>% 
  filter(is_target_present == TRUE & is_actual_present == TRUE) %>% 
  mutate(period_target_number = as.numeric(period_target), 
         period_actual_number = as.numeric(period_actual)) %>% 
  filter(period_target_number>0 & period_actual_number>0) %>% 
  mutate(percent_change = 100*(period_actual_number-period_target_number)/period_target_number) %>% 
  mutate(percent_change_bin = ifelse(percent_change >=1000, "more than 1000% increase",
                                     ifelse(percent_change >=100, 
                                            "100% to 1000% increase",
                                            ifelse(percent_change <=0, 
                                                   "Negative change", "0 to 100% in increase")))
         ) %>% 
  ggplot(aes(x=period_target_number, y=period_actual_number, color = percent_change_bin)) +
  geom_point(alpha=0.5) +
  scale_y_log10(oob=squish_infinite) +
  scale_x_log10(oob=squish_infinite) +
  labs(x = "Target value", y = "Actual value", color = "Percent Change")

4.5.4 List of `target` and `actual` with more than 1000% increase

df.periods %>% 
  filter(indicator_measure==1) %>% 
  left_join(df.activities.all, by=c("activity_id"="id")) %>% 
  left_join(df.organizations.all, by=c("organization_id"="id")) %>% 
  mutate(is_target_present = !is.na(period_target),
         is_actual_present = !is.na(period_actual)) %>% 
  filter(is_target_present == TRUE & is_actual_present == TRUE) %>% 
  mutate(period_target_number = as.numeric(period_target), 
         period_actual_number = as.numeric(period_actual)) %>% 
  filter(period_target_number>0 & period_actual_number>0) %>% 
  mutate(percent_change = 100*(period_actual_number-period_target_number)/period_target_number) %>% 
  filter(abs(percent_change) >= 1000) %>% 
  select(name, result_title, period_target, period_actual, percent_change) %>% 
  group_by(name) %>% 
  gt() %>% 
  cols_label(
    result_title = "Result Title",
    period_target = "Target Value",
    period_actual = "Actual Value",
    percent_change = "Percent Increase"
  ) %>% 
  fmt_number(
    columns = vars(percent_change),
    decimals = 2
  ) %>% 
  fmt_number(
    columns = vars(period_target, period_actual),
    decimals = 0
  ) %>% 
  tab_style(
    style = cells_styles(
      text_size = px(12)
    ),
    locations = cells_data(
      columns = everything()
    )
  )

Result Title	Target Value	Actual Value	Percent Increase
ZOA
Output 1.1	100	1,530	1,430.00
Output 1.1	100	1,530	1,430.00
W.R.1	600	10,432	1,638.67
Stichting Vluchteling
P.R3 Protection concerns are analysed and identified and awareness-raising and advocacy interventions implemented	40	742	1,755.00
S.SO2 Provide basic non food items for general use and winter to vulnerable conflict affected people, IDPs and returnees.	7	6,512	92,928.57
FS.SO1 - Provide emergency assistance to enable vulnerable conflict affected people, IDPs and returnees to meet their basic food needs.	4	3,510	83,471.43
P.R1 Improved protection mechanisms in place for IDPs and host communities	800	17,966	2,145.75
Objective 2. Reinforced Protection for vulnerable IDPs, returneesand conflict affected host communities, especially those most at risk for GBV	1,232	23,752	1,827.92
Healthnettpo
3.1.1 Women are agents of change and have increased their capacities, abilities, knowledge, and resources for significant participation in the prevention and resolution of conflicts and in the construction, support, and recuperation of peace.	10	136	1,260.00
1.1. Increase in capacities, abilities, knowledge, and resources for the significant participation of women in the development of the security and justice sector.	3	59	1,866.67
Save the Children Netherlands
P.R2. Girls and boys coping mechanisms and resilience are strengthened and severely affected Children receive appropriate support	620	8,070	1,201.61
Output 5: Increased utilization of SRHR services that are responsive to the needs of young people	3	108	3,500.00
Rutgers
Increased utilisation of comprehensive SRHR information and education by all people	770	56,312	7,213.25
Increased utilisation of comprehensive SRHR information and education by all people	770	56,312	7,213.25
Result Area 4: OUTPUT - Targeted policy- and law-makers, donors or development partners are reached through advocacy actions for the engagement of (young) men in GBV prevention and gender justice	5	85	1,600.00
Result Area 4: OUTPUT - Targeted networks, movements and organisations collaborate on engaging (young) men in GBV prevention and gender justice	3	48	1,500.00
Pathway 1 Community members and gate-keepers have changed attitudes and take action to prevent CM FGC and TP	10	1,210	12,000.00
Pathway 1 Community members and gate-keepers have changed attitudes and take action to prevent CM FGC and TP	1	1,198	119,700.00
Pathway 1 Community members and gate-keepers have changed attitudes and take action to prevent CM FGC and TP	10	6,200	61,900.00
Pathway 3 Adolescent girls and boys take informed action on their sexual health	4	1,224	30,500.00
Result Area 4: OUTPUT - Targeted networks, movements and organisations collaborate on engaging (young) men and women in GBV prevention and gender justice	10	123	1,130.00
Cordaid
Water and Sanitation & Hygiene	1,200	19,360	1,513.33
FSL objective 1: Immediate WASH needs of conflict affected populations are met whilst protecting their safety and dignity	200	3,227	1,513.50
WASH	23,400	1,004,026	4,190.71
Defence for Children - ECPAT the Netherlands
# of community members (incl. community, religious and traditional leaders) participating in awareness raising sessions organized by alliance local partners	540	709,000	131,196.30
Pathway 3: Government	2	71	3,450.00
Hague Institute for the Innovation of Law
Number of people about which the study results are valid: 8676815	6,000	8,676,815	144,513.58
Plan Nederland
W.R3.3 Distribute hygiene kits to women ( gender appropriate)	2,000	22,000	1,000.00
Solidaridad Netherlands
(Output) Farmers/workers/miners are trained in good practices and administrative & entrepreneurial skills	13	163	1,153.85
(Output) SME supported through training, access to funds, business development advice and coaching	5	130	2,500.00
dance4life
Increased utilisation of comprehensive SRHR education and information by all people	126	2,557	1,929.37
Improve socio cultural, political and legal environment for young people's SRHR	4,300	3,003,500	69,748.84
Improve socio cultural, political and legal environment for young people's SRHR	102	172,275	168,797.06
COC Nederland
Empowered LGBTI Individuals	500	13,072	2,514.40
Empowered LGBTI Individuals	20	803	3,915.00
Empowered LGBTI Individuals	20	2,392	11,860.00
Strong LGBTI Communities.	20	578	2,790.00
Empowered LGBTI Individuals	20	10,260	51,200.00
CHOICE for Youth and Sexuality
OUTCOME AREA 5: Improved socio-cultural political and legal environment for young people SRHR	1,500	39,039	2,502.60
OUTCOME AREA 5: Improved socio-cultural political and legal environment for young people SRHR	500	524,621	104,824.20
OUTCOME AREA 5: Improved socio-cultural political and legal environment for young people SRHR	500	874,020	174,704.00
OUTCOME AREA 5: Improved socio-cultural political and legal environment for young people SRHR	500	215,520	43,004.00
OUTCOME AREA 5: Improved socio-cultural political and legal environment for young people SRHR	500	650,000	129,900.00
Goal 5. Policy makers and duty bearers develop and implement laws on CM and FGM-C	5	98	1,860.00
Health Action International
Output 2: Strengthened HAI/GSI	10,000	669,353	6,593.53
World Vision Netherlands
R.W1 Water and Sanitation services available for all the population	8,000	93,000	1,062.50
N.R1 Increase community based management of acute malnutrition for the most vulnerable and at risk	50	1,000	1,900.00
N.R1 Increase community based management of acute malnutrition for the most vulnerable and at risk	30	1,000	3,233.33
N.R1 Increase community based management of acute malnutrition for the most vulnerable and at risk	30	1,000	3,233.33
ACC-R1 Participation	9	250	2,677.78
Support Trust for Africa Development
Support community mobilization on gender equality through development and distribution of IEC materials, campaigns, events, radio messaging, etc.	9	100	1,011.11
Stichting CARE Nederland
WSH.R1 Restore or maintain sustainable water and sanitation systems to improve public health and resilience	10,500	202,203	1,825.74
War Child Holland
# unique project participants (war child holland and partners)	30	437	1,356.67
Les initiatives des jeunes sont renforcé à travers le renforcement de leurs capacités entreprenariales et austres compétences necessaires pour leurs developpement	42	538	1,180.95

Thank you for reading. Any questions or suggestions? Please ask me @twitter

AidStream IATI Result Element Analysis for Netherland based Organisations

Anjesh

2019-03-22

1 Background

2 Setup

3 Data Preparation

3.1 Data Import

3.2 Preparing `result` element

3.2.1 `Result` quick summary

3.3 Preparing `indicator` sub-element

3.3.1 `indicator` quick summary

3.4 Preparing `baseline` and `period` sub-elements

3.4.1 `baseline` quick summary

3.4.2 `period` quick summary

4 Data analysis

4.1 Number of `result` element based on `result/type`

4.2 `indicator` subelement count based on `indicator/measure`

4.3 `result/type` vs `indicator/measure`

4.4 Exploring `baseline` element

4.4.1 Number of `baseline`s reported by `baseline/year`

4.4.2 Exploring Unit Quantitative `baseline/value`s

4.4.3 `baseline/@value` type

4.4.3.1 `baseline/value` vs `baseline/year`

4.4.3.2 High value `baseline/value` result

4.5 Exploring `period` element

4.5.1 `period-start` vs `period-end`

4.5.1.1 List of long period duration `result`

4.5.2 `period/target` vs `period/actual`

4.5.3 Percent change between `target` and `actual` values

4.5.4 List of `target` and `actual` with more than 1000% increase

AidStream IATI Result Element Analysis for Netherland based Organisations

Anjesh

2019-03-22

1 Background

2 Setup

3 Data Preparation

3.1 Data Import

3.2 Preparing result element

3.2.1 Result quick summary

3.3 Preparing indicator sub-element

3.3.1 indicator quick summary

3.4 Preparing baseline and period sub-elements

3.4.1 baseline quick summary

3.4.2 period quick summary

4 Data analysis

4.1 Number of result element based on result/type

4.2 indicator subelement count based on indicator/measure

4.3 result/type vs indicator/measure

4.4 Exploring baseline element

4.4.1 Number of baselines reported by baseline/year

4.4.2 Exploring Unit Quantitative baseline/values

4.4.3 baseline/@value type

4.4.3.1 baseline/value vs baseline/year

4.4.3.2 High value baseline/value result

4.5 Exploring period element

4.5.1 period-start vs period-end

4.5.1.1 List of long period duration result

4.5.2 period/target vs period/actual

4.5.3 Percent change between target and actual values

4.5.4 List of target and actual with more than 1000% increase

3.2 Preparing `result` element

3.2.1 `Result` quick summary

3.3 Preparing `indicator` sub-element

3.3.1 `indicator` quick summary

3.4 Preparing `baseline` and `period` sub-elements

3.4.1 `baseline` quick summary

3.4.2 `period` quick summary

4.1 Number of `result` element based on `result/type`

4.2 `indicator` subelement count based on `indicator/measure`

4.3 `result/type` vs `indicator/measure`

4.4 Exploring `baseline` element

4.4.1 Number of `baseline`s reported by `baseline/year`

4.4.2 Exploring Unit Quantitative `baseline/value`s

4.4.3 `baseline/@value` type

4.4.3.1 `baseline/value` vs `baseline/year`

4.4.3.2 High value `baseline/value` result

4.5 Exploring `period` element

4.5.1 `period-start` vs `period-end`

4.5.1.1 List of long period duration `result`

4.5.2 `period/target` vs `period/actual`

4.5.3 Percent change between `target` and `actual` values

4.5.4 List of `target` and `actual` with more than 1000% increase