TL;DR
A trimmed version of this analysis (without code) is published in medium.
In this analysis, we are using all the result
data of all the published activities of Netherlands (NL) based publishers.
For those new to IATI Standard, please refer to Result page to understand the content of result
element.
First we need to load the required packages.
library(glue)
library(RPostgreSQL)
library(tidyverse)
library(viridis)
library(lubridate)
library(jsonlite)
library(scales)
library(skimr)
library(gt)
cl.result.types <- read.csv(url("http://reference.iatistandard.org/203/codelists/downloads/clv1/codelist/ResultType.csv"), stringsAsFactors = FALSE)
cl.result.types.labels <- setNames(as.character(cl.result.types$name), cl.result.types$code)
cl.indicator.measures <- read.csv(url("http://reference.iatistandard.org/203/codelists/downloads/clv1/codelist/IndicatorMeasure.csv"))
cl.indicator.measures.labels <- setNames(as.character(cl.indicator.measures$name),
cl.indicator.measures$code)
options(scipen = 999) # to show the numbers instead of scientific notations in ggplot2 graphs
options(warn = -1)
The following code imports necessary fields from the database. For simplicity, I saved the results from database to RDS file and loaded from RDS file for quick import.
knitr::opts_chunk$set(echo = TRUE)
organisations.all.rds <- here::here("aidstream","04-sessions","organisations.all.rds")
activities.all.rds <- here::here("aidstream","04-sessions","activities.all.rds")
results.all.rds <- here::here("aidstream","04-sessions","results.all.rds")
if(file.exists(organisations.all.rds) &
file.exists(activities.all.rds) &
file.exists(results.all.rds)) {
df.organizations.all <- readRDS(organisations.all.rds)
df.activities.all <- readRDS(activities.all.rds)
df.results.all <- readRDS(results.all.rds)
} else {
dbconn <- dbConnect(dbDriver("PostgreSQL"),
host="localhost",
dbname="aidstream_prod_local",
user="postgres")
fetch_query <- function(conn, query) {
resultset <- dbSendQuery(conn, query)
fetch(resultset, n=-1)
}
df.organizations.all <- fetch_query(dbconn, "select id, user_identifier, name, reporting_org, country,
published_to_registry, created_at, updated_at
from organizations where country='NL';")
df.activities.all <- fetch_query(dbconn, glue("select id, identifier, title, organization_id,
activity_status, activity_date, recipient_country, recipient_region,
sector, published_to_registry,
created_at, updated_at
from activity_data where
published_to_registry = 1 and
organization_id in ({paste0(df.organizations.all$id, collapse=\",\")});"))
df.results.all <- fetch_query(dbconn, glue("select id, activity_id, result, created_at
from activity_results where
activity_id in ({paste0(df.activities.all$id, collapse=\",\")});"))
saveRDS(df.organizations.all, file=organisations.all.rds)
saveRDS(df.activities.all, file=activities.all.rds)
saveRDS(df.results.all, file=results.all.rds)
}
df.summary <- df.results.all %>%
left_join(df.activities.all, by=c("activity_id"="id")) %>%
left_join(df.organizations.all, by=c("organization_id"="id"))
Total Organisations: 79
Total unique Activities: 901
result
elementFirst we need to convert JSON string to R list before we can further process the data.
There can be multiple indicator
s within a given result
and multiple baseline
and multiple period
elements within an indicator
. Lets extract the count of the indicators, baselines and periods mentioned inside each result element to see how many indicators, baselines and periods are reported under each result.
results.rds <- here::here("aidstream","04-sessions","results.rds")
if(file.exists(results.rds)) {
df.results <- readRDS(results.rds)
} else {
df.results.all <- df.results.all %>%
mutate(result_json = map(result, fromJSON))
get_indicator_count <- function(result_json) {
return(nrow(result_json$indicator))
}
get_indicator_baseline_count <- function(result_json) {
sum(map_int(result_json$indicator$baseline, nrow))
}
get_indicator_period_count <- function(result_json) {
sum(map_int(result_json$indicator$period, nrow))
}
get_indicators <- function(result_json) {
df <- data.frame(
indicator_title = map_chr(result_json$indicator$title, function(.x) {
.x$narrative[[1]]$narrative[1]
}),
indicator_measure = as.character(result_json$indicator$measure),
indicator_ascending = as.character(result_json$indicator$ascending),
stringsAsFactors = FALSE
)
df$baseline = result_json$indicator$baseline
df$period = result_json$indicator$period
df
}
df.results <- df.results.all %>%
transmute(id, activity_id,
result_title = map_chr(result_json, function(.x) {
.x$title$narrative[[1]]$narrative[1]
}),
result_type = map_chr(result_json, "type"),
indicators_count = map_int(result_json, get_indicator_count),
indicators_baseline_count = map_int(result_json, get_indicator_baseline_count),
indicators_period_count = map_int(result_json, get_indicator_period_count),
indicator = map(result_json, get_indicators)
)
saveRDS(df.results, file = results.rds)
}
Result
quick summaryskim_with(integer = list(hist = NULL, p0=NULL, p25=NULL, p50=NULL, p75=NULL, p100=NULL),
character = list(min = NULL, max = NULL))
df.results %>%
select(starts_with("result")) %>%
skim() %>%
pander()
Skim summary statistics
n obs: 7237
n variables: 2
variable | missing | complete | n | empty | n_unique |
---|---|---|---|---|---|
result_title | 5 | 7232 | 7237 | 0 | 3253 |
result_type | 0 | 7237 | 7237 | 0 | 4 |
indicator
sub-elementWe create a new dataframe for indicator
s only.
indicator
quick summaryskim_with(integer = list(hist = NULL, p0=NULL, p25=NULL, p50=NULL, p75=NULL, p100=NULL))
df.indicators %>%
select(indicator_measure, indicator_ascending, indicator_title,
result_title, result_type) %>%
skim() %>%
pander()
Skim summary statistics
n obs: 11179
n variables: 5
variable | missing | complete | n | empty | n_unique |
---|---|---|---|---|---|
indicator_ascending | 0 | 11179 | 11179 | 2432 | 5 |
indicator_measure | 0 | 11179 | 11179 | 2 | 6 |
indicator_title | 1 | 11178 | 11179 | 3 | 4723 |
result_title | 5 | 11174 | 11179 | 0 | 3253 |
result_type | 0 | 11179 | 11179 | 0 | 4 |
baseline
and period
sub-elementsSimiarly we prepare dataframes for period
s and baseline
s.
get_baselines <- function(baseline_json) {
df <- data.frame(
baseline_year = as.character(baseline_json$year),
baseline_value = as.character(baseline_json$value),
baseline_comment = baseline_json$comment[[1]]$narrative[[1]]$narrative[1],
stringsAsFactors = FALSE
)
df
}
get_periods <- function(period_json) {
df <- data.frame(
period_start = map_chr(period_json$period_start, "date"),
period_end = map_chr(period_json$period_end, "date"),
period_target = map_chr(period_json$target, "value"),
period_actual = map_chr(period_json$actual, "value"),
stringsAsFactors = FALSE
)
df
}
df.indicators <- df.indicators %>%
mutate(baseline_df = map(baseline, get_baselines),
period_df = map(period, get_periods))
df.baselines <- df.indicators %>%
unnest(baseline_df)
df.periods <- df.indicators %>%
unnest(period_df)
df.periods$period_start <- parse_date_time(df.periods$period_start, "y-m-d", tz = "GMT")
df.periods$period_end <- parse_date_time(df.periods$period_end, "y-m-d", tz = "GMT")
df.periods$period_target <- as.numeric(df.periods$period_target)
df.periods$period_actual <- as.numeric(df.periods$period_actual)
baseline
quick summaryskim_with(integer = list(hist = NULL, p0=NULL, p25=NULL, p50=NULL, p75=NULL, p100=NULL))
df.baselines %>%
select(-ends_with("count"),-ends_with("id"), -baseline, -starts_with("period")) %>%
skim() %>%
pander(caption = "Baseline Summary")
Skim summary statistics
n obs: 11179
n variables: 8
variable | missing | complete | n | empty | n_unique |
---|---|---|---|---|---|
baseline_comment | 9 | 11170 | 11179 | 9575 | 900 |
baseline_value | 7 | 11172 | 11179 | 5365 | 450 |
baseline_year | 0 | 11179 | 11179 | 5300 | 13 |
indicator_ascending | 0 | 11179 | 11179 | 2432 | 5 |
indicator_measure | 0 | 11179 | 11179 | 2 | 6 |
indicator_title | 1 | 11178 | 11179 | 3 | 4723 |
result_title | 5 | 11174 | 11179 | 0 | 3253 |
result_type | 0 | 11179 | 11179 | 0 | 4 |
period
quick summaryskim_with(numeric = list(hist = NULL, p0=NULL, p25=NULL, p50=NULL, p75=NULL, p100=NULL, sd=NULL),
POSIXct = list(median = NULL, min=NULL, max=NULL))
df.periods %>%
select(-ends_with("count"),-ends_with("id")) %>%
skim() %>%
pander()
Skim summary statistics
n obs: 15748
n variables: 9
variable | missing | complete | n | empty | n_unique |
---|---|---|---|---|---|
indicator_ascending | 0 | 15748 | 15748 | 3062 | 5 |
indicator_measure | 0 | 15748 | 15748 | 2 | 6 |
indicator_title | 1 | 15747 | 15748 | 3 | 4723 |
result_title | 8 | 15740 | 15748 | 0 | 3253 |
result_type | 0 | 15748 | 15748 | 0 | 4 |
variable | missing | complete | n | mean |
---|---|---|---|---|
period_actual | 5910 | 9838 | 15748 | 11856.91 |
period_target | 6939 | 8809 | 15748 | 133389.73 |
variable | missing | complete | n | n_unique |
---|---|---|---|---|
period_end | 458 | 15290 | 15748 | 260 |
period_start | 458 | 15290 | 15748 | 236 |
Now we can proceed with the analysis.
We see in indicator
summary that there are 11179 indicator
s mentioned in 7237 result
s. There’s an average of 1.54 indicator
s in a given result
. Lets see which result
s have large number of indicator
s.
df.results %>%
group_by(indicators_count) %>%
count() %>%
ungroup() %>%
gt() %>%
cols_label(
indicators_count = "Unique count of indicators in each <Result> element",
n = "Count of Results containing the given number of indicators"
) %>%
tab_style(
style = cells_styles(
text_size = px(12)
),
locations = cells_data(
columns = everything()
)
)
Unique count of indicators in each <Result> element | Count of Results containing the given number of indicators |
---|---|
1 | 5543 |
2 | 664 |
3 | 490 |
4 | 289 |
5 | 94 |
6 | 82 |
7 | 16 |
8 | 30 |
9 | 6 |
10 | 4 |
11 | 12 |
12 | 4 |
30 | 1 |
32 | 2 |
We can see that there are 2 result
elements, that contains as many as 32 indicator
s. That makes result
very long and complex. But we see that majority of result
elements contain one indicator
only. Ideally a given result can have multiple relevant indicators.
There are activities which has number of result
element with the same result metadata.
get_identifier <- function(identifier_text) {
id_json <- fromJSON(identifier_text)
id_json$iati_identifier_text
}
df.results %>%
group_by(result_title, activity_id) %>%
count() %>%
filter(n>1) %>%
arrange(desc(n)) %>%
ungroup() %>%
head(10) %>%
left_join(df.activities.all, by=c("activity_id"="id")) %>%
left_join(df.organizations.all, by=c("organization_id"="id")) %>%
mutate(iati_identifier = map_chr(identifier, get_identifier)) %>%
select(name, iati_identifier, result_title, n) %>%
group_by(name) %>%
gt() %>%
cols_label(
iati_identifier = "IATI Identifier",
result_title = "Result Title",
n = "Number of Result element in a given activity"
) %>%
cols_hide(
columns = vars(activity_id)
) %>%
tab_style(
style = cells_styles(
text_size = px(12)
),
locations = cells_data(
columns = everything()
)
)
IATI Identifier | Result Title | Number of Result element in a given activity |
---|---|---|
Solidaridad Netherlands | ||
NL-KVK-51756811-SSEA-P1467 | (Output) Stakeholder dynamics generate dialogue & debate on sustainability or land issues (land rights) | 30 |
CHOICE for Youth and Sexuality | ||
NL-KVK-32108345-NLBB26-8842-ID | Goal 2. Adolescent girls and boys are meaningfully engaged to claim their SHR rights | 27 |
NL-KVK-32108345-NLBB26-8845-MZ | Goal 2. Adolescent girls and boys are meaningfully engaged to claim their SHR rights | 22 |
Rutgers | ||
NL-KVK-41193594-GUS7140ET | Increased utilisation of comprehensive SRHR information and education by all people | 27 |
NL-KVK-41193594-GUS7130ID | Increased utilisation of comprehensive SRHR information and education by all people | 23 |
NL-KVK-41193594-GUS7135UG | Increased utilisation of comprehensive SRHR information and education by all people | 23 |
NL-KVK-41193594-GUS7145KE | Increased utilisation of comprehensive SRHR information and education by all people | 23 |
NL-KVK-41193594-GUS7150MW | Increased utilisation of comprehensive SRHR information and education by all people | 23 |
NL-KVK-41193594-GUS7075GH-GUS7050GH | Increased utilisation of comprehensive SRHR information and education by all people | 23 |
NL-KVK-41193594-GUS7120PK | Increased utilisation of comprehensive SRHR information and education by all people | 21 |
result
element based on result/type
df.results %>%
group_by(result_type) %>%
count() %>%
select(result_type, n) %>%
ggplot(aes(x=result_type, y=n)) +
geom_bar(stat = "identity", fill="gray") +
coord_flip() +
scale_x_discrete(labels = cl.result.types.labels) +
geom_text(aes(label=n), hjust=-0.1, size=3) +
theme_minimal() +
theme(axis.text.x = element_blank()) +
labs(x="Result Type", y="Count of <Result> Element")
We see that most of the result
are Output-based and Outcome-based in the published activities.
indicator
subelement count based on indicator/measure
indicator/measure
defines the unit of measure in which the value is reported in the given indicator i.e. in baseline/value
and values for period/target/value
and period/actual/value
.
df.indicators %>%
group_by(indicator_measure) %>%
count() %>%
ungroup() %>%
mutate(label = cl.indicator.measures.labels[indicator_measure]) %>%
select(indicator_measure, label, n) %>%
mutate(label = replace_na(label,"-")) %>%
gt() %>%
cols_label(
label = "Indicator Measure",
n = "Count of <indicator> sub-element"
) %>%
cols_hide(
columns = vars(indicator_measure)
) %>%
tab_footnote(
footnote = "Missing indicator-measure in <indicator>",
locations = cells_data(
columns = vars(label),
rows = label == "-"
)
) %>%
tab_style(
style = cells_styles(
text_size = px(12)
),
locations = cells_data(
columns = everything()
)
)
Indicator Measure | Count of <indicator> sub-element |
---|---|
-1 | 2 |
Unit | 9586 |
Percentage | 1559 |
Nominal | 10 |
Ordinal | 7 |
Qualitative | 15 |
1 Missing indicator-measure in <indicator> |
We see that most of the indicator/measure
are of quantitative nature i.e. Unit and Percentage. There are 5 defined codes for indicator/measure
. There’s missing value for indicator/measure
as well.
result/type
vs indicator/measure
Lets see what indicator/measure
s are used for each result/type
.
df.indicators %>%
group_by(result_type, indicator_measure) %>%
count() %>%
spread(indicator_measure, n, fill=0) %>%
gather(indicator_measure, n, -result_type) %>%
ggplot(aes(x=result_type, y=indicator_measure)) +
geom_tile(aes(fill=n), colour = "gray") +
geom_text(aes(label=n)) +
scale_x_discrete(labels = cl.result.types.labels) +
scale_y_discrete(labels = c(cl.indicator.measures.labels, V1="Missing*")) +
scale_fill_gradient(low = "white", high = "steelblue") +
labs(x="Result Type", y="Indicator Measure", fill="Count of Indicators")
We see that bulk of quantitative measures (Unit and Percentage) are reported for Outcome and Outcome. So we can focus further analysis to quantitative measures only.
baseline
elementbaseline
s reported by baseline/year
Now we dive into baseline
sub-element of indicator
. From the baseline
summary above, we see that majority of baseline/year
values are empty.
df.baselines %>%
group_by(baseline_year) %>%
count() %>%
ungroup() %>%
gt() %>%
cols_label(
baseline_year = "Baseline Year",
n = "Number of baselines reported for that year"
) %>%
tab_style(
style = cells_styles(
text_size = px(12)
),
locations = cells_data(
columns = everything()
)
)
Baseline Year | Number of baselines reported for that year |
---|---|
5300 | |
2010 | 5 |
2011 | 7 |
2012 | 37 |
2013 | 180 |
2014 | 35 |
2015 | 258 |
2016 | 3581 |
2017 | 1244 |
2018 | 474 |
2019 | 19 |
2020 | 38 |
217 | 1 |
We see that there are few issues with the year as well like 217. Lets identify which organisation published 217 year in their result/indicator/baseline
data.
df.baselines %>%
filter(indicator_measure==1 & baseline_year == 217) %>%
select(activity_id) %>%
left_join(df.activities.all, by=c("activity_id"="id")) %>%
left_join(df.organizations.all, by=c("organization_id"="id")) %>%
mutate(iati_identifier = fromJSON(identifier)$iati_identifier_text) %>%
select(name, iati_identifier) %>%
gt() %>%
cols_label(
name = "Organisation",
iati_identifier = "IATI Identifier"
) %>%
tab_style(
style = cells_styles(
text_size = px(12)
),
locations = cells_data(
columns = everything()
)
)
Organisation | IATI Identifier |
---|---|
War Child Holland | NL-KVK-41215393-PJ3516 |
http://preview.iatistandard.org/index.php?url=https%3A//aidstream.org/files/xml/warchildholland-activities.xml shows 217 in the data.
baseline/value
sFor simplicity, we will explore the baseline/value
for unit indicator/measure
only. As per standard, baseline/value
is mandatory for quantiative indicator/measure
i.e. for both Unit[1] and Percentage[2].
Lets see how many baseline/value
are present.
df.baselines %>%
filter(indicator_measure==1) %>%
group_by(baseline_value) %>%
count() %>%
mutate(baseline_value_present = ifelse(trimws(baseline_value)=="" | is.na(baseline_value),FALSE,TRUE)) %>%
group_by(baseline_value_present) %>%
summarize(n=sum(n)) %>%
ungroup() %>%
mutate(Percentage = round(100*n/sum(n), digits=2)) %>%
gt() %>%
cols_label(
baseline_value_present = "Baseline value present?",
n = "Count of baseline value",
) %>%
tab_style(
style = cells_styles(
text_size = px(12)
),
locations = cells_data(
columns = everything()
)
)
Baseline value present? | Count of baseline value | Percentage |
---|---|---|
FALSE | 4920 | 51.32 |
TRUE | 4666 | 48.68 |
baseline/@value
typeWe see only 48% of the reported baseline/value
are present for Unit measure. Now we need to see if all those reported values are numeric or not. These values should be numeric as per standard.
The @value must be included for non-qualitative measures. The @value should be a valid number for all non-qualitative measures.
df.baselines %>%
filter(indicator_measure==1) %>%
group_by(baseline_value) %>%
count() %>%
filter(baseline_value !="") %>%
mutate(baseline_value_number = as.numeric(baseline_value)) %>%
mutate(baseline_value_type = ifelse(is.na(baseline_value_number),"Text","Number")) %>%
group_by(baseline_value_type) %>%
summarise(n=sum(n)) %>%
ungroup() %>%
gt() %>%
cols_label(
baseline_value_type = "Baseline Value Type",
n = "Count of Baseline Value",
) %>%
tab_style(
style = cells_styles(
text_size = px(12)
),
locations = cells_data(
columns = everything()
)
)
Baseline Value Type | Count of Baseline Value |
---|---|
Number | 4635 |
Text | 31 |
We see that there are 31 instances where baseline/value
are mentioned in textual form.
baseline/value
vs baseline/year
Lets plot the baseline/value
against baseline/year
and for Unit quantitative measure only i.e. indicator-measure
= 1[Unit].
df.baselines %>%
filter(indicator_measure==1 & baseline_year != 217) %>%
group_by(baseline_value, baseline_year, result_type) %>%
count() %>%
filter(baseline_value !="") %>%
mutate(baseline_value_number = as.numeric(baseline_value)) %>%
mutate(baseline_value_type = ifelse(is.na(baseline_value_number),"Text","Number")) %>%
filter(baseline_value_type == "Number") %>%
ungroup() %>%
mutate(result_type = factor(cl.result.types.labels[result_type], levels = cl.result.types.labels)) %>%
ggplot(aes(x=baseline_year,y=baseline_value_number, color=result_type)) +
geom_point(alpha = 0.5) +
scale_y_log10(label=comma) +
labs(x="Baseline year", y = "Baseline value", color="Result Type")
The log10 scale was used as the baseline/value
ranges from 0 to 10,000,000. It’s not clear whether we can have future baseline/value
, but apparently there are baseline/value
for 2019 and 2020.
df.baselines %>%
filter(indicator_measure==1) %>%
group_by(baseline_value, baseline_year, result_type) %>%
count() %>%
filter(baseline_value !="") %>%
mutate(baseline_value_number = as.numeric(baseline_value)) %>%
mutate(baseline_value_type = ifelse(is.na(baseline_value_number),"Text","Number")) %>%
filter(baseline_value_type == "Number") %>%
ungroup() %>%
mutate(result_type = factor(cl.result.types.labels[result_type], levels = cl.result.types.labels)) %>%
ggplot(aes(x=baseline_year,y=baseline_value_number)) +
geom_boxplot(alpha=0.5) +
scale_y_log10(label=comma) +
stat_summary(fun.y=mean,shape=4,color='red',geom='point') +
labs(x="Baseline year", y = "Baseline value", color="Result Type")
From 2014 to 2017, medium (represented by and mean (represented by red x) of baseline/@value
s lie between 10 and 1000.
baseline/value
resultThere are few baseline/@value
beyond 1,000,000. Lets list those values. It would help the organisations to verify and correct those large numbers if those are typos.
df.baselines %>%
filter(indicator_measure==1) %>%
mutate(baseline_value_number = as.numeric(baseline_value)) %>%
filter(baseline_value_number >1000000) %>%
select(result_title, indicator_title, baseline_comment, baseline_value) %>%
mutate(baseline_value = as.numeric(baseline_value)) %>%
arrange(desc(baseline_value)) %>%
gt() %>%
cols_label(
result_title = "Result Title",
indicator_title = "Indicator Title",
baseline_comment = "Baseline Comment",
baseline_value = "Baseline Value"
) %>%
fmt_number(
columns = vars(baseline_value),
decimals = 0
) %>%
tab_style(
style = cells_styles(
text_size = px(12)
),
locations = cells_data(
columns = everything()
)
)
Result Title | Indicator Title | Baseline Comment | Baseline Value |
---|---|---|---|
Reach: Membership of Trade Unions directly supported(OP3) | Membership male and female (DO31 + DO32) | 2017 is start year of working with ToCs in new sectors and a new geographical focus of Mondiaal FNV | 8,830,024 |
Number of people about which the study results are valid: 6,072,449 | Number of people about which the study results are valid: 6,072,449 | 6,072,449 | |
Number of people about which the study results are valid: 4,600,000 | Number of people about which the study results are valid: 4,600,000 | 4,600,000 | |
Outcome- Program Progress (MYP 2016-2020) | Area where trained practices are applied either on farmland or in protection area (hectares) | 3,500,000 | |
Output - Program Progress (MYP 2016-2020) | Volume of sustainably produced production | 3,000,000 | |
Output- Program Progress (MYP 2016-2020) | Volume of sustainably produced production (MT) | FEFAC Guidelines, RTRS or equivalent | 2,000,000 |
I01. Access to safe water | I1 - Number of people having access to safe water | Baseline amount shows the number of people who already have access to safe water before the start of the project. Tha target value only shows the additional number of people targeted. | 1,125,845 |
These could be correct numbers, but it would be good to verify the big numbers at least once.
period
elementdf.periods %>%
filter(indicator_measure != "") %>%
mutate(indicator_measure = paste0(indicator_measure, "-", cl.indicator.measures.labels[indicator_measure])) %>%
mutate(result_type = paste0(result_type, "-", cl.result.types.labels[result_type])) %>%
janitor::tabyl(result_type, indicator_measure) %>%
janitor::adorn_totals(c("col","row")) %>%
gt() %>%
cols_label(
result_type = "Result Type"
)
Result Type | 1-Unit | 2-Percentage | 3-Nominal | 4-Ordinal | 5-Qualitative | Total |
---|---|---|---|---|---|---|
1-Output | 5511 | 731 | 1 | 4 | 6 | 6253 |
2-Outcome | 7230 | 1938 | 8 | 3 | 9 | 9188 |
3-Impact | 89 | 106 | 1 | 0 | 0 | 196 |
9-Other | 107 | 2 | 0 | 0 | 0 | 109 |
Total | 12937 | 2777 | 10 | 7 | 15 | 15746 |
# tab_spanner(
# label = "Indicator Measure",
# columns = vars(`1-Nominal`, Ordinal, Percentage, Qualitative, Unit)
# )
period-start
vs period-end
Lets create an scatter plot of all the points against period-start
and period-end
dates.
df.periods %>%
mutate(result_type = factor(cl.result.types.labels[result_type], levels = cl.result.types.labels)) %>%
ggplot(aes(x=period_start, y=period_end)) +
geom_point(alpha = 0.4, aes(color=result_type)) +
labs(x="Period Start Date", y="Period End Date",color="Result Type")
We see that there are certain period
which has very long duration, mostly those periods, which end after 2030. See top-left corner of the above chart.
Lets list those periods.
result
df.periods %>%
filter(year(period_end) > 2030) %>%
left_join(df.activities.all, by=c("activity_id"="id")) %>%
left_join(df.organizations.all, by=c("organization_id"="id")) %>%
select(name, identifier, result_title, period_start, period_end) %>%
mutate(iati_identifier = map_chr(identifier, get_identifier)) %>%
group_by(name, iati_identifier) %>%
gt() %>%
cols_label(
result_title = "Result Title",
period_start = "Period Start Date",
period_end = "Period End Date"
) %>%
cols_hide(
columns = "identifier"
) %>%
tab_style(
style = cells_styles(
text_size = px(12)
),
locations = cells_data(
columns = everything()
)
)
Result Title | Period Start Date | Period End Date |
---|---|---|
Stichting CARE Nederland - NL-KVK-41158230-NLRCGT0001 | ||
Interagency Strategic Agenda (ISA) as a space for dialogue for IRM | 2001-01-16 | 2031-12-20 |
Interagency Strategic Agenda (ISA) as a space for dialogue for IRM | 2001-01-16 | 2031-12-16 |
Interagency Strategic Agenda (ISA) as a space for dialogue for IRM | 2001-07-17 | 2031-12-17 |
Interagency Strategic Agenda (ISA) as a space for dialogue for IRM | 2001-01-16 | 2031-12-20 |
Interagency Strategic Agenda (ISA) as a space for dialogue for IRM | 2001-07-17 | 2031-12-17 |
Empowerment of women in the IRM approach | 2001-01-16 | 2031-12-20 |
Empowerment of women in the IRM approach | 2001-07-17 | 2031-12-17 |
Empowerment of women in the IRM approach | 2001-01-16 | 2031-12-20 |
Empowerment of women in the IRM approach | 2001-01-16 | 2031-12-16 |
Empowerment of women in the IRM approach | 2001-07-17 | 2031-12-17 |
Stichting CARE Nederland - NL-KVK-41158230-NLRCET0001 | ||
Improving food security through the management of agriculture, livestock and rangeland practices | 2001-01-16 | 2031-12-20 |
Improving food security through the management of agriculture, livestock and rangeland practices | 2001-01-16 | 2031-12-16 |
Improving food security through the management of agriculture, livestock and rangeland practices | 2001-07-17 | 2031-12-17 |
Investment strategies are based on the Environmental Impact Assessment | 2001-01-16 | 2031-12-20 |
Investment strategies are based on the Environmental Impact Assessment | 2001-01-16 | 2031-12-16 |
Investment strategies are based on the Environmental Impact Assessment | 2001-07-17 | 2031-12-17 |
Stichting CARE Nederland - NL-KVK-41158230-NLRCID0001 | ||
Gender-sensitive community-based development plans incorporate and fund IRM initiatives | 2001-01-16 | 2031-12-20 |
Gender-sensitive community-based development plans incorporate and fund IRM initiatives | 2001-01-16 | 2031-12-16 |
Gender-sensitive community-based development plans incorporate and fund IRM initiatives | 2001-07-17 | 2031-12-17 |
Gender-sensitive community-based development plans incorporate and fund IRM initiatives | 2001-01-16 | 2031-12-20 |
Gender-sensitive community-based development plans incorporate and fund IRM initiatives | 2001-01-16 | 2031-12-16 |
Gender-sensitive community-based development plans incorporate and fund IRM initiatives | 2001-07-17 | 2031-12-17 |
Gender-sensitive community-based development plans incorporate and fund IRM initiatives | 2001-01-16 | 2031-12-20 |
Gender-sensitive community-based development plans incorporate and fund IRM initiatives | 2001-01-16 | 2031-12-16 |
Gender-sensitive community-based development plans incorporate and fund IRM initiatives | 2001-07-17 | 2031-12-17 |
Stichting CARE Nederland - NL-KVK-41158230-NLRCPH0002 | ||
Key stakeholders mainstream Integrated Risk Management in inclusive planning guidelines | 2001-01-16 | 2031-12-20 |
Key stakeholders mainstream Integrated Risk Management in inclusive planning guidelines | 2001-01-16 | 2031-12-16 |
Key stakeholders mainstream Integrated Risk Management in inclusive planning guidelines | 2001-07-17 | 2031-12-17 |
Key stakeholders mainstream Integrated Risk Management in inclusive planning guidelines | 2001-01-16 | 2031-12-20 |
Key stakeholders mainstream Integrated Risk Management in inclusive planning guidelines | 2001-01-16 | 2031-12-16 |
Key stakeholders mainstream Integrated Risk Management in inclusive planning guidelines | 2001-07-17 | 2031-12-17 |
Key stakeholders mainstream Integrated Risk Management in inclusive planning guidelines | 2001-01-16 | 2031-12-20 |
Key stakeholders mainstream Integrated Risk Management in inclusive planning guidelines | 2001-01-16 | 2031-12-16 |
Key stakeholders mainstream Integrated Risk Management in inclusive planning guidelines | 2001-07-17 | 2031-12-17 |
Stichting CARE Nederland - NL-KVK-41158230-NLRCUG0001 | ||
Integrated risk management approach is mainstreamed in development policies re. climate change | 2001-01-16 | 2031-12-20 |
Integrated risk management approach is mainstreamed in development policies re. climate change | 2001-01-16 | 2031-12-16 |
Integrated risk management approach is mainstreamed in development policies re. climate change | 2001-07-17 | 2031-12-17 |
It’s only “Stichting CARE Nederland” that seems to have longer periods duration. We can verify those numbers from the XML file as well. It’s worth to verify these dates at least once to ensure that we are projecting the right data to the audience at large.
period/target
vs period/actual
Lets see how many target
and actual
values are present in period
.
df.periods %>%
mutate(is_target_present = !is.na(period_target),
is_actual_present = !is.na(period_actual)) %>%
group_by(is_target_present, is_actual_present) %>%
count() %>%
ungroup() %>%
spread(is_actual_present, n) %>%
gt() %>%
tab_spanner(
label = "Is Actual Present?",
columns = vars(FALSE, TRUE)
) %>%
cols_label(
is_target_present = "Is Target Present?"
) %>%
tab_style(
style = cells_styles(
text_size = px(12)
),
locations = cells_data(
columns = everything()
)
)
Is Target Present? | Is Actual Present? | |
---|---|---|
FALSE | TRUE | |
FALSE | 3011 | 3928 |
TRUE | 2899 | 5910 |
Lets narrow our analysis to unit quantitative measure only, i.e. indicator/measure
= 1.
df.periods %>%
filter(indicator_measure==1) %>%
mutate(is_target_present = !is.na(period_target),
is_actual_present = !is.na(period_actual)) %>%
group_by(is_target_present, is_actual_present) %>%
count() %>%
spread(is_actual_present, n) %>%
ungroup() %>%
gt() %>%
tab_spanner(
label = "Is Actual Present?",
columns = vars(FALSE, TRUE)
) %>%
cols_label(
is_target_present = "Is Target Present?"
) %>%
tab_style(
style = cells_styles(
text_size = px(12)
),
locations = cells_data(
columns = everything()
)
)
Is Target Present? | Is Actual Present? | |
---|---|---|
FALSE | TRUE | |
FALSE | 2590 | 3069 |
TRUE | 2320 | 4958 |
Now lets plot 4958 points of actual
and target
values in scatterplot.
df.periods %>%
filter(indicator_measure==1) %>%
mutate(is_target_present = !is.na(period_target),
is_actual_present = !is.na(period_actual)) %>%
filter(is_target_present == TRUE & is_actual_present == TRUE) %>%
mutate(result_type = factor(cl.result.types.labels[result_type], levels = cl.result.types.labels)) %>%
ggplot(aes(x=as.numeric(period_target), y=as.numeric(period_actual), color=result_type)) +
geom_point(alpha=0.5) +
geom_rug(alpha=0.1) +
scale_y_log10(oob=squish_infinite) +
scale_x_log10(oob=squish_infinite) +
labs(x = "Target value", y="Actual value",color="Result Type")
There’s lot of points.
Lets see these points separately for each result/type
df.periods %>%
filter(indicator_measure==1) %>%
mutate(is_target_present = !is.na(period_target),
is_actual_present = !is.na(period_actual)) %>%
filter(is_target_present == TRUE & is_actual_present == TRUE) %>%
mutate(period_target_number = as.numeric(period_target),
period_actual_number = as.numeric(period_actual)) %>%
filter(period_target_number>0 & period_actual_number>0) %>%
mutate(percent_change = (period_actual_number-period_target_number)/period_target_number) %>%
mutate(result_type = factor(cl.result.types.labels[result_type], levels = cl.result.types.labels)) %>%
ggplot(aes(x=period_target_number, y=period_actual_number, color=result_type)) +
geom_point(alpha=0.3) +
scale_y_log10(oob=squish_infinite) +
scale_x_log10(oob=squish_infinite) +
facet_grid(result_type ~ .) +
labs(x = "Target value", y="Actual value") +
theme(
legend.position = "none"
)
Here we see that target and actual values are heavily mentioned for outputs and outcomes. It would be good to
target
and actual
valuesdf.periods %>%
filter(indicator_measure==1) %>%
mutate(is_target_present = !is.na(period_target),
is_actual_present = !is.na(period_actual)) %>%
filter(is_target_present == TRUE & is_actual_present == TRUE) %>%
mutate(period_target_number = as.numeric(period_target),
period_actual_number = as.numeric(period_actual)) %>%
filter(period_target_number>0 & period_actual_number>0) %>%
mutate(percent_change = 100*(period_actual_number-period_target_number)/period_target_number) %>%
mutate(percent_change_bin = ifelse(percent_change >=1000, "more than 1000% increase",
ifelse(percent_change >=100,
"100% to 1000% increase",
ifelse(percent_change <=0,
"Negative change", "0 to 100% in increase")))
) %>%
ggplot(aes(x=period_target_number, y=period_actual_number, color = percent_change_bin)) +
geom_point(alpha=0.5) +
scale_y_log10(oob=squish_infinite) +
scale_x_log10(oob=squish_infinite) +
labs(x = "Target value", y = "Actual value", color = "Percent Change")
target
and actual
with more than 1000% increasedf.periods %>%
filter(indicator_measure==1) %>%
left_join(df.activities.all, by=c("activity_id"="id")) %>%
left_join(df.organizations.all, by=c("organization_id"="id")) %>%
mutate(is_target_present = !is.na(period_target),
is_actual_present = !is.na(period_actual)) %>%
filter(is_target_present == TRUE & is_actual_present == TRUE) %>%
mutate(period_target_number = as.numeric(period_target),
period_actual_number = as.numeric(period_actual)) %>%
filter(period_target_number>0 & period_actual_number>0) %>%
mutate(percent_change = 100*(period_actual_number-period_target_number)/period_target_number) %>%
filter(abs(percent_change) >= 1000) %>%
select(name, result_title, period_target, period_actual, percent_change) %>%
group_by(name) %>%
gt() %>%
cols_label(
result_title = "Result Title",
period_target = "Target Value",
period_actual = "Actual Value",
percent_change = "Percent Increase"
) %>%
fmt_number(
columns = vars(percent_change),
decimals = 2
) %>%
fmt_number(
columns = vars(period_target, period_actual),
decimals = 0
) %>%
tab_style(
style = cells_styles(
text_size = px(12)
),
locations = cells_data(
columns = everything()
)
)
Result Title | Target Value | Actual Value | Percent Increase |
---|---|---|---|
ZOA | |||
Output 1.1 | 100 | 1,530 | 1,430.00 |
Output 1.1 | 100 | 1,530 | 1,430.00 |
W.R.1 | 600 | 10,432 | 1,638.67 |
Stichting Vluchteling | |||
P.R3 Protection concerns are analysed and identified and awareness-raising and advocacy interventions implemented | 40 | 742 | 1,755.00 |
S.SO2 Provide basic non food items for general use and winter to vulnerable conflict affected people, IDPs and returnees. | 7 | 6,512 | 92,928.57 |
FS.SO1 - Provide emergency assistance to enable vulnerable conflict affected people, IDPs and returnees to meet their basic food needs. | 4 | 3,510 | 83,471.43 |
P.R1 Improved protection mechanisms in place for IDPs and host communities | 800 | 17,966 | 2,145.75 |
Objective 2. Reinforced Protection for vulnerable IDPs, returneesand conflict affected host communities, especially those most at risk for GBV | 1,232 | 23,752 | 1,827.92 |
Healthnettpo | |||
3.1.1 Women are agents of change and have increased their capacities, abilities, knowledge, and resources for significant participation in the prevention and resolution of conflicts and in the construction, support, and recuperation of peace. | 10 | 136 | 1,260.00 |
1.1. Increase in capacities, abilities, knowledge, and resources for the significant participation of women in the development of the security and justice sector. | 3 | 59 | 1,866.67 |
Save the Children Netherlands | |||
P.R2. Girls and boys coping mechanisms and resilience are strengthened and severely affected Children receive appropriate support | 620 | 8,070 | 1,201.61 |
Output 5: Increased utilization of SRHR services that are responsive to the needs of young people | 3 | 108 | 3,500.00 |
Rutgers | |||
Increased utilisation of comprehensive SRHR information and education by all people | 770 | 56,312 | 7,213.25 |
Increased utilisation of comprehensive SRHR information and education by all people | 770 | 56,312 | 7,213.25 |
Result Area 4: OUTPUT - Targeted policy- and law-makers, donors or development partners are reached through advocacy actions for the engagement of (young) men in GBV prevention and gender justice | 5 | 85 | 1,600.00 |
Result Area 4: OUTPUT - Targeted networks, movements and organisations collaborate on engaging (young) men in GBV prevention and gender justice | 3 | 48 | 1,500.00 |
Pathway 1 Community members and gate-keepers have changed attitudes and take action to prevent CM FGC and TP | 10 | 1,210 | 12,000.00 |
Pathway 1 Community members and gate-keepers have changed attitudes and take action to prevent CM FGC and TP | 1 | 1,198 | 119,700.00 |
Pathway 1 Community members and gate-keepers have changed attitudes and take action to prevent CM FGC and TP | 10 | 6,200 | 61,900.00 |
Pathway 3 Adolescent girls and boys take informed action on their sexual health | 4 | 1,224 | 30,500.00 |
Result Area 4: OUTPUT - Targeted networks, movements and organisations collaborate on engaging (young) men and women in GBV prevention and gender justice | 10 | 123 | 1,130.00 |
Cordaid | |||
Water and Sanitation & Hygiene | 1,200 | 19,360 | 1,513.33 |
FSL objective 1: Immediate WASH needs of conflict affected populations are met whilst protecting their safety and dignity | 200 | 3,227 | 1,513.50 |
WASH | 23,400 | 1,004,026 | 4,190.71 |
Defence for Children - ECPAT the Netherlands | |||
# of community members (incl. community, religious and traditional leaders) participating in awareness raising sessions organized by alliance local partners | 540 | 709,000 | 131,196.30 |
Pathway 3: Government | 2 | 71 | 3,450.00 |
Hague Institute for the Innovation of Law | |||
Number of people about which the study results are valid: 8676815 | 6,000 | 8,676,815 | 144,513.58 |
Plan Nederland | |||
W.R3.3 Distribute hygiene kits to women ( gender appropriate) | 2,000 | 22,000 | 1,000.00 |
Solidaridad Netherlands | |||
(Output) Farmers/workers/miners are trained in good practices and administrative & entrepreneurial skills | 13 | 163 | 1,153.85 |
(Output) SME supported through training, access to funds, business development advice and coaching | 5 | 130 | 2,500.00 |
dance4life | |||
Increased utilisation of comprehensive SRHR education and information by all people | 126 | 2,557 | 1,929.37 |
Improve socio cultural, political and legal environment for young people's SRHR | 4,300 | 3,003,500 | 69,748.84 |
Improve socio cultural, political and legal environment for young people's SRHR | 102 | 172,275 | 168,797.06 |
COC Nederland | |||
Empowered LGBTI Individuals | 500 | 13,072 | 2,514.40 |
Empowered LGBTI Individuals | 20 | 803 | 3,915.00 |
Empowered LGBTI Individuals | 20 | 2,392 | 11,860.00 |
Strong LGBTI Communities. | 20 | 578 | 2,790.00 |
Empowered LGBTI Individuals | 20 | 10,260 | 51,200.00 |
CHOICE for Youth and Sexuality | |||
OUTCOME AREA 5: Improved socio-cultural political and legal environment for young people SRHR | 1,500 | 39,039 | 2,502.60 |
OUTCOME AREA 5: Improved socio-cultural political and legal environment for young people SRHR | 500 | 524,621 | 104,824.20 |
OUTCOME AREA 5: Improved socio-cultural political and legal environment for young people SRHR | 500 | 874,020 | 174,704.00 |
OUTCOME AREA 5: Improved socio-cultural political and legal environment for young people SRHR | 500 | 215,520 | 43,004.00 |
OUTCOME AREA 5: Improved socio-cultural political and legal environment for young people SRHR | 500 | 650,000 | 129,900.00 |
Goal 5. Policy makers and duty bearers develop and implement laws on CM and FGM-C | 5 | 98 | 1,860.00 |
Health Action International | |||
Output 2: Strengthened HAI/GSI | 10,000 | 669,353 | 6,593.53 |
World Vision Netherlands | |||
R.W1 Water and Sanitation services available for all the population | 8,000 | 93,000 | 1,062.50 |
N.R1 Increase community based management of acute malnutrition for the most vulnerable and at risk | 50 | 1,000 | 1,900.00 |
N.R1 Increase community based management of acute malnutrition for the most vulnerable and at risk | 30 | 1,000 | 3,233.33 |
N.R1 Increase community based management of acute malnutrition for the most vulnerable and at risk | 30 | 1,000 | 3,233.33 |
ACC-R1 Participation | 9 | 250 | 2,677.78 |
Support Trust for Africa Development | |||
Support community mobilization on gender equality through development and distribution of IEC materials, campaigns, events, radio messaging, etc. | 9 | 100 | 1,011.11 |
Stichting CARE Nederland | |||
WSH.R1 Restore or maintain sustainable water and sanitation systems to improve public health and resilience | 10,500 | 202,203 | 1,825.74 |
War Child Holland | |||
# unique project participants (war child holland and partners) | 30 | 437 | 1,356.67 |
Les initiatives des jeunes sont renforcé à travers le renforcement de leurs capacités entreprenariales et austres compétences necessaires pour leurs developpement | 42 | 538 | 1,180.95 |
Thank you for reading. Any questions or suggestions? Please ask me @twitter