Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated EBI26, formatted SPSS and R, added Stata and Python #175

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions Indicators/Environmental-Benefit-Indicator-CRF-26/ebi26.do
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
*------------------------------------------------------------------------------*
* WFP Standardized Scripts
* Calculating Environmental Benefit Indicator (EBI) 26
*------------------------------------------------------------------------------*

* This script calculates the Environmental Benefit Indicator (EBI) based on various
* environmental-related questions. It recodes the responses, calculates percentages,
* and computes the EBI for each community and overall.

* Label EBI relevant variables
label var EBIFFAPart "Have you or any of your household member participated in the asset creation activities and received a food assistance transfer?"
label var EBISoilFertility "Do you think that the assets that were built or rehabilitated in your community have allowed to increase agricultural potential due to greater water availability and/or soil fertility (e.g. increased or diversified production not requiring expanded irrigation)?"
label var EBIStabilization "Do you think that the assets that were built or rehabilitated in your community have improved natural environment due to land stabilization and restoration (e.g. more natural vegetal cover, increase in indigenous flora/fauna, less erosion or siltation, etc.)?"
label var EBISanitation "Do you think that the assets that were built or rehabilitated in your community have improved environmental surroundings due to enhanced water and sanitation measures (i.e., greater availability/longer duration of water for domestic non-human consumption, improved hygiene practices – less open defecation)?"

* Define value labels
label define EBIFFAPart_lbl 0 "No" 1 "Yes"
label values EBIFFAPart EBIFFAPart_lbl

label define EBI_lbl 0 "No" 1 "Yes" 9999 "Not applicable"
label values EBISoilFertility EBIStabilization EBISanitation EBI_lbl

* Recode 9999 to 0
foreach var of varlist EBISoilFertility EBIStabilization EBISanitation {
replace `var' = 0 if `var' == 9999
}

* Create table of % of yes responses to each of the 3 questions by ADMIN5Name
collapse (mean) EBISoilFertility EBIStabilization EBISanitation, by(ADMIN5Name)
gen EBISoilFertility_perc = EBISoilFertility * 100
gen EBIStabilization_perc = EBIStabilization * 100
gen EBISanitation_perc = EBISanitation * 100

* Create values with the denominator of questions asked for each community
gen EBIdenom = .
replace EBIdenom = 2 if ADMIN5Name == "Community A"
replace EBIdenom = 3 if ADMIN5Name == "Community B"

* Calculate EBI by community
gen EBI_ADMIN5Name = (EBISoilFertility_perc + EBIStabilization_perc + EBISanitation_perc) / EBIdenom

* Calculate total EBI average across all communities
summarize EBI_ADMIN5Name, meanonly
gen EBI_overall = r(mean)

* End of Scripts
54 changes: 54 additions & 0 deletions Indicators/Environmental-Benefit-Indicator-CRF-26/ebi26.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#------------------------------------------------------------------------------#
# WFP Standardized Scripts
# Calculating Environmental Benefit Indicator (EBI) 26
#------------------------------------------------------------------------------#

# This script calculates the Environmental Benefit Indicator (EBI) based on various
# environmental-related questions. It recodes the responses, calculates percentages,
# and computes the EBI for each community and overall.

# Load Packages
import pandas as pd

# Import dataset
#data = pd.read_csv("~/GitHub/RAMResourcesScripts/Static/EBI_Sample_Survey.csv")

# Assign variable and value labels
data = data.rename(columns={
'EBIFFAPart': 'Have you or any of your household member participated in the asset creation activities and received a food assistance transfer?',
'EBISoilFertility': 'Do you think that the assets that were built or rehabilitated in your community have allowed to increase agricultural potential due to greater water availability and/or soil fertility (e.g. increased or diversified production not requiring expanded irrigation)?',
'EBIStabilization': 'Do you think that the assets that were built or rehabilitated in your community have improved natural environment due to land stabilization and restoration (e.g. more natural vegetal cover, increase in indigenous flora/fauna, less erosion or siltation, etc.)?',
'EBISanitation': 'Do you think that the assets that were built or rehabilitated in your community have improved environmental surroundings due to enhanced water and sanitation measures (i.e., greater availability/longer duration of water for domestic non-human consumption, improved hygiene practices – less open defecation)?'
})

# Define value labels
value_labels = {
0: "No",
1: "Yes",
9999: "Not applicable"
}
for column in ['EBISoilFertility', 'EBIStabilization', 'EBISanitation']:
data[column] = data[column].map(value_labels)

# Recode 9999 to 0
for column in ['EBISoilFertility', 'EBIStabilization', 'EBISanitation']:
data[column] = data[column].replace(9999, 0)

# Create 3 tables with the % of yes responses to each of the 3 questions by ADMIN5Name
table_perc_soilfert = data.groupby('ADMIN5Name').apply(lambda x: x['EBISoilFertility'].mean() * 100).reset_index(name='EBISoilFertility_perc')
table_perc_stab = data.groupby('ADMIN5Name').apply(lambda x: x['EBIStabilization'].mean() * 100).reset_index(name='EBIStabilization_perc')
table_perc_san = data.groupby('ADMIN5Name').apply(lambda x: x['EBISanitation'].mean() * 100).reset_index(name='EBISanitation_perc')

# Join together the perc values of each of the three tables
table_allperc = table_perc_soilfert.merge(table_perc_stab, on='ADMIN5Name').merge(table_perc_san, on='ADMIN5Name')

# Create table with the denominator of questions asked for each community
table_allperc['EBIdenom'] = table_allperc['ADMIN5Name'].apply(lambda x: 2 if x == 'Community A' else 3)

# Calculate EBI by community
table_allperc['EBI_ADMIN5Name'] = (table_allperc['EBISoilFertility_perc'] + table_allperc['EBIStabilization_perc'] + table_allperc['EBISanitation_perc']) / table_allperc['EBIdenom']

# Calculate total EBI combining all communities
EBI_overall = table_allperc['EBI_ADMIN5Name'].mean()

# End of Scripts
80 changes: 42 additions & 38 deletions Indicators/Environmental-Benefit-Indicator-CRF-26/ebi26.sps
Original file line number Diff line number Diff line change
@@ -1,63 +1,67 @@
* Encoding: UTF-8.
* define variable and value labels
*------------------------------------------------------------------------------*
* WFP Standardized Scripts
* Calculating Environmental Benefit Indicator (EBI) 26
*------------------------------------------------------------------------------*

Variable labels EBIFFAPart 'Have you or any of your household member participated in the asset creation activities and received a food assistance transfer?'.
Variable labels EBISoilFertility 'Do you think that the assets that were built or rehabilitated in your community have allowed to increase agricultural potential due to greater water availability and/or soil fertility (e.g. increased or diversified production not requiring expanded irrigation)'.
Variable labels EBIStabilization 'Do you think that the assets that were built or rehabilitated in your community have improved natural environment due to land stabilization and restoration (e.g. more natural vegetal cover, increase in indigenous flora/fauna, less erosion or siltation, etc.)?'.
Variable labels EBISanitation 'Do you think that the assets that were built or rehabilitated in your community have improved environmental surroundings due to enhanced water and sanitation measures (i.e., greater availability/longer duration of water for domestic non-human consumption, improved hygiene practices – less open defecation)?'.
* This script calculates the Environmental Benefit Indicator (EBI) based on various
* environmental-related questions. It recodes the responses, calculates percentages,
* and computes the EBI for each community and overall.

Value labels EBIFFAPart 1 'Yes' 0 'No'.
Value labels EBISoilFertility EBIStabilization EBISanitation 1 'Yes' 0 'No' 9999 'Not applicable'.
* Define variable and value labels
Variable labels
EBIFFAPart 'Have you or any of your household member participated in the asset creation activities and received a food assistance transfer?'
EBISoilFertility 'Do you think that the assets that were built or rehabilitated in your community have allowed to increase agricultural potential due to greater water availability and/or soil fertility (e.g. increased or diversified production not requiring expanded irrigation)?'
EBIStabilization 'Do you think that the assets that were built or rehabilitated in your community have improved natural environment due to land stabilization and restoration (e.g. more natural vegetal cover, increase in indigenous flora/fauna, less erosion or siltation, etc.)?'
EBISanitation 'Do you think that the assets that were built or rehabilitated in your community have improved environmental surroundings due to enhanced water and sanitation measures (i.e., greater availability/longer duration of water for domestic non-human consumption, improved hygiene practices – less open defecation)?'

*take a look at of responses by community and note how many questions were answered for each community

Value labels
EBIFFAPart 1 'Yes' 0 'No'
EBISoilFertility 1 'Yes' 0 'No' 9999 'Not applicable'
EBIStabilization 1 'Yes' 0 'No' 9999 'Not applicable'
EBISanitation 1 'Yes' 0 'No' 9999 'Not applicable'

* Take a look at responses by community and note how many questions were answered for each community
CROSSTABS
/TABLES= EBISoilFertility EBIStabilization EBISanitation BY ADMIN5Name
/CELLS=COUNT
/TABLES=EBISoilFertility EBIStabilization EBISanitation BY ADMIN5Name
/CELLS=COUNT
/COUNT ROUND CELL.

* recode 9999 to 0

RECODE EBISoilFertility EBIStabilization EBISanitation (9999=0) (0=0) (1=1).
* Recode 9999 to 0
RECODE EBISoilFertility EBIStabilization EBISanitation (9999=0) (0=0) (1=1).
EXECUTE.

* create table of % of yes responses to each of the 3 questions by ADMIN5Name

* Create table of % of yes responses to each of the 3 questions by ADMIN5Name
DATASET DECLARE table_allperc.
SORT CASES BY ADMIN5Name.
AGGREGATE
/OUTFILE='table_allperc'
/PRESORTED
/BREAK=ADMIN5Name
/EBISoilFertility_mean=MEAN(EBISoilFertility)
/EBIStabilization_mean=MEAN(EBIStabilization)
/EBISanitation_mean=MEAN(EBISanitation) .
/EBISoilFertility_mean=MEAN(EBISoilFertility)
/EBIStabilization_mean=MEAN(EBIStabilization)
/EBISanitation_mean=MEAN(EBISanitation).

DATASET ACTIVATE table_allperc.
COMPUTE EBISoilFertility_perc=EBISoilFertility_mean * 100.
COMPUTE EBIStabilization_perc=EBIStabilization_mean * 100.
COMPUTE EBISanitation_perc=EBISanitation_mean * 100.
COMPUTE EBISoilFertility_perc = EBISoilFertility_mean * 100.
COMPUTE EBIStabilization_perc = EBIStabilization_mean * 100.
COMPUTE EBISanitation_perc = EBISanitation_mean * 100.
EXECUTE.

*create values with the denominator of questions asked for each community - should scan through the data and values from tables above to generate these values

* Create values with the denominator of questions asked for each community
DATASET ACTIVATE table_allperc.
do if ADMIN5Name = "Community A".

compute EBIdenom =2.
else.
compute EBIdenom = 3.
end if.
DO IF ADMIN5Name = "Community A".
COMPUTE EBIdenom = 2.
ELSE.
COMPUTE EBIdenom = 3.
END IF.
EXECUTE.

*calculate EBI by community

DATASET ACTIVATE table_allperc.
compute EBI_ADMIN5Name = (EBISoilFertility_perc + EBIStabilization_perc + EBISanitation_perc) / EBIdenom.
* Calculate EBI by community
COMPUTE EBI_ADMIN5Name = (EBISoilFertility_perc + EBIStabilization_perc + EBISanitation_perc) / EBIdenom.
EXECUTE.

*finally calculate total EBI average EBI across all communities

DATASET ACTIVATE table_allperc.
* Calculate total EBI average across all communities
DESCRIPTIVES VARIABLES=EBI_ADMIN5Name
/STATISTICS=MEAN.

* End of Scripts
115 changes: 69 additions & 46 deletions Indicators/Environmental-Benefit-Indicator-CRF-26/ebi26_tidyverse.R
Original file line number Diff line number Diff line change
@@ -1,62 +1,85 @@
#------------------------------------------------------------------------------#
# WFP Standardized Scripts
# Calculating Environmental Benefit Indicator (EBI) 26
#------------------------------------------------------------------------------#

# This script calculates the Environmental Benefit Indicator (EBI) based on various
# environmental-related questions. It recodes the responses, calculates percentages,
# and computes the EBI for each community and overall.

# Load Packages
library(tidyverse)
library(labelled)
library(expss)

#import dataset
data <- read_csv("~/GitHub/RAMResourcesScripts/Static/EBI_Sample_Survey.csv")
# Import dataset
#data <- read_csv("~/GitHub/RAMResourcesScripts/Static/EBI_Sample_Survey.csv")

#assign variable and value labels
var_label(data$EBIFFAPart) <- "Have you or any of your household member participated in the asset creation activities and received a food assistance transfer?"
# Assign variable and value labels
var_label(data$EBIFFAPart) <- "Have you or any of your household member participated in the asset creation activities and received a food assistance transfer?"
var_label(data$EBISoilFertility) <- "Do you think that the assets that were built or rehabilitated in your community have allowed to increase agricultural potential due to greater water availability and/or soil fertility (e.g. increased or diversified production not requiring expanded irrigation)?"
var_label(data$EBIStabilization) <- "Do you think that the assets that were built or rehabilitated in your community have improved natural environment due to land stabilization and restoration (e.g. more natural vegetal cover, increase in indigenous flora/fauna, less erosion or siltation, etc.)?"
var_label(data$EBISanitation) <- "Do you think that the assets that were built or rehabilitated in your community have improved environmental surroundings due to enhanced water and sanitation measures (i.e., greater availability/longer duration of water for domestic non-human consumption, improved hygiene practices – less open defecation)?"
var_label(data$EBISanitation) <- "Do you think that the assets that were built or rehabilitated in your community have improved environmental surroundings due to enhanced water and sanitation measures (i.e., greater availability/longer duration of water for domestic non-human consumption, improved hygiene practices – less open defecation)?"

val_lab(data$EBIFFAPart) = num_lab("
0 No
1 Yes
val_lab(data$EBIFFAPart) <- num_lab("
0 No
1 Yes
")

data <- data %>%
mutate(across(c(EBISoilFertility,EBIStabilization,EBISanitation), ~labelled(., labels = c(
mutate(across(c(EBISoilFertility, EBIStabilization, EBISanitation), ~labelled(., labels = c(
"No" = 0,
"Yes" = 1,
"Not applicable" = 999
"Not applicable" = 9999
))))

#recode 9999 to 0
# Recode 9999 to 0
data <- data %>%
mutate(across(EBISoilFertility:EBISanitation, ~ dplyr::recode(.x, "0" = 0, "1" = 1, "9999" = 0)))


#create 3 tables with the % of yes responses to each of the 3 questions by ADMIN5Name

table_perc_soilfert <- data %>% group_by(ADMIN5Name) %>%
summarize(n = n(), EBISoilFertility_tot = sum(EBISoilFertility)) %>% mutate(EBISoilFertility_perc = round(((EBISoilFertility_tot / n) * 100),1)) %>% select(ADMIN5Name,EBISoilFertility_perc)

table_perc_stab <- data %>% group_by(ADMIN5Name) %>%
summarize(n = n(), EBIStabilization_tot = sum(EBIStabilization)) %>% mutate(EBIStabilization_perc = round(((EBIStabilization_tot / n) * 100),1)) %>% select(ADMIN5Name,EBIStabilization_perc)

table_perc_san <- data %>% group_by(ADMIN5Name) %>%
summarize(n = n(), EBISanitation_tot = sum(EBISanitation)) %>% mutate(EBISanitation_perc = round(((EBISanitation_tot / n) * 100),1)) %>% select(ADMIN5Name,EBISanitation_perc)

#join together the perc values of each of the three tables
table_allperc <- table_perc_soilfert %>% left_join(table_perc_stab, by='ADMIN5Name') %>% left_join(table_perc_san, by='ADMIN5Name')



#create table with the denominator of questions asked for each community - should scan through the data and values from tables above to generate these values
num_quest_table <- data %>% count(ADMIN5Name) %>% mutate(EBIdenom = case_when(
ADMIN5Name == "Community A" ~ 2,
ADMIN5Name == "Community B" ~ 3
)) %>% select(-n)

#join table with percentages of each question with the table with count of number of questions (EBIdenom)
perc_denom_table <- table_allperc %>% left_join(num_quest_table, by='ADMIN5Name')
#then calculate EBI by community
EBI_ADMIN5Name <- perc_denom_table %>% mutate(EBI_ADMIN5Name = ((EBISoilFertility_perc + EBIStabilization_perc + EBISanitation_perc) / EBIdenom))

#finally calculate total EBI combining all communities
EBI_overall <- EBI_ADMIN5Name %>% summarize(EBI_overall = round(mean(EBI_ADMIN5Name),1))



mutate(across(c(EBISoilFertility, EBIStabilization, EBISanitation), ~ dplyr::recode(.x, "0" = 0, "1" = 1, "9999" = 0)))

# Create 3 tables with the % of yes responses to each of the 3 questions by ADMIN5Name
table_perc_soilfert <- data %>%
group_by(ADMIN5Name) %>%
summarize(n = n(), EBISoilFertility_tot = sum(EBISoilFertility)) %>%
mutate(EBISoilFertility_perc = round(((EBISoilFertility_tot / n) * 100), 1)) %>%
select(ADMIN5Name, EBISoilFertility_perc)

table_perc_stab <- data %>%
group_by(ADMIN5Name) %>%
summarize(n = n(), EBIStabilization_tot = sum(EBIStabilization)) %>%
mutate(EBIStabilization_perc = round(((EBIStabilization_tot / n) * 100), 1)) %>%
select(ADMIN5Name, EBIStabilization_perc)

table_perc_san <- data %>%
group_by(ADMIN5Name) %>%
summarize(n = n(), EBISanitation_tot = sum(EBISanitation)) %>%
mutate(EBISanitation_perc = round(((EBISanitation_tot / n) * 100), 1)) %>%
select(ADMIN5Name, EBISanitation_perc)

# Join together the perc values of each of the three tables
table_allperc <- table_perc_soilfert %>%
left_join(table_perc_stab, by = 'ADMIN5Name') %>%
left_join(table_perc_san, by = 'ADMIN5Name')

# Create table with the denominator of questions asked for each community
num_quest_table <- data %>%
count(ADMIN5Name) %>%
mutate(EBIdenom = case_when(
ADMIN5Name == "Community A" ~ 2,
ADMIN5Name == "Community B" ~ 3
)) %>%
select(-n)

# Join table with percentages of each question with the table with count of number of questions (EBIdenom)
perc_denom_table <- table_allperc %>%
left_join(num_quest_table, by = 'ADMIN5Name')

# Calculate EBI by community
EBI_ADMIN5Name <- perc_denom_table %>%
mutate(EBI_ADMIN5Name = ((EBISoilFertility_perc + EBIStabilization_perc + EBISanitation_perc) / EBIdenom))

# Calculate total EBI combining all communities
EBI_overall <- EBI_ADMIN5Name %>%
summarize(EBI_overall = round(mean(EBI_ADMIN5Name), 1))

# End of Scripts