********************************************************************************
********************************************************************************
* Date: November 2025
** Do: Code to reproduce Chapter 2 of World Inequality Report 2026
*** Author: Ricardo Gómez-Carrera
	* 1. Figure 2.1. The least populated regions have higher average incomes
	* 2. Figure 2.2. Global economic weight is shifting back toward Asia
	* 3. Figure 2.3. A person in North America & Oceania earns about 13 times more than someone in Sub-Saharan Africa
	* 4. Figure 2.4. Incomes are very unequal across countries
	* 5. Figure 2.5.  Most individuals who earn below the global average are in SSAF and SSEA
	* 6. Figure 2.6. Extreme concentration of income at the very top is a defining feature of the global economy
	* 7. Figure 2.7. The composition of top earners and other groups has shifted over time
	* 8. Figure 2.8. Bottom 50% income shares are very low everywhere
	* 9. Figure 2.9. Middle 40% shares are never higher than 50%
	*10. Figure 2.10. Top 10% income shares are very large everywhere
	*11. Figure 2.11. Top 1% income shares are very large
	*12. Figure 2.12. Some countries face the double burden of low incomes and very high inequality
	*13. Figure 2.13. Redistribution decreases inequality within countries but with large variations
	*14. Figure 2.14 Transfers account for a larger share of redistribution than taxes
	*15. Figure 2.15. Taxes alone tend to have minimal or even regressive effects on inequality in many countries
	*16. Figure 2.16. Transfers consistently reduce inequality across all regions, but with large variations across countries
	*17. Figure Box 2.1. Country rankings for large countries according to per capita national income
	*18. Figure Box 2.2. Country rankings according to per capita national income
	*19. Figure B2.2.2. Per capita national income by country size
********************************************************************************
********************************************************************************

/*
For information on how to use the widcode and the World Inequality Database, visit:
	*1. https://wid.world/codes-dictionary/
	*2. https://github.com/world-inequality-database/wid-stata-tool
	*3. https://github.com/world-inequality-database/wid-r-tool
	*4. https://wid.world/document/distributional-national-accounts-dina-guidelines-2025-methods-and-concepts-used-in-the-world-inequality-database/
	
	For other data inputs visit:
	*1. https://github.com/world-inequality-database/wid-world/tree/master/data-input
	
*/



**# Figure 2.1.a., Figure 2.2.a, and Figure 1.1
* ---------------------------------------------------------------------------- *
* ---------------------------------------------------------------------------- * 
use if 	 inrange(year,$year_output,$year_output) & p=="p0p100" & ///
		inlist(widcode, "xlceup999i") ///
		using $wid2024, clear
gen maincountry=.
foreach c of global maincountries {
	replace maincountry=1 if iso=="`c'"
	}
keep if (maincountry==1 )		
keep year iso value	widcode	
greshape wide value, i(year iso) j(widcode) string
renvars value*, pred(5)
drop year
tempfile ppp
save `ppp',replace
		

use if 	inlist(widcode, "mnninc999i", "npopul999i") ///
		using $wid2024, clear
gen maincountry=.
foreach c of global maincountries {
	replace maincountry=1 if iso=="`c'"
	}
keep if (maincountry==1 )
		
keep year iso value	widcode	
greshape wide value, i(year iso) j(widcode) string
renvars value*, pred(5)
merge m:1 iso using `ppp'
drop _merge
replace mnninc999i=(mnninc999i/xlceup999i) 
drop xlceup999i

gen region2=""
	foreach c of global EURO {
		replace region2="QE" if iso=="`c'"
		}
	foreach c of global NAOC {
		replace region2="XB" if iso=="`c'"
		}
	foreach c of global LATA {
		replace region2="XL" if iso=="`c'"
		}
	foreach c of global MENA {
		replace region2="XN" if iso=="`c'"
		}
	foreach c of global SSAF {
		replace region2="XF" if iso=="`c'"
		}
	foreach c of global RUCA {
		replace region2="XR" if iso=="`c'"
		}
	foreach c of global EASA {
		replace region2="QL" if iso=="`c'"
		}	
	foreach c of global SSEA {
		replace region2="XS" if iso=="`c'"
		}		
replace region2="QE" if inlist(iso, "QM","OC")
replace region2="XB" if inlist(iso, "OH") 		
replace region2="XL" if inlist(iso, "OD")
replace region2="XN" if inlist(iso, "OE")
replace region2="XF" if inlist(iso, "OJ")
replace region2="XR" if inlist(iso, "OA")
replace region2="QL" if inlist(iso, "OB")
replace region2="XS" if inlist(iso, "OI") 		
	ren region2 region
	
collapse (sum)	mnninc999i npopul999i,by(year region) 
*World
preserve
collapse (sum)	mnninc999i npopul999i,by(year ) 
g region ="WO"
	g anninc999i=mnninc999i/npopul999i
tempfile world_nni
save `world_nni',replace
restore
	g anninc999i=mnninc999i/npopul999i
append using 	`world_nni'

preserve	
	drop mnninc999i npopul999i
	
greshape wide anninc999i, i(year) j(region) string
renvars anninc999i*, pred(10)

label var QE "Europe"
label var QL "East Asia"
label var XB "North America & Oceania"
label var XF "Sub-Saharan Africa"
label var XL "Latin America"
label var XN "MENA"
label var XR "Russia & Central Asia"
label var XS "South & Southeast Asia"
label var WO "World"

 order  year QE QL XB XF XL XN XR XS WO	
 export excel using "$outputs/WIR_2026_Chapter2.xlsx", sheet("data-F2.1.a") sheetmod cell(A1) first(varl)
 restore
 
 
**# Figure 2.1.b., Figure 2.2.b, and Figure 1.1
* ---------------------------------------------------------------------------- *
* ---------------------------------------------------------------------------- * 
preserve	
	drop mnninc999i anninc999i
replace npopul999i=npopul999i/1000000	
greshape wide npopul999i, i(year) j(region) string
renvars npopul999i*, pred(10)

label var QE "Europe"
label var QL "East Asia"
label var XB "North America & Oceania"
label var XF "Sub-Saharan Africa"
label var XL "Latin America"
label var XN "MENA"
label var XR "Russia & Central Asia"
label var XS "South & Southeast Asia"
label var WO "World"

 order  year QE QL XB XF XL XN XR XS WO	
 export excel using "$outputs/WIR_2026_Chapter2.xlsx", sheet("data-F2.1.b") sheetmod cell(A1) first(varl)
 restore 
 

 
**# Figure 2.3. A person in North America & Oceania earns about 13 times more than someone in Sub-Saharan Africa
* ---------------------------------------------------------------------------- *
* ---------------------------------------------------------------------------- * 
preserve 
drop mnninc999i npopul999i
sort anninc999i
replace anninc999i=anninc999i/12

keep if year==$year_output

replace region = "Europe" if region == "QE"
replace region = "East Asia" if region == "QL"
replace region = "North America and Oceania" if region == "XB"
replace region = "Sub-Saharan Africa" if region == "XF"
replace region = "Latin America" if region == "XL"
replace region = "MENA" if region == "XN"
replace region = "Russia and Central Asia" if region == "XR"
replace region = "South and Southeast Asia" if region == "XS"
replace region = "World" if region == "WO"

 export excel using "$outputs/WIR_2026_Chapter2.xlsx", sheet("data-F2.3") sheetmod cell(A1) first(varl)
 restore

**# Figure 2.4., Box 2.1., Box 2.2.
* ---------------------------------------------------------------------------- 
* ---------------------------------------------------------------------------- *

import excel using "$raw/country-codes-new.xlsx", firstrow clear
renvars code shortname / iso isoname
keep iso isoname 
replace isoname="Turkiye" if iso=="TR"
replace isoname="Czechia" if iso=="CZ"
replace isoname="Russia" if iso=="RU"
replace isoname="Eswatini" if iso=="SZ"
replace isoname="Syria" if iso=="SY"
replace isoname="South Korea" if iso=="KR"
tempfile isoname 
save `isoname', replace

 clear 
insobs 1 
g use_widcode="$use_widcode"


if use_widcode=="YES"{
	wid, ind(anninc xlceup npopul) age(999) pop(i) p( p0p100) year($year_output) clear
	ren (variable country percentile) (widcode iso p)
	drop age pop
	g currency=""
	keep if inlist(widcode,"anninc999i") 
}
else{
use if inlist(widcode, "anninc999i","xlceup999i","npopul999i","xlceux999i") & ///
       year == $year_output & inlist(p, "p0p100") using $wid2024, clear
}	

drop p currency 

reshape wide value, i(iso) j(widcode) string
renvars value*, predrop(5)

merge m:1 iso using `isoname', nogen keep(master matched) keepusing(isoname)

g corecountry=.
foreach country in $corecountries{
	replace corecountry=1 if iso=="`country'"
}
keep if corecountry==1
drop corecountry

g anninc999i_mer=anninc999i/xlceux999i
drop xlceux999i
replace anninc999i = anninc999i/xlceup999i 
drop xlceup999i

save "$work_data/per_capita_income_map.dta", replace


**# Figure 2.5.  Most individuals who earn below the global average are in SSAF and SSEA
* ---------------------------------------------------------------------------- 
* ---------------------------------------------------------------------------- *
 clear 
insobs 1 
g use_widcode="$use_widcode"

if use_widcode=="YES"{
	wid, ind(npopul) age(992) pop(i) area(QE QL XB XF XL XN XR XS) year($year) clear
	ren (variable country percentile) (widcode iso p)
	drop age pop
	g currency=""
	keep if inlist(widcode, "npopul992i") 
}
else{
use if widcode == "npopul992i" & year == $year_output & inlist(iso, "QE", "QL", "XB", "XF", "XL", "XN", "XR", "XS") using $wid2024, clear
}	


ren value npopul992i

egen totpop = total(npopul992i)
replace npopul992i = npopul992i/totpop

reshape wide npopul992i, i(year) j(iso) string
 
keep year npopul992i*
 
tempfile population
save `population', replace


 clear 
insobs 1 
g use_widcode="$use_widcode"

if use_widcode=="YES"{
	wid, ind(aptinc ahweal) age(992) pop(j) area(QE QL XB XF XL XN XR XS) year($year_output) clear
	ren (variable country percentile) (widcode iso p)
	drop age pop
	g currency=""
	keep if inlist(widcode, "aptinc992j", "ahweal992j")
}
else{
use if inlist(widcode, "aptinc992j", "ahweal992j") & year == $year_output & inlist(iso, "QE", "QL", "XB", "XF", "XL", "XN", "XR", "XS") using $wid2024, clear
}


reshape wide value, i(iso year p) j(widcode) string
renvars value*, predrop(5)

generate long p_min = round(1000*real(regexs(1))) if regexm(p, "^p([0-9\.]+)p([0-9\.]+)$")
generate long p_max = round(1000*real(regexs(2))) if regexm(p, "^p([0-9\.]+)p([0-9\.]+)$")

generate n = round(p_max - p_min, 1)

keep if inlist(n, 1000)

drop p p_max n currency
rename p_min p

sort iso year p

merge m:1 year using `population', nogen

range dinc 2.5 14 200
gen expdinc = exp(dinc)
gen loginc = log(aptinc992j)

gen aptinc992jm = aptinc992j/12

range dincm 0 11 200
gen expdincm = exp(dincm)
gen logincm = log(aptinc992jm)

range dweal 1.5 16 200
gen expdweal = exp(dweal)
gen logweal = log(ahweal992j)

foreach c in QE QL XB XF XL XN XR XS{
	kdensity loginc if iso == "`c'", gen(dinc`c' inc`c') at(dinc) nograph
	replace inc`c' = inc`c'*npopul992i`c'[1]
	
	kdensity logincm if iso == "`c'", gen(dincm`c' incm`c') at(dincm) nograph
	replace incm`c' = incm`c'*npopul992i`c'[1]
	
	kdensity logweal if iso == "`c'", gen(dweal`c' weal`c') at(dweal) nograph
	replace weal`c' = weal`c'*npopul992i`c'[1]
}


* monthly income
preserve 
keep dincm expdincm incm* 
renvars incm*, predrop(4)

replace expdincm = round(expdincm,.1)
replace expdincm = round(expdincm,1) if expdincm>=1
replace expdincm = round(expdincm,5) if expdincm>=20
replace expdincm = round(expdincm,10) if expdincm>=100
replace expdincm = round(expdincm,50) if expdincm>=200
replace expdincm = round(expdincm,100) if expdincm>=1000
replace expdincm = round(expdincm,500) if expdincm>=2000
replace expdincm = round(expdincm,1000) if expdincm>=10000
replace expdincm = round(expdincm,5000) if expdincm>=20000

label var XF "Sub-Saharan Africa" 
label var XL "Latin America" 
label var XN "MENA" 
label var XR "Russia & Central Asia" 
label var QE "Europe" 
label var XB "North America & Oceania"
label var QL "East Asia"
label var XS "South & Southeast Asia"

order d expd XB QE QL XR XN XL XS XF

line XB QE QL XR XN XL XS XF d

export excel using "$outputs/WIR_2026_Chapter2.xlsx", sheet("data-F2.5") sheetmod cell(A2) first(varl)
putexcel set "$outputs/WIR_2026_Chapter2.xlsx", modify sheet("data-F2.5")
putexcel (C3:J202), nformat(percent)
restore


save "$work_data/temp_density.dta", replace




**# Figure 2.6. Extreme concentration of income at the very top is a defining feature of the global economy
* ---------------------------------------------------------------------------- *      
* ---------------------------------------------------------------------------- *

 clear 
insobs 1 
g use_widcode="$use_widcode"

if use_widcode=="YES"{
	wid, ind(sptinc) age(992) pop(j) area(XN XF QE XB XL XR XS QL) p(p0p50 p50p90 p90p100 p99p100 ) year($year_output) clear
	ren (variable country percentile) (widcode iso p)
	drop age pop
	g currency=""
	keep if inlist(widcode, "sptinc992j") 
}
else{
use if widcode == "sptinc992j" & inlist(iso, "XN", "XF", "QE", "XB", "XL", "XR", "XS", "QL") ///
		& year == $year_output & inlist(p, "p0p50", "p50p90", "p90p100", "p99p100" ) using $wid2024, clear
}	
 


replace iso = "Sub-Saharan Africa"      if iso == "XF"
replace iso = "Europe"                  if iso == "QE"
replace iso = "North America & Oceania" if iso == "XB"
replace iso = "Latin America"           if iso == "XL"
replace iso = "MENA"                    if iso == "XN"
replace iso = "Russia & Central Asia"   if iso == "XR"
replace iso = "South & Southeast Asia" if iso == "XS"
replace iso = "East Asia"               if iso == "QL"

drop widcode currency 
replace p = subinstr(p, ".", "_", .)
reshape wide value, i(iso) j(p) string
renvars value*, predrop(5)
label var p0p50   "Bottom 50%"
label var p50p90  "Middle 40%"
label var p90p100 "Top 10%"
label var p99p100 "Top 1%"


order year iso p0p50 p50p90 p90p100 p99p100 
gsort p90p100
export excel using "$outputs/WIR_2026_Chapter2.xlsx", sheet("data-F2.6") sheetmod cell(A1) first(varl)
putexcel set "$outputs/WIR_2026_Chapter2.xlsx", modify sheet("data-F2.6")
putexcel (C2:J9), nformat(percent)

tempfile F1p3
save `F1p3', replace



**# Input for Figure 2.7
* ---------------------------------------------------------------------------- *
* ---------------------------------------------------------------------------- *
clear all

// global year 2021
global OA  "HK JP KP KR MN MO TW AF BD BN BT ID KH LA LK MM MV MY NP PG PH PK SG TH TL VN" // Asia without China India CA


tempfile combined
save `combined', emptyok

 clear 
insobs 1 
g use_widcode="$use_widcode"


if use_widcode=="YES"{
	wid, ind(ahweal anninc npopul xlceup) age(992 999) pop(i) p(p0p100) area($OA)  clear
	ren (variable country percentile) (widcode iso p)
	drop age pop
	g currency=""
	keep if inlist(widcode, "ahweal992i", "anninc992i", "npopul992i", "xlceup999i")
}
else{
use $wid2024, clear
keep if inlist(widcode, "ahweal992i", "anninc992i", "npopul992i", "xlceup999i")
keep if p == "p0p100"
}	




reshape wide value, i(iso year) j(widcode) string
renvars value*, predrop(5)

replace xlceup999i = . if year != $year_output

egen xlceup999i2 = mean(xlceup999i), by(iso)
drop xlceup999i 
rename xlceup999i2 xlceup999i


drop p currency

tempfile aggregates
save "`aggregates'"

 clear 
insobs 1 
g use_widcode="$use_widcode"


if use_widcode=="YES"{
	wid, ind(aptinc sptinc ahweal shweal) age(992) pop(j) area($OA) clear
	ren (variable country percentile) (widcode iso p)
	drop age pop
	g currency=""
	keep if inlist(widcode, "aptinc992j", "sptinc992j", "ahweal992j", "shweal992j")
}
else{
use $wid2024, clear
}	


drop if (substr(iso, 1, 1) == "X" | substr(iso, 1, 1) == "Q") & iso != "QA"
drop if substr(iso, 1, 1) == "O" & iso != "OM"
drop if strpos(iso, "-")

keep if inlist(widcode, "aptinc992j", "sptinc992j", "ahweal992j", "shweal992j")

// Parse percentiles
generate long p_min = round(1000*real(regexs(1))) if regexm(p, "^p([0-9\.]+)p([0-9\.]+)$")
generate long p_max = round(1000*real(regexs(2))) if regexm(p, "^p([0-9\.]+)p([0-9\.]+)$")

replace p_min = round(1000*real(regexs(1))) if regexm(p, "^p([0-9\.]+)$")

replace p_max = 1000*100 if missing(p_max)

replace p_max = p_min + 1000 if missing(p_max) & inrange(p_min, 0, 98000)
replace p_max = p_min + 100  if missing(p_max) & inrange(p_min, 99000, 99800)
replace p_max = p_min + 10   if missing(p_max) & inrange(p_min, 99900, 99980)
replace p_max = p_min + 1    if missing(p_max) & inrange(p_min, 99990, 99999)

replace p = "p" + string(round(p_min/1e3, 0.001)) + "p" + string(round(p_max/1e3, 0.001)) if !missing(p_max)

// Keep only g-percentiles
generate n = round(p_max - p_min, 1)
keep if inlist(n, 1, 10, 100, 1000)
drop if n == 1000 & p_min >= 99000
drop if n == 100  & p_min >= 99900
drop if n == 10   & p_min >= 99990
drop p p_max currency
rename p_min p
duplicates drop iso year p widcode, force
sort iso year widcode p

reshape wide value, i(iso year p) j(widcode) string

rename valueaptinc992j ai
rename valuesptinc992j si
rename valueahweal992j aw
rename valueshweal992j sw

merge n:1 iso year using "`aggregates'", nogenerate keep(master match)

rename anninc992i itot
rename ahweal992i wtot

drop if year<1980

generate pop = n*npopul992i
gen keep = 0

// PPP
rename xlceup999i PPP


foreach z in i w {

foreach y in PPP {
// 	local y PPP
	
	foreach v of varlist a`z' `z'tot  {
		gen `v'_`y' = `v'/`y'
	}

// all regions and world
foreach x in OA {
	
preserve

	foreach q in $`x' {
		replace keep = 1 if iso == "`q'"	
	}
	keep if keep == 1

	levelsof iso
	drop if missing(a`z')
	gsort year -a`z'_`y' 
	by year: generate rank = sum(pop)
	by year: replace rank = 1e5*(1 - rank/rank[_N])

	egen bracket = cut(rank), at(0(1000)99000 99100(100)99900 99910(10)99990 99991(1)99999 200000)

	collapse (mean) a`z'_`y' [pw=pop], by(year bracket)

	generate iso = "`x'-`y'"
	levelsof iso
	rename bracket p
	rename a`z'_`y' a`z'

	tempfile `x'_`y'_`z'
	append using `combined'
	save "`combined'", replace
restore
}

}

}

use "`combined'", clear

bys iso year p (aw): replace aw = aw[1]
bys iso year p (ai): replace ai = ai[1]

duplicates drop iso year p, force
replace iso = "OA"
preserve 
 clear 
insobs 1 
g use_widcode="$use_widcode"

if use_widcode=="YES"{
	wid, ind(npopul) age(992) pop(i) area($OA) clear
	ren (variable country percentile) (widcode iso p)
	drop age pop
	g currency=""
	keep if inlist(widcode, "npopul992i")
}
else{
	use if widcode == "npopul992i" using $wid2024, clear
	
}	


drop if year<1980
	gen keep = 0
	foreach q in $OA {
		replace keep = 1 if iso == "`q'"	
	}
	keep if keep == 1
	drop keep currency widcode p
	rename value npopul992i
	collapse (sum) npopul992i, by(year)
	generate iso = "OA"
	
	tempfile npopul992i
	save `npopul992i'
restore 
merge m:1 iso year using "`npopul992i'", nogenerate 


save "$work_data/OtherAsia-ptinc-hweal", replace




**# Figure 2.7. The composition of top earners and other groups has shifted over time
* ---------------------------------------------------------------------------- *
*If necessary, change year
* ---------------------------------------------------------------------------- *

 clear 
insobs 1 
g use_widcode="$use_widcode"


if use_widcode=="YES"{
	wid, ind(aptinc) age(992) pop(j) area(QE XB XR XL XF XN CN IN) year($year_output) clear
	ren (variable country percentile) (widcode iso p)
	drop age pop
	g currency=""
	keep if inlist(widcode, "aptinc992j")
}
else{
use if inlist(widcode, "aptinc992j") & year==$year_output using $wid2024, clear
drop if missing(value)
keep if inlist(iso, "QE", "XB", "XR", "XL") ///
	  | inlist(iso, "XF", "XN", "CN", "IN")
}	


// Parse percentiles
generate long p_min = round(1000*real(regexs(1))) if regexm(p, "^p([0-9\.]+)p([0-9\.]+)$")
generate long p_max = round(1000*real(regexs(2))) if regexm(p, "^p([0-9\.]+)p([0-9\.]+)$")

replace p_min = round(1000*real(regexs(1))) if regexm(p, "^p([0-9\.]+)$")

replace p_max = 1000*100 if missing(p_max)

replace p_max = p_min + 1000 if missing(p_max) & inrange(p_min, 0, 98000)
replace p_max = p_min + 100  if missing(p_max) & inrange(p_min, 99000, 99800)
replace p_max = p_min + 10   if missing(p_max) & inrange(p_min, 99900, 99980)
replace p_max = p_min + 1    if missing(p_max) & inrange(p_min, 99990, 99999)

replace p = "p" + string(round(p_min/1e3, 0.001)) + "p" + string(round(p_max/1e3, 0.001)) if !missing(p_max)

// Keep only g-percentiles
generate n = round(p_max - p_min, 1)
keep if inlist(n, 1, 10, 100, 1000)
drop if n == 1000 & p_min >= 99000
drop if n == 100  & p_min >= 99900
drop if n == 10   & p_min >= 99990
drop p p_max currency
rename p_min p
duplicates drop iso year p widcode, force
sort iso year widcode p

reshape wide value, i(iso year p) j(widcode) string

rename valueaptinc992j a

preserve 
 clear 
insobs 1 
g use_widcode="$use_widcode"

if use_widcode=="YES"{
	wid, ind(xlceup) age(999) pop(i) area(CN IN) year($year_output) clear
	ren (variable country percentile) (widcode iso p)
	drop age pop
	g currency=""
	keep if inlist(widcode, "xlceup999i")
}
else{
	use if widcode == "xlceup999i" & inlist(iso, "CN", "IN") & year == $year_output using $wid2024, clear
}	



	drop p widcode currency year
	rename value ppp
	
	tempfile ppp
	save `ppp'
restore 
merge m:1 iso using `ppp', nogen

replace a = a/ppp if !missing(ppp)
drop ppp

keep if year == $year_output


// rank and compute new bracket for each region
preserve 
 clear 
insobs 1 
g use_widcode="$use_widcode"


if use_widcode=="YES"{
	wid, ind(npopul) age(992) pop(i) area(QE XB XR XL XF XN CN IN) year($year_output) clear
	ren (variable country percentile) (widcode iso p)
	drop age pop
	g currency=""
	keep if inlist(widcode, "npopul992i")
}
else{
		use if widcode == "npopul992i" & year == $year_output using $wid2024, clear
	keep if inlist(iso, "QE", "XB", "XR", "XL") ///
		  | inlist(iso, "XF", "XN", "CN", "IN")
}	

	drop p widcode currency
	rename value npopul992i
	
	tempfile npopul992i
	save `npopul992i'
restore 
merge m:1 iso year using `npopul992i', nogen
// aqui me quede
preserve 
	use "$work_data/OtherAsia-ptinc-hweal", clear
	keep year iso p ai npopul992i
	rename ai a
	keep if year == $year_output
	
	tempfile OAsia
	save `OAsia'
restore
append using `OAsia'
drop n

// recompute size of the bracket
gen n=0.01
replace n=0.001 if p>=99000 & p<99900
replace n=0.0001 if p>=99900 & p<99990
replace n=0.00001 if p>=99990

generate pop = n*npopul992i

gsort year -a
bys year : generate rank = sum(pop)
bys year : replace rank = 1e5*(1 - rank/rank[_N])

egen bracket = cut(rank), at(0(1000)99000 99100(100)99900 99910(10)99990 99991(1)99999 200000)

collapse (sum) pop, by(year bracket iso)
bys year bracket : egen tot = sum(pop)
gen share = (pop/tot)*100
// gen share = (pop/tot)
keep iso year bracket share 
reshape wide share, i( year bracket) j(iso) string
ds year bracket, not
foreach l in `r(varlist)' {
replace `l' = 0 if missing(`l')
}

bys year : gen perc = _n
tsset year perc
ds year bracket, not

foreach x in `r(varlist)' {
	lowess `x' perc, bwidth(.125) gen(`x'_sm)
}


ren bracket p 


keep p share*sm
foreach i in XF CN IN XL XN XR QE XB OA{
	replace share`i'_sm = share`i'_sm/100
}
label var shareIN_sm "India" 
label var shareOA_sm "Other Asia" 
label var shareCN_sm "China" 
label var shareXF_sm "SSA" 
label var shareXL_sm "Latin America" 
label var shareXN_sm "MENA" 
label var shareXR_sm "Russia & Central Asia" 
label var shareQE_sm "Europe" 
label var shareXB_sm "North America & Oceania"

* just for correct graduations on graph
replace p = p/1000
replace p = . if mod(_n-1,10)!=0 

replace p = round(floor(p),.1) if p>99 & p<99.91 
replace p = round(floor(p*10)/10,.01) if p>=99.91 & p<99.991 
replace p = round(floor(p*100)/100,.001) if p>=99.991 

order p shareIN_sm shareOA_sm shareCN_sm shareXF_sm shareXL_sm shareXN_sm shareXR_sm shareQE_sm shareXB_sm    

export excel using "$outputs/WIR_2026_Chapter2.xlsx", sheet("data-F2.7") sheetmod cell(A2) first(varl)
putexcel set "$outputs/WIR_2026_Chapter2.xlsx", modify sheet("data-F2.7")
putexcel (B3:J129), nformat(percent)



**# Figure 2.8-2.12. 
* ---------------------------------------------------------------------------- *      
* ---------------------------------------------------------------------------- *
import excel using "$raw/country-codes-new.xlsx", firstrow clear
renvars code shortname / iso isoname
keep iso isoname 
replace isoname="Turkiye" if iso=="TR"
replace isoname="Czechia" if iso=="CZ"
replace isoname="Russia" if iso=="RU"
replace isoname="Eswatini" if iso=="SZ"
replace isoname="Syria" if iso=="SY"
replace isoname="South Korea" if iso=="KR"
tempfile isoname 
save `isoname', replace

 clear 
insobs 1 
g use_widcode="$use_widcode"


if use_widcode=="YES"{
	wid, ind(aptinc) age(992) pop(j) p( p0p50 p90p100 ) year($year_output) clear
	ren (variable country percentile) (widcode iso p)
	drop age pop
	g currency=""
	keep if inlist(widcode,"aptinc992j") 
}
else{
use if widcode == "aptinc992j" & ///
       year == $year_output & inlist(p, "p0p50", "p90p100") using $wid2024, clear
}	


drop widcode currency 
replace p = "bot50" if p == "p0p50"
replace p = "top10" if p == "p90p100"


reshape wide value, i(iso) j(p) string
renvars value*, predrop(5)
gen T10B50 = top10/(bot50) 
 

merge m:1 iso using `isoname', nogen keep(master matched) keepusing(isoname)

preserve
use if inlist(widcode,"rptinc992j") & year == $year_output  using $wid2024, clear
keep iso value
ren value T10B50
tempfile bd2
save `bd2', replace
restore 

drop T10B50
merge 1:1 iso using `bd2' , nogen keep(master matched) 

g corecountry=.
foreach country in $corecountries{
	replace corecountry=1 if iso=="`country'"
}
keep if corecountry==1
drop corecountry

keep iso bot50  top1 top10 year T10B50  isoname
order iso bot50  top1 top10 year T10B50  isoname

save "$work_data/T10B50_map.dta", replace



**# Figure 2.13. Redistribution decreases inequality within countries but with large variations
* ---------------------------------------------------------------------------- *      
* ---------------------------------------------------------------------------- *

use if inlist(widcode,"rdiinc992j","rptinc992j") & inlist(iso, "XN-PPP", "XF-PPP", "QE-PPP", "XB-PPP", "XL-PPP", "XR-PPP", "XS-PPP", "QL-PPP") & ///
       year == $year_output  using $wid2024, clear
*can retrieve data from wid, using wid, ind() age() pop() p() year() area() clear   
	   drop p currency
	reshape wide value, i(iso year) j(widcode) string
renvars value*, predrop(5)
ren ( rptinc992j rdiinc992j) (T10B50 T10B50post)
gen decreaseprepost = (T10B50-T10B50post)/T10B50

order year iso T10B50 T10B50post  decreaseprepost
sort T10B50

keep year iso T10B50 T10B50post decreaseprepost
replace iso = "Sub-Saharan Africa"      if iso == "XF-PPP"
replace iso = "Europe"                  if iso == "QE-PPP"
replace iso = "North America & Oceania"        if iso == "XB-PPP"
replace iso = "Latin America"           if iso == "XL-PPP"
replace iso = "MENA"                    if iso == "XN-PPP"
replace iso = "Russia & Central Asia"   if iso == "XR-PPP"
replace iso = "South & Southeast Asia" if iso == "XS-PPP"
replace iso = "East Asia"               if iso == "QL-PPP" 

preserve 
	keep year iso T10B50 T10B50post 
	order year iso T10B50post T10B50
	gsort T10B50

	export excel using "$outputs/WIR_2026_Chapter2.xlsx", sheet("data-F2.13") sheetmod cell(A1) first(var)
	putexcel set "$outputs/WIR_2026_Chapter2.xlsx", modify sheet("data-F2.13")
	putexcel (E2:E9), nformat(percent)
restore



**# Figure 2.14.a Transfers account for a larger share of redistribution than taxes
* ---------------------------------------------------------------------------- *
*                      F7 from Figher & Gethin, 2023
/*
Data provided by Amory Gethin (wir2026-gethin-redistribution.dta)
*Paper: https://amory-gethin.fr/files/pdf/FisherGethin2023.pdf

Figure 7: Tax Progressivity by World Region, 1980-2019:
Percent Reduction in Top 10% to Bottom 50% Average Income Ratio 
(Pretax versus Net-of-tax Income)
*/
* ---------------------------------------------------------------------------- *
*Data provided by Amory Gethin
use "$raw/gethin/wir2026-gethin-redistribution.dta", clear

g region=""
foreach reg in MENA RUCA SSAF SSEA EURO LATA NAOC EASA{
	foreach c in $`reg'{
		replace region="`reg'" if iso=="`c'"
	}	
}
keep if region!=""

preserve
	use if inlist(widcode,"npopul992i")  using $wid2024, clear
	keep iso value year
	ren value npopul992i
	duplicates report
	tempfile pops
	save `pops',replace
restore

merge m:1 year iso using `pops'	
g corecountry=.
foreach country in $corecountries{
	replace corecountry=1 if iso=="`country'"
}
keep if corecountry==1
drop corecountry
drop _merge

preserve
*aggregate at region level weighting by population
collapse (mean) diff_tax diff_tot [w=npopul992i], by(year region)
ren (region diff_tax) (iso value)
 
keep iso value year
greshape wide value, i(year) j(iso) string
renvars value*, predrop(5)

label var EASA 	"East Asia"
label var EURO	  "Europe"
label var LATA	 "Latin America"
label var MENA	 "MENA"
label var NAOC 	"North America & Oceania"
label var SSEA 	"South & Southeast Asia"
label var RUCA	 "Russia & Central Asia"
label var SSAF "Sub-Saharan Africa"
order year EURO EASA  NAOC  SSAF  LATA MENA RUCA  SSEA 

export excel using "$outputs/WIR_2026_Chapter2.xlsx", sheet("data-F2.14a") sheetmod cell(A1) first(varl)
restore



**# Figure 2.14.b Transfers account for a larger share of redistribution than taxes
* ---------------------------------------------------------------------------- *
*                      F11 from Figher & Gethin, 2023
/*
Data provided by Amory Gethin
*Paper: https://amory-gethin.fr/files/pdf/FisherGethin2023.pdf

Figure 11 – Extent of Redistribution by World Region, 1980-2019:
Percent Reduction in Top 10% to Bottom 50% Income Ratio, Pretax - Posttax
*/
* ---------------------------------------------------------------------------- *
preserve
*aggregate at region level weighting by population
collapse (mean) diff_tax diff_tot [w=npopul992i], by(year region)

ren (region diff_tot) (iso value)
 
keep iso value year
greshape wide value, i(year) j(iso) string
renvars value*, predrop(5)

label var EASA 	"East Asia"
label var EURO	  "Europe"
label var LATA	 "Latin America"
label var MENA	 "MENA"
label var NAOC 	"North America & Oceania"
label var SSEA 	"South & Southeast Asia"
label var RUCA	 "Russia & Central Asia"
label var SSAF "Sub-Saharan Africa"
order year EURO EASA  NAOC  SSAF  LATA MENA RUCA  SSEA 

export excel using "$outputs/WIR_2026_Chapter2xlsx", sheet("data-F2.14b") sheetmod cell(A1) first(varl)
restore



**# Figure 2.15. Taxes alone tend to have minimal or even regressive effects on inequality in many countries
* ---------------------------------------------------------------------------- *
* 				   	                  F4 from Figher & Gethin, 2023
/*
Data provided by Amory Gethin
*Paper: https://amory-gethin.fr/files/pdf/FisherGethin2023.pdf

Figure 4 – Tax Progressivity Around the World:
Percent Reduction in Top 10% to Bottom 50% Average Income Ratio (Pretax versus Net-of-tax Income)
*/
* ---------------------------------------------------------------------------- *

preserve	
import excel using "$raw/country-codes-new.xlsx", firstrow clear
renvars code shortname / iso isoname
keep iso isoname 
replace isoname="Turkiye" if iso=="TR"
replace isoname="Czechia" if iso=="CZ"
replace isoname="Russia" if iso=="RU"
replace isoname="Eswatini" if iso=="SZ"
replace isoname="Syria" if iso=="SY"
replace isoname="South Korea" if iso=="KR"
tempfile isoname 
save `isoname', replace	
restore

preserve
keep if year==2023
keep iso diff_tax
ren diff_tax reduc


merge 1:1 iso using `isoname'
drop _merge

sort reduc
drop if reduc==.
// gen value_cat = ""
ren reduc value
gen value_cat = ""
replace value_cat = "(60,70]"     if value > 60
replace value_cat = "(50,60]"     if value > 50   & value <= 60
replace value_cat = "(40,50]"     if value > 40   & value <= 50
replace value_cat = "(30,40]"     if value > 30   & value <= 40
replace value_cat = "(20,30]"     if value > 20   & value <= 30
replace value_cat = "(10,20]"     if value > 10   & value <= 20
replace value_cat = "(5,10]"      if value > 5    & value <= 10
replace value_cat = "(0,5]"       if value > 0    & value <= 5
replace value_cat = "(-5,0]"      if value > -5   & value <= 0
replace value_cat = "(-10,-5]"    if value > -10  & value <= -5
replace value_cat = "(-20,-10]"   if value > -20  & value <= -10
replace value_cat = "[-40,-20]"   if value <= -20
replace value_cat = "No data"     if missing(value)


g corecountry=.
foreach country in $corecountries{
	replace corecountry=1 if iso=="`country'"
}
keep if corecountry==1
drop corecountry
sort value
sum value
save "$work_data/tax_progressivity_map.dta", replace
restore



**# Figure 2.16. Transfers consistently reduce inequality across all regions, but with large variations across countries
* ---------------------------------------------------------------------------- *
* 				   	                  F9 from Figher-Post & Gethin, 2023
/*
Data provided by Amory Gethin
*Paper: https://amory-gethin.fr/files/pdf/FisherGethin2023.pdf

Figure 9 – A Global Map of Redistribution
Percent Reduction in Top 10% to Bottom 50% Income Ratio, Pretax - Posttax
*/
* ---------------------------------------------------------------------------- *

preserve
keep if year==2023
keep iso diff_tot
ren diff_tot reduc


merge 1:1 iso using `isoname'
drop _merge

sort reduc
drop if reduc==.
// gen value_cat = ""
ren reduc value
gen value_cat = ""
replace value_cat = "(60,70]"     if value > 60
replace value_cat = "(50,60]"     if value > 50   & value <= 60
replace value_cat = "(40,50]"     if value > 40   & value <= 50
replace value_cat = "(30,40]"     if value > 30   & value <= 40
replace value_cat = "(20,30]"     if value > 20   & value <= 30
replace value_cat = "(10,20]"     if value > 10   & value <= 20
replace value_cat = "(5,10]"      if value > 5    & value <= 10
replace value_cat = "(0,5]"       if value > 0    & value <= 5
replace value_cat = "(-5,0]"      if value > -5   & value <= 0
replace value_cat = "(-10,-5]"    if value > -10  & value <= -5
replace value_cat = "(-20,-10]"   if value > -20  & value <= -10
replace value_cat = "[-40,-20]"   if value <= -20
replace value_cat = "No data"     if missing(value)


g corecountry=.
foreach country in $corecountries{
	replace corecountry=1 if iso=="`country'"
}
keep if corecountry==1
drop corecountry
sort value
sum value
save "$work_data/tax_progressivity_map_f9.dta", replace
restore



**# Figure B2.2.2. Per capita national income by country size
* ---------------------------------------------------------------------------- *
* ---------------------------------------------------------------------------- *

use if 	 inrange(year,$year_output,$year_output) & p=="p0p100" & ///
		inlist(widcode, "xlceup999i") ///
		using $wid2024, clear
g corecountry=.
foreach country in $corecountries{
	replace corecountry=1 if iso=="`country'"
}
keep if corecountry==1
drop corecountry		
keep year iso value	widcode	
greshape wide value, i(year iso) j(widcode) string
renvars value*, pred(5)
drop year
tempfile ppp
save `ppp',replace


 clear 
insobs 1 
g use_widcode="$use_widcode"
	

if use_widcode=="YES"{
	wid, ind(mnninc npopul) age(992 999) pop(i)   clear
	ren (variable country percentile) (widcode iso p)
	drop age pop
	g currency=""
	keep if inrange(year, 1970, $year_output)
	keep if inlist(widcode, "mnninc999i", "npopul999i")
}
else{
use if inlist(widcode,"mnninc999i", "npopul999i") & ///
inrange(year, 1970, $year_output) using $wid2024, clear
}
g corecountry=.
foreach country in $corecountries{
	replace corecountry=1 if iso=="`country'"
}
keep if corecountry==1
drop corecountry
	
keep year iso value	widcode	
greshape wide value, i(year iso) j(widcode) string
renvars value*, pred(5)
merge m:1 iso using `ppp'
drop _merge
replace mnninc999i=(mnninc999i/xlceup999i) 
drop xlceup999i


preserve
keep if year==$year_output
g poptot=npopul999i/1000000 

g size=""

replace size = "s0_100k"     if inrange(poptot, 0, 0.1)
replace size = "s100k_1m"    if inrange(poptot, 0.1, 1)
replace size = "s1m_10m"     if inrange(poptot, 1, 10)
replace size = "s10m_50m"    if inrange(poptot, 10, 50)
replace size = "s50m_100m"   if inrange(poptot, 50, 100)
replace size = "s100m_500m"  if inrange(poptot, 100, 500)
replace size = "sover_500m"  if poptot > 500
keep iso size
tempfile sizes
save `sizes',replace
restore

preserve 
collapse (sum) mnninc999i npopul999i ,by(year )
g anninc999i=mnninc999i /npopul999i 
drop mnninc999i npopul999i 
rename anninc999i world
tempfile world
save `world',replace
restore 

merge m:1 iso using `sizes'
drop _merge

collapse (sum) mnninc999i npopul999i ,by(year size)
g anninc999i=mnninc999i /npopul999i 
drop mnninc999i npopul999i 
reshape wide anninc999i, i(year) j(size) string
renvars anninc999i*, pred(10)

merge m:1 year using `world'
drop _merge
foreach var in s0_100k s100k_1m s100m_500m s10m_50m s1m_10m s50m_100m sover_500m{
replace `var'=`var'/world	
}
drop world

label var s0_100k "0-100k"
label var s100k_1m "100k-1m"
label var s100m_500m "1m-10m"  
label var s10m_50m "10m-50m"
label var s1m_10m "50m-100m"
label var s50m_100m "100m-500m"
label var sover_500m "over 500m"
tsset year



export excel using "$outputs/WIR_2026_Chapter2.xlsx", sheet("data-Box 2.2.2.", replace) firstrow(varl) sheetmod cell(A1) 
