Lognormal Income Distributions

Incomes within countries typically follow a skewed distribution with a long high tail. It has been found that on average national income distributions fit a lognormal distribution pretty well. Since I sometimes develop scenarios of income distribution and poverty as part of my work, I've put together a library for calculating standard poverty measures for income distributions.

EKB This library is now being used for the Greenhouse Development Rights (GDRs) Calculator (http://gdrs.sourceforge.net/ ). The GDRs calculator is a supporting tool for the GDRs framework for allocating the burden for greenhouse gas mitigation and adaptation.

EKB I've submitted this to the tcllib math library. While doing that I've made some modifications, which I'm posting below. One change is to the license, which is now BSD to make it consistent with tcllib. Also, I've added some examples (above the code) and a test suite (below the code).

The input variables are:

yave: mean per capita income
yc: an income cutoff
p: the proportion of people below the cutoff
x: cumulative fraction of the population (for the Lorenz Curve)
gini: The Gini coefficient

Some background:

Lognormal distributions on Wikipedia [L1 ]
Lorenz curves on Wikipedia [L2 ]
Gini coefficients on Wikipedia [L3 ]
A World Bank working paper justifying the use of lognormals for income distributions [L4 ]

Some examples:

    # Calculate the poverty headcount and poverty gap for a
    #  country with a mean income of 15000, a poverty line of
    #  3000, and a Gini coefficient of 0.45
    puts [math::lognorm::headcount 3000 15000 0.45]
    puts [math::lognorm::gap 3000 15000 0.45]

    # Calculate the share of income held by the lowest-earning
    #  50% in a country with a Gini coefficient of 0.35
    puts [math::lognorm::L 0.50 0.35]

    # Calculate the ratio of the highest-earning p percent to
    #  the lowest-earning p percent in a country with the
    #  given Gini coefficient
    proc highlow {p Gini} {
        set low [math::lognorm::L $p $Gini]
        set high [expr {1.0 - [math::lognorm::L [expr {(1.0 - $p)}] $Gini]}]
        return [expr {$high/$low}]
    }
    # Apply "highlow"
    puts [highlow 0.20 0.35]
    puts [highlow 0.20 0.55]



 ##
 ## Lognormal income distributions
 ##
 # Copyright 2007 Eric Kemp-Benedict
 # Released under the BSD license under any terms
 # that allow it to be compatible with tcllib

 package provide math::lognorm 0.1

 package require math::statistics

 namespace eval math::lognorm {
    # Maximum iterations for finding inverse normal
    variable maxiter 100
    variable epsilon 1.0e-9

    namespace export norminv gini2sdlog sdlog2gini \
        medmean_ratio L headcount gap cutoff

 }

 proc math::lognorm::quicknorminv {p} {
    variable epsilon

    if {$p < $epsilon || $p > 1-$epsilon} {
        error "Value out of bounds for inverse normal: $p"
        return 0
    }

    set sign -1
    if {$p > 0.5} {
        set p [expr {1 - $p}]
        set sign 1
    }        

    set t [expr {sqrt(-2.0 * log($p))}]

    set a0 2.30753
    set a1 0.27061
    set b1 0.99229
    set b2 0.04481

    set num [expr {$a0 + $a1 * $t}]
    set denom [expr {1 + $b1 * $t + $b2 * $t * $t}]

    return [expr {$sign * ($t - $num/$denom)}]

 }

 proc math::lognorm::norminv {p} {
    variable maxiter
    variable epsilon

    set deltax [expr {100 * $epsilon}]

    # Initial value for x
    set x [quicknorminv $p]

    set niter 0
    while {abs([::math::statistics::pnorm $x] - $p) > $epsilon} {
        set pstar [::math::statistics::pnorm $x]
        set pl [::math::statistics::pnorm [expr {$x - $deltax}]]
        set ph [::math::statistics::pnorm [expr {$x + $deltax}]]

        set x [expr {$x + 2.0 * $deltax * ($p - $pstar)/($ph - $pl)}]

        incr niter
        if {$niter == $maxiter} {
            error "Inverse normal distribution did not converge after $niter iterations"
            return {}
        }
    }

    return $x

 }

 # Gini must be 0..1
 proc math::lognorm::gini2sdlog {gini} {
    set p [expr {0.5 * (1.0 + $gini)}]
    return [expr {sqrt(2.0) * [norminv $p]}]
 }

 proc math::lognorm::sdlog2gini {sdlog} {
    set x [expr {$sdlog/sqrt(2.0)}]
    return [expr {2.0 * [::math::statistics::pnorm $x] - 1.0}]
 }

 # Ratio of median to mean
 proc math::lognorm::medmean_ratio {gini} {
    set sdlog [gini2sdlog $gini]
    return [expr {exp(-0.5 * $sdlog * $sdlog)}]
 }

 # Lorenz curve
 proc math::lognorm::L {x gini} {
    variable epsilon

    if {$x < $epsilon || $x > 1.0 - $epsilon} {
        return $x
    }
    set p [norminv $x]
    set sdlog [gini2sdlog $gini]
    return [::math::statistics::pnorm [expr {$p - $sdlog}]]
 }

 proc math::lognorm::headcount {yc yave gini} {
    variable epsilon

    set sdlog [gini2sdlog $gini]
    set z [expr {1.0 * $yc / $yave}]
    if {$z < $epsilon} {
        return 0.0
    }
    set x [expr {(1.0/$sdlog) * log($z) + 0.5 * $sdlog}]
    return [::math::statistics::pnorm $x]
 }

 # Analogous to poverty gap
 proc math::lognorm::gap {yc yave gini} {
    variable epsilon

    if {$yc < $epsilon * $yave} {
        return 0.0
    }
    set hc [headcount $yc $yave $gini]
    set Lhc [L $hc $gini]
    return [expr {$hc - (1.0 * $yave/$yc) * $Lhc}]
 }

 # Inverse of headcount
 proc math::lognorm::cutoff {p yave gini} {
    variable epsilon

    if {$p < $epsilon} {
        return 0.0
    }
    set sdlog [gini2sdlog $gini]
    set x [norminv $p]
    return [expr {$yave * exp($sdlog * ($x - 0.5 * $sdlog))}]
 }

Test Suite

    package require math::lognorm 0.1

    namespace import math::lognorm::*

    puts "--- math::lognorm::norminv ---"
    puts {[norminv 0.5] should give 0.0}
    puts [norminv 0.5]
    puts {[norminv 0.7] should give 0.5244}
    puts [norminv 0.7]
    puts {[norminv 0.05] should give -1.6448}
    puts [norminv 0.05]

    puts "\n"
    puts "--- math::lognorm::gini2sdlog ---"
    puts {[gini2sdlog 0.45] should give 0.84536}
    puts [gini2sdlog 0.45]
    puts {[gini2sdlog 0.23] should give 0.413481}
    puts [gini2sdlog 0.23]
    puts {[gini2sdlog 0.90] should give 2.326174}
    puts [gini2sdlog 0.90]
    puts {[gini2sdlog 0.05] should give 0.088681}
    puts [gini2sdlog 0.05]

    puts "\n"
    puts "--- math::lognorm::sdlog2gini ---"
    puts {[sdlog2gini [gini2sdlog 0.52]] should give 0.52}
    puts [sdlog2gini [gini2sdlog 0.52]]
    puts {[sdlog2gini [gini2sdlog 0.01]] should give 0.01}
    puts [sdlog2gini [gini2sdlog 0.01]]
    puts {[sdlog2gini [gini2sdlog 0.92]] should give 0.92}
    puts [sdlog2gini [gini2sdlog 0.92]]
    puts {[sdlog2gini [gini2sdlog 0.27]] should give 0.27}
    puts [sdlog2gini [gini2sdlog 0.27]]

    puts "\n"
    puts "--- math::lognorm::medmean_ratio ---"
    puts {[medmean_ratio 0.45] should give 0.699551}
    puts [medmean_ratio 0.45]
    puts {[medmean_ratio 0.70] should give 0.341573}
    puts [medmean_ratio 0.70]
    puts {[medmean_ratio 0.23] should give 0.918069}
    puts [medmean_ratio 0.23]

    puts "\n"
    puts "--- math::lognorm::L ---"
    puts {[L 0.20 0.55] should give 0.02807}
    puts [L 0.20 0.55]
    puts {[L 0.90 0.70] should give 0.426934}
    puts [L 0.90 0.70]

    puts "\n"
    puts "--- math::lognorm::headcount ---"
    puts {[headcount 7000 23000 0.35] should give 0.062651}
    puts [headcount 7000 23000 0.35]
    puts {[headcount 365 5000 0.55] should give 0.027698}
    puts [headcount 365 5000 0.55]
    puts {[headcount 10000 15000 0.55] should give 0.561441}
    puts [headcount 10000 15000 0.55]

    puts "\n"
    puts "--- math::lognorm::gap ---"
    puts {[gap 7000 23000 0.35] should give 0.013925}
    puts [gap 7000 23000 0.35]
    puts {[gap 365 5000 0.55] should give 0.008215}
    puts [gap 365 5000 0.55]
    puts {[gap 10000 15000 0.55] should give 0.290783}
    puts [gap 10000 15000 0.55]

    puts "\n"
    puts "--- math::lognorm::cutoff ---"
    puts {[cutoff 0.20 5000 0.55] should give 1149.892}
    puts [cutoff 0.20 5000 0.55]
    puts {[cutoff 0.05 20000 0.70] should give 613.0022}
    puts [cutoff 0.05 20000 0.70]
    puts {[cutoff 0.80 15000 0.25] should give 19801.82}
    puts [cutoff 0.80 15000 0.25]

Category Statistics