forre.st | binomconf

binomconf

Calculating Binomial Confidence Intervals in Python

Gives exact, optimal confidence intervals

from __future__ import division

import math
import random # only used for n=0 case

from scipy import special


def find_root(y_over_dy, start, steps=10, bounds=(None, None)):
    guess = start
    for i in xrange(steps):
        prev, guess = guess, guess - y_over_dy(guess)
        if bounds[0] is not None and guess < bounds[0]: guess = bounds[0]
        if bounds[1] is not None and guess > bounds[1]: guess = bounds[1]
        if guess == prev:
            break
    return guess

def binomial_conf_interval(x, n, conf=0.95):
    assert 0 <= x <= n and 0 <= conf < 1
    if n == 0:
        left = random.random()*(1 - conf)
        return left, left + conf
    bl = float(special.betaln(x+1, n-x+1))
    def f(left_a):
        left = max(1e-8, float(special.betaincinv(x+1, n-x+1, left_a)))
        right = min(1-1e-8, float(special.betaincinv(x+1, n-x+1, left_a + conf)))
        top = math.exp(
            math.log(right)*(x+1) + math.log(1-right)*(n-x+1)
            + math.log(left) + math.log(1-left) - bl
        ) - math.exp(
            math.log(left)*(x+1) + math.log(1-left)*(n-x+1)
            + math.log(right) + math.log(1-right) - bl
        )
        bottom = (x - n*right)*left*(1-left) - (x - n*left)*right*(1-right)
        return top/bottom
    left_a = find_root(f, (1-conf)/2, bounds=(0, 1-conf))
    return (
        float(special.betaincinv(x+1, n-x+1, left_a)),
        float(special.betaincinv(x+1, n-x+1, left_a + conf)),
    )

minmax = lambda x: (min(x), max(x))

def format_binomial_conf(x, n, conf=0.95, f=lambda x: x):
    if n == 0:
        return '???'
    left, right = minmax(map(f, binomial_conf_interval(x, n, conf)))
    return '~%.1f%% (%.f-%.f%%)' % (100*f(x/n),
        math.floor(100*left), math.ceil(100*right))

print "(95% confidence level)"
print
for x, n in [(1, 2), (0, 10), (3, 4), (1000, 1000), (0, 0)]:
    print "x =", x, "n =", n
    print binomial_conf_interval(x, n)
    print format_binomial_conf(x, n)
    print

Output:

(95% confidence level)

x = 1 n = 2
(0.09429932405024612, 0.9057006759497539)
~50.0% (9-91%)

x = 0 n = 10
(0.0, 0.23840419038085267)
~0.0% (0-24%)

x = 3 n = 4
(0.3298504422802088, 0.9739692154994636)
~75.0% (32-98%)

x = 1000 n = 1000
(0.9970117342468727, 1.0)
~100.0% (99-100%)

x = 0 n = 0
(0.001202340017706206, 0.9512023400177062)
???