attachment:Benford.py of progress report

Attachment 'Benford.py'

   1 """
   2 Benny Distribution Checker
   3 
   4 AUTHORS:
   5 
   6 - Amy Feaver (2013-07-13)
   7 
   8 - Anne Ho (2013-07-13)
   9 
  10 Michelle Manes, Alina Bucur, Kate Thompson
  11 
  12 """
  13 
  14 import sage.libs.all
  15 
  16 import scipy
  17 import numpy
  18 
  19 from scipy import stats
  20 from numpy import array
  21 
  22 import matplotlib.pyplot
  23 
  24 
  25 
  26 
  27 def create_Benford_prediction(DataList, base):
  28     """
  29     INPUT:
  30 
  31     - ``DataList`` -- a list of real numbers
  32 
  33     - ``base`` -- a real number greater than or equal to 2
  34 
  35     OUTPUT:
  36 
  37     A list of length ceil(b), such that for 0 <= i <= ceil(b)-1, the ith position of the list contains the predicted number of items in the list
  38     which should start with the digit i under the Benford distribution (when the numbers in DataList are converted to the specified base).
  39 
  40     EXAMPLES:
  41 
  42         sage: Benford_prediction([i for i in range(40)],10)
  43         [0, 12.0411998265592, 7.04365036222725, 4.99754946433200, 3.87640052032226, 3.16724984190499, 2.67787158522453, 2.31967787910747, 2.04610089789525, 1.83029962242701]
  44 
  45         sage: Benford_prediction([sqrt(2)*i^2 for i in range(3,81)],5.4402)
  46         [0, 31.9193381529151, 18.6716158672934, 13.2477222856217, 10.2757317216334, 8.39588414565996]
  47 
  48     """
  49     b = base
  50     size = len(DataList)
  51     first_digit_Benford = []
  52     first_digit_Benford.append(0)
  53     c = ceil(b)
  54     for d in range(1,c):
  55         q =log(1+1.0/d, b).n()
  56         first_digit_Benford.append((q*size/log(c,b)).n())
  57     return first_digit_Benford
  58 
  59 
  60 
  61 def get_first_digit_count(DataList, base):
  62     """
  63     INPUT:
  64 
  65     - ``DataList`` -- a list of real numbers
  66 
  67     - ``base`` -- a real number greater than or equal to 2
  68 
  69     OUTPUT:
  70 
  71     A list of length ceil(b), such that for 0 <= i <= ceil(b)-1, the ith position of the list contains the  number of items in the list which
  72     start with the digit i (when the numbers in DataList are converted to the specified base).
  73 
  74     EXAMPLES:
  75 
  76         sage: Benford_count([0,-1,2,3,75],35/4)
  77         [1, 1, 1, 1, 0, 0, 0, 0, 1]
  78 
  79         sage: Benford_count([i for i in range(30)],10)
  80         [1, 11, 11, 1, 1, 1, 1, 1, 1, 1]
  81 
  82     """
  83 
  84     b = base
  85     c = ceil(b)
  86 
  87     #Create first_digit_data; a list of length ceil(b), where each entry is initialized to 0.
  88     first_digit_data = []
  89     for i in range(c):
  90         first_digit_data.append(0)
  91 
  92     for num in DataList:
  93         if num == 0:
  94            first_digit_data[0] += 1
  95         else:
  96             
  97             
  98 
  99             #We are trying to avoid loss of precision, so each number num in DataList is viewed as a real number, n.
 100             #The value e is the highest power of b in n, and M is the mantissa (i.e. n=M*b^e).
 101             n = RR(abs(num))
 102 
 103             l = RR(log(n,b)).n()
 104             
 105             if (l-l.floor())<0.0000000005  or (l.ceil()-l)<0.0000000005:
 106                 power_of_b=1.0
 107                 new_e = 0.0
 108 
 109                 while power_of_b <= n/b:
 110                     power_of_b = power_of_b*b
 111                     new_e += 1.0
 112                 e = new_e
 113             else:
 114                 e = l.floor()
 115             
 116             M = (n/b**e).n()
 117             first_digit= M.floor()
 118             first_digit_data[first_digit] += 1
 119     return first_digit_data
 120 
 121 
 122 
 123 class BenfordChecker:
 124     
 125 
 126     def __init__(self,data_list,base):
 127 
 128         if len(data_list)==0:
 129             raise ValueError, "data list must be nonempty"
 130 
 131         if base<=1:
 132             raise ValueError, "base must be greater than 1"
 133 
 134         self.__data_list = data_list
 135         self.__base = base
 136         self.__Benny = create_Benford_prediction(data_list,base)
 137         self.__first_digit_count = get_first_digit_count(data_list,base)
 138 
 139 
 140     def data_list(self):
 141         return self.__data_list
 142 
 143     def base(self):
 144         return self.__base
 145 
 146     def Benford_prediction(self):
 147         return self.__Benny
 148 
 149     def first_digit_count(self):
 150         return self.__first_digit_count
 151 
 152 
 153     def Benford_chi_square(self):
 154         """
 155         INPUT:
 156 
 157         - DataList -- a list of real numbers
 158 
 159         - base -- a real number greater than or equal to 2
 160 
 161         OUTPUT:
 162 
 163         A list containing, first, the chi-squared value and, second, the associated p-value.
 164 
 165         EXAMPLES:
 166 
 167             sage: Benford_chi_square([1,2,3,4,5],3)
 168             (0.61378226057799101, 0.4333672659230694)
 169 
 170             sage: Benford_chi_square([i for i in range(30)],10)
 171             (11.78779211948263, 0.16092617606270504)
 172 
 173         """
 174         prediction = list(self.__Benny)
 175         count = list(self.__first_digit_count)
 176         del prediction[0]
 177         del count[0]
 178         cs = scipy.stats.chisquare(numpy.array(count),numpy.array(prediction))
 179         return cs
 180 
 181 
 182     def bar_graph(self):
 183         N = ceil(self.__base)
 184         ind = numpy.arange(N)
 185         width = .35
 186         p1 = matplotlib.pyplot.bar(ind, self.__Benny, width, color='#669966')
 187         p2 = matplotlib.pyplot.bar(ind+.35, self.__first_digit_count, width, color='#66CCCC')
 188         matplotlib.pyplot.title('Benford Distribution')
 189         matplotlib.pyplot.legend( (p1[0], p2[0]), ('Prediction', 'Count') )
 190         matplotlib.pyplot.savefig('Graph.png')
 191         matplotlib.pyplot.close()
 192 
 193     def line_graph(self):
 194         p1 = matplotlib.pyplot.plot(self.__Benny, color='#669966')
 195         p2 = matplotlib.pyplot.plot(self.__first_digit_count, color='#66CCCC')
 196         matplotlib.pyplot.title('Benford Distribution')
 197         matplotlib.pyplot.legend( (p1[0], p2[0]), ('Prediction', 'Count') )
 198         matplotlib.pyplot.savefig('Graph.png')
 199         matplotlib.pyplot.close()
 200     
 201
Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
You are not allowed to attach a file to this page.