@@ -121,27 +121,29 @@ def test_median():
121121 assert "4.50" == "%.2f" % median ([4.0 , 5 , 2 , 1 , 9 , 10 ])
122122
123123
124- def histogram (stream , options ):
124+ def _histogram (stream , minimum = None , maximum = None , num_buckets = None , logscale = False ,
125+ custbuckets = None , calc_mvsd = True ,
126+ bucket_format = '%10.4f' , calc_percentage = False , dot = '∎' ):
125127 """
126128 Loop over the stream and add each entry to the dataset, printing out at the
127129 end.
128130
129131 stream yields Decimal()
130132 """
131- if not options . min or not options . max :
133+ if not minimum or not maximum :
132134 # glob the iterator here so we can do min/max on it
133135 data = list (stream )
134136 else :
135137 data = stream
136138 bucket_scale = 1
137139
138- if options . min :
139- min_v = Decimal (options . min )
140+ if minimum :
141+ min_v = Decimal (minimum )
140142 else :
141143 min_v = min (data , key = lambda x : x .value )
142144 min_v = min_v .value
143- if options . max :
144- max_v = Decimal (options . max )
145+ if maximum :
146+ max_v = Decimal (maximum )
145147 else :
146148 max_v = max (data , key = lambda x : x .value )
147149 max_v = max_v .value
@@ -151,11 +153,9 @@ def histogram(stream, options):
151153 diff = max_v - min_v
152154
153155 boundaries = []
154- bucket_counts = []
155- buckets = 0
156156
157- if options . custbuckets :
158- bound = options . custbuckets .split (',' )
157+ if custbuckets :
158+ bound = custbuckets .split (',' )
159159 bound_sort = sorted (map (Decimal , bound ))
160160
161161 # if the last value is smaller than the maximum, replace it
@@ -174,8 +174,8 @@ def histogram(stream, options):
174174 # so no need to do a -1!
175175 bucket_counts = [0 for x in range (len (boundaries ))]
176176 buckets = len (boundaries )
177- elif options . logscale :
178- buckets = options . buckets and int (options . buckets ) or 10
177+ elif logscale :
178+ buckets = num_buckets and int (num_buckets ) or 10
179179 if buckets <= 0 :
180180 raise ValueError ('# of buckets must be > 0' )
181181
@@ -202,7 +202,7 @@ def log_steps(k, n):
202202 for step in log_steps (buckets , diff ):
203203 boundaries .append (min_v + step )
204204 else :
205- buckets = options . buckets and int (options . buckets ) or 10
205+ buckets = num_buckets and int (num_buckets ) or 10
206206 if buckets <= 0 :
207207 raise ValueError ('# of buckets must be > 0' )
208208 step = diff / buckets
@@ -216,7 +216,7 @@ def log_steps(k, n):
216216 accepted_data = []
217217 for record in data :
218218 samples += record .count
219- if options . mvsd :
219+ if calc_mvsd :
220220 mvsd .add (record .value , record .count )
221221 accepted_data .append (record )
222222 # find the bucket this goes in
@@ -237,29 +237,39 @@ def log_steps(k, n):
237237 if skipped :
238238 print ("# %d value%s outside of min/max" %
239239 (skipped , skipped > 1 and 's' or '' ))
240- if options . mvsd :
240+ if calc_mvsd :
241241 print ("# Mean = %f; Variance = %f; SD = %f; Median %f" %
242242 (mvsd .mean (), mvsd .var (), mvsd .sd (),
243243 median (accepted_data , key = lambda x : x .value )))
244- print "# each " + options .dot + " represents a count of %d" % bucket_scale
245- bucket_min = min_v
244+ print "# each " + dot + " represents a count of %d" % bucket_scale
246245 bucket_max = min_v
247246 percentage = ""
248- format_string = options . format + ' - ' + options . format + ' [%6d]: %s%s'
247+ format_string = bucket_format + ' - ' + bucket_format + ' [%6d]: %s%s'
249248 for bucket in range (buckets ):
250249 bucket_min = bucket_max
251250 bucket_max = boundaries [bucket ]
252251 bucket_count = bucket_counts [bucket ]
253252 star_count = 0
254253 if bucket_count :
255254 star_count = bucket_count / bucket_scale
256- if options . percentage :
255+ if calc_percentage :
257256 percentage = " (%0.2f%%)" % (100 * Decimal (bucket_count ) /
258257 Decimal (samples ))
259- print format_string % (bucket_min , bucket_max , bucket_count , options . dot *
258+ print format_string % (bucket_min , bucket_max , bucket_count , dot *
260259 star_count , percentage )
261260
262261
262+ def histogram (stream , options ):
263+ _histogram (stream , options .min , options .max , options .buckets , options .logscale ,
264+ options .custbuckets , options .mvsd , options .format , options .percentage ,
265+ options .dot )
266+
267+
268+ def print_histogram (samples , ** kwargs ):
269+ stream = [str (x ) for x in samples ]
270+ _histogram (load_stream (stream , False , False ), ** kwargs )
271+
272+
263273if __name__ == "__main__" :
264274 parser = OptionParser ()
265275 parser .usage = "cat data | %prog [options]"
0 commit comments