def__init__(self, T, N, k): """ Repeat T trials, each trial will read N characters and return k samples """ self.T = T self.N = N assert1 <= k <= 25 assert k <= N self.k = k self.source = {}
defstream(self): sampler = Sampling(self.k) for i in range(self.N): delta = random.randint(0, 10) c = chr(ord('A') + delta) if c notin self.source: self.source[c] = 1 else: self.source[c] += 1 sampler.read(c) return sampler
defcount(self): cnt = {} for t in range(self.T): sampler = self.stream() samples = sampler.sampling() for s in samples: if s in cnt: cnt[s] += 1 else: cnt[s] = 1 return cnt
defstatistic(self):
cnt = self.count() total = sum(cnt.values()) print"total: ", total for k, v in sorted(cnt.items()): print"%c %d %0.3f" % (k, v, float(v) / total)
total = sum(self.source.values()) print"source total: ", total for k, v in sorted(self.source.items()): print"%c %d %0.3f" % (k, v, float(v) / total)
if __name__ == '__main__': stat_char = Stat(100000, 10, 5) stat_char.statistic()