"""Script to filter family scores """ import sys import cPickle for f in sys.argv[1:]: print f, stem = f[:-3] # .pck pa_scores = cPickle.load(open(f)) filtered_scores = {} nmax = 0 for child, padict in pa_scores.items(): levels = {} for parentset in padict: try: levels[len(parentset)].append(parentset) except KeyError: levels[len(parentset)] = [parentset] sorted_levels = sorted(levels) for paset_size in sorted_levels: for parentset in levels[paset_size]: parentset_score = padict[parentset] def tmp(other): return(padict[other] > parentset_score or not(other >= parentset)) for higher_level in sorted_levels[paset_size+1:]: levels[higher_level] = filter(tmp,levels[higher_level]) filtered_scores_child = {} for parentsets in levels.values(): for parent in parentsets: filtered_scores_child[parent] = padict[parent] filtered_scores[child] = filtered_scores_child n = len(filtered_scores_child) if n > nmax: nmax = n print nmax, sum([len(x) for x in filtered_scores.values()]) cPickle.dump(filtered_scores,open('%s_filtered.pck' % stem,'w'))