#!/usr/bin/env python # -*- coding: utf-8 -*- # # Compute Cheesecake scores for all packages on PyPI. # import datetime import os import re import sys import time import urllib2 current_dir = os.path.dirname(__file__) sys.path.insert(0, os.path.join(current_dir, '../')) try: import subprocess except ImportError, ex: from cheesecake import subprocess CHEESECAKE_PATH = os.path.abspath(os.path.join(current_dir, '../cheesecake_index')) LOG_PATH = '/tmp/cheesecake_pypi_results' def read_file_contents(filename): fd = file(filename) contents = fd.read() fd.close() return contents def replace_chars(string): replacements = {'%20': '_', '%27': "\\'", '%28': '\\(', '%29': '\\)', '%2A': '\\*', '%3A': ':', '%3F': '\\?', '%C3%B1': 'ñ', } for From, To in replacements.iteritems(): string = string.replace(From, To) return string def get_package_names(): """Get list of all packages on PyPI. For each package return (name, version) tuple. """ package_regex = r'' pypi = urllib2.urlopen("http://python.org/pypi?%3Aaction=index") html_lines = pypi.readlines() pypi.close() for line in html_lines: m = re.search(package_regex, line) if m: # To make setuptools download a package, convert all spaces to undescores. yield (replace_chars(m.group(1)), replace_chars(m.group(2))) def score_one_package(package_name, log_template): """Score one package leaving information in logs along the way. :Logs: * .stdout -> Cheesecake stdout * .stderr -> Cheesecake stderr * .log -> Cheesecake log for given package """ log_file = log_template % 'log' stdout_fd = file(log_template % 'stdout', 'w') stderr_fd = file(log_template % 'stderr', 'w') process = subprocess.Popen('%s -l %s -n %s' % \ (CHEESECAKE_PATH, log_file, package_name), stdout=stdout_fd, stderr=stderr_fd, shell=True) result = process.wait() stdout_fd.close() stderr_fd.close() if result == 0: score_regex = r'OVERALL CHEESECAKE INDEX \(RELATIVE\) \.\.\.\.\s+([\d]+)' stdout = read_file_contents(log_template % 'stdout') m = re.search(score_regex, stdout) if m: return int(m.group(1)) return -1 def time2datetime(t): t = time.localtime(t) return datetime.datetime(t.tm_year, t.tm_mon, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec) def time_delta(start, end): return str(time2datetime(end) - time2datetime(start)) def score_all_packages(): packages_failed = [] packages_scores = [] if not os.path.exists(LOG_PATH): os.mkdir(LOG_PATH) for name, version in get_package_names(): name_and_version = '%s-%s' % (name, version) log_template = os.path.join(LOG_PATH, name_and_version + '.%s') start = time.time() result = score_one_package('%s==%s' % (name, version), log_template) end = time.time() if result == -1: packages_failed.append(name_and_version) else: packages_scores.append((name_and_version, result, time_delta(start, end))) print "=== Packages that Cheesecake failed to score ===" for failed in packages_failed: print failed print print "=== All packages scores ===" # Sorty by score. packages_scores.sort(lambda x,y: cmp(x[1], y[1])) for name, score, timing in packages_scores: print "%s SCORE:%s (in %s time)" % (name, score, timing) print print "=== Summary ===" print "Checked %d packages in overall." % (len(packages_scores) + len(packages_failed)) print "Failed for %d." % len(packages_failed) print "%d packages got more than 50%% Cheesecake score." % len(filter(lambda x: x[1] > 50, packages_scores)) if __name__ == '__main__': score_all_packages()