From 0947ff0b9e9160441212c6d7c16423a271dfd58e Mon Sep 17 00:00:00 2001 From: y-p Date: Sat, 6 Oct 2012 07:13:08 +0200 Subject: [PATCH 1/2] BLD: added caching to setup.py - speeds up testing dramatically. Cythoning results and compiled extension are cached. This is disabled by default, to enable it uncomment the line specifying the cache directory at the top of setup.py. you need to occasionally empty the cache manually, to keep it from from growing too much. BLD: added caching to setup.sys - speeds up testing dramatically. Cythoning results and compiled extension are cached. This is disabled by default, to enable it uncomment the line specifying the cache directory at the top of setup.py. you need to occasionally empty the cache manually, to keep it from from growing too much. --- setup.py | 253 +++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 225 insertions(+), 28 deletions(-) diff --git a/setup.py b/setup.py index d39fcd73ad17c..9efa7334bdc84 100755 --- a/setup.py +++ b/setup.py @@ -11,6 +11,21 @@ import shutil import warnings +try: + BUILD_CACHE_DIR=None + + # uncomment to activate the build cache + #BUILD_CACHE_DIR="/tmp/.pandas_build_cache/" + + if os.isdir(BUILD_CACHE_DIR): + print("--------------------------------------------------------") + print("BUILD CACHE ACTIVATED. be careful, this is experimental.") + print("--------------------------------------------------------") + else: + BUILD_CACHE_DIR=None +except : + pass + # may need to work around setuptools bug by providing a fake Pyrex try: import Cython @@ -87,8 +102,15 @@ from distutils.extension import Extension from distutils.command.build import build -from distutils.command.build_ext import build_ext from distutils.command.sdist import sdist +from distutils.command.build_ext import build_ext + +try: + from Cython.Distutils import build_ext + #from Cython.Distutils import Extension # to get pyrex debugging symbols + cython=True +except ImportError: + cython=False from os.path import splitext, basename, join as pjoin @@ -314,38 +336,213 @@ def build_extensions(self): for ext in self.extensions: self.build_extension(ext) +class CompilationCacheMixin(object): + def __init__(self,*args,**kwds): + cache_dir=kwds.pop("cache_dir",BUILD_CACHE_DIR) + self.cache_dir=cache_dir + if not os.path.isdir(cache_dir): + raise Exception("Error: path to Cache directory [%s] is not a dir"); + + def _copy_from_cache(self,hash,target): + src=os.path.join(self.cache_dir,hash) + if os.path.exists(src): + # print("Cache HIT: asked to copy file %s in %s" % (src,os.path.abspath(target))) + s="." + for d in target.split(os.path.sep)[:-1]: + s=os.path.join(s,d) + if not os.path.exists(s): + os.mkdir(s) + shutil.copyfile(src,target) + + return True + + return False + + def _put_to_cache(self,hash,src): + target=os.path.join(self.cache_dir,hash) + # print( "Cache miss: asked to copy file from %s to %s" % (src,target)) + s="." + for d in target.split(os.path.sep)[:-1]: + s=os.path.join(s,d) + if not os.path.exists(s): + os.mkdir(s) + shutil.copyfile(src,target) + + def _hash_obj(self,obj): + """ + you should override this method to provide a sensible + implementation of hashing functions for your intended objects + """ + try: + return hash(obj) + except: + raise NotImplementedError("You must override this method") + + # this is missing in 2.5, mro will do the right thing + def get_ext_fullpath(self, ext_name): + """Returns the path of the filename for a given extension. + + The file is located in `build_lib` or directly in the package + (inplace option). + """ + import string + # makes sure the extension name is only using dots + all_dots = string.maketrans('/'+os.sep, '..') + ext_name = ext_name.translate(all_dots) + + fullname = self.get_ext_fullname(ext_name) + modpath = fullname.split('.') + filename = self.get_ext_filename(ext_name) + filename = os.path.split(filename)[-1] + + if not self.inplace: + # no further work needed + # returning : + # build_dir/package/path/filename + filename = os.path.join(*modpath[:-1]+[filename]) + return os.path.join(self.build_lib, filename) + + # the inplace option requires to find the package directory + # using the build_py command for that + package = '.'.join(modpath[0:-1]) + build_py = self.get_finalized_command('build_py') + package_dir = os.path.abspath(build_py.get_package_dir(package)) + + # returning + # package_dir/filename + return os.path.join(package_dir, filename) + +class CompilationCacheExtMixin(CompilationCacheMixin): + def __init__(self,*args,**kwds): + CompilationCacheMixin.__init__(self,*args,**kwds) + + def _hash_file(self,fname): + from hashlib import sha1 + try: + hash=sha1() + hash.update(self.build_lib.encode('utf-8')) + try: + if sys.version_info[0] >= 3: + import io + f=io.open(fname,"rb") + else: + f=open(fname) + + first_line=f.readline() + # ignore cython generation timestamp header + if "Generated by Cython" not in first_line.decode('utf-8'): + hash.update(first_line) + hash.update(f.read()) + return hash.hexdigest() + + except: + raise + return None + finally: + f.close() + + except IOError: + return None + + def _hash_obj(self,ext): + from hashlib import sha1 + + sources = ext.sources + if sources is None or \ + (not hasattr(sources,'__iter__') ) or \ + isinstance(sources,str) or \ + sys.version[0]==2 and isinstance(sources,unicode): #argh + return False + + sources = list(sources) + ext.depends + hash=sha1() + try: + for fname in sources: + fhash=self._hash_file(fname) + if fhash: + hash.update(fhash.encode('utf-8')) + except: + return None + + return hash.hexdigest() + +class CachingBuildExt(build_ext,CompilationCacheExtMixin): + def __init__(self,*args,**kwds): + CompilationCacheExtMixin.__init__(self,*args,**kwds) + kwds.pop("cache_dir",None) + build_ext.__init__(self,*args,**kwds) + + def build_extension(self, ext,*args,**kwds): + ext_path = self.get_ext_fullpath(ext.name) + build_path = os.path.join(self.build_lib,os.path.basename(ext_path)) + + hash=self._hash_obj(ext) + if hash and self._copy_from_cache(hash,ext_path): + return + + build_ext.build_extension(self,ext,*args,**kwds) + + hash=self._hash_obj(ext) + if os.path.exists(build_path): + self._put_to_cache(hash,build_path) # build_ext + if os.path.exists(ext_path): + self._put_to_cache(hash,ext_path) # develop + + + def cython_sources(self, sources, extension): + import re + cplus = self.cython_cplus or getattr(extension, 'cython_cplus', 0) or \ + (extension.language and extension.language.lower() == 'c++') + target_ext = '.c' + if cplus: + target_ext = '.cpp' + + for i,s in enumerate(sources): + if not re.search("\.(pyx|pxi|pxd)$",s): + continue + ext_dir=os.path.dirname(s) + ext_basename=re.sub("\.[^\.]+$","",os.path.basename(s)) + ext_basename += target_ext + target= os.path.join(ext_dir,ext_basename) + hash=self._hash_file(s) + sources[i]=target + if hash and self._copy_from_cache(hash,target): + continue + build_ext.cython_sources(self,[s],extension) + self._put_to_cache(hash,target) + + return sources + +class CythonCommand(build_ext): + """Custom distutils command subclassed from Cython.Distutils.build_ext + to compile pyx->c, and stop there. All this does is override the + C-compile method build_extension() with a no-op.""" + def build_extension(self, ext): + pass + +class DummyBuildSrc(Command): + """ numpy's build_src command interferes with Cython's build_ext. + """ + user_options = [] + def initialize_options(self): + self.py_modules_dict = {} + def finalize_options(self): + pass + def run(self): + pass + cmdclass = {'clean': CleanCommand, 'build': build} - -try: - from Cython.Distutils import build_ext - #from Cython.Distutils import Extension # to get pyrex debugging symbols - cython=True -except ImportError: - cython=False - suffix = '.c' - cmdclass['build_ext'] = CheckingBuildExt -else: +if cython: suffix = '.pyx' - class CythonCommand(build_ext): - """Custom distutils command subclassed from Cython.Distutils.build_ext - to compile pyx->c, and stop there. All this does is override the - C-compile method build_extension() with a no-op.""" - def build_extension(self, ext): - pass - - class DummyBuildSrc(Command): - """ numpy's build_src command interferes with Cython's build_ext. - """ - user_options = [] - def initialize_options(self): - self.py_modules_dict = {} - def finalize_options(self): - pass - def run(self): - pass + cmdclass['build_ext'] = build_ext + if BUILD_CACHE_DIR: # use the cache + cmdclass['build_ext'] = CachingBuildExt +else: + suffix = '.c' cmdclass['build_src'] = DummyBuildSrc + cmdclass['cython'] = CythonCommand cmdclass['build_ext'] = build_ext cmdclass['sdist'] = CheckSDist From c8e877900a5d5f2a09a7c97a9fbd44cbeb496b7c Mon Sep 17 00:00:00 2001 From: y-p Date: Sat, 6 Oct 2012 07:00:46 +0200 Subject: [PATCH 2/2] TST: Added Script for running tox instances in parallel. --- tox_prll.ini | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++ tox_prll.sh | 23 ++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 tox_prll.ini create mode 100755 tox_prll.sh diff --git a/tox_prll.ini b/tox_prll.ini new file mode 100644 index 0000000000000..a9e54bacd281e --- /dev/null +++ b/tox_prll.ini @@ -0,0 +1,54 @@ +# Tox (http://tox.testrun.org/) is a tool for running tests +# in multiple virtualenvs. This configuration file will run the +# test suite on all supported python versions. To use it, "pip install tox" +# and then run "tox" from this directory. + +[tox] +envlist = py25, py26, py27, py31, py32 +sdistsrc = {env:DISTFILE} + +[testenv] +deps = + cython + numpy >= 1.6.1 + nose + pytz + +# cd to anything but the default {toxinidir} which +# contains the pandas subdirectory and confuses +# nose away from the fresh install in site-packages +changedir = {envdir} + +commands = + # TODO: --exe because of GH #761 + {envbindir}/nosetests --exe pandas.tests + # cleanup the temp. build dir created by the tox build + /bin/rm -rf {toxinidir}/build + + # quietly rollback the install. + # Note this line will only be reached if the tests + # previous lines succeed (in particular, the tests), + # but an uninstall is really only required when + # files are removed from source tree, in which case, + # stale versions of files will will remain in the venv, + # until the next time uninstall is run. + # + # tox should provide a preinstall-commands hook. + pip uninstall pandas -qy + + +[testenv:py25] +deps = + cython + numpy >= 1.6.1 + nose + pytz + simplejson + +[testenv:py26] + +[testenv:py27] + +[testenv:py31] + +[testenv:py32] diff --git a/tox_prll.sh b/tox_prll.sh new file mode 100755 index 0000000000000..66311aaf1991e --- /dev/null +++ b/tox_prll.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# +# tox has an undocumented (as of 1.4.2) config option called "sdistsrc" +# which can make a run use a pre-prepared sdist file. +# we prepare the sdist once , then launch the tox runs in parallel using it. +# +# currently (tox 1.4.2) We have to skip sdist generation when running in parallel +# or we get a race. +# + + +ENVS=$(cat tox.ini | grep envlist | tr "," " " | cut -d " " -f 3-) +TOX_INI_PAR="tox_prll.ini" + +echo "[Creating distfile]" +tox --sdistonly +export DISTFILE="$(find .tox/dist -type f )" + +echo -e "[Starting tests]\n" +for e in $ENVS; do + echo "[launching tox for $e]" + tox -c "$TOX_INI_PAR" -e "$e" & +done