diff --git a/repo2docker/buildpacks/base.py b/repo2docker/buildpacks/base.py index 41fea076d..d7a6b5c56 100644 --- a/repo2docker/buildpacks/base.py +++ b/repo2docker/buildpacks/base.py @@ -4,6 +4,7 @@ import io import os import re +import pathspec import logging import docker @@ -170,7 +171,7 @@ def get_base_packages(self): # FIXME: Use npm from nodesource! # Everything seems to depend on npm these days, unfortunately. "npm", - + "unzip", } @@ -349,7 +350,23 @@ def _filter_tar(tar): src_path = os.path.join(os.path.dirname(__file__), *src_parts) tar.add(src_path, src, filter=_filter_tar) - tar.add('.', 'src/', filter=_filter_tar) + _exclude_tar = None + if os.path.exists(".gitignore"): + with open(".gitignore") as gitignore_fh: + ignorespec = pathspec.PathSpec.from_lines('gitignore', + gitignore_fh) + def _exclude_tar(filepath): + # Conditionally exclude files based on the pathspecs + # mentioned in the `.gitignore` file. + # Note that, the behaviour of this function is + # not **exactly** same as the way `git` excludes files + # based on `.gitignore`. + # + # https://github.com/cpburnz/python-path-specification/issues/19 + filepath = os.path.relpath(filepath, "./") + return ignorespec.match_file(filepath) + + tar.add('.', 'src/', exclude=_exclude_tar, filter=_filter_tar) tar.close() tarf.seek(0) diff --git a/setup.py b/setup.py index f8fe5fdd7..e32b4b4ef 100644 --- a/setup.py +++ b/setup.py @@ -10,6 +10,7 @@ 'escapism', 'jinja2', 'ruamel.yaml>=0.15', + 'pathspec>=0.5.5', ], python_requires='>=3.4', author='Yuvi Panda', diff --git a/tests/gitignore.py b/tests/gitignore.py new file mode 100644 index 000000000..a3566dc06 --- /dev/null +++ b/tests/gitignore.py @@ -0,0 +1,93 @@ +""" +Local builds respect .gitignore +Tests that files excluded in gitignore are not packaged inside the image +""" +import os +import subprocess +import tempfile +import time + +GITIGNORE_CONTENT=""" +*.csv +!A/0.csv +A/1.tsv +!/B/2.csv +B/1.tsv +C/ +""" + +VERIFY_SCRIPT=""" +#!/usr/bin/env python +import os + +assert os.path.exists("A/0.csv") +assert not os.path.exists("A/1.csv") +assert not os.path.exists("A/2.csv") + +assert os.path.exists("A/0.tsv") +assert not os.path.exists("A/1.tsv") +assert os.path.exists("A/2.tsv") + +assert not os.path.exists("B/0.csv") +assert not os.path.exists("B/1.csv") +assert os.path.exists("B/2.csv") + +assert os.path.exists("B/0.tsv") +assert not os.path.exists("B/1.tsv") +assert os.path.exists("B/2.tsv") + +assert not os.path.exists("C/0.csv") +assert not os.path.exists("C/1.csv") +assert not os.path.exists("C/2.csv") + +assert not os.path.exists("C/0.tsv") +assert not os.path.exists("C/1.tsv") +assert not os.path.exists("C/2.tsv") +""" + +def test_gitignore(): + """ + Local builds respect .gitignore + """ + def create_directory_structure(): + # Create Directories and files + # + # It is important to create the directory structure programmatically + # because if we include a folder with a .gitignore in the repository, + # then git uses it to exclude files from being checked into + # the repository + for dirName in ['A', 'B', 'C']: + os.mkdir(dirName) + for file_ids in range(3): + # Write csv + fp = open(os.path.join(dirName, str(file_ids)+".csv"), "w") + fp.write("test") + fp.close() + # Write .tsv + fp = open(os.path.join(dirName, str(file_ids)+".tsv"), "w") + fp.write("test") + fp.close() + # Create .gitignore + with open(".gitignore", "w") as fp: + fp.write(GITIGNORE_CONTENT) + # Create a verify script + with open("verify.py", "w") as fp: + fp.write(VERIFY_SCRIPT) + # Make the file executable + mode = os.fstat(fp.fileno()).st_mode + mode |= 0o111 + os.fchmod(fp.fileno(), mode & 0o7777) + + with tempfile.TemporaryDirectory() as tmpdir: + os.chdir(tmpdir) + create_directory_structure() + username = os.getlogin() + euid = os.getegid() + subprocess.check_output([ + 'repo2docker', + '--user-id', str(euid), + '--user-name', username, + tmpdir, + 'python', + '/home/{}/verify.py'.format(username) + ], stderr=subprocess.STDOUT)