Skip to content
This repository was archived by the owner on Feb 10, 2021. It is now read-only.

add crc=True|False parameter to HDFileSystem(...) #164

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion continuous_integration/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ docker build -t daskdev/hdfs3dev continuous_integration/
- Start the container and wait for it to be ready:

```bash
source continuous_integration/startup_hdfs.sh
source continuous_integration/setup_hdfs.sh
```

- Start a bash session in the running container:
Expand All @@ -36,3 +36,10 @@ docker exec -it $CONTAINER_ID bash
python setup.py install
py.test hdfs3 -s -vv
```

To run the tests on Python 2.7:

```bash
/opt/conda/envs/py27/bin/python setup.py install
/opt/conda/envs/py27/bin/py.test hdfs3 -s -vv
```
6 changes: 5 additions & 1 deletion hdfs3/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class HDFileSystem(object):
_first_pid = None

def __init__(self, host=MyNone, port=MyNone, connect=True, autoconf=True,
pars=None, **kwargs):
pars=None, crc=True, **kwargs):
"""
Parameters
----------
Expand All @@ -52,6 +52,8 @@ def __init__(self, host=MyNone, port=MyNone, connect=True, autoconf=True,
autoconf: bool (True)
Whether to use the configuration found in the conf module as
the set of defaults
crc: bool (True)
Enable/disable CRC verification
pars : {str: str}
any parameters for hadoop, that you can find in hdfs-site.xml,
https://hadoop.apache.org/docs/r2.6.0/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml
Expand All @@ -77,6 +79,8 @@ def __init__(self, host=MyNone, port=MyNone, connect=True, autoconf=True,
self.conf['host'] = host
if port is not MyNone:
self.conf['port'] = port
if not crc:
self.conf['input.read.default.verify'] = '0'

self._handle = None

Expand Down
25 changes: 25 additions & 0 deletions hdfs3/tests/test_hdfs3.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,31 @@ def test_idempotent_connect(hdfs):
hdfs.connect()


def test_disable_crc():
hdfs = HDFileSystem(host=test_host, port=test_port,
pars={'rpc.client.connect.retry': '2'}, crc=False)

assert 'input.read.default.verify' in hdfs.conf

if hdfs.exists('/tmp/test'):
hdfs.rm('/tmp/test')
hdfs.mkdir('/tmp/test')

data = b'a' * (10 * 2**20)

with hdfs.open(a, 'wb', replication=1) as f:
f.write(data)

with hdfs.open(a, 'rb') as f:
out = f.read(len(data))
assert len(data) == len(out)
assert out == data

if hdfs.exists('/tmp/test'):
hdfs.rm('/tmp/test', recursive=True)
hdfs.disconnect()


def test_ls_touch(hdfs):
assert not hdfs.ls('/tmp/test')
hdfs.touch(a)
Expand Down