Skip to content

Commit 43019db

Browse files
committed
[SPARK-18652] Include the data in pyspark package.
1 parent c24076d commit 43019db

File tree

2 files changed

+10
-0
lines changed

2 files changed

+10
-0
lines changed

python/MANIFEST.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
global-exclude *.py[cod] __pycache__ .DS_Store
1818
recursive-include deps/jars *.jar
1919
graft deps/bin
20+
recursive-include deps/data *
2021
recursive-include deps/examples *.py
2122
recursive-include lib *.zip
2223
include README.md

python/setup.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,11 @@
6969

7070
EXAMPLES_PATH = os.path.join(SPARK_HOME, "examples/src/main/python")
7171
SCRIPTS_PATH = os.path.join(SPARK_HOME, "bin")
72+
DATA_PATH = os.path.join(SPARK_HOME, "data")
7273
SCRIPTS_TARGET = os.path.join(TEMP_PATH, "bin")
7374
JARS_TARGET = os.path.join(TEMP_PATH, "jars")
7475
EXAMPLES_TARGET = os.path.join(TEMP_PATH, "examples")
76+
DATA_TARGET = os.path.join(TEMP_PATH, "data")
7577

7678

7779
# Check and see if we are under the spark path in which case we need to build the symlink farm.
@@ -114,11 +116,13 @@ def _supports_symlinks():
114116
os.symlink(JARS_PATH, JARS_TARGET)
115117
os.symlink(SCRIPTS_PATH, SCRIPTS_TARGET)
116118
os.symlink(EXAMPLES_PATH, EXAMPLES_TARGET)
119+
os.symlink(DATA_PATH, DATA_TARGET)
117120
else:
118121
# For windows fall back to the slower copytree
119122
copytree(JARS_PATH, JARS_TARGET)
120123
copytree(SCRIPTS_PATH, SCRIPTS_TARGET)
121124
copytree(EXAMPLES_PATH, EXAMPLES_TARGET)
125+
copytree(DATA_PATH, DATA_TARGET)
122126
else:
123127
# If we are not inside of SPARK_HOME verify we have the required symlink farm
124128
if not os.path.exists(JARS_TARGET):
@@ -161,18 +165,21 @@ def _supports_symlinks():
161165
'pyspark.jars',
162166
'pyspark.python.pyspark',
163167
'pyspark.python.lib',
168+
'pyspark.data',
164169
'pyspark.examples.src.main.python'],
165170
include_package_data=True,
166171
package_dir={
167172
'pyspark.jars': 'deps/jars',
168173
'pyspark.bin': 'deps/bin',
169174
'pyspark.python.lib': 'lib',
175+
'pyspark.data': 'deps/data',
170176
'pyspark.examples.src.main.python': 'deps/examples',
171177
},
172178
package_data={
173179
'pyspark.jars': ['*.jar'],
174180
'pyspark.bin': ['*'],
175181
'pyspark.python.lib': ['*.zip'],
182+
'pyspark.data': ['*'],
176183
'pyspark.examples.src.main.python': ['*.py', '*/*.py']},
177184
scripts=scripts,
178185
license='http://www.apache.org/licenses/LICENSE-2.0',
@@ -202,8 +209,10 @@ def _supports_symlinks():
202209
os.remove(os.path.join(TEMP_PATH, "jars"))
203210
os.remove(os.path.join(TEMP_PATH, "bin"))
204211
os.remove(os.path.join(TEMP_PATH, "examples"))
212+
os.remove(os.path.join(TEMP_PATH, "data"))
205213
else:
206214
rmtree(os.path.join(TEMP_PATH, "jars"))
207215
rmtree(os.path.join(TEMP_PATH, "bin"))
208216
rmtree(os.path.join(TEMP_PATH, "examples"))
217+
rmtree(os.path.join(TEMP_PATH, "data"))
209218
os.rmdir(TEMP_PATH)

0 commit comments

Comments
 (0)