Skip to content

Commit 5831033

Browse files
Anatoli Sheinomalley
authored andcommitted
ORC-17. Support HDFS as a C++ plugin module.
Fixes #134 Signed-off-by: Owen O'Malley <[email protected]>
1 parent 10c0a85 commit 5831033

22 files changed

+465
-64
lines changed

.travis.yml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,14 @@ matrix:
1212
- compiler: clang
1313
os: osx
1414
osx_image: xcode6.4
15+
- compiler: clang
16+
os: osx
17+
osx_image: xcode8.3
18+
script:
19+
- mkdir build
20+
- cd build
21+
- cmake -DOPENSSL_ROOT_DIR=`brew --prefix openssl` ..
22+
- make package test-out
1523

1624
jdk:
1725
- openjdk7
@@ -22,4 +30,4 @@ script:
2230
- mkdir build
2331
- cd build
2432
- cmake ..
25-
- make package test-out
33+
- make package test-out

CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ option (BUILD_JAVA
3030
"Include ORC Java library in the build process"
3131
ON)
3232

33+
option (BUILD_LIBHDFSPP
34+
"Include LIBHDFSPP library in the build process"
35+
ON)
36+
3337
# Make sure that a build type is selected
3438
if (NOT CMAKE_BUILD_TYPE)
3539
message(STATUS "No build type selected, default to ReleaseWithDebugInfo")
@@ -84,6 +88,7 @@ endif ()
8488

8589
enable_testing()
8690

91+
INCLUDE(CheckSourceCompiles)
8792
INCLUDE(ThirdpartyToolchain)
8893

8994
set (EXAMPLE_DIRECTORY ${CMAKE_SOURCE_DIR}/examples)

c++/include/CMakeLists.txt

Lines changed: 0 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -10,61 +10,6 @@
1010
# See the License for the specific language governing permissions and
1111
# limitations under the License.
1212

13-
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX11_FLAGS} ${WARN_FLAGS}")
14-
15-
INCLUDE(CheckCXXSourceCompiles)
16-
17-
CHECK_CXX_SOURCE_COMPILES("
18-
#include <initializer_list>
19-
struct A {
20-
A(std::initializer_list<int> list);
21-
};
22-
int main(int,char*[]){
23-
}"
24-
ORC_CXX_HAS_INITIALIZER_LIST
25-
)
26-
27-
CHECK_CXX_SOURCE_COMPILES("
28-
int main(int,char*[]) noexcept {
29-
return 0;
30-
}"
31-
ORC_CXX_HAS_NOEXCEPT
32-
)
33-
34-
CHECK_CXX_SOURCE_COMPILES("
35-
int main(int,char* argv[]){
36-
return argv[0] != nullptr;
37-
}"
38-
ORC_CXX_HAS_NULLPTR
39-
)
40-
41-
CHECK_CXX_SOURCE_COMPILES("
42-
struct A {
43-
virtual ~A();
44-
virtual void foo();
45-
};
46-
struct B: public A {
47-
virtual void foo() override;
48-
};
49-
int main(int,char*[]){
50-
}"
51-
ORC_CXX_HAS_OVERRIDE
52-
)
53-
54-
CHECK_CXX_SOURCE_COMPILES("
55-
#include<memory>
56-
int main(int,char* []){
57-
std::unique_ptr<int> ptr(new int);
58-
}"
59-
ORC_CXX_HAS_UNIQUE_PTR
60-
)
61-
62-
CHECK_CXX_SOURCE_COMPILES("
63-
#include <cstdint>
64-
int main(int, char*[]) { }"
65-
ORC_CXX_HAS_CSTDINT
66-
)
67-
6813
configure_file (
6914
"orc/orc-config.hh.in"
7015
"${CMAKE_CURRENT_BINARY_DIR}/orc/orc-config.hh"

c++/include/orc/OrcFile.hh

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,24 @@ namespace orc {
102102
virtual void close() = 0;
103103
};
104104

105+
/**
106+
* Create a stream to a local file or HDFS file if path begins with "hdfs://"
107+
* @param path the name of the file in the local file system or HDFS
108+
*/
109+
ORC_UNIQUE_PTR<InputStream> readFile(const std::string& path);
110+
105111
/**
106112
* Create a stream to a local file.
107113
* @param path the name of the file in the local file system
108114
*/
109115
ORC_UNIQUE_PTR<InputStream> readLocalFile(const std::string& path);
110116

117+
/**
118+
* Create a stream to an HDFS file.
119+
* @param path the uri of the file in HDFS
120+
*/
121+
ORC_UNIQUE_PTR<InputStream> readHdfsFile(const std::string& path);
122+
111123
/**
112124
* Create a reader to the for the ORC file.
113125
* @param stream the stream to read
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
Wed Aug 30 10:56:51 EDT 2017
2+
HDFS-10787
3+
commit 9587bb04a818a2661e264f619b09c15ce10ff38e
4+
Author: Anatoli Shein <[email protected]>
5+
Date: Wed Aug 30 10:49:42 2017 -0400
6+
7+
fixed warnings3
8+
diffs: --------------
9+
--------------
10+
Wed Aug 30 10:56:51 EDT 2017
928 KB
Binary file not shown.

c++/libs/libhdfspp/pull_hdfs.sh

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
if [ -z "$1" ]; then
2+
echo "Usage: pull_hdfs [path_to_hdfs_git_root]"
3+
exit 1;
4+
fi
5+
if [ ! -d "$1" ]; then
6+
echo "$1 is not a directory"
7+
fi
8+
if [ ! -d "$1/hadoop-hdfs-project" ]; then
9+
echo "$1 is not the root of a hadoop git checkout"
10+
fi
11+
12+
HADOOP_ROOT=$1
13+
echo HADOOP_ROOT=$HADOOP_ROOT
14+
OUT=$(readlink -m `dirname $0`)
15+
echo OUT=$OUT
16+
TS=$OUT/imported_timestamp
17+
18+
cd $HADOOP_ROOT &&
19+
mvn -pl :hadoop-hdfs-native-client -Pnative compile -Dnative_make_args="copy_hadoop_files"
20+
(date > $TS; git rev-parse --abbrev-ref HEAD >> $TS; git log -n 1 >> $TS; \
21+
echo "diffs: --------------" >> $TS; git diff HEAD >> $TS; \
22+
echo " --------------" >> $TS)
23+
cd $OUT &&
24+
#Delete everything except for pull_hdfs.sh and imported_timestamp
25+
find . ! -name 'pull_hdfs.sh' ! -name 'imported_timestamp' ! -name '.' ! -name '..' -exec rm -rf {} + &&
26+
cp -R $HADOOP_ROOT/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp . &&
27+
cp -R $HADOOP_ROOT/hadoop-hdfs-project/hadoop-hdfs-native-client/target/main/native/libhdfspp/extern libhdfspp/ &&
28+
cd libhdfspp &&
29+
tar -czf ../libhdfspp.tar.gz * &&
30+
cd .. &&
31+
rm -rf libhdfspp &&
32+
date >> $TS

c++/src/CMakeLists.txt

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ include_directories (
123123
${ZLIB_INCLUDE_DIRS}
124124
${SNAPPY_INCLUDE_DIRS}
125125
${LZ4_INCLUDE_DIRS}
126+
${LIBHDFSPP_INCLUDE_DIRS}
126127
)
127128

128129
add_custom_command(OUTPUT orc_proto.pb.h orc_proto.pb.cc
@@ -132,7 +133,7 @@ add_custom_command(OUTPUT orc_proto.pb.h orc_proto.pb.cc
132133
"${CMAKE_SOURCE_DIR}/proto/orc_proto.proto"
133134
)
134135

135-
add_library (orc STATIC
136+
set(SOURCE_FILES
136137
"${CMAKE_CURRENT_BINARY_DIR}/Adaptor.hh"
137138
orc_proto.pb.h
138139
io/InputStream.cc
@@ -161,13 +162,24 @@ add_library (orc STATIC
161162
Writer.cc
162163
)
163164

165+
if(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP)
166+
set(SOURCE_FILES ${SOURCE_FILES} OrcHdfsFile.cc)
167+
endif(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP)
168+
169+
add_library (orc STATIC ${SOURCE_FILES})
170+
164171
install(TARGETS orc DESTINATION lib)
165172

166173
target_link_libraries (orc
167174
${PROTOBUF_LIBRARIES}
168175
${ZLIB_LIBRARIES}
169176
${SNAPPY_LIBRARIES}
170177
${LZ4_LIBRARIES}
178+
${LIBHDFSPP_LIBRARIES}
171179
)
172180

173181
add_dependencies(orc protobuf)
182+
183+
if(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP)
184+
add_definitions(-DBUILD_LIBHDFSPP)
185+
endif(ORC_CXX_HAS_THREAD_LOCAL AND BUILD_LIBHDFSPP)

c++/src/OrcFile.cc

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include <sys/stat.h>
2929
#include <sys/types.h>
3030
#include <unistd.h>
31+
#include <string.h>
3132

3233
namespace orc {
3334

@@ -86,8 +87,20 @@ namespace orc {
8687
close(file);
8788
}
8889

90+
std::unique_ptr<InputStream> readFile(const std::string& path) {
91+
#ifdef BUILD_LIBHDFSPP
92+
if(strncmp (path.c_str(), "hdfs://", 7) == 0){
93+
return orc::readHdfsFile(std::string(path));
94+
} else {
95+
#endif
96+
return orc::readLocalFile(std::string(path));
97+
#ifdef BUILD_LIBHDFSPP
98+
}
99+
#endif
100+
}
101+
89102
std::unique_ptr<InputStream> readLocalFile(const std::string& path) {
90-
return std::unique_ptr<InputStream>(new FileInputStream(path));
103+
return std::unique_ptr<InputStream>(new FileInputStream(path));
91104
}
92105

93106
OutputStream::~OutputStream() {

0 commit comments

Comments
 (0)