Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ language: cpp
compiler:
- gcc
- clang
jdk:
- openjdk7
before_install:
- sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
- sudo apt-get -qq update
Expand All @@ -12,5 +14,7 @@ before_script:
- mkdir build
- cd build
- cmake ..
env:
- MAVEN_OPTS=-Xmx2g MAVEN_SKIP_RC=true
script:
- make package test-out
5 changes: 5 additions & 0 deletions java/bench/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.*.crc
*.json.gz
*.avro
*.parquet
*.orc
6 changes: 6 additions & 0 deletions java/bench/fetch-data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
mkdir -p data/sources/taxi
(cd data/sources/taxi; wget https://s3.amazonaws.com/nyc-tlc/trip+data/yellow_tripdata_2015-{11,12}.csv)
(cd data/sources/taxi; gzip *.csv)
mkdir -p data/sources/github
(cd data/sources/github; wget http://data.githubarchive.org/2015-11-{01..15}-{0..23}.json.gz)
159 changes: 159 additions & 0 deletions java/bench/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.orc</groupId>
<artifactId>orc</artifactId>
<version>1.4.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>

<artifactId>orc-benchmarks</artifactId>
<packaging>jar</packaging>
<name>ORC Benchmarks</name>
<description>
Benchmarks for comparing ORC, Parquet, and Avro performance.
</description>

<dependencies>
<dependency>
<groupId>org.apache.orc</groupId>
<artifactId>orc-core</artifactId>
</dependency>

<!-- inter-project -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
</dependency>
<dependency>
<groupId>io.airlift</groupId>
<artifactId>aircompressor</artifactId>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro-mapred</artifactId>
<classifier>hadoop2</classifier>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-csv</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-common</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<classifier>core</classifier>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-serde</artifactId>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-storage-api</artifactId>
</dependency>
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-hadoop-bundle</artifactId>
</dependency>
<dependency>
<groupId>org.jodd</groupId>
<artifactId>jodd-core</artifactId>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-core</artifactId>
</dependency>
</dependencies>

<build>
<sourceDirectory>${basedir}/src/java</sourceDirectory>
<testSourceDirectory>${basedir}/src/test</testSourceDirectory>
<testResources>
<testResource>
<directory>${basedir}/src/test/resources</directory>
</testResource>
</testResources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<archive>
<manifest>
<mainClass>org.apache.orc.bench.Driver</mainClass>
</manifest>
</archive>
<descriptors>
<descriptor>src/assembly/uber.xml</descriptor>
</descriptors>
</configuration>
<executions>
<execution>
<id>make-assembly</id> <!-- this is used for inheritance merges -->
<phase>package</phase> <!-- bind to the packaging phase -->
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>

<profiles>
<profile>
<id>cmake</id>
<build>
<directory>${build.dir}/bench</directory>
</build>
</profile>
</profiles>
</project>
33 changes: 33 additions & 0 deletions java/bench/src/assembly/uber.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<assembly>
<id>uber</id>
<formats>
<format>jar</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<dependencySets>
<dependencySet>
<outputDirectory>/</outputDirectory>
<useProjectArtifact>true</useProjectArtifact>
<unpack>true</unpack>
<scope>runtime</scope>
</dependencySet>
</dependencySets>
<containerDescriptorHandlers>
<containerDescriptorHandler>
<handlerName>metaInf-services</handlerName>
</containerDescriptorHandler>
</containerDescriptorHandlers>
</assembly>
25 changes: 25 additions & 0 deletions java/bench/src/findbugs/exclude.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<FindBugsFilter>
<Match>
<Bug pattern="EI_EXPOSE_REP,EI_EXPOSE_REP2"/>
</Match>
<Match>
<Class name="~org\.openjdk\.jmh\.infra\.generated.*"/>
</Match>
<Match>
<Class name="~org\.apache\.orc\.bench\.generated.*"/>
</Match>
</FindBugsFilter>
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs;

import java.io.FileNotFoundException;
import java.io.IOException;

public class TrackingLocalFileSystem extends RawLocalFileSystem {

class TrackingFileInputStream extends RawLocalFileSystem.LocalFSFileInputStream {
public TrackingFileInputStream(Path f) throws IOException {
super(f);
}

public int read() throws IOException {
statistics.incrementReadOps(1);
return super.read();
}

public int read(byte[] b, int off, int len) throws IOException {
statistics.incrementReadOps(1);
return super.read(b, off, len);
}

public int read(long position, byte[] b, int off, int len) throws IOException {
statistics.incrementReadOps(1);
return super.read(position, b, off, len);
}
}

public FSDataInputStream open(Path f, int bufferSize) throws IOException {
if (!exists(f)) {
throw new FileNotFoundException(f.toString());
}
return new FSDataInputStream(new BufferedFSInputStream(
new TrackingFileInputStream(f), bufferSize));
}

public FileSystem.Statistics getLocalStatistics() {
return statistics;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.io.orc;

import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.io.Writable;
import org.apache.orc.OrcProto;
import org.apache.orc.OrcUtils;
import org.apache.orc.TypeDescription;

import java.util.List;

/**
* Utilities that need the non-public methods from Hive.
*/
public class OrcBenchmarkUtilities {

public static StructObjectInspector createObjectInspector(TypeDescription schema) {
List<OrcProto.Type> types = OrcUtils.getOrcTypes(schema);
return (StructObjectInspector) OrcStruct.createObjectInspector(0, types);
}

public static Writable nextObject(VectorizedRowBatch batch,
TypeDescription schema,
int rowId,
Writable obj) {
OrcStruct result = (OrcStruct) obj;
if (result == null) {
result = new OrcStruct(batch.cols.length);
}
List<TypeDescription> childrenTypes = schema.getChildren();
for(int c=0; c < batch.cols.length; ++c) {
result.setFieldValue(c, RecordReaderImpl.nextValue(batch.cols[c], rowId,
childrenTypes.get(c), result.getFieldValue(c)));
}
return result;
}
}
Loading