Skip to content
This repository was archived by the owner on Apr 10, 2024. It is now read-only.

Commit bad99a1

Browse files
committed
Adding operator+[=] and operator/[=] for FloatingArray and IntegerArray.
This includes a design change that obviates the need for an ArrayView. Instead, every array has an internal offset. Shallow copy is achieved by copy constructor, though the current set of copy constructors don't yet support a slice. Deep copy is still achieved through the Copy virtual function. More detailed explanation of the changes: * Adding copy/move constructors for {Floating,Integer,Numeric}Array * Adding various method for marking/getting nulls (valid bits) in integer arrays * Changing data() and mutable_data() in NumericArray so that they return a pointer that starts at the array's offset * Addition of Addable/Divisable classes (similar to Boost operators) for easy support of operator[+/] * Unit test scaffolding for testing permutations of left/right hand side types on arithmetic operators * Implementing IntegerArray::operator/, IntegerArray::operator+= FloatingArray::operator/=, FloatingArray::operator+=
1 parent 29df124 commit bad99a1

File tree

12 files changed

+607
-56
lines changed

12 files changed

+607
-56
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ endif()
7474
# GCC cannot always verify whether strict aliasing rules are indeed followed due to
7575
# fundamental limitations in escape analysis, which can result in subtle bad code generation.
7676
# This has a small perf hit but worth it to avoid hard to debug crashes.
77-
set(CXX_COMMON_FLAGS "-std=c++11 -fno-strict-aliasing -msse4.2 -Wall -Wno-sign-compare -Wno-deprecated -pthread -D__STDC_FORMAT_MACROS")
77+
set(CXX_COMMON_FLAGS "-std=c++1y -fno-strict-aliasing -msse4.2 -Wall -Wno-sign-compare -Wno-deprecated -pthread -D__STDC_FORMAT_MACROS")
7878

7979
# compiler flags for different build types (run 'cmake -DCMAKE_BUILD_TYPE=<type> .')
8080
# For all builds:

pandas/native.pxd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ cdef extern from "pandas/api.h" namespace "pandas":
7474
c_bool Equals(const DataType& other)
7575
string ToString()
7676

77-
ctypedef shared_ptr[DataType] TypePtr
77+
ctypedef shared_ptr[const DataType] TypePtr
7878

7979
cdef cppclass Int8Type(DataType):
8080
pass

pandas/native.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ cdef Array wrap_array(const lp.ArrayPtr& arr):
277277

278278
cdef PandasType wrap_type(const lp.TypePtr& sp_type):
279279
cdef:
280-
lp.DataType* type = sp_type.get()
280+
const lp.DataType* type = sp_type.get()
281281
PandasType result
282282

283283
if type.type() == lp.TypeId_CATEGORY:

src/pandas/array-test.cc

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,177 @@ TEST_F(TestArray, Attrs) {
4545
ASSERT_EQ(values_.size(), array_->length());
4646
}
4747

48+
template <template <typename> class LEFT_ARRAY_TYPE, typename LEFT_DATA_TYPE,
49+
template <typename> class RIGHT_ARRAY_TYPE, typename RIGHT_DATA_TYPE,
50+
std::size_t LENGTH = 10>
51+
class OperatorTest {
52+
public:
53+
OperatorTest()
54+
: left_buffer_(std::make_shared<Buffer>(
55+
reinterpret_cast<const std::uint8_t*>(Initialize(left_data_)),
56+
LENGTH * sizeof(LEFT_C_TYPE))),
57+
right_buffer_(std::make_shared<Buffer>(
58+
reinterpret_cast<const std::uint8_t*>(Initialize(right_data_)),
59+
LENGTH * sizeof(RIGHT_C_TYPE))),
60+
left_array_(LENGTH, left_buffer_),
61+
right_array_(LENGTH, right_buffer_) {}
62+
63+
template <typename OPERATOR>
64+
void TestOperator(OPERATOR& operation) {
65+
auto result = operation(left_array_, right_array_);
66+
for (auto ii = 0; ii < left_array_.length(); ++ii) {
67+
ASSERT_EQ(result.data()[ii], operation(left_data_[ii], right_data_[ii]));
68+
}
69+
}
70+
71+
template <typename OPERATOR, typename INPLACE_OPERATOR>
72+
void TestInplaceOperator(OPERATOR& operation, INPLACE_OPERATOR& inplace_operation) {
73+
auto result = operation(left_array_, right_array_);
74+
for (auto ii = 0; ii < left_array_.length(); ++ii) {
75+
ASSERT_EQ(result.data()[ii], operation(left_data_[ii], right_data_[ii]));
76+
}
77+
inplace_operation(left_array_, right_array_);
78+
for (auto ii = 0; ii < left_array_.length(); ++ii) {
79+
ASSERT_EQ(left_array_.data()[ii], operation(left_data_[ii], right_data_[ii]));
80+
}
81+
for (auto ii = 0; ii < left_array_.length(); ++ii) {
82+
ASSERT_EQ(left_array_.data()[ii], result.data()[ii]);
83+
}
84+
}
85+
86+
private:
87+
template <typename C_TYPE>
88+
static C_TYPE* Initialize(C_TYPE (&value)[LENGTH]) {
89+
for (auto ii = 0; ii < LENGTH; ++ii) {
90+
// Start at 1 so that we don't get FPE with operator/
91+
value[ii] = static_cast<C_TYPE>(ii + 1);
92+
}
93+
return value;
94+
}
95+
96+
using LEFT_C_TYPE = typename LEFT_DATA_TYPE::c_type;
97+
98+
using RIGHT_C_TYPE = typename RIGHT_DATA_TYPE::c_type;
99+
100+
LEFT_C_TYPE left_data_[LENGTH];
101+
102+
RIGHT_C_TYPE right_data_[LENGTH];
103+
104+
std::shared_ptr<Buffer> left_buffer_;
105+
106+
std::shared_ptr<Buffer> right_buffer_;
107+
108+
LEFT_ARRAY_TYPE<LEFT_DATA_TYPE> left_array_;
109+
110+
RIGHT_ARRAY_TYPE<RIGHT_DATA_TYPE> right_array_;
111+
};
112+
113+
TEST(TestArrayOperators, Addition) {
114+
auto plus = [](auto const& left, auto const& right) { return left + right; };
115+
auto plus_inplace = [](auto& left, auto const& right) { left += right; };
116+
117+
OperatorTest<IntegerArray, UInt8Type, IntegerArray, UInt16Type>().TestInplaceOperator(
118+
plus, plus_inplace);
119+
OperatorTest<IntegerArray, UInt16Type, IntegerArray, UInt8Type>().TestInplaceOperator(
120+
plus, plus_inplace);
121+
OperatorTest<IntegerArray, Int8Type, IntegerArray, Int16Type>().TestInplaceOperator(
122+
plus, plus_inplace);
123+
OperatorTest<IntegerArray, Int16Type, IntegerArray, Int8Type>().TestInplaceOperator(
124+
plus, plus_inplace);
125+
OperatorTest<IntegerArray, UInt32Type, IntegerArray, UInt16Type>().TestInplaceOperator(
126+
plus, plus_inplace);
127+
OperatorTest<IntegerArray, UInt64Type, IntegerArray, UInt32Type>().TestInplaceOperator(
128+
plus, plus_inplace);
129+
OperatorTest<IntegerArray, Int32Type, IntegerArray, Int64Type>().TestInplaceOperator(
130+
plus, plus_inplace);
131+
OperatorTest<IntegerArray, Int64Type, IntegerArray, Int32Type>().TestInplaceOperator(
132+
plus, plus_inplace);
133+
134+
OperatorTest<IntegerArray, Int8Type, IntegerArray, Int8Type>().TestInplaceOperator(
135+
plus, plus_inplace);
136+
OperatorTest<IntegerArray, Int16Type, IntegerArray, Int16Type>().TestInplaceOperator(
137+
plus, plus_inplace);
138+
OperatorTest<IntegerArray, Int32Type, IntegerArray, Int32Type>().TestInplaceOperator(
139+
plus, plus_inplace);
140+
OperatorTest<IntegerArray, Int64Type, IntegerArray, Int64Type>().TestInplaceOperator(
141+
plus, plus_inplace);
142+
OperatorTest<IntegerArray, UInt8Type, IntegerArray, UInt8Type>().TestInplaceOperator(
143+
plus, plus_inplace);
144+
OperatorTest<IntegerArray, UInt16Type, IntegerArray, UInt16Type>().TestInplaceOperator(
145+
plus, plus_inplace);
146+
OperatorTest<IntegerArray, UInt32Type, IntegerArray, UInt32Type>().TestInplaceOperator(
147+
plus, plus_inplace);
148+
OperatorTest<IntegerArray, UInt64Type, IntegerArray, UInt64Type>().TestInplaceOperator(
149+
plus, plus_inplace);
150+
151+
OperatorTest<FloatingArray, FloatType, IntegerArray, UInt8Type>().TestInplaceOperator(
152+
plus, plus_inplace);
153+
OperatorTest<FloatingArray, FloatType, IntegerArray, UInt64Type>().TestInplaceOperator(
154+
plus, plus_inplace);
155+
OperatorTest<FloatingArray, FloatType, IntegerArray, Int8Type>().TestInplaceOperator(
156+
plus, plus_inplace);
157+
OperatorTest<FloatingArray, FloatType, IntegerArray, Int64Type>().TestInplaceOperator(
158+
plus, plus_inplace);
159+
160+
OperatorTest<FloatingArray, DoubleType, IntegerArray, UInt16Type>().TestInplaceOperator(
161+
plus, plus_inplace);
162+
OperatorTest<FloatingArray, DoubleType, IntegerArray, UInt64Type>().TestInplaceOperator(
163+
plus, plus_inplace);
164+
OperatorTest<FloatingArray, DoubleType, IntegerArray, Int16Type>().TestInplaceOperator(
165+
plus, plus_inplace);
166+
OperatorTest<FloatingArray, DoubleType, IntegerArray, Int64Type>().TestInplaceOperator(
167+
plus, plus_inplace);
168+
}
169+
170+
TEST(TestArrayOperators, Division) {
171+
auto divide = [](auto const& left, auto const& right) { return left / right; };
172+
auto divide_inplace = [](auto& left, auto const& right) { left /= right; };
173+
174+
OperatorTest<IntegerArray, UInt8Type, IntegerArray, UInt16Type>().TestOperator(divide);
175+
OperatorTest<IntegerArray, UInt16Type, IntegerArray, UInt8Type>().TestOperator(divide);
176+
OperatorTest<IntegerArray, Int8Type, IntegerArray, Int16Type>().TestOperator(divide);
177+
OperatorTest<IntegerArray, Int16Type, IntegerArray, Int8Type>().TestOperator(divide);
178+
OperatorTest<IntegerArray, UInt32Type, IntegerArray, UInt16Type>().TestOperator(divide);
179+
OperatorTest<IntegerArray, UInt64Type, IntegerArray, UInt32Type>().TestOperator(divide);
180+
OperatorTest<IntegerArray, Int32Type, IntegerArray, Int64Type>().TestOperator(divide);
181+
OperatorTest<IntegerArray, Int64Type, IntegerArray, Int32Type>().TestOperator(divide);
182+
183+
OperatorTest<IntegerArray, Int8Type, IntegerArray, Int8Type>().TestOperator(divide);
184+
OperatorTest<IntegerArray, Int16Type, IntegerArray, Int16Type>().TestOperator(divide);
185+
OperatorTest<IntegerArray, Int32Type, IntegerArray, Int32Type>().TestOperator(divide);
186+
OperatorTest<IntegerArray, Int64Type, IntegerArray, Int64Type>().TestOperator(divide);
187+
OperatorTest<IntegerArray, UInt8Type, IntegerArray, UInt8Type>().TestOperator(divide);
188+
OperatorTest<IntegerArray, UInt16Type, IntegerArray, UInt16Type>().TestOperator(divide);
189+
OperatorTest<IntegerArray, UInt32Type, IntegerArray, UInt32Type>().TestOperator(divide);
190+
OperatorTest<IntegerArray, UInt64Type, IntegerArray, UInt64Type>().TestOperator(divide);
191+
192+
OperatorTest<FloatingArray, FloatType, IntegerArray, UInt8Type>().TestInplaceOperator(
193+
divide, divide_inplace);
194+
OperatorTest<FloatingArray, FloatType, IntegerArray, UInt64Type>().TestInplaceOperator(
195+
divide, divide_inplace);
196+
OperatorTest<FloatingArray, FloatType, IntegerArray, Int8Type>().TestInplaceOperator(
197+
divide, divide_inplace);
198+
OperatorTest<FloatingArray, FloatType, IntegerArray, Int64Type>().TestInplaceOperator(
199+
divide, divide_inplace);
200+
OperatorTest<FloatingArray, FloatType, FloatingArray, FloatType>().TestInplaceOperator(
201+
divide, divide_inplace);
202+
OperatorTest<FloatingArray, FloatType, FloatingArray, DoubleType>().TestInplaceOperator(
203+
divide, divide_inplace);
204+
205+
OperatorTest<FloatingArray, DoubleType, IntegerArray, UInt16Type>().TestInplaceOperator(
206+
divide, divide_inplace);
207+
OperatorTest<FloatingArray, DoubleType, IntegerArray, UInt64Type>().TestInplaceOperator(
208+
divide, divide_inplace);
209+
OperatorTest<FloatingArray, DoubleType, IntegerArray, Int16Type>().TestInplaceOperator(
210+
divide, divide_inplace);
211+
OperatorTest<FloatingArray, DoubleType, IntegerArray, Int64Type>().TestInplaceOperator(
212+
divide, divide_inplace);
213+
OperatorTest<FloatingArray, DoubleType, FloatingArray, FloatType>().TestInplaceOperator(
214+
divide, divide_inplace);
215+
OperatorTest<FloatingArray, DoubleType, FloatingArray, DoubleType>()
216+
.TestInplaceOperator(divide, divide_inplace);
217+
}
218+
48219
// ----------------------------------------------------------------------
49220
// Array view object
50221

src/pandas/array.cc

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,8 @@ namespace pandas {
1111
// ----------------------------------------------------------------------
1212
// Array
1313

14-
Array::Array(const std::shared_ptr<DataType>& type, int64_t length)
15-
: type_(type), length_(length) {}
16-
1714
Status Array::Copy(std::shared_ptr<Array>* out) const {
18-
return Copy(0, length_, out);
15+
return Copy(0, length(), out);
1916
}
2017

2118
// ----------------------------------------------------------------------

src/pandas/array.h

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,19 @@ class Array {
2020
public:
2121
virtual ~Array() {}
2222

23-
int64_t length() const { return length_; }
24-
std::shared_ptr<DataType> type() const { return type_; }
25-
DataType::TypeId type_id() const { return type_->type(); }
23+
virtual int64_t length() const = 0;
24+
// There are two methods to obtain the data type.
25+
// The signature without a shared_ptr allows sub-classes
26+
// to have a covariant return type, which eliminates the
27+
// need/danger of doing a static_cast when dealing with
28+
// a concrete sub-class. Ideally, the shared_ptr signature
29+
// would suffice, but the compiler cannot treat a shared_ptr
30+
// to a base class and a shared_ptr to a subclass as a
31+
// covariant return type.
32+
virtual TypePtr type() const = 0;
33+
virtual const DataType& type_reference() const = 0;
34+
35+
DataType::TypeId type_id() const { return type()->type(); }
2636

2737
// Copy a section of the array into a new output array
2838
virtual Status Copy(
@@ -42,13 +52,10 @@ class Array {
4252
virtual bool owns_data() const = 0;
4353

4454
protected:
45-
std::shared_ptr<DataType> type_;
46-
int64_t length_;
55+
Array() {}
4756

48-
Array(const std::shared_ptr<DataType>& type, int64_t length);
49-
50-
private:
51-
DISALLOW_COPY_AND_ASSIGN(Array);
57+
Array(const Array& other) = default;
58+
Array(Array&& other) = default;
5259
};
5360

5461
// An object that is a view on a section of another array (possibly the whole

src/pandas/type.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,15 +57,15 @@ class DataType {
5757

5858
virtual std::string ToString() const = 0;
5959

60-
virtual bool Equals(const DataType& other) { return type_ == other.type_; }
60+
virtual bool Equals(const DataType& other) const { return type_ == other.type_; }
6161

6262
TypeId type() const { return type_; }
6363

6464
private:
6565
TypeId type_;
6666
};
6767

68-
typedef std::shared_ptr<DataType> TypePtr;
68+
using TypePtr = std::shared_ptr<const DataType>;
6969

7070
class PANDAS_EXPORT TimestampType : public DataType {
7171
public:

src/pandas/types/category.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55

66
namespace pandas {
77

8+
CategoryArray::CategoryArray(ArrayView codes, const std::shared_ptr<CategoryType>& type)
9+
: codes_(codes), type_(type) {}
10+
811
std::string CategoryType::ToString() const {
912
std::stringstream s;
1013
s << "category<" << category_type()->ToString() << ">";

src/pandas/types/category.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ struct CategoryType : public DataType {
2020

2121
std::string ToString() const override;
2222

23-
std::shared_ptr<DataType> category_type() const { return categories_.data()->type(); }
23+
std::shared_ptr<const DataType> category_type() const {
24+
return categories_.data()->type();
25+
}
2426

2527
const ArrayView& categories() const { return categories_; }
2628

@@ -30,14 +32,15 @@ struct CategoryType : public DataType {
3032

3133
class CategoryArray : public Array {
3234
public:
35+
CategoryArray(ArrayView codes, const std::shared_ptr<CategoryType>& type);
36+
3337
const ArrayView& codes() const { return codes_; }
3438

35-
const ArrayView& categories() const {
36-
return static_cast<CategoryType*>(type_.get())->categories();
37-
}
39+
const ArrayView& categories() const { return type_->categories(); }
3840

3941
private:
4042
ArrayView codes_;
43+
std::shared_ptr<CategoryType> type_;
4144
};
4245

4346
} // namespace pandas

0 commit comments

Comments
 (0)