Skip to content

Commit 821e498

Browse files
committed
Make StructArray::field() thread-safe
Before this change it was possible for two threads calling `field()` with the same index at the same time to cause a race on the stored entry in `boxed_fields_`. I.e. if a second thread goes into the path that calls `MakeArray` before the first thread stored its own new array, the second thread would also write to the same shared_ptr and invalidate the shared_ptr from the first thread, thereby also invalidating the returned reference.
1 parent 934554d commit 821e498

File tree

2 files changed

+20
-13
lines changed

2 files changed

+20
-13
lines changed

cpp/src/arrow/array/array_nested.cc

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,20 +1076,27 @@ const ArrayVector& StructArray::fields() const {
10761076
return boxed_fields_;
10771077
}
10781078

1079-
const std::shared_ptr<Array>& StructArray::field(int i) const {
1079+
std::shared_ptr<Array> StructArray::field(int i) const {
10801080
std::shared_ptr<Array> result = std::atomic_load(&boxed_fields_[i]);
1081-
if (!result) {
1082-
std::shared_ptr<ArrayData> field_data;
1083-
if (data_->offset != 0 || data_->child_data[i]->length != data_->length) {
1084-
field_data = data_->child_data[i]->Slice(data_->offset, data_->length);
1085-
} else {
1086-
field_data = data_->child_data[i];
1087-
}
1088-
result = MakeArray(field_data);
1089-
std::atomic_store(&boxed_fields_[i], std::move(result));
1090-
return boxed_fields_[i];
1081+
if (result) {
1082+
return result;
10911083
}
1092-
return boxed_fields_[i];
1084+
std::shared_ptr<ArrayData> field_data;
1085+
if (data_->offset != 0 || data_->child_data[i]->length != data_->length) {
1086+
field_data = data_->child_data[i]->Slice(data_->offset, data_->length);
1087+
} else {
1088+
field_data = data_->child_data[i];
1089+
}
1090+
result = MakeArray(field_data);
1091+
// Check if some other thread inserted the array in the meantime and return
1092+
// that in that case.
1093+
std::shared_ptr<Array> expected = nullptr;
1094+
if (!std::atomic_compare_exchange_strong_explicit(&boxed_fields_[i], &expected, result,
1095+
std::memory_order_release,
1096+
std::memory_order_acquire)) {
1097+
result = std::move(expected);
1098+
}
1099+
return result;
10931100
}
10941101

10951102
std::shared_ptr<Array> StructArray::GetFieldByName(const std::string& name) const {

cpp/src/arrow/array/array_nested.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -692,7 +692,7 @@ class ARROW_EXPORT StructArray : public Array {
692692
// Return a shared pointer in case the requestor desires to share ownership
693693
// with this array. The returned array has its offset, length and null
694694
// count adjusted.
695-
const std::shared_ptr<Array>& field(int pos) const;
695+
std::shared_ptr<Array> field(int pos) const;
696696

697697
const ArrayVector& fields() const;
698698

0 commit comments

Comments
 (0)