Skip to content

Commit cb528ec

Browse files
author
Tacet
authored
[ASan][libc++] Turn on ASan annotations for short strings (#79049)
Originally merged here: #75882 Reverted here: #78627 Reverted due to failing buildbots. The problem was not caused by the annotations code, but by code in the `UniqueFunctionBase` class and in the `JSON.h` file. That code caused the program to write to memory that was already being used by string objects, which resulted in an ASan error. Fixes are implemented in: - #79065 - #79066 Problematic code from `UniqueFunctionBase` for example: ```cpp #ifndef NDEBUG // In debug builds, we also scribble across the rest of the storage. memset(RHS.getInlineStorage(), 0xAD, InlineStorageSize); #endif ``` --- Original description: This commit turns on ASan annotations in `std::basic_string` for short stings (SSO case). Originally suggested here: https://reviews.llvm.org/D147680 String annotations added here: #72677 Requires to pass CI without fails: - #75845 - #75858 Annotating `std::basic_string` with default allocator is implemented in #72677 but annotations for short strings (SSO - Short String Optimization) are turned off there. This commit turns them on. This also removes `_LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED`, because we do not plan to support turning on and off short string annotations. Support in ASan API exists since dd1b7b7. You can turn off annotations for a specific allocator based on changes from 2fa1bec. This PR is a part of a series of patches extending AddressSanitizer C++ container overflow detection capabilities by adding annotations, similar to those existing in `std::vector` and `std::deque` collections. These enhancements empower ASan to effectively detect instances where the instrumented program attempts to access memory within a collection's internal allocation that remains unused. This includes cases where access occurs before or after the stored elements in `std::deque`, or between the `std::basic_string`'s size (including the null terminator) and capacity bounds. The introduction of these annotations was spurred by a real-world software bug discovered by Trail of Bits, involving an out-of-bounds memory access during the comparison of two strings using the `std::equals` function. This function was taking iterators (`iter1_begin`, `iter1_end`, `iter2_begin`) to perform the comparison, using a custom comparison function. When the `iter1` object exceeded the length of `iter2`, an out-of-bounds read could occur on the `iter2` object. Container sanitization, upon enabling these annotations, would effectively identify and flag this potential vulnerability. If you have any questions, please email: [email protected] [email protected]
1 parent c416b2e commit cb528ec

File tree

5 files changed

+429
-34
lines changed

5 files changed

+429
-34
lines changed

libcxx/include/string

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -659,7 +659,6 @@ _LIBCPP_PUSH_MACROS
659659
#else
660660
# define _LIBCPP_STRING_INTERNAL_MEMORY_ACCESS
661661
#endif
662-
#define _LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED false
663662

664663
_LIBCPP_BEGIN_NAMESPACE_STD
665664

@@ -1896,38 +1895,33 @@ private:
18961895
#endif
18971896
}
18981897

1899-
// ASan: short string is poisoned if and only if this function returns true.
1900-
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __asan_short_string_is_annotated() const _NOEXCEPT {
1901-
return _LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED && !__libcpp_is_constant_evaluated();
1902-
}
1903-
19041898
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __annotate_new(size_type __current_size) const _NOEXCEPT {
19051899
(void) __current_size;
19061900
#if !defined(_LIBCPP_HAS_NO_ASAN) && defined(_LIBCPP_INSTRUMENTED_WITH_ASAN)
1907-
if (!__libcpp_is_constant_evaluated() && (__asan_short_string_is_annotated() || __is_long()))
1901+
if (!__libcpp_is_constant_evaluated())
19081902
__annotate_contiguous_container(data() + capacity() + 1, data() + __current_size + 1);
19091903
#endif
19101904
}
19111905

19121906
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __annotate_delete() const _NOEXCEPT {
19131907
#if !defined(_LIBCPP_HAS_NO_ASAN) && defined(_LIBCPP_INSTRUMENTED_WITH_ASAN)
1914-
if (!__libcpp_is_constant_evaluated() && (__asan_short_string_is_annotated() || __is_long()))
1908+
if (!__libcpp_is_constant_evaluated())
19151909
__annotate_contiguous_container(data() + size() + 1, data() + capacity() + 1);
19161910
#endif
19171911
}
19181912

19191913
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __annotate_increase(size_type __n) const _NOEXCEPT {
19201914
(void) __n;
19211915
#if !defined(_LIBCPP_HAS_NO_ASAN) && defined(_LIBCPP_INSTRUMENTED_WITH_ASAN)
1922-
if (!__libcpp_is_constant_evaluated() && (__asan_short_string_is_annotated() || __is_long()))
1916+
if (!__libcpp_is_constant_evaluated())
19231917
__annotate_contiguous_container(data() + size() + 1, data() + size() + 1 + __n);
19241918
#endif
19251919
}
19261920

19271921
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __annotate_shrink(size_type __old_size) const _NOEXCEPT {
19281922
(void) __old_size;
19291923
#if !defined(_LIBCPP_HAS_NO_ASAN) && defined(_LIBCPP_INSTRUMENTED_WITH_ASAN)
1930-
if (!__libcpp_is_constant_evaluated() && (__asan_short_string_is_annotated() || __is_long()))
1924+
if (!__libcpp_is_constant_evaluated())
19311925
__annotate_contiguous_container(data() + __old_size + 1, data() + size() + 1);
19321926
#endif
19331927
}
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// REQUIRES: asan
10+
// UNSUPPORTED: c++03
11+
12+
#include <cassert>
13+
#include <string>
14+
#include <array>
15+
#include <deque>
16+
#include "test_macros.h"
17+
#include "asan_testing.h"
18+
#include "min_allocator.h"
19+
20+
// This tests exists to check if strings work well with deque, as those
21+
// may be partialy annotated, we cannot simply call
22+
// is_double_ended_contiguous_container_asan_correct, as it assumes that
23+
// object memory inside is not annotated, so we check everything in a more careful way.
24+
25+
template <typename D>
26+
void verify_inside(D const& d) {
27+
for (size_t i = 0; i < d.size(); ++i) {
28+
assert(is_string_asan_correct(d[i]));
29+
}
30+
}
31+
32+
template <typename S, size_t N>
33+
S get_s(char c) {
34+
S s;
35+
for (size_t i = 0; i < N; ++i)
36+
s.push_back(c);
37+
38+
return s;
39+
}
40+
41+
template <class C, class S>
42+
void test_string() {
43+
size_t const N = sizeof(S) < 256 ? (4096 / sizeof(S)) : 16;
44+
45+
{
46+
C d1a(1), d1b(N), d1c(N + 1), d1d(5 * N);
47+
verify_inside(d1a);
48+
verify_inside(d1b);
49+
verify_inside(d1c);
50+
verify_inside(d1d);
51+
}
52+
{
53+
C d2;
54+
for (size_t i = 0; i < 3 * N + 2; ++i) {
55+
d2.push_back(get_s<S, 1>(i % 10 + 'a'));
56+
verify_inside(d2);
57+
d2.push_back(get_s<S, 22>(i % 10 + 'b'));
58+
verify_inside(d2);
59+
60+
d2.pop_front();
61+
verify_inside(d2);
62+
}
63+
}
64+
{
65+
C d3;
66+
for (size_t i = 0; i < 3 * N + 2; ++i) {
67+
d3.push_front(get_s<S, 1>(i % 10 + 'a'));
68+
verify_inside(d3);
69+
d3.push_front(get_s<S, 28>(i % 10 + 'b'));
70+
verify_inside(d3);
71+
72+
d3.pop_back();
73+
verify_inside(d3);
74+
}
75+
}
76+
{
77+
C d4;
78+
for (size_t i = 0; i < 3 * N + 2; ++i) {
79+
// When there is no SSO, all elements inside should not be poisoned,
80+
// so we can verify deque poisoning.
81+
d4.push_front(get_s<S, 33>(i % 10 + 'a'));
82+
verify_inside(d4);
83+
assert(is_double_ended_contiguous_container_asan_correct(d4));
84+
d4.push_back(get_s<S, 28>(i % 10 + 'b'));
85+
verify_inside(d4);
86+
assert(is_double_ended_contiguous_container_asan_correct(d4));
87+
}
88+
}
89+
{
90+
C d5;
91+
for (size_t i = 0; i < 3 * N + 2; ++i) {
92+
// In d4 we never had poisoned memory inside deque.
93+
// Here we start with SSO, so part of the inside of the container,
94+
// will be poisoned.
95+
d5.push_front(S());
96+
verify_inside(d5);
97+
}
98+
for (size_t i = 0; i < d5.size(); ++i) {
99+
// We change the size to have long string.
100+
// Memory owne by deque should not be poisoned by string.
101+
d5[i].resize(100);
102+
verify_inside(d5);
103+
}
104+
105+
assert(is_double_ended_contiguous_container_asan_correct(d5));
106+
107+
d5.erase(d5.begin() + 2);
108+
verify_inside(d5);
109+
110+
d5.erase(d5.end() - 2);
111+
verify_inside(d5);
112+
113+
assert(is_double_ended_contiguous_container_asan_correct(d5));
114+
}
115+
{
116+
C d6a;
117+
assert(is_double_ended_contiguous_container_asan_correct(d6a));
118+
119+
C d6b(N + 2, get_s<S, 100>('a'));
120+
d6b.push_front(get_s<S, 101>('b'));
121+
while (!d6b.empty()) {
122+
d6b.pop_back();
123+
assert(is_double_ended_contiguous_container_asan_correct(d6b));
124+
}
125+
126+
C d6c(N + 2, get_s<S, 102>('c'));
127+
while (!d6c.empty()) {
128+
d6c.pop_back();
129+
assert(is_double_ended_contiguous_container_asan_correct(d6c));
130+
}
131+
}
132+
{
133+
C d7(9 * N + 2);
134+
135+
d7.insert(d7.begin() + 1, S());
136+
verify_inside(d7);
137+
138+
d7.insert(d7.end() - 3, S());
139+
verify_inside(d7);
140+
141+
d7.insert(d7.begin() + 2 * N, get_s<S, 1>('a'));
142+
verify_inside(d7);
143+
144+
d7.insert(d7.end() - 2 * N, get_s<S, 1>('b'));
145+
verify_inside(d7);
146+
147+
d7.insert(d7.begin() + 2 * N, 3 * N, get_s<S, 1>('c'));
148+
verify_inside(d7);
149+
150+
// It may not be short for big element types, but it will be checked correctly:
151+
d7.insert(d7.end() - 2 * N, 3 * N, get_s<S, 2>('d'));
152+
verify_inside(d7);
153+
154+
d7.erase(d7.begin() + 2);
155+
verify_inside(d7);
156+
157+
d7.erase(d7.end() - 2);
158+
verify_inside(d7);
159+
}
160+
}
161+
162+
template <class S>
163+
void test_container() {
164+
test_string<std::deque<S, std::allocator<S>>, S>();
165+
test_string<std::deque<S, min_allocator<S>>, S>();
166+
test_string<std::deque<S, safe_allocator<S>>, S>();
167+
}
168+
169+
int main(int, char**) {
170+
// Those tests support only types based on std::basic_string.
171+
test_container<std::string>();
172+
test_container<std::wstring>();
173+
#if TEST_STD_VER >= 11
174+
test_container<std::u16string>();
175+
test_container<std::u32string>();
176+
#endif
177+
#if TEST_STD_VER >= 20
178+
test_container<std::u8string>();
179+
#endif
180+
181+
return 0;
182+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// REQUIRES: asan
10+
// UNSUPPORTED: c++03
11+
12+
// <string>
13+
14+
// Basic test if ASan annotations work for short strings.
15+
16+
#include <string>
17+
#include <cassert>
18+
#include <cstdlib>
19+
20+
#include "asan_testing.h"
21+
#include "min_allocator.h"
22+
#include "test_iterators.h"
23+
#include "test_macros.h"
24+
25+
extern "C" void __sanitizer_set_death_callback(void (*callback)(void));
26+
27+
void do_exit() { exit(0); }
28+
29+
int main(int, char**) {
30+
{
31+
typedef cpp17_input_iterator<char*> MyInputIter;
32+
// Should not trigger ASan.
33+
std::basic_string<char, std::char_traits<char>, safe_allocator<char>> v;
34+
char i[] = {'a', 'b', 'c', 'd'};
35+
36+
v.insert(v.begin(), MyInputIter(i), MyInputIter(i + 4));
37+
assert(v[0] == 'a');
38+
assert(is_string_asan_correct(v));
39+
}
40+
41+
__sanitizer_set_death_callback(do_exit);
42+
{
43+
using T = char;
44+
using C = std::basic_string<T, std::char_traits<T>, safe_allocator<T>>;
45+
const T t[] = {'a', 'b', 'c', 'd', 'e', 'f', 'g'};
46+
C c(std::begin(t), std::end(t));
47+
assert(is_string_asan_correct(c));
48+
assert(__sanitizer_verify_contiguous_container(c.data(), c.data() + c.size() + 1, c.data() + c.capacity() + 1) !=
49+
0);
50+
volatile T foo = c[c.size() + 1]; // should trigger ASAN. Use volatile to prevent being optimized away.
51+
assert(false); // if we got here, ASAN didn't trigger
52+
((void)foo);
53+
}
54+
55+
return 0;
56+
}

0 commit comments

Comments
 (0)