Use the new absl::StringResizeAndOverwrite() in string escaping utilities

PiperOrigin-RevId: 821767972
Change-Id: Ia4d38b49bc97a485dec3e352dcb056ca1c4465d6
This commit is contained in:
Derek Mauro
2025-10-20 13:01:32 -07:00
committed by Copybara-Service
parent 1f659deb71
commit c9ffd888c1
4 changed files with 117 additions and 84 deletions

View File

@@ -135,6 +135,7 @@ cc_library(
copts = ABSL_DEFAULT_COPTS,
linkopts = ABSL_DEFAULT_LINKOPTS,
deps = [
":resize_and_overwrite",
"//absl/base:config",
"//absl/base:core_headers",
"//absl/base:endian",

View File

@@ -139,6 +139,7 @@ absl_cc_library(
absl::core_headers
absl::endian
absl::raw_logging_internal
absl::strings_resize_and_overwrite
absl::type_traits
)

View File

@@ -28,6 +28,7 @@
#include "absl/base/internal/endian.h"
#include "absl/base/internal/raw_logging.h"
#include "absl/base/internal/unaligned_access.h"
#include "absl/base/macros.h"
#include "absl/base/nullability.h"
#include "absl/strings/ascii.h"
#include "absl/strings/charset.h"
@@ -35,6 +36,7 @@
#include "absl/strings/internal/resize_uninitialized.h"
#include "absl/strings/internal/utf8.h"
#include "absl/strings/numbers.h"
#include "absl/strings/resize_and_overwrite.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/string_view.h"
@@ -76,30 +78,29 @@ inline bool IsSurrogate(char32_t c, absl::string_view src,
//
// Unescapes C escape sequences and is the reverse of CEscape().
//
// If `src` is valid, stores the unescaped string `dst`, and returns
// true. Otherwise returns false and optionally stores the error
// description in `error`. Set `error` to nullptr to disable error
// reporting.
// If `src` is valid, stores the unescaped string in `dst` and the length of
// unescaped string in `dst_size`, and returns true. Otherwise returns false
// and optionally stores the error description in `error`. Set `error` to
// nullptr to disable error reporting.
//
// `src` and `dst` may use the same underlying buffer.
// `dst` must have at least as much space as `src`.
// ----------------------------------------------------------------------
bool CUnescapeInternal(absl::string_view src, bool leave_nulls_escaped,
std::string* absl_nonnull dst,
char* absl_nonnull dst, size_t* absl_nonnull dst_size,
std::string* absl_nullable error) {
strings_internal::STLStringResizeUninitialized(dst, src.size());
absl::string_view::size_type p = 0; // Current src position.
std::string::size_type d = 0; // Current dst position.
size_t d = 0; // Current dst position.
// When unescaping in-place, skip any prefix that does not have escaping.
if (src.data() == dst->data()) {
if (src.data() == dst) {
while (p < src.size() && src[p] != '\\') p++, d++;
}
while (p < src.size()) {
if (src[p] != '\\') {
(*dst)[d++] = src[p++];
dst[d++] = src[p++];
} else {
if (++p >= src.size()) { // skip past the '\\'
if (error != nullptr) {
@@ -108,17 +109,19 @@ bool CUnescapeInternal(absl::string_view src, bool leave_nulls_escaped,
return false;
}
switch (src[p]) {
case 'a': (*dst)[d++] = '\a'; break;
case 'b': (*dst)[d++] = '\b'; break;
case 'f': (*dst)[d++] = '\f'; break;
case 'n': (*dst)[d++] = '\n'; break;
case 'r': (*dst)[d++] = '\r'; break;
case 't': (*dst)[d++] = '\t'; break;
case 'v': (*dst)[d++] = '\v'; break;
case '\\': (*dst)[d++] = '\\'; break;
case '?': (*dst)[d++] = '\?'; break;
case '\'': (*dst)[d++] = '\''; break;
case '"': (*dst)[d++] = '\"'; break;
// clang-format off
case 'a': dst[d++] = '\a'; break;
case 'b': dst[d++] = '\b'; break;
case 'f': dst[d++] = '\f'; break;
case 'n': dst[d++] = '\n'; break;
case 'r': dst[d++] = '\r'; break;
case 't': dst[d++] = '\t'; break;
case 'v': dst[d++] = '\v'; break;
case '\\': dst[d++] = '\\'; break;
case '?': dst[d++] = '\?'; break;
case '\'': dst[d++] = '\''; break;
case '"': dst[d++] = '\"'; break;
// clang-format on
case '0':
case '1':
case '2':
@@ -145,13 +148,13 @@ bool CUnescapeInternal(absl::string_view src, bool leave_nulls_escaped,
}
if ((ch == 0) && leave_nulls_escaped) {
// Copy the escape sequence for the null character
(*dst)[d++] = '\\';
dst[d++] = '\\';
while (octal_start <= p) {
(*dst)[d++] = src[octal_start++];
dst[d++] = src[octal_start++];
}
break;
}
(*dst)[d++] = static_cast<char>(ch);
dst[d++] = static_cast<char>(ch);
break;
}
case 'x':
@@ -185,13 +188,13 @@ bool CUnescapeInternal(absl::string_view src, bool leave_nulls_escaped,
}
if ((ch == 0) && leave_nulls_escaped) {
// Copy the escape sequence for the null character
(*dst)[d++] = '\\';
dst[d++] = '\\';
while (hex_start <= p) {
(*dst)[d++] = src[hex_start++];
dst[d++] = src[hex_start++];
}
break;
}
(*dst)[d++] = static_cast<char>(ch);
dst[d++] = static_cast<char>(ch);
break;
}
case 'u': {
@@ -218,16 +221,16 @@ bool CUnescapeInternal(absl::string_view src, bool leave_nulls_escaped,
}
if ((rune == 0) && leave_nulls_escaped) {
// Copy the escape sequence for the null character
(*dst)[d++] = '\\';
dst[d++] = '\\';
while (hex_start <= p) {
(*dst)[d++] = src[hex_start++];
dst[d++] = src[hex_start++];
}
break;
}
if (IsSurrogate(rune, src.substr(hex_start, 5), error)) {
return false;
}
d += strings_internal::EncodeUTF8Char(dst->data() + d, rune);
d += strings_internal::EncodeUTF8Char(dst + d, rune);
break;
}
case 'U': {
@@ -267,17 +270,17 @@ bool CUnescapeInternal(absl::string_view src, bool leave_nulls_escaped,
}
if ((rune == 0) && leave_nulls_escaped) {
// Copy the escape sequence for the null character
(*dst)[d++] = '\\';
dst[d++] = '\\';
// U00000000
while (hex_start <= p) {
(*dst)[d++] = src[hex_start++];
dst[d++] = src[hex_start++];
}
break;
}
if (IsSurrogate(rune, src.substr(hex_start, 9), error)) {
return false;
}
d += strings_internal::EncodeUTF8Char(dst->data() + d, rune);
d += strings_internal::EncodeUTF8Char(dst + d, rune);
break;
}
default: {
@@ -291,7 +294,7 @@ bool CUnescapeInternal(absl::string_view src, bool leave_nulls_escaped,
}
}
dst->erase(d);
*dst_size = d;
return true;
}
@@ -807,23 +810,18 @@ bool Base64UnescapeInternal(const char* absl_nullable src, size_t slen,
// 4 characters. Any leftover chars are added directly for good measure.
const size_t dest_len = 3 * (slen / 4) + (slen % 4);
strings_internal::STLStringResizeUninitialized(dest, dest_len);
// We are getting the destination buffer by getting the beginning of the
// string and converting it into a char *.
size_t len;
const bool ok =
Base64UnescapeInternal(src, slen, &(*dest)[0], dest_len, unbase64, &len);
if (!ok) {
dest->clear();
return false;
}
// could be shorter if there was padding
assert(len <= dest_len);
dest->erase(len);
return true;
bool ok;
StringResizeAndOverwrite(
*dest, dest_len, [src, slen, unbase64, &ok](char* buf, size_t buf_size) {
size_t len;
ok = Base64UnescapeInternal(src, slen, buf, buf_size, unbase64, &len);
if (!ok) {
len = 0;
}
assert(len <= buf_size); // Could be shorter if there was padding.
return len;
});
return ok;
}
/* clang-format off */
@@ -878,15 +876,11 @@ void HexStringToBytesInternal(const char* absl_nullable from, T to,
}
}
// This is a templated function so that T can be either a char* or a
// std::string.
template <typename T>
void BytesToHexStringInternal(const unsigned char* absl_nullable src, T dest,
size_t num) {
auto dest_ptr = &dest[0];
for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) {
void BytesToHexStringInternal(const unsigned char* absl_nullable src,
char* dest, size_t num) {
for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest += 2) {
const char* hex_p = &numbers_internal::kHexTable[*src_ptr * 2];
std::copy(hex_p, hex_p + 2, dest_ptr);
std::copy(hex_p, hex_p + 2, dest);
}
}
@@ -899,7 +893,32 @@ void BytesToHexStringInternal(const unsigned char* absl_nullable src, T dest,
// ----------------------------------------------------------------------
bool CUnescape(absl::string_view source, std::string* absl_nonnull dest,
std::string* absl_nullable error) {
return CUnescapeInternal(source, kUnescapeNulls, dest, error);
bool success;
// `CUnescape()` allows for in-place unescaping, which means `source` may
// alias `*dest`. However, absl::StringResizeAndOverwrite() invalidates
// invalidates all iterators, pointers, and references into the string,
// regardless whether reallocation occurs. Therefore we need to avoid calling
// absl::StringResizeAndOverwrite() when `source.data() ==
// dest->data()`. Comparing the sizes is sufficient to cover this case.
if (dest->size() >= source.size()) {
size_t dest_size = 0;
success = CUnescapeInternal(source, kUnescapeNulls, dest->data(),
&dest_size, error);
ABSL_ASSERT(dest_size <= dest->size());
dest->erase(dest_size);
} else {
StringResizeAndOverwrite(
*dest, source.size(),
[source, error, &success](char* buf, size_t buf_size) {
size_t dest_size = 0;
success =
CUnescapeInternal(source, kUnescapeNulls, buf, &dest_size, error);
ABSL_ASSERT(dest_size <= buf_size);
return dest_size;
});
}
return success;
}
std::string CEscape(absl::string_view src) {
@@ -966,19 +985,23 @@ bool HexStringToBytes(absl::string_view hex, std::string* absl_nonnull bytes) {
return false;
}
absl::strings_internal::STLStringResizeUninitialized(&output, num_bytes);
auto hex_p = hex.cbegin();
for (std::string::iterator bin_p = output.begin(); bin_p != output.end();
++bin_p) {
int h1 = absl::kHexValueStrict[static_cast<size_t>(*hex_p++)];
int h2 = absl::kHexValueStrict[static_cast<size_t>(*hex_p++)];
if (h1 == -1 || h2 == -1) {
output.resize(static_cast<size_t>(bin_p - output.begin()));
return false;
}
*bin_p = static_cast<char>((h1 << 4) + h2);
}
StringResizeAndOverwrite(
output, num_bytes, [hex](char* buf, size_t buf_size) {
auto hex_p = hex.cbegin();
for (size_t i = 0; i < buf_size; ++i) {
int h1 = absl::kHexValueStrict[static_cast<size_t>(*hex_p++)];
int h2 = absl::kHexValueStrict[static_cast<size_t>(*hex_p++)];
if (h1 == -1 || h2 == -1) {
return size_t{0};
}
buf[i] = static_cast<char>((h1 << 4) + h2);
}
return buf_size;
});
if (output.size() != num_bytes) {
return false;
}
*bytes = std::move(output);
return true;
}
@@ -986,16 +1009,22 @@ bool HexStringToBytes(absl::string_view hex, std::string* absl_nonnull bytes) {
std::string HexStringToBytes(absl::string_view from) {
std::string result;
const auto num = from.size() / 2;
strings_internal::STLStringResizeUninitialized(&result, num);
absl::HexStringToBytesInternal<std::string&>(from.data(), result, num);
StringResizeAndOverwrite(result, num, [from](char* buf, size_t buf_size) {
absl::HexStringToBytesInternal<char*>(from.data(), buf, buf_size);
return buf_size;
});
return result;
}
std::string BytesToHexString(absl::string_view from) {
std::string result;
strings_internal::STLStringResizeUninitialized(&result, 2 * from.size());
absl::BytesToHexStringInternal<std::string&>(
reinterpret_cast<const unsigned char*>(from.data()), result, from.size());
StringResizeAndOverwrite(
result, 2 * from.size(), [from](char* buf, size_t buf_size) {
absl::BytesToHexStringInternal(
reinterpret_cast<const unsigned char*>(from.data()), buf,
from.size());
return buf_size;
});
return result;
}

View File

@@ -17,7 +17,7 @@
#include <cassert>
#include "absl/strings/internal/resize_uninitialized.h"
#include "absl/strings/resize_and_overwrite.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
@@ -42,12 +42,14 @@ void Base64EscapeInternal(const unsigned char* src, size_t szsrc, String* dest,
bool do_padding, const char* base64_chars) {
const size_t calc_escaped_size =
CalculateBase64EscapedLenInternal(szsrc, do_padding);
STLStringResizeUninitialized(dest, calc_escaped_size);
const size_t escaped_len = Base64EscapeInternal(
src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding);
assert(calc_escaped_size == escaped_len);
dest->erase(escaped_len);
StringResizeAndOverwrite(
*dest, calc_escaped_size,
[src, szsrc, base64_chars, do_padding](char* buf, size_t buf_size) {
const size_t escaped_len = Base64EscapeInternal(
src, szsrc, buf, buf_size, base64_chars, do_padding);
assert(escaped_len == buf_size);
return escaped_len;
});
}
} // namespace strings_internal