json: add function to write UTF-16LE strings
spdk_json_write_string_utf16le() writes a UTF-16LE string to a JSON write context. Change-Id: I413ffb8a3dee6e1b44ec96ce2415fd1b9c36320f Signed-off-by: Daniel Verkamp <daniel.verkamp@intel.com> Reviewed-on: https://review.gerrithub.io/368625 Tested-by: SPDK Automated Test System <sys_sgsw@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
parent
229f64942d
commit
429672d31b
@ -198,6 +198,27 @@ int spdk_json_write_int64(struct spdk_json_write_ctx *w, int64_t val);
|
||||
int spdk_json_write_uint64(struct spdk_json_write_ctx *w, uint64_t val);
|
||||
int spdk_json_write_string(struct spdk_json_write_ctx *w, const char *val);
|
||||
int spdk_json_write_string_raw(struct spdk_json_write_ctx *w, const char *val, size_t len);
|
||||
|
||||
/**
|
||||
* Write null-terminated UTF-16LE string.
|
||||
*
|
||||
* \param w JSON write context.
|
||||
* \param val UTF-16LE string; must be null terminated.
|
||||
* \return 0 on success or negative on failure.
|
||||
*/
|
||||
int spdk_json_write_string_utf16le(struct spdk_json_write_ctx *w, const uint16_t *val);
|
||||
|
||||
/**
|
||||
* Write UTF-16LE string.
|
||||
*
|
||||
* \param w JSON write context.
|
||||
* \param val UTF-16LE string; may contain embedded null characters.
|
||||
* \param len Length of val in 16-bit code units (i.e. size of string in bytes divided by 2).
|
||||
* \return 0 on success or negative on failure.
|
||||
*/
|
||||
int spdk_json_write_string_utf16le_raw(struct spdk_json_write_ctx *w, const uint16_t *val,
|
||||
size_t len);
|
||||
|
||||
int spdk_json_write_string_fmt(struct spdk_json_write_ctx *w, const char *fmt,
|
||||
...) __attribute__((__format__(__printf__, 2, 3)));
|
||||
int spdk_json_write_array_begin(struct spdk_json_write_ctx *w);
|
||||
|
@ -36,6 +36,7 @@
|
||||
|
||||
#include "spdk/stdinc.h"
|
||||
|
||||
#include "spdk/endian.h"
|
||||
#include "spdk/json.h"
|
||||
#include "spdk/likely.h"
|
||||
#include "spdk/string.h"
|
||||
@ -251,6 +252,49 @@ utf16_valid_surrogate_low(uint32_t val)
|
||||
return val >= 0xDC00 && val <= 0xDFFF;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for a valid UTF-16LE encoding of a single codepoint.
|
||||
*
|
||||
* \return Length of valid UTF-16LE sequence in 16-bit code units, or negative if invalid.
|
||||
*/
|
||||
static inline int
|
||||
utf16le_valid(const uint16_t *start, const uint16_t *end)
|
||||
{
|
||||
const uint16_t *p = start;
|
||||
uint16_t high, low;
|
||||
|
||||
if (p == end) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
high = from_le16(p);
|
||||
|
||||
if (high <= 0xD7FF || high >= 0xE000) {
|
||||
/* Single code unit in BMP */
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (high >= 0xDC00) {
|
||||
/* Low surrogate in first code unit - invalid */
|
||||
return -1;
|
||||
}
|
||||
|
||||
assert(utf16_valid_surrogate_high(high));
|
||||
|
||||
if (++p == end) {
|
||||
/* Not enough code units left */
|
||||
return -1;
|
||||
}
|
||||
low = from_le16(p);
|
||||
|
||||
if (!utf16_valid_surrogate_low(low)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Valid surrogate pair */
|
||||
return 2;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
utf16_decode_surrogate_pair(uint32_t high, uint32_t low)
|
||||
{
|
||||
|
@ -275,11 +275,9 @@ write_hex_4(void *dest, uint16_t val)
|
||||
p[3] = hex[val & 0xF];
|
||||
}
|
||||
|
||||
static int
|
||||
write_string_or_name(struct spdk_json_write_ctx *w, const char *val, size_t len)
|
||||
static inline int
|
||||
write_codepoint(struct spdk_json_write_ctx *w, uint32_t codepoint)
|
||||
{
|
||||
const uint8_t *p = val;
|
||||
const uint8_t *end = val + len;
|
||||
static const uint8_t escapes[] = {
|
||||
['\b'] = 'b',
|
||||
['\f'] = 'f',
|
||||
@ -293,34 +291,10 @@ write_string_or_name(struct spdk_json_write_ctx *w, const char *val, size_t len)
|
||||
* (it is valid unescaped).
|
||||
*/
|
||||
};
|
||||
|
||||
if (emit(w, "\"", 1)) return fail(w);
|
||||
|
||||
while (p != end) {
|
||||
int codepoint_len;
|
||||
uint32_t codepoint;
|
||||
uint16_t high, low;
|
||||
char out[13];
|
||||
size_t out_len;
|
||||
|
||||
codepoint_len = utf8_valid(p, end);
|
||||
switch (codepoint_len) {
|
||||
case 1:
|
||||
codepoint = utf8_decode_unsafe_1(p);
|
||||
break;
|
||||
case 2:
|
||||
codepoint = utf8_decode_unsafe_2(p);
|
||||
break;
|
||||
case 3:
|
||||
codepoint = utf8_decode_unsafe_3(p);
|
||||
break;
|
||||
case 4:
|
||||
codepoint = utf8_decode_unsafe_4(p);
|
||||
break;
|
||||
default:
|
||||
return fail(w);
|
||||
}
|
||||
|
||||
if (codepoint < sizeof(escapes) && escapes[codepoint]) {
|
||||
out[0] = '\\';
|
||||
out[1] = escapes[codepoint];
|
||||
@ -348,7 +322,71 @@ write_string_or_name(struct spdk_json_write_ctx *w, const char *val, size_t len)
|
||||
out_len = 12;
|
||||
}
|
||||
|
||||
if (emit(w, out, out_len)) return fail(w);
|
||||
return emit(w, out, out_len);
|
||||
}
|
||||
|
||||
static int
|
||||
write_string_or_name(struct spdk_json_write_ctx *w, const char *val, size_t len)
|
||||
{
|
||||
const uint8_t *p = val;
|
||||
const uint8_t *end = val + len;
|
||||
|
||||
if (emit(w, "\"", 1)) return fail(w);
|
||||
|
||||
while (p != end) {
|
||||
int codepoint_len;
|
||||
uint32_t codepoint;
|
||||
|
||||
codepoint_len = utf8_valid(p, end);
|
||||
switch (codepoint_len) {
|
||||
case 1:
|
||||
codepoint = utf8_decode_unsafe_1(p);
|
||||
break;
|
||||
case 2:
|
||||
codepoint = utf8_decode_unsafe_2(p);
|
||||
break;
|
||||
case 3:
|
||||
codepoint = utf8_decode_unsafe_3(p);
|
||||
break;
|
||||
case 4:
|
||||
codepoint = utf8_decode_unsafe_4(p);
|
||||
break;
|
||||
default:
|
||||
return fail(w);
|
||||
}
|
||||
|
||||
if (write_codepoint(w, codepoint)) return fail(w);
|
||||
p += codepoint_len;
|
||||
}
|
||||
|
||||
return emit(w, "\"", 1);
|
||||
}
|
||||
|
||||
static int
|
||||
write_string_or_name_utf16le(struct spdk_json_write_ctx *w, const uint16_t *val, size_t len)
|
||||
{
|
||||
const uint16_t *p = val;
|
||||
const uint16_t *end = val + len;
|
||||
|
||||
if (emit(w, "\"", 1)) return fail(w);
|
||||
|
||||
while (p != end) {
|
||||
int codepoint_len;
|
||||
uint32_t codepoint;
|
||||
|
||||
codepoint_len = utf16le_valid(p, end);
|
||||
switch (codepoint_len) {
|
||||
case 1:
|
||||
codepoint = from_le16(&p[0]);
|
||||
break;
|
||||
case 2:
|
||||
codepoint = utf16_decode_surrogate_pair(from_le16(&p[0]), from_le16(&p[1]));
|
||||
break;
|
||||
default:
|
||||
return fail(w);
|
||||
}
|
||||
|
||||
if (write_codepoint(w, codepoint)) return fail(w);
|
||||
p += codepoint_len;
|
||||
}
|
||||
|
||||
@ -368,6 +406,26 @@ spdk_json_write_string(struct spdk_json_write_ctx *w, const char *val)
|
||||
return spdk_json_write_string_raw(w, val, strlen(val));
|
||||
}
|
||||
|
||||
int
|
||||
spdk_json_write_string_utf16le_raw(struct spdk_json_write_ctx *w, const uint16_t *val, size_t len)
|
||||
{
|
||||
if (begin_value(w)) return fail(w);
|
||||
return write_string_or_name_utf16le(w, val, len);
|
||||
}
|
||||
|
||||
int
|
||||
spdk_json_write_string_utf16le(struct spdk_json_write_ctx *w, const uint16_t *val)
|
||||
{
|
||||
const uint16_t *p;
|
||||
size_t len;
|
||||
|
||||
for (len = 0, p = val; *p; p++) {
|
||||
len++;
|
||||
}
|
||||
|
||||
return spdk_json_write_string_utf16le_raw(w, val, len);
|
||||
}
|
||||
|
||||
int
|
||||
spdk_json_write_string_fmt(struct spdk_json_write_ctx *w, const char *fmt, ...)
|
||||
{
|
||||
|
@ -86,6 +86,18 @@ write_cb(void *cb_ctx, const void *data, size_t size)
|
||||
#define STR_FAIL(in) \
|
||||
BEGIN(); VAL_STRING_FAIL(in); END_FAIL()
|
||||
|
||||
#define VAL_STRING_UTF16LE(str) \
|
||||
CU_ASSERT(spdk_json_write_string_utf16le_raw(w, (const uint16_t *)str, sizeof(str) / sizeof(uint16_t) - 1) == 0)
|
||||
|
||||
#define VAL_STRING_UTF16LE_FAIL(str) \
|
||||
CU_ASSERT(spdk_json_write_string_utf16le_raw(w, (const uint16_t *)str, sizeof(str) / sizeof(uint16_t) - 1) < 0)
|
||||
|
||||
#define STR_UTF16LE_PASS(in, out) \
|
||||
BEGIN(); VAL_STRING_UTF16LE(in); END("\"" out "\"")
|
||||
|
||||
#define STR_UTF16LE_FAIL(in) \
|
||||
BEGIN(); VAL_STRING_UTF16LE_FAIL(in); END_FAIL()
|
||||
|
||||
#define VAL_NAME(name) \
|
||||
CU_ASSERT(spdk_json_write_name_raw(w, name, sizeof(name) - 1) == 0)
|
||||
|
||||
@ -248,6 +260,37 @@ test_write_string_escapes(void)
|
||||
STR_FAIL("\xED\xA1\x8C\xED\xBE\xB4"); /* U+233B4 (invalid surrogate pair encoding) */
|
||||
}
|
||||
|
||||
static void
|
||||
test_write_string_utf16le(void)
|
||||
{
|
||||
struct spdk_json_write_ctx *w;
|
||||
|
||||
/* All characters in BMP */
|
||||
STR_UTF16LE_PASS(((uint8_t[]) {
|
||||
'H', 0, 'e', 0, 'l', 0, 'l', 0, 'o', 0, 0x15, 0xFE, 0, 0
|
||||
}), "Hello\\uFE15");
|
||||
|
||||
/* Surrogate pair */
|
||||
STR_UTF16LE_PASS(((uint8_t[]) {
|
||||
'H', 0, 'i', 0, 0x34, 0xD8, 0x1E, 0xDD, '!', 0, 0, 0
|
||||
}), "Hi\\uD834\\uDD1E!");
|
||||
|
||||
/* Valid high surrogate, but no low surrogate */
|
||||
STR_UTF16LE_FAIL(((uint8_t[]) {
|
||||
0x00, 0xD8, 0, 0 /* U+D800 */
|
||||
}));
|
||||
|
||||
/* Invalid leading low surrogate */
|
||||
STR_UTF16LE_FAIL(((uint8_t[]) {
|
||||
0x00, 0xDC, 0x00, 0xDC, 0, 0 /* U+DC00 U+DC00 */
|
||||
}));
|
||||
|
||||
/* Valid high surrogate followed by another high surrogate (invalid) */
|
||||
STR_UTF16LE_FAIL(((uint8_t[]) {
|
||||
0x00, 0xD8, 0x00, 0xD8, 0, 0 /* U+D800 U+D800 */
|
||||
}));
|
||||
}
|
||||
|
||||
static void
|
||||
test_write_number_int32(void)
|
||||
{
|
||||
@ -618,6 +661,7 @@ int main(int argc, char **argv)
|
||||
CU_add_test(suite, "write_literal", test_write_literal) == NULL ||
|
||||
CU_add_test(suite, "write_string_simple", test_write_string_simple) == NULL ||
|
||||
CU_add_test(suite, "write_string_escapes", test_write_string_escapes) == NULL ||
|
||||
CU_add_test(suite, "write_string_utf16le", test_write_string_utf16le) == NULL ||
|
||||
CU_add_test(suite, "write_number_int32", test_write_number_int32) == NULL ||
|
||||
CU_add_test(suite, "write_number_uint32", test_write_number_uint32) == NULL ||
|
||||
CU_add_test(suite, "write_array", test_write_array) == NULL ||
|
||||
|
Loading…
Reference in New Issue
Block a user