json: add function to write UTF-16LE strings

spdk_json_write_string_utf16le() writes a UTF-16LE string to a
JSON write context.

Change-Id: I413ffb8a3dee6e1b44ec96ce2415fd1b9c36320f
Signed-off-by: Daniel Verkamp <daniel.verkamp@intel.com>
Reviewed-on: https://review.gerrithub.io/368625
Tested-by: SPDK Automated Test System <sys_sgsw@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
Daniel Verkamp 2017-07-07 15:01:12 -07:00 committed by Jim Harris
parent 229f64942d
commit 429672d31b
4 changed files with 200 additions and 33 deletions

View File

@ -198,6 +198,27 @@ int spdk_json_write_int64(struct spdk_json_write_ctx *w, int64_t val);
int spdk_json_write_uint64(struct spdk_json_write_ctx *w, uint64_t val);
int spdk_json_write_string(struct spdk_json_write_ctx *w, const char *val);
int spdk_json_write_string_raw(struct spdk_json_write_ctx *w, const char *val, size_t len);
/**
* Write null-terminated UTF-16LE string.
*
* \param w JSON write context.
* \param val UTF-16LE string; must be null terminated.
* \return 0 on success or negative on failure.
*/
int spdk_json_write_string_utf16le(struct spdk_json_write_ctx *w, const uint16_t *val);
/**
* Write UTF-16LE string.
*
* \param w JSON write context.
* \param val UTF-16LE string; may contain embedded null characters.
* \param len Length of val in 16-bit code units (i.e. size of string in bytes divided by 2).
* \return 0 on success or negative on failure.
*/
int spdk_json_write_string_utf16le_raw(struct spdk_json_write_ctx *w, const uint16_t *val,
size_t len);
int spdk_json_write_string_fmt(struct spdk_json_write_ctx *w, const char *fmt,
...) __attribute__((__format__(__printf__, 2, 3)));
int spdk_json_write_array_begin(struct spdk_json_write_ctx *w);

View File

@ -36,6 +36,7 @@
#include "spdk/stdinc.h"
#include "spdk/endian.h"
#include "spdk/json.h"
#include "spdk/likely.h"
#include "spdk/string.h"
@ -251,6 +252,49 @@ utf16_valid_surrogate_low(uint32_t val)
return val >= 0xDC00 && val <= 0xDFFF;
}
/*
* Check for a valid UTF-16LE encoding of a single codepoint.
*
* \return Length of valid UTF-16LE sequence in 16-bit code units, or negative if invalid.
*/
static inline int
utf16le_valid(const uint16_t *start, const uint16_t *end)
{
const uint16_t *p = start;
uint16_t high, low;
if (p == end) {
return 0;
}
high = from_le16(p);
if (high <= 0xD7FF || high >= 0xE000) {
/* Single code unit in BMP */
return 1;
}
if (high >= 0xDC00) {
/* Low surrogate in first code unit - invalid */
return -1;
}
assert(utf16_valid_surrogate_high(high));
if (++p == end) {
/* Not enough code units left */
return -1;
}
low = from_le16(p);
if (!utf16_valid_surrogate_low(low)) {
return -1;
}
/* Valid surrogate pair */
return 2;
}
static inline uint32_t
utf16_decode_surrogate_pair(uint32_t high, uint32_t low)
{

View File

@ -275,11 +275,9 @@ write_hex_4(void *dest, uint16_t val)
p[3] = hex[val & 0xF];
}
static int
write_string_or_name(struct spdk_json_write_ctx *w, const char *val, size_t len)
static inline int
write_codepoint(struct spdk_json_write_ctx *w, uint32_t codepoint)
{
const uint8_t *p = val;
const uint8_t *end = val + len;
static const uint8_t escapes[] = {
['\b'] = 'b',
['\f'] = 'f',
@ -293,34 +291,10 @@ write_string_or_name(struct spdk_json_write_ctx *w, const char *val, size_t len)
* (it is valid unescaped).
*/
};
if (emit(w, "\"", 1)) return fail(w);
while (p != end) {
int codepoint_len;
uint32_t codepoint;
uint16_t high, low;
char out[13];
size_t out_len;
codepoint_len = utf8_valid(p, end);
switch (codepoint_len) {
case 1:
codepoint = utf8_decode_unsafe_1(p);
break;
case 2:
codepoint = utf8_decode_unsafe_2(p);
break;
case 3:
codepoint = utf8_decode_unsafe_3(p);
break;
case 4:
codepoint = utf8_decode_unsafe_4(p);
break;
default:
return fail(w);
}
if (codepoint < sizeof(escapes) && escapes[codepoint]) {
out[0] = '\\';
out[1] = escapes[codepoint];
@ -348,7 +322,71 @@ write_string_or_name(struct spdk_json_write_ctx *w, const char *val, size_t len)
out_len = 12;
}
if (emit(w, out, out_len)) return fail(w);
return emit(w, out, out_len);
}
static int
write_string_or_name(struct spdk_json_write_ctx *w, const char *val, size_t len)
{
const uint8_t *p = val;
const uint8_t *end = val + len;
if (emit(w, "\"", 1)) return fail(w);
while (p != end) {
int codepoint_len;
uint32_t codepoint;
codepoint_len = utf8_valid(p, end);
switch (codepoint_len) {
case 1:
codepoint = utf8_decode_unsafe_1(p);
break;
case 2:
codepoint = utf8_decode_unsafe_2(p);
break;
case 3:
codepoint = utf8_decode_unsafe_3(p);
break;
case 4:
codepoint = utf8_decode_unsafe_4(p);
break;
default:
return fail(w);
}
if (write_codepoint(w, codepoint)) return fail(w);
p += codepoint_len;
}
return emit(w, "\"", 1);
}
static int
write_string_or_name_utf16le(struct spdk_json_write_ctx *w, const uint16_t *val, size_t len)
{
const uint16_t *p = val;
const uint16_t *end = val + len;
if (emit(w, "\"", 1)) return fail(w);
while (p != end) {
int codepoint_len;
uint32_t codepoint;
codepoint_len = utf16le_valid(p, end);
switch (codepoint_len) {
case 1:
codepoint = from_le16(&p[0]);
break;
case 2:
codepoint = utf16_decode_surrogate_pair(from_le16(&p[0]), from_le16(&p[1]));
break;
default:
return fail(w);
}
if (write_codepoint(w, codepoint)) return fail(w);
p += codepoint_len;
}
@ -368,6 +406,26 @@ spdk_json_write_string(struct spdk_json_write_ctx *w, const char *val)
return spdk_json_write_string_raw(w, val, strlen(val));
}
int
spdk_json_write_string_utf16le_raw(struct spdk_json_write_ctx *w, const uint16_t *val, size_t len)
{
if (begin_value(w)) return fail(w);
return write_string_or_name_utf16le(w, val, len);
}
int
spdk_json_write_string_utf16le(struct spdk_json_write_ctx *w, const uint16_t *val)
{
const uint16_t *p;
size_t len;
for (len = 0, p = val; *p; p++) {
len++;
}
return spdk_json_write_string_utf16le_raw(w, val, len);
}
int
spdk_json_write_string_fmt(struct spdk_json_write_ctx *w, const char *fmt, ...)
{

View File

@ -86,6 +86,18 @@ write_cb(void *cb_ctx, const void *data, size_t size)
#define STR_FAIL(in) \
BEGIN(); VAL_STRING_FAIL(in); END_FAIL()
#define VAL_STRING_UTF16LE(str) \
CU_ASSERT(spdk_json_write_string_utf16le_raw(w, (const uint16_t *)str, sizeof(str) / sizeof(uint16_t) - 1) == 0)
#define VAL_STRING_UTF16LE_FAIL(str) \
CU_ASSERT(spdk_json_write_string_utf16le_raw(w, (const uint16_t *)str, sizeof(str) / sizeof(uint16_t) - 1) < 0)
#define STR_UTF16LE_PASS(in, out) \
BEGIN(); VAL_STRING_UTF16LE(in); END("\"" out "\"")
#define STR_UTF16LE_FAIL(in) \
BEGIN(); VAL_STRING_UTF16LE_FAIL(in); END_FAIL()
#define VAL_NAME(name) \
CU_ASSERT(spdk_json_write_name_raw(w, name, sizeof(name) - 1) == 0)
@ -248,6 +260,37 @@ test_write_string_escapes(void)
STR_FAIL("\xED\xA1\x8C\xED\xBE\xB4"); /* U+233B4 (invalid surrogate pair encoding) */
}
static void
test_write_string_utf16le(void)
{
struct spdk_json_write_ctx *w;
/* All characters in BMP */
STR_UTF16LE_PASS(((uint8_t[]) {
'H', 0, 'e', 0, 'l', 0, 'l', 0, 'o', 0, 0x15, 0xFE, 0, 0
}), "Hello\\uFE15");
/* Surrogate pair */
STR_UTF16LE_PASS(((uint8_t[]) {
'H', 0, 'i', 0, 0x34, 0xD8, 0x1E, 0xDD, '!', 0, 0, 0
}), "Hi\\uD834\\uDD1E!");
/* Valid high surrogate, but no low surrogate */
STR_UTF16LE_FAIL(((uint8_t[]) {
0x00, 0xD8, 0, 0 /* U+D800 */
}));
/* Invalid leading low surrogate */
STR_UTF16LE_FAIL(((uint8_t[]) {
0x00, 0xDC, 0x00, 0xDC, 0, 0 /* U+DC00 U+DC00 */
}));
/* Valid high surrogate followed by another high surrogate (invalid) */
STR_UTF16LE_FAIL(((uint8_t[]) {
0x00, 0xD8, 0x00, 0xD8, 0, 0 /* U+D800 U+D800 */
}));
}
static void
test_write_number_int32(void)
{
@ -618,6 +661,7 @@ int main(int argc, char **argv)
CU_add_test(suite, "write_literal", test_write_literal) == NULL ||
CU_add_test(suite, "write_string_simple", test_write_string_simple) == NULL ||
CU_add_test(suite, "write_string_escapes", test_write_string_escapes) == NULL ||
CU_add_test(suite, "write_string_utf16le", test_write_string_utf16le) == NULL ||
CU_add_test(suite, "write_number_int32", test_write_number_int32) == NULL ||
CU_add_test(suite, "write_number_uint32", test_write_number_uint32) == NULL ||
CU_add_test(suite, "write_array", test_write_array) == NULL ||