[v2,1/5] telemetry: escape special char when tel string
Checks
Commit Message
This patch supports escape special characters (including: \",\\,/,\b,
/f,/n,/r,/t) when telemetry string.
This patch is used to support telemetry xxx-dump commands which the
string may include special characters.
Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
---
lib/telemetry/telemetry.c | 96 +++++++++++++++++++++++++++++++++++++--
1 file changed, 93 insertions(+), 3 deletions(-)
Comments
> From: Chengwen Feng [mailto:fengchengwen@huawei.com]
> Sent: Friday, 17 June 2022 11.46
>
> This patch supports escape special characters (including: \",\\,/,\b,
> /f,/n,/r,/t) when telemetry string.
> This patch is used to support telemetry xxx-dump commands which the
> string may include special characters.
>
> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> ---
> lib/telemetry/telemetry.c | 96 +++++++++++++++++++++++++++++++++++++--
> 1 file changed, 93 insertions(+), 3 deletions(-)
>
> diff --git a/lib/telemetry/telemetry.c b/lib/telemetry/telemetry.c
> index c6fd03a5ab..0f762f633e 100644
> --- a/lib/telemetry/telemetry.c
> +++ b/lib/telemetry/telemetry.c
> @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data *d,
> char *out_buf, size_t buf_len)
> return used;
> }
>
> +static bool
> +json_is_special_char(char ch)
> +{
> + static unsigned char is_spec[256] = { 0 };
> + static bool init_once;
> +
> + if (!init_once) {
> + is_spec['\"'] = 1;
> + is_spec['\\'] = 1;
> + is_spec['/'] = 1;
> + is_spec['\b'] = 1;
> + is_spec['\f'] = 1;
> + is_spec['\n'] = 1;
> + is_spec['\r'] = 1;
> + is_spec['\t'] = 1;
> + init_once = true;
> + }
> +
> + return (bool)is_spec[(unsigned char)ch];
> +}
Here's a suggestion for simplifying the code:
Remove json_is_special_char(), and update json_escape_special_char() and json_format_string() as follows:
> +
> +static size_t
> +json_escape_special_char(char *buf, const char ch)
Consider making this function inline.
> +{
> + size_t used = 0;
> +
> + switch (ch) {
> + case '\"':
> + buf[used++] = '\\';
> + buf[used++] = '\"';
> + break;
> + case '\\':
> + buf[used++] = '\\';
> + buf[used++] = '\\';
> + break;
> + case '/':
> + buf[used++] = '\\';
> + buf[used++] = '/';
> + break;
> + case '\b':
> + buf[used++] = '\\';
> + buf[used++] = 'b';
> + break;
> + case '\f':
> + buf[used++] = '\\';
> + buf[used++] = 'f';
> + break;
> + case '\n':
> + buf[used++] = '\\';
> + buf[used++] = 'n';
> + break;
> + case '\r':
> + buf[used++] = '\\';
> + buf[used++] = 'r';
> + break;
> + case '\t':
> + buf[used++] = '\\';
> + buf[used++] = 't';
> + break;
> + default:
Handle non-escaped characters in the default case here instead:
+ buf[used++] = ch;
> + break;
> + }
> +
> + return used;
> +}
> +
> +static size_t
> +json_format_string(char *buf, size_t len, const char *str)
> +{
> + size_t used = 0;
> +
> + while (*str) {
> + if (unlikely(len < used + 2)) {
> + TMTY_LOG(WARNING, "Insufficient buffer when json
> format string\n");
> + break;
> + }
> +
-- replace:
> + if (json_is_special_char(*str))
> + used += json_escape_special_char(buf + used, *str);
> + else
> + buf[used++] = *str;
> +
> + str++;
-- by:
+ used += json_escape_special_char(buf + used, *str++);
--
End of suggestion. Feel free to use it or not. :-)
> + }
> +
> + return used;
> +}
> +
> static void
> output_json(const char *cmd, const struct rte_tel_data *d, int s)
> {
> @@ -232,9 +320,11 @@ output_json(const char *cmd, const struct
> rte_tel_data *d, int s)
> MAX_CMD_LEN, cmd ? cmd : "none");
> break;
> case RTE_TEL_STRING:
> - used = snprintf(out_buf, sizeof(out_buf),
> "{\"%.*s\":\"%.*s\"}",
> - MAX_CMD_LEN, cmd,
> - RTE_TEL_MAX_SINGLE_STRING_LEN, d->data.str);
> + used = snprintf(out_buf, sizeof(out_buf), "{\"%.*s\":\"",
> + MAX_CMD_LEN, cmd);
> + used += json_format_string(out_buf + used,
> + sizeof(out_buf) - used - 3, d->data.str);
> + used += snprintf(out_buf + used, sizeof(out_buf) - used,
> "\"}");
I looked for missing 0-termination in json_format_string(), but that is not required due to the immediately following snprintf().
> break;
> case RTE_TEL_DICT:
> prefix_used = snprintf(out_buf, sizeof(out_buf),
> "{\"%.*s\":",
> --
> 2.33.0
>
If you want to make it generic, these four cases in output_json() also need to JSON encode the strings:
case RTE_TEL_DICT:
case RTE_TEL_STRING_VAL: ...
case RTE_TEL_CONTAINER: ... (strings only)
case RTE_TEL_ARRAY_STRING:
if (d->type == RTE_TEL_ARRAY_STRING) ...
else if (d->type == RTE_TEL_ARRAY_CONTAINER) ... (strings only)
However, JSON encoding of strings inside arrays and containers is not required for the dump purposes addressed by this patch series, so I consider this patch complete without it. No need to add "feature creep" to this series.
Reviewed-by: Morten Brørup <mb@smartsharesystems.com>
On Fri, Jun 17, 2022 at 01:16:08PM +0200, Morten Brørup wrote:
> > From: Chengwen Feng [mailto:fengchengwen@huawei.com]
> > Sent: Friday, 17 June 2022 11.46
> >
> > This patch supports escape special characters (including: \",\\,/,\b,
> > /f,/n,/r,/t) when telemetry string.
> > This patch is used to support telemetry xxx-dump commands which the
> > string may include special characters.
> >
> > Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> > ---
> > lib/telemetry/telemetry.c | 96 +++++++++++++++++++++++++++++++++++++--
> > 1 file changed, 93 insertions(+), 3 deletions(-)
> >
> > diff --git a/lib/telemetry/telemetry.c b/lib/telemetry/telemetry.c
> > index c6fd03a5ab..0f762f633e 100644
> > --- a/lib/telemetry/telemetry.c
> > +++ b/lib/telemetry/telemetry.c
> > @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data *d,
> > char *out_buf, size_t buf_len)
> > return used;
> > }
> >
> > +static bool
> > +json_is_special_char(char ch)
> > +{
> > + static unsigned char is_spec[256] = { 0 };
> > + static bool init_once;
> > +
> > + if (!init_once) {
> > + is_spec['\"'] = 1;
> > + is_spec['\\'] = 1;
> > + is_spec['/'] = 1;
> > + is_spec['\b'] = 1;
> > + is_spec['\f'] = 1;
> > + is_spec['\n'] = 1;
> > + is_spec['\r'] = 1;
> > + is_spec['\t'] = 1;
> > + init_once = true;
> > + }
> > +
> > + return (bool)is_spec[(unsigned char)ch];
> > +}
According to the json spec at [1], the characters that need to be escaped
are:
a) any characters <0x20
b) inverted commas/quote character \"
c) the "reverse solidus character", better known to you and I as the
back-slash.
Therefore, I think this table generation could be simplified, but also
expanded using this. For completeness we should also see about handling all
control characters if they are encountered.
[1] https://www.rfc-editor.org/rfc/rfc8259.txt
/Bruce
On Fri, Jun 17, 2022 at 05:46:20PM +0800, Chengwen Feng wrote:
> This patch supports escape special characters (including: \",\\,/,\b,
> /f,/n,/r,/t) when telemetry string.
> This patch is used to support telemetry xxx-dump commands which the
> string may include special characters.
>
> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> ---
> lib/telemetry/telemetry.c | 96 +++++++++++++++++++++++++++++++++++++--
> 1 file changed, 93 insertions(+), 3 deletions(-)
>
> diff --git a/lib/telemetry/telemetry.c b/lib/telemetry/telemetry.c
> index c6fd03a5ab..0f762f633e 100644
> --- a/lib/telemetry/telemetry.c
> +++ b/lib/telemetry/telemetry.c
> @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data *d, char *out_buf, size_t buf_len)
> return used;
> }
>
> +static bool
> +json_is_special_char(char ch)
> +{
> + static unsigned char is_spec[256] = { 0 };
> + static bool init_once;
> +
> + if (!init_once) {
> + is_spec['\"'] = 1;
> + is_spec['\\'] = 1;
> + is_spec['/'] = 1;
> + is_spec['\b'] = 1;
> + is_spec['\f'] = 1;
> + is_spec['\n'] = 1;
> + is_spec['\r'] = 1;
> + is_spec['\t'] = 1;
> + init_once = true;
> + }
> +
> + return (bool)is_spec[(unsigned char)ch];
> +}
> +
> +static size_t
> +json_escape_special_char(char *buf, const char ch)
> +{
> + size_t used = 0;
> +
> + switch (ch) {
> + case '\"':
> + buf[used++] = '\\';
> + buf[used++] = '\"';
> + break;
> + case '\\':
> + buf[used++] = '\\';
> + buf[used++] = '\\';
> + break;
> + case '/':
> + buf[used++] = '\\';
> + buf[used++] = '/';
> + break;
> + case '\b':
> + buf[used++] = '\\';
> + buf[used++] = 'b';
> + break;
> + case '\f':
> + buf[used++] = '\\';
> + buf[used++] = 'f';
> + break;
> + case '\n':
> + buf[used++] = '\\';
> + buf[used++] = 'n';
> + break;
> + case '\r':
> + buf[used++] = '\\';
> + buf[used++] = 'r';
> + break;
> + case '\t':
> + buf[used++] = '\\';
> + buf[used++] = 't';
> + break;
> + default:
> + break;
> + }
> +
> + return used;
> +}
> +
> +static size_t
> +json_format_string(char *buf, size_t len, const char *str)
> +{
> + size_t used = 0;
> +
> + while (*str) {
> + if (unlikely(len < used + 2)) {
> + TMTY_LOG(WARNING, "Insufficient buffer when json format string\n");
> + break;
> + }
> +
> + if (json_is_special_char(*str))
> + used += json_escape_special_char(buf + used, *str);
> + else
> + buf[used++] = *str;
> +
> + str++;
> + }
> +
> + return used;
> +}
> +
> static void
> output_json(const char *cmd, const struct rte_tel_data *d, int s)
> {
> @@ -232,9 +320,11 @@ output_json(const char *cmd, const struct rte_tel_data *d, int s)
> MAX_CMD_LEN, cmd ? cmd : "none");
> break;
> case RTE_TEL_STRING:
> - used = snprintf(out_buf, sizeof(out_buf), "{\"%.*s\":\"%.*s\"}",
> - MAX_CMD_LEN, cmd,
> - RTE_TEL_MAX_SINGLE_STRING_LEN, d->data.str);
> + used = snprintf(out_buf, sizeof(out_buf), "{\"%.*s\":\"",
> + MAX_CMD_LEN, cmd);
> + used += json_format_string(out_buf + used,
> + sizeof(out_buf) - used - 3, d->data.str);
> + used += snprintf(out_buf + used, sizeof(out_buf) - used, "\"}");
> break;
> case RTE_TEL_DICT:
> prefix_used = snprintf(out_buf, sizeof(out_buf), "{\"%.*s\":",
> --
I think it might be worthwhile to write a general json_str_printf function
to do the snprintf and the escaping in one go. Then that might be more able
to be used in other places where we output strings.
/Bruce
On Fri, 17 Jun 2022 12:25:04 +0100
Bruce Richardson <bruce.richardson@intel.com> wrote:
> On Fri, Jun 17, 2022 at 01:16:08PM +0200, Morten Brørup wrote:
> > > From: Chengwen Feng [mailto:fengchengwen@huawei.com]
> > > Sent: Friday, 17 June 2022 11.46
> > >
> > > This patch supports escape special characters (including: \",\\,/,\b,
> > > /f,/n,/r,/t) when telemetry string.
> > > This patch is used to support telemetry xxx-dump commands which the
> > > string may include special characters.
> > >
> > > Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> > > ---
> > > lib/telemetry/telemetry.c | 96 +++++++++++++++++++++++++++++++++++++--
> > > 1 file changed, 93 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/lib/telemetry/telemetry.c b/lib/telemetry/telemetry.c
> > > index c6fd03a5ab..0f762f633e 100644
> > > --- a/lib/telemetry/telemetry.c
> > > +++ b/lib/telemetry/telemetry.c
> > > @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data *d,
> > > char *out_buf, size_t buf_len)
> > > return used;
> > > }
> > >
> > > +static bool
> > > +json_is_special_char(char ch)
> > > +{
> > > + static unsigned char is_spec[256] = { 0 };
> > > + static bool init_once;
> > > +
> > > + if (!init_once) {
> > > + is_spec['\"'] = 1;
> > > + is_spec['\\'] = 1;
> > > + is_spec['/'] = 1;
> > > + is_spec['\b'] = 1;
> > > + is_spec['\f'] = 1;
> > > + is_spec['\n'] = 1;
> > > + is_spec['\r'] = 1;
> > > + is_spec['\t'] = 1;
> > > + init_once = true;
> > > + }
> > > +
> > > + return (bool)is_spec[(unsigned char)ch];
> > > +}
>
> According to the json spec at [1], the characters that need to be escaped
> are:
> a) any characters <0x20
> b) inverted commas/quote character \"
> c) the "reverse solidus character", better known to you and I as the
> back-slash.
>
> Therefore, I think this table generation could be simplified, but also
> expanded using this. For completeness we should also see about handling all
> control characters if they are encountered.
>
> [1] https://www.rfc-editor.org/rfc/rfc8259.txt
>
> /Bruce
Since it is trivial could be initializer?
static const uint8_t is_spec[256] = {
[0 ... 0x20] = 1,
['\"' ] = 1,
['\\' ] = 1,
['/'] = 1,
etc
Or we could change the telemetry API to disallow control characters?
On 2022/6/18 1:05, Stephen Hemminger wrote:
> On Fri, 17 Jun 2022 12:25:04 +0100
> Bruce Richardson <bruce.richardson@intel.com> wrote:
>
>> On Fri, Jun 17, 2022 at 01:16:08PM +0200, Morten Brørup wrote:
>>>> From: Chengwen Feng [mailto:fengchengwen@huawei.com]
>>>> Sent: Friday, 17 June 2022 11.46
>>>>
>>>> This patch supports escape special characters (including: \",\\,/,\b,
>>>> /f,/n,/r,/t) when telemetry string.
>>>> This patch is used to support telemetry xxx-dump commands which the
>>>> string may include special characters.
>>>>
>>>> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
>>>> ---
>>>> lib/telemetry/telemetry.c | 96 +++++++++++++++++++++++++++++++++++++--
>>>> 1 file changed, 93 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/lib/telemetry/telemetry.c b/lib/telemetry/telemetry.c
>>>> index c6fd03a5ab..0f762f633e 100644
>>>> --- a/lib/telemetry/telemetry.c
>>>> +++ b/lib/telemetry/telemetry.c
>>>> @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data *d,
>>>> char *out_buf, size_t buf_len)
>>>> return used;
>>>> }
>>>>
>>>> +static bool
>>>> +json_is_special_char(char ch)
>>>> +{
>>>> + static unsigned char is_spec[256] = { 0 };
>>>> + static bool init_once;
>>>> +
>>>> + if (!init_once) {
>>>> + is_spec['\"'] = 1;
>>>> + is_spec['\\'] = 1;
>>>> + is_spec['/'] = 1;
>>>> + is_spec['\b'] = 1;
>>>> + is_spec['\f'] = 1;
>>>> + is_spec['\n'] = 1;
>>>> + is_spec['\r'] = 1;
>>>> + is_spec['\t'] = 1;
>>>> + init_once = true;
>>>> + }
>>>> +
>>>> + return (bool)is_spec[(unsigned char)ch];
>>>> +}
>>
>> According to the json spec at [1], the characters that need to be escaped
>> are:
>> a) any characters <0x20
>> b) inverted commas/quote character \"
>> c) the "reverse solidus character", better known to you and I as the
>> back-slash.
>>
>> Therefore, I think this table generation could be simplified, but also
>> expanded using this. For completeness we should also see about handling all
>> control characters if they are encountered.
>>
>> [1] https://www.rfc-editor.org/rfc/rfc8259.txt
>>
>> /Bruce
>
> Since it is trivial could be initializer?
>
> static const uint8_t is_spec[256] = {
> [0 ... 0x20] = 1,
> ['\"' ] = 1,
> ['\\' ] = 1,
> ['/'] = 1,
>
> etc
>
> Or we could change the telemetry API to disallow control characters?
I was thinking about converting 0~0x20, but I don't think there's a scenario.
I prefer change the telemetry API to disallow control characters, and this may not
be a violation of the ABI, if yes, the dpdk-telemetry.py will returns an error.
So I think we could add declaring and checking functions to make sure telemetry string
do not allow control characters.
>
>
> .
>
+CC: Ciara Power, Telemetry library maintainer
> From: fengchengwen [mailto:fengchengwen@huawei.com]
> Sent: Saturday, 18 June 2022 05.52
>
> On 2022/6/18 1:05, Stephen Hemminger wrote:
> > On Fri, 17 Jun 2022 12:25:04 +0100
> > Bruce Richardson <bruce.richardson@intel.com> wrote:
> >
> >> On Fri, Jun 17, 2022 at 01:16:08PM +0200, Morten Brørup wrote:
> >>>> From: Chengwen Feng [mailto:fengchengwen@huawei.com]
> >>>> Sent: Friday, 17 June 2022 11.46
> >>>>
> >>>> This patch supports escape special characters (including:
> \",\\,/,\b,
> >>>> /f,/n,/r,/t) when telemetry string.
> >>>> This patch is used to support telemetry xxx-dump commands which
> the
> >>>> string may include special characters.
> >>>>
> >>>> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> >>>> ---
> >>>> lib/telemetry/telemetry.c | 96
> +++++++++++++++++++++++++++++++++++++--
> >>>> 1 file changed, 93 insertions(+), 3 deletions(-)
> >>>>
> >>>> diff --git a/lib/telemetry/telemetry.c b/lib/telemetry/telemetry.c
> >>>> index c6fd03a5ab..0f762f633e 100644
> >>>> --- a/lib/telemetry/telemetry.c
> >>>> +++ b/lib/telemetry/telemetry.c
> >>>> @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data
> *d,
> >>>> char *out_buf, size_t buf_len)
> >>>> return used;
> >>>> }
> >>>>
> >>>> +static bool
> >>>> +json_is_special_char(char ch)
> >>>> +{
> >>>> + static unsigned char is_spec[256] = { 0 };
> >>>> + static bool init_once;
> >>>> +
> >>>> + if (!init_once) {
> >>>> + is_spec['\"'] = 1;
> >>>> + is_spec['\\'] = 1;
> >>>> + is_spec['/'] = 1;
> >>>> + is_spec['\b'] = 1;
> >>>> + is_spec['\f'] = 1;
> >>>> + is_spec['\n'] = 1;
> >>>> + is_spec['\r'] = 1;
> >>>> + is_spec['\t'] = 1;
> >>>> + init_once = true;
> >>>> + }
> >>>> +
> >>>> + return (bool)is_spec[(unsigned char)ch];
> >>>> +}
> >>
> >> According to the json spec at [1], the characters that need to be
> escaped
> >> are:
> >> a) any characters <0x20
> >> b) inverted commas/quote character \"
> >> c) the "reverse solidus character", better known to you and I as the
> >> back-slash.
> >>
> >> Therefore, I think this table generation could be simplified, but
> also
> >> expanded using this. For completeness we should also see about
> handling all
> >> control characters if they are encountered.
> >>
> >> [1] https://www.rfc-editor.org/rfc/rfc8259.txt
> >>
> >> /Bruce
> >
> > Since it is trivial could be initializer?
> >
> > static const uint8_t is_spec[256] = {
> > [0 ... 0x20] = 1,
> > ['\"' ] = 1,
> > ['\\' ] = 1,
> > ['/'] = 1,
> >
> > etc
> >
> > Or we could change the telemetry API to disallow control characters?
>
> I was thinking about converting 0~0x20, but I don't think there's a
> scenario.
>
> I prefer change the telemetry API to disallow control characters, and
> this may not
> be a violation of the ABI, if yes, the dpdk-telemetry.py will returns
> an error.
I agree with Chengwen Feng. The telemetry data type is STRING, not BLOB.
So we need to define exactly what the STRING type contains.
I hope we can all agree that control characters should be disallowed.
The more complicated question is: Do we want to use the ASCII character set only, or do we want to use UTF-8 encoded Unicode?
Personally, think UTF-8 encoded Unicode is more future proof, and would vote for that.
But I wouldn't reject limiting it to ASCII, and perhaps in the future introduce another data type for UTF-8 strings.
UTF-8 is the modern choice, but it is incompatible with old stuff, e.g. many SNMP MIBs.
>
> So I think we could add declaring and checking functions to make sure
> telemetry string
> do not allow control characters.
Input validation (when storing a string in the telemetry database) has a performance cost, so it could be a compile time debug option, like the memory cookies and mbuf integrity checks. Just a thought.
Hi folks,
> -----Original Message-----
> From: Morten Brørup <mb@smartsharesystems.com>
> Sent: Saturday 18 June 2022 10:59
> To: fengchengwen <fengchengwen@huawei.com>; Stephen Hemminger
> <stephen@networkplumber.org>; Richardson, Bruce
> <bruce.richardson@intel.com>
> Cc: thomas@monjalon.net; ferruh.yigit@xilinx.com; Laatz, Kevin
> <kevin.laatz@intel.com>; andrew.rybchenko@oktetlabs.ru;
> jerinj@marvell.com; sachin.saxena@oss.nxp.com;
> hemant.agrawal@nxp.com; dev@dpdk.org; Power, Ciara
> <ciara.power@intel.com>
> Subject: RE: [PATCH v2 1/5] telemetry: escape special char when tel string
>
> +CC: Ciara Power, Telemetry library maintainer
>
> > From: fengchengwen [mailto:fengchengwen@huawei.com]
> > Sent: Saturday, 18 June 2022 05.52
> >
> > On 2022/6/18 1:05, Stephen Hemminger wrote:
> > > On Fri, 17 Jun 2022 12:25:04 +0100
> > > Bruce Richardson <bruce.richardson@intel.com> wrote:
> > >
> > >> On Fri, Jun 17, 2022 at 01:16:08PM +0200, Morten Brørup wrote:
> > >>>> From: Chengwen Feng [mailto:fengchengwen@huawei.com]
> > >>>> Sent: Friday, 17 June 2022 11.46
> > >>>>
> > >>>> This patch supports escape special characters (including:
> > \",\\,/,\b,
> > >>>> /f,/n,/r,/t) when telemetry string.
> > >>>> This patch is used to support telemetry xxx-dump commands which
> > the
> > >>>> string may include special characters.
> > >>>>
> > >>>> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> > >>>> ---
> > >>>> lib/telemetry/telemetry.c | 96
> > +++++++++++++++++++++++++++++++++++++--
> > >>>> 1 file changed, 93 insertions(+), 3 deletions(-)
> > >>>>
> > >>>> diff --git a/lib/telemetry/telemetry.c
> > >>>> b/lib/telemetry/telemetry.c index c6fd03a5ab..0f762f633e 100644
> > >>>> --- a/lib/telemetry/telemetry.c
> > >>>> +++ b/lib/telemetry/telemetry.c
> > >>>> @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data
> > *d,
> > >>>> char *out_buf, size_t buf_len)
> > >>>> return used;
> > >>>> }
> > >>>>
> > >>>> +static bool
> > >>>> +json_is_special_char(char ch)
> > >>>> +{
> > >>>> + static unsigned char is_spec[256] = { 0 };
> > >>>> + static bool init_once;
> > >>>> +
> > >>>> + if (!init_once) {
> > >>>> + is_spec['\"'] = 1;
> > >>>> + is_spec['\\'] = 1;
> > >>>> + is_spec['/'] = 1;
> > >>>> + is_spec['\b'] = 1;
> > >>>> + is_spec['\f'] = 1;
> > >>>> + is_spec['\n'] = 1;
> > >>>> + is_spec['\r'] = 1;
> > >>>> + is_spec['\t'] = 1;
> > >>>> + init_once = true;
> > >>>> + }
> > >>>> +
> > >>>> + return (bool)is_spec[(unsigned char)ch]; }
> > >>
> > >> According to the json spec at [1], the characters that need to be
> > escaped
> > >> are:
> > >> a) any characters <0x20
> > >> b) inverted commas/quote character \"
> > >> c) the "reverse solidus character", better known to you and I as
> > >> the back-slash.
> > >>
> > >> Therefore, I think this table generation could be simplified, but
> > also
> > >> expanded using this. For completeness we should also see about
> > handling all
> > >> control characters if they are encountered.
> > >>
> > >> [1] https://www.rfc-editor.org/rfc/rfc8259.txt
> > >>
> > >> /Bruce
> > >
> > > Since it is trivial could be initializer?
> > >
> > > static const uint8_t is_spec[256] = {
> > > [0 ... 0x20] = 1,
> > > ['\"' ] = 1,
> > > ['\\' ] = 1,
> > > ['/'] = 1,
> > >
> > > etc
> > >
> > > Or we could change the telemetry API to disallow control characters?
> >
> > I was thinking about converting 0~0x20, but I don't think there's a
> > scenario.
> >
> > I prefer change the telemetry API to disallow control characters, and
> > this may not be a violation of the ABI, if yes, the dpdk-telemetry.py
> > will returns an error.
>
> I agree with Chengwen Feng. The telemetry data type is STRING, not BLOB.
>
> So we need to define exactly what the STRING type contains.
>
> I hope we can all agree that control characters should be disallowed.
>
> The more complicated question is: Do we want to use the ASCII character set
> only, or do we want to use UTF-8 encoded Unicode?
>
> Personally, think UTF-8 encoded Unicode is more future proof, and would
> vote for that.
>
> But I wouldn't reject limiting it to ASCII, and perhaps in the future introduce
> another data type for UTF-8 strings.
>
> UTF-8 is the modern choice, but it is incompatible with old stuff, e.g. many
> SNMP MIBs.
>
[CP]
Just from looking at the spec [1] , I would say UTF-8, as it seems to suggest its use for JSON (section 8.1).
[1] https://www.rfc-editor.org/rfc/rfc8259.txt
> >
> > So I think we could add declaring and checking functions to make sure
> > telemetry string do not allow control characters.
[CP]
I am not sure why we don't want these at all - I thought we do want some of them, like tab (\u0009) for example.
<snip>
In general, I think Bruce's suggestion of using a customised printf function instead of snprintf would be a good way forward, to scan the chars as they are being copied in.
Thanks,
Ciara
On Wed, Jun 22, 2022 at 08:57:43AM +0100, Power, Ciara wrote:
> Hi folks,
>
> > -----Original Message-----
> > From: Morten Brørup <mb@smartsharesystems.com>
> > Sent: Saturday 18 June 2022 10:59
> > To: fengchengwen <fengchengwen@huawei.com>; Stephen Hemminger
> > <stephen@networkplumber.org>; Richardson, Bruce
> > <bruce.richardson@intel.com>
> > Cc: thomas@monjalon.net; ferruh.yigit@xilinx.com; Laatz, Kevin
> > <kevin.laatz@intel.com>; andrew.rybchenko@oktetlabs.ru;
> > jerinj@marvell.com; sachin.saxena@oss.nxp.com;
> > hemant.agrawal@nxp.com; dev@dpdk.org; Power, Ciara
> > <ciara.power@intel.com>
> > Subject: RE: [PATCH v2 1/5] telemetry: escape special char when tel string
> >
> > +CC: Ciara Power, Telemetry library maintainer
> >
> > > From: fengchengwen [mailto:fengchengwen@huawei.com]
> > > Sent: Saturday, 18 June 2022 05.52
> > >
> > > On 2022/6/18 1:05, Stephen Hemminger wrote:
> > > > On Fri, 17 Jun 2022 12:25:04 +0100
> > > > Bruce Richardson <bruce.richardson@intel.com> wrote:
> > > >
> > > >> On Fri, Jun 17, 2022 at 01:16:08PM +0200, Morten Brørup wrote:
> > > >>>> From: Chengwen Feng [mailto:fengchengwen@huawei.com]
> > > >>>> Sent: Friday, 17 June 2022 11.46
> > > >>>>
> > > >>>> This patch supports escape special characters (including:
> > > \",\\,/,\b,
> > > >>>> /f,/n,/r,/t) when telemetry string.
> > > >>>> This patch is used to support telemetry xxx-dump commands which
> > > the
> > > >>>> string may include special characters.
> > > >>>>
> > > >>>> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> > > >>>> ---
> > > >>>> lib/telemetry/telemetry.c | 96
> > > +++++++++++++++++++++++++++++++++++++--
> > > >>>> 1 file changed, 93 insertions(+), 3 deletions(-)
> > > >>>>
> > > >>>> diff --git a/lib/telemetry/telemetry.c
> > > >>>> b/lib/telemetry/telemetry.c index c6fd03a5ab..0f762f633e 100644
> > > >>>> --- a/lib/telemetry/telemetry.c
> > > >>>> +++ b/lib/telemetry/telemetry.c
> > > >>>> @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data
> > > *d,
> > > >>>> char *out_buf, size_t buf_len)
> > > >>>> return used;
> > > >>>> }
> > > >>>>
> > > >>>> +static bool
> > > >>>> +json_is_special_char(char ch)
> > > >>>> +{
> > > >>>> + static unsigned char is_spec[256] = { 0 };
> > > >>>> + static bool init_once;
> > > >>>> +
> > > >>>> + if (!init_once) {
> > > >>>> + is_spec['\"'] = 1;
> > > >>>> + is_spec['\\'] = 1;
> > > >>>> + is_spec['/'] = 1;
> > > >>>> + is_spec['\b'] = 1;
> > > >>>> + is_spec['\f'] = 1;
> > > >>>> + is_spec['\n'] = 1;
> > > >>>> + is_spec['\r'] = 1;
> > > >>>> + is_spec['\t'] = 1;
> > > >>>> + init_once = true;
> > > >>>> + }
> > > >>>> +
> > > >>>> + return (bool)is_spec[(unsigned char)ch]; }
> > > >>
> > > >> According to the json spec at [1], the characters that need to be
> > > escaped
> > > >> are:
> > > >> a) any characters <0x20
> > > >> b) inverted commas/quote character \"
> > > >> c) the "reverse solidus character", better known to you and I as
> > > >> the back-slash.
> > > >>
> > > >> Therefore, I think this table generation could be simplified, but
> > > also
> > > >> expanded using this. For completeness we should also see about
> > > handling all
> > > >> control characters if they are encountered.
> > > >>
> > > >> [1] https://www.rfc-editor.org/rfc/rfc8259.txt
> > > >>
> > > >> /Bruce
> > > >
> > > > Since it is trivial could be initializer?
> > > >
> > > > static const uint8_t is_spec[256] = {
> > > > [0 ... 0x20] = 1,
> > > > ['\"' ] = 1,
> > > > ['\\' ] = 1,
> > > > ['/'] = 1,
> > > >
> > > > etc
> > > >
> > > > Or we could change the telemetry API to disallow control characters?
> > >
> > > I was thinking about converting 0~0x20, but I don't think there's a
> > > scenario.
> > >
> > > I prefer change the telemetry API to disallow control characters, and
> > > this may not be a violation of the ABI, if yes, the dpdk-telemetry.py
> > > will returns an error.
> >
> > I agree with Chengwen Feng. The telemetry data type is STRING, not BLOB.
> >
> > So we need to define exactly what the STRING type contains.
> >
> > I hope we can all agree that control characters should be disallowed.
> >
> > The more complicated question is: Do we want to use the ASCII character set
> > only, or do we want to use UTF-8 encoded Unicode?
> >
> > Personally, think UTF-8 encoded Unicode is more future proof, and would
> > vote for that.
> >
> > But I wouldn't reject limiting it to ASCII, and perhaps in the future introduce
> > another data type for UTF-8 strings.
> >
> > UTF-8 is the modern choice, but it is incompatible with old stuff, e.g. many
> > SNMP MIBs.
> >
> [CP]
>
> Just from looking at the spec [1] , I would say UTF-8, as it seems to suggest its use for JSON (section 8.1).
>
> [1] https://www.rfc-editor.org/rfc/rfc8259.txt
>
> > >
> > > So I think we could add declaring and checking functions to make sure
> > > telemetry string do not allow control characters.
> [CP]
>
> I am not sure why we don't want these at all - I thought we do want some of them, like tab (\u0009) for example.
>
> <snip>
>
> In general, I think Bruce's suggestion of using a customised printf function instead of snprintf would be a good way forward, to scan the chars as they are being copied in.
>
I'm hoping to have some time to try and prototype this myself soon, and
send out a draft patch to this mailing list for consideration.
/Bruce
On Wed, Jun 22, 2022 at 10:19:48AM +0100, Bruce Richardson wrote:
> On Wed, Jun 22, 2022 at 08:57:43AM +0100, Power, Ciara wrote:
> > Hi folks,
> >
> > > -----Original Message-----
> > > From: Morten Brørup <mb@smartsharesystems.com>
> > > Sent: Saturday 18 June 2022 10:59
> > > To: fengchengwen <fengchengwen@huawei.com>; Stephen Hemminger
> > > <stephen@networkplumber.org>; Richardson, Bruce
> > > <bruce.richardson@intel.com>
> > > Cc: thomas@monjalon.net; ferruh.yigit@xilinx.com; Laatz, Kevin
> > > <kevin.laatz@intel.com>; andrew.rybchenko@oktetlabs.ru;
> > > jerinj@marvell.com; sachin.saxena@oss.nxp.com;
> > > hemant.agrawal@nxp.com; dev@dpdk.org; Power, Ciara
> > > <ciara.power@intel.com>
> > > Subject: RE: [PATCH v2 1/5] telemetry: escape special char when tel string
> > >
> > > +CC: Ciara Power, Telemetry library maintainer
> > >
> > > > From: fengchengwen [mailto:fengchengwen@huawei.com]
> > > > Sent: Saturday, 18 June 2022 05.52
> > > >
> > > > On 2022/6/18 1:05, Stephen Hemminger wrote:
> > > > > On Fri, 17 Jun 2022 12:25:04 +0100
> > > > > Bruce Richardson <bruce.richardson@intel.com> wrote:
> > > > >
> > > > >> On Fri, Jun 17, 2022 at 01:16:08PM +0200, Morten Brørup wrote:
> > > > >>>> From: Chengwen Feng [mailto:fengchengwen@huawei.com]
> > > > >>>> Sent: Friday, 17 June 2022 11.46
> > > > >>>>
> > > > >>>> This patch supports escape special characters (including:
> > > > \",\\,/,\b,
> > > > >>>> /f,/n,/r,/t) when telemetry string.
> > > > >>>> This patch is used to support telemetry xxx-dump commands which
> > > > the
> > > > >>>> string may include special characters.
> > > > >>>>
> > > > >>>> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
> > > > >>>> ---
> > > > >>>> lib/telemetry/telemetry.c | 96
> > > > +++++++++++++++++++++++++++++++++++++--
> > > > >>>> 1 file changed, 93 insertions(+), 3 deletions(-)
> > > > >>>>
> > > > >>>> diff --git a/lib/telemetry/telemetry.c
> > > > >>>> b/lib/telemetry/telemetry.c index c6fd03a5ab..0f762f633e 100644
> > > > >>>> --- a/lib/telemetry/telemetry.c
> > > > >>>> +++ b/lib/telemetry/telemetry.c
> > > > >>>> @@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data
> > > > *d,
> > > > >>>> char *out_buf, size_t buf_len)
> > > > >>>> return used;
> > > > >>>> }
> > > > >>>>
> > > > >>>> +static bool
> > > > >>>> +json_is_special_char(char ch)
> > > > >>>> +{
> > > > >>>> + static unsigned char is_spec[256] = { 0 };
> > > > >>>> + static bool init_once;
> > > > >>>> +
> > > > >>>> + if (!init_once) {
> > > > >>>> + is_spec['\"'] = 1;
> > > > >>>> + is_spec['\\'] = 1;
> > > > >>>> + is_spec['/'] = 1;
> > > > >>>> + is_spec['\b'] = 1;
> > > > >>>> + is_spec['\f'] = 1;
> > > > >>>> + is_spec['\n'] = 1;
> > > > >>>> + is_spec['\r'] = 1;
> > > > >>>> + is_spec['\t'] = 1;
> > > > >>>> + init_once = true;
> > > > >>>> + }
> > > > >>>> +
> > > > >>>> + return (bool)is_spec[(unsigned char)ch]; }
> > > > >>
> > > > >> According to the json spec at [1], the characters that need to be
> > > > escaped
> > > > >> are:
> > > > >> a) any characters <0x20
> > > > >> b) inverted commas/quote character \"
> > > > >> c) the "reverse solidus character", better known to you and I as
> > > > >> the back-slash.
> > > > >>
> > > > >> Therefore, I think this table generation could be simplified, but
> > > > also
> > > > >> expanded using this. For completeness we should also see about
> > > > handling all
> > > > >> control characters if they are encountered.
> > > > >>
> > > > >> [1] https://www.rfc-editor.org/rfc/rfc8259.txt
> > > > >>
> > > > >> /Bruce
> > > > >
> > > > > Since it is trivial could be initializer?
> > > > >
> > > > > static const uint8_t is_spec[256] = {
> > > > > [0 ... 0x20] = 1,
> > > > > ['\"' ] = 1,
> > > > > ['\\' ] = 1,
> > > > > ['/'] = 1,
> > > > >
> > > > > etc
> > > > >
> > > > > Or we could change the telemetry API to disallow control characters?
> > > >
> > > > I was thinking about converting 0~0x20, but I don't think there's a
> > > > scenario.
> > > >
> > > > I prefer change the telemetry API to disallow control characters, and
> > > > this may not be a violation of the ABI, if yes, the dpdk-telemetry.py
> > > > will returns an error.
> > >
> > > I agree with Chengwen Feng. The telemetry data type is STRING, not BLOB.
> > >
> > > So we need to define exactly what the STRING type contains.
> > >
> > > I hope we can all agree that control characters should be disallowed.
> > >
> > > The more complicated question is: Do we want to use the ASCII character set
> > > only, or do we want to use UTF-8 encoded Unicode?
> > >
> > > Personally, think UTF-8 encoded Unicode is more future proof, and would
> > > vote for that.
> > >
> > > But I wouldn't reject limiting it to ASCII, and perhaps in the future introduce
> > > another data type for UTF-8 strings.
> > >
> > > UTF-8 is the modern choice, but it is incompatible with old stuff, e.g. many
> > > SNMP MIBs.
> > >
> > [CP]
> >
> > Just from looking at the spec [1] , I would say UTF-8, as it seems to suggest its use for JSON (section 8.1).
> >
> > [1] https://www.rfc-editor.org/rfc/rfc8259.txt
> >
> > > >
> > > > So I think we could add declaring and checking functions to make sure
> > > > telemetry string do not allow control characters.
> > [CP]
> >
> > I am not sure why we don't want these at all - I thought we do want some of them, like tab (\u0009) for example.
> >
> > <snip>
> >
> > In general, I think Bruce's suggestion of using a customised printf function instead of snprintf would be a good way forward, to scan the chars as they are being copied in.
> >
>
> I'm hoping to have some time to try and prototype this myself soon, and
> send out a draft patch to this mailing list for consideration.
>
Here is an RFC of my current prototype of this:
http://patches.dpdk.org/project/dpdk/list/?series=23739
Feedback welcome.
Regards,
/Bruce
@@ -215,6 +215,94 @@ container_to_json(const struct rte_tel_data *d, char *out_buf, size_t buf_len)
return used;
}
+static bool
+json_is_special_char(char ch)
+{
+ static unsigned char is_spec[256] = { 0 };
+ static bool init_once;
+
+ if (!init_once) {
+ is_spec['\"'] = 1;
+ is_spec['\\'] = 1;
+ is_spec['/'] = 1;
+ is_spec['\b'] = 1;
+ is_spec['\f'] = 1;
+ is_spec['\n'] = 1;
+ is_spec['\r'] = 1;
+ is_spec['\t'] = 1;
+ init_once = true;
+ }
+
+ return (bool)is_spec[(unsigned char)ch];
+}
+
+static size_t
+json_escape_special_char(char *buf, const char ch)
+{
+ size_t used = 0;
+
+ switch (ch) {
+ case '\"':
+ buf[used++] = '\\';
+ buf[used++] = '\"';
+ break;
+ case '\\':
+ buf[used++] = '\\';
+ buf[used++] = '\\';
+ break;
+ case '/':
+ buf[used++] = '\\';
+ buf[used++] = '/';
+ break;
+ case '\b':
+ buf[used++] = '\\';
+ buf[used++] = 'b';
+ break;
+ case '\f':
+ buf[used++] = '\\';
+ buf[used++] = 'f';
+ break;
+ case '\n':
+ buf[used++] = '\\';
+ buf[used++] = 'n';
+ break;
+ case '\r':
+ buf[used++] = '\\';
+ buf[used++] = 'r';
+ break;
+ case '\t':
+ buf[used++] = '\\';
+ buf[used++] = 't';
+ break;
+ default:
+ break;
+ }
+
+ return used;
+}
+
+static size_t
+json_format_string(char *buf, size_t len, const char *str)
+{
+ size_t used = 0;
+
+ while (*str) {
+ if (unlikely(len < used + 2)) {
+ TMTY_LOG(WARNING, "Insufficient buffer when json format string\n");
+ break;
+ }
+
+ if (json_is_special_char(*str))
+ used += json_escape_special_char(buf + used, *str);
+ else
+ buf[used++] = *str;
+
+ str++;
+ }
+
+ return used;
+}
+
static void
output_json(const char *cmd, const struct rte_tel_data *d, int s)
{
@@ -232,9 +320,11 @@ output_json(const char *cmd, const struct rte_tel_data *d, int s)
MAX_CMD_LEN, cmd ? cmd : "none");
break;
case RTE_TEL_STRING:
- used = snprintf(out_buf, sizeof(out_buf), "{\"%.*s\":\"%.*s\"}",
- MAX_CMD_LEN, cmd,
- RTE_TEL_MAX_SINGLE_STRING_LEN, d->data.str);
+ used = snprintf(out_buf, sizeof(out_buf), "{\"%.*s\":\"",
+ MAX_CMD_LEN, cmd);
+ used += json_format_string(out_buf + used,
+ sizeof(out_buf) - used - 3, d->data.str);
+ used += snprintf(out_buf + used, sizeof(out_buf) - used, "\"}");
break;
case RTE_TEL_DICT:
prefix_used = snprintf(out_buf, sizeof(out_buf), "{\"%.*s\":",