0b0404bf84
These comments are used by static code analysis tools and in code reviews to avoid false warnings because of missing break statements. The case statements handled here were reported by coverity. Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Stefan Weil <sw@weilnetz.de> Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
374 lines
9.3 KiB
C
374 lines
9.3 KiB
C
/*
|
|
* JSON lexer
|
|
*
|
|
* Copyright IBM, Corp. 2009
|
|
*
|
|
* Authors:
|
|
* Anthony Liguori <aliguori@us.ibm.com>
|
|
*
|
|
* This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
|
|
* See the COPYING.LIB file in the top-level directory.
|
|
*
|
|
*/
|
|
|
|
#include "qstring.h"
|
|
#include "qlist.h"
|
|
#include "qdict.h"
|
|
#include "qint.h"
|
|
#include "qemu-common.h"
|
|
#include "json-lexer.h"
|
|
|
|
#define MAX_TOKEN_SIZE (64ULL << 20)
|
|
|
|
/*
|
|
* \"([^\\\"]|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*\"
|
|
* '([^\\']|(\\\"\\'\\\\\\/\\b\\f\\n\\r\\t\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]))*'
|
|
* 0|([1-9][0-9]*(.[0-9]+)?([eE]([-+])?[0-9]+))
|
|
* [{}\[\],:]
|
|
* [a-z]+
|
|
*
|
|
*/
|
|
|
|
enum json_lexer_state {
|
|
IN_ERROR = 0,
|
|
IN_DQ_UCODE3,
|
|
IN_DQ_UCODE2,
|
|
IN_DQ_UCODE1,
|
|
IN_DQ_UCODE0,
|
|
IN_DQ_STRING_ESCAPE,
|
|
IN_DQ_STRING,
|
|
IN_SQ_UCODE3,
|
|
IN_SQ_UCODE2,
|
|
IN_SQ_UCODE1,
|
|
IN_SQ_UCODE0,
|
|
IN_SQ_STRING_ESCAPE,
|
|
IN_SQ_STRING,
|
|
IN_ZERO,
|
|
IN_DIGITS,
|
|
IN_DIGIT,
|
|
IN_EXP_E,
|
|
IN_MANTISSA,
|
|
IN_MANTISSA_DIGITS,
|
|
IN_NONZERO_NUMBER,
|
|
IN_NEG_NONZERO_NUMBER,
|
|
IN_KEYWORD,
|
|
IN_ESCAPE,
|
|
IN_ESCAPE_L,
|
|
IN_ESCAPE_LL,
|
|
IN_ESCAPE_I,
|
|
IN_ESCAPE_I6,
|
|
IN_ESCAPE_I64,
|
|
IN_WHITESPACE,
|
|
IN_START,
|
|
};
|
|
|
|
#define TERMINAL(state) [0 ... 0x7F] = (state)
|
|
|
|
/* Return whether TERMINAL is a terminal state and the transition to it
|
|
from OLD_STATE required lookahead. This happens whenever the table
|
|
below uses the TERMINAL macro. */
|
|
#define TERMINAL_NEEDED_LOOKAHEAD(old_state, terminal) \
|
|
(json_lexer[(old_state)][0] == (terminal))
|
|
|
|
static const uint8_t json_lexer[][256] = {
|
|
/* double quote string */
|
|
[IN_DQ_UCODE3] = {
|
|
['0' ... '9'] = IN_DQ_STRING,
|
|
['a' ... 'f'] = IN_DQ_STRING,
|
|
['A' ... 'F'] = IN_DQ_STRING,
|
|
},
|
|
[IN_DQ_UCODE2] = {
|
|
['0' ... '9'] = IN_DQ_UCODE3,
|
|
['a' ... 'f'] = IN_DQ_UCODE3,
|
|
['A' ... 'F'] = IN_DQ_UCODE3,
|
|
},
|
|
[IN_DQ_UCODE1] = {
|
|
['0' ... '9'] = IN_DQ_UCODE2,
|
|
['a' ... 'f'] = IN_DQ_UCODE2,
|
|
['A' ... 'F'] = IN_DQ_UCODE2,
|
|
},
|
|
[IN_DQ_UCODE0] = {
|
|
['0' ... '9'] = IN_DQ_UCODE1,
|
|
['a' ... 'f'] = IN_DQ_UCODE1,
|
|
['A' ... 'F'] = IN_DQ_UCODE1,
|
|
},
|
|
[IN_DQ_STRING_ESCAPE] = {
|
|
['b'] = IN_DQ_STRING,
|
|
['f'] = IN_DQ_STRING,
|
|
['n'] = IN_DQ_STRING,
|
|
['r'] = IN_DQ_STRING,
|
|
['t'] = IN_DQ_STRING,
|
|
['/'] = IN_DQ_STRING,
|
|
['\\'] = IN_DQ_STRING,
|
|
['\''] = IN_DQ_STRING,
|
|
['\"'] = IN_DQ_STRING,
|
|
['u'] = IN_DQ_UCODE0,
|
|
},
|
|
[IN_DQ_STRING] = {
|
|
[1 ... 0xBF] = IN_DQ_STRING,
|
|
[0xC2 ... 0xF4] = IN_DQ_STRING,
|
|
['\\'] = IN_DQ_STRING_ESCAPE,
|
|
['"'] = JSON_STRING,
|
|
},
|
|
|
|
/* single quote string */
|
|
[IN_SQ_UCODE3] = {
|
|
['0' ... '9'] = IN_SQ_STRING,
|
|
['a' ... 'f'] = IN_SQ_STRING,
|
|
['A' ... 'F'] = IN_SQ_STRING,
|
|
},
|
|
[IN_SQ_UCODE2] = {
|
|
['0' ... '9'] = IN_SQ_UCODE3,
|
|
['a' ... 'f'] = IN_SQ_UCODE3,
|
|
['A' ... 'F'] = IN_SQ_UCODE3,
|
|
},
|
|
[IN_SQ_UCODE1] = {
|
|
['0' ... '9'] = IN_SQ_UCODE2,
|
|
['a' ... 'f'] = IN_SQ_UCODE2,
|
|
['A' ... 'F'] = IN_SQ_UCODE2,
|
|
},
|
|
[IN_SQ_UCODE0] = {
|
|
['0' ... '9'] = IN_SQ_UCODE1,
|
|
['a' ... 'f'] = IN_SQ_UCODE1,
|
|
['A' ... 'F'] = IN_SQ_UCODE1,
|
|
},
|
|
[IN_SQ_STRING_ESCAPE] = {
|
|
['b'] = IN_SQ_STRING,
|
|
['f'] = IN_SQ_STRING,
|
|
['n'] = IN_SQ_STRING,
|
|
['r'] = IN_SQ_STRING,
|
|
['t'] = IN_SQ_STRING,
|
|
['/'] = IN_DQ_STRING,
|
|
['\\'] = IN_DQ_STRING,
|
|
['\''] = IN_SQ_STRING,
|
|
['\"'] = IN_SQ_STRING,
|
|
['u'] = IN_SQ_UCODE0,
|
|
},
|
|
[IN_SQ_STRING] = {
|
|
[1 ... 0xBF] = IN_SQ_STRING,
|
|
[0xC2 ... 0xF4] = IN_SQ_STRING,
|
|
['\\'] = IN_SQ_STRING_ESCAPE,
|
|
['\''] = JSON_STRING,
|
|
},
|
|
|
|
/* Zero */
|
|
[IN_ZERO] = {
|
|
TERMINAL(JSON_INTEGER),
|
|
['0' ... '9'] = IN_ERROR,
|
|
['.'] = IN_MANTISSA,
|
|
},
|
|
|
|
/* Float */
|
|
[IN_DIGITS] = {
|
|
TERMINAL(JSON_FLOAT),
|
|
['0' ... '9'] = IN_DIGITS,
|
|
},
|
|
|
|
[IN_DIGIT] = {
|
|
['0' ... '9'] = IN_DIGITS,
|
|
},
|
|
|
|
[IN_EXP_E] = {
|
|
['-'] = IN_DIGIT,
|
|
['+'] = IN_DIGIT,
|
|
['0' ... '9'] = IN_DIGITS,
|
|
},
|
|
|
|
[IN_MANTISSA_DIGITS] = {
|
|
TERMINAL(JSON_FLOAT),
|
|
['0' ... '9'] = IN_MANTISSA_DIGITS,
|
|
['e'] = IN_EXP_E,
|
|
['E'] = IN_EXP_E,
|
|
},
|
|
|
|
[IN_MANTISSA] = {
|
|
['0' ... '9'] = IN_MANTISSA_DIGITS,
|
|
},
|
|
|
|
/* Number */
|
|
[IN_NONZERO_NUMBER] = {
|
|
TERMINAL(JSON_INTEGER),
|
|
['0' ... '9'] = IN_NONZERO_NUMBER,
|
|
['e'] = IN_EXP_E,
|
|
['E'] = IN_EXP_E,
|
|
['.'] = IN_MANTISSA,
|
|
},
|
|
|
|
[IN_NEG_NONZERO_NUMBER] = {
|
|
['0'] = IN_ZERO,
|
|
['1' ... '9'] = IN_NONZERO_NUMBER,
|
|
},
|
|
|
|
/* keywords */
|
|
[IN_KEYWORD] = {
|
|
TERMINAL(JSON_KEYWORD),
|
|
['a' ... 'z'] = IN_KEYWORD,
|
|
},
|
|
|
|
/* whitespace */
|
|
[IN_WHITESPACE] = {
|
|
TERMINAL(JSON_SKIP),
|
|
[' '] = IN_WHITESPACE,
|
|
['\t'] = IN_WHITESPACE,
|
|
['\r'] = IN_WHITESPACE,
|
|
['\n'] = IN_WHITESPACE,
|
|
},
|
|
|
|
/* escape */
|
|
[IN_ESCAPE_LL] = {
|
|
['d'] = JSON_ESCAPE,
|
|
},
|
|
|
|
[IN_ESCAPE_L] = {
|
|
['d'] = JSON_ESCAPE,
|
|
['l'] = IN_ESCAPE_LL,
|
|
},
|
|
|
|
[IN_ESCAPE_I64] = {
|
|
['d'] = JSON_ESCAPE,
|
|
},
|
|
|
|
[IN_ESCAPE_I6] = {
|
|
['4'] = IN_ESCAPE_I64,
|
|
},
|
|
|
|
[IN_ESCAPE_I] = {
|
|
['6'] = IN_ESCAPE_I6,
|
|
},
|
|
|
|
[IN_ESCAPE] = {
|
|
['d'] = JSON_ESCAPE,
|
|
['i'] = JSON_ESCAPE,
|
|
['p'] = JSON_ESCAPE,
|
|
['s'] = JSON_ESCAPE,
|
|
['f'] = JSON_ESCAPE,
|
|
['l'] = IN_ESCAPE_L,
|
|
['I'] = IN_ESCAPE_I,
|
|
},
|
|
|
|
/* top level rule */
|
|
[IN_START] = {
|
|
['"'] = IN_DQ_STRING,
|
|
['\''] = IN_SQ_STRING,
|
|
['0'] = IN_ZERO,
|
|
['1' ... '9'] = IN_NONZERO_NUMBER,
|
|
['-'] = IN_NEG_NONZERO_NUMBER,
|
|
['{'] = JSON_OPERATOR,
|
|
['}'] = JSON_OPERATOR,
|
|
['['] = JSON_OPERATOR,
|
|
[']'] = JSON_OPERATOR,
|
|
[','] = JSON_OPERATOR,
|
|
[':'] = JSON_OPERATOR,
|
|
['a' ... 'z'] = IN_KEYWORD,
|
|
['%'] = IN_ESCAPE,
|
|
[' '] = IN_WHITESPACE,
|
|
['\t'] = IN_WHITESPACE,
|
|
['\r'] = IN_WHITESPACE,
|
|
['\n'] = IN_WHITESPACE,
|
|
},
|
|
};
|
|
|
|
void json_lexer_init(JSONLexer *lexer, JSONLexerEmitter func)
|
|
{
|
|
lexer->emit = func;
|
|
lexer->state = IN_START;
|
|
lexer->token = qstring_new();
|
|
lexer->x = lexer->y = 0;
|
|
}
|
|
|
|
static int json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
|
|
{
|
|
int char_consumed, new_state;
|
|
|
|
lexer->x++;
|
|
if (ch == '\n') {
|
|
lexer->x = 0;
|
|
lexer->y++;
|
|
}
|
|
|
|
do {
|
|
new_state = json_lexer[lexer->state][(uint8_t)ch];
|
|
char_consumed = !TERMINAL_NEEDED_LOOKAHEAD(lexer->state, new_state);
|
|
if (char_consumed) {
|
|
qstring_append_chr(lexer->token, ch);
|
|
}
|
|
|
|
switch (new_state) {
|
|
case JSON_OPERATOR:
|
|
case JSON_ESCAPE:
|
|
case JSON_INTEGER:
|
|
case JSON_FLOAT:
|
|
case JSON_KEYWORD:
|
|
case JSON_STRING:
|
|
lexer->emit(lexer, lexer->token, new_state, lexer->x, lexer->y);
|
|
/* fall through */
|
|
case JSON_SKIP:
|
|
QDECREF(lexer->token);
|
|
lexer->token = qstring_new();
|
|
new_state = IN_START;
|
|
break;
|
|
case IN_ERROR:
|
|
/* XXX: To avoid having previous bad input leaving the parser in an
|
|
* unresponsive state where we consume unpredictable amounts of
|
|
* subsequent "good" input, percolate this error state up to the
|
|
* tokenizer/parser by forcing a NULL object to be emitted, then
|
|
* reset state.
|
|
*
|
|
* Also note that this handling is required for reliable channel
|
|
* negotiation between QMP and the guest agent, since chr(0xFF)
|
|
* is placed at the beginning of certain events to ensure proper
|
|
* delivery when the channel is in an unknown state. chr(0xFF) is
|
|
* never a valid ASCII/UTF-8 sequence, so this should reliably
|
|
* induce an error/flush state.
|
|
*/
|
|
lexer->emit(lexer, lexer->token, JSON_ERROR, lexer->x, lexer->y);
|
|
QDECREF(lexer->token);
|
|
lexer->token = qstring_new();
|
|
new_state = IN_START;
|
|
lexer->state = new_state;
|
|
return 0;
|
|
default:
|
|
break;
|
|
}
|
|
lexer->state = new_state;
|
|
} while (!char_consumed && !flush);
|
|
|
|
/* Do not let a single token grow to an arbitrarily large size,
|
|
* this is a security consideration.
|
|
*/
|
|
if (lexer->token->length > MAX_TOKEN_SIZE) {
|
|
lexer->emit(lexer, lexer->token, lexer->state, lexer->x, lexer->y);
|
|
QDECREF(lexer->token);
|
|
lexer->token = qstring_new();
|
|
lexer->state = IN_START;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int json_lexer_feed(JSONLexer *lexer, const char *buffer, size_t size)
|
|
{
|
|
size_t i;
|
|
|
|
for (i = 0; i < size; i++) {
|
|
int err;
|
|
|
|
err = json_lexer_feed_char(lexer, buffer[i], false);
|
|
if (err < 0) {
|
|
return err;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int json_lexer_flush(JSONLexer *lexer)
|
|
{
|
|
return lexer->state == IN_START ? 0 : json_lexer_feed_char(lexer, 0, true);
|
|
}
|
|
|
|
void json_lexer_destroy(JSONLexer *lexer)
|
|
{
|
|
QDECREF(lexer->token);
|
|
}
|