In the json_tokener_state_number case, explicitly adjust what "number" characters are allowed based on the exact micro-state that we're in, and check for invalid following characters in a different way, to allow a valid json_type_number object to be returned at the top level.

This causes previously failing strings like "123-456" to return a valid json_object with the appropriate value. If you care about the trailing content, call json_tokener_parse_ex() and check the parse end point with json_tokener_get_parse_end().
2020-06-29 02:14:26 +00:00
parent 6eac6986c9
commit f23486a321
6 changed files with 92 additions and 50 deletions
--- a/json_tokener.c
+++ b/json_tokener.c
@@ -193,7 +193,17 @@ struct json_object *json_tokener_parse_verbose(const char *str, enum json_tokene
 		return NULL;
 	obj = json_tokener_parse_ex(tok, str, -1);
 	*error = tok->err;
-	if (tok->err != json_tokener_success)
+	if (tok->err != json_tokener_success
+#if 0
+		/* This would be a more sensible default, and cause parsing
+		 * things like "null123" to fail when the caller can't know
+		 * where the parsing left off, but starting to fail would
+		 * be a notable behaviour change.  Save for a 1.0 release.
+		 */
+	    || json_tokener_get_parse_end(tok) != strlen(str)
+#endif
+	   )
+
 	{
 		if (obj != NULL)
 			json_object_put(obj);
@@ -838,7 +848,8 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *
 			const char *case_start = str;
 			int case_len = 0;
 			int is_exponent = 0;
-			int negativesign_next_possible_location = 1;
+			int neg_sign_ok = 1;
+			int pos_sign_ok = 0;
 			if (printbuf_length(tok->pb) > 0)
 			{
 				/* We don't save all state from the previous incremental parse
@@ -852,14 +863,26 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *
 					char *last_saved_char =
 					    &tok->pb->buf[printbuf_length(tok->pb) - 1];
 					is_exponent = 1;
+					pos_sign_ok = neg_sign_ok = 1;
 					/* If the "e" isn't at the end, we can't start with a '-' */
 					if (e_loc != last_saved_char)
-						negativesign_next_possible_location = -1;
+					{
+						neg_sign_ok = 0;
+						pos_sign_ok = 0;
+					}
 					// else leave it set to 1, i.e. start of the new input
 				}
 			}
-			while (c && strchr(json_number_chars, c))
+
+			while (c &&
+			    ((c >= '0' && c <= '9') ||
+				 (!is_exponent && (c=='e' || c=='E')) ||
+				 (neg_sign_ok && c=='-') ||
+				 (pos_sign_ok && c=='+') ||
+				 (!tok->is_double && c=='.')
+			 ))
 			{
+				pos_sign_ok = neg_sign_ok = 0;
 				++case_len;

 				/* non-digit characters checks */
@@ -871,40 +894,16 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *
 				switch (c)
 				{
 				case '.':
-					if (tok->is_double != 0)
-					{
-						/* '.' can only be found once, and out of the exponent part.
-						 * Thus, if the input is already flagged as double, it
-						 * is invalid.
-						 */
-						tok->err = json_tokener_error_parse_number;
-						goto out;
-					}
 					tok->is_double = 1;
+					pos_sign_ok = 1;
+					neg_sign_ok = 1;
 					break;
 				case 'e': /* FALLTHRU */
 				case 'E':
-					if (is_exponent != 0)
-					{
-						/* only one exponent possible */
-						tok->err = json_tokener_error_parse_number;
-						goto out;
-					}
 					is_exponent = 1;
 					tok->is_double = 1;
 					/* the exponent part can begin with a negative sign */
-					negativesign_next_possible_location = case_len + 1;
-					break;
-				case '-':
-					if (case_len != negativesign_next_possible_location)
-					{
-						/* If the negative sign is not where expected (ie
-						 * start of input or start of exponent part), the
-						 * input is invalid.
-						 */
-						tok->err = json_tokener_error_parse_number;
-						goto out;
-					}
+					pos_sign_ok = neg_sign_ok = 1;
 					break;
 				default: break;
 				}
@@ -915,6 +914,22 @@ struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *
 					goto out;
 				}
 			}
+			/*
+				Now we know c isn't a valid number char, but check whether
+				it might have been intended to be, and return a potentially
+				more understandable error right away.
+				However, if we're at the top-level, use the number as-is
+			    because c can be part of a new object to parse on the
+				next call to json_tokener_parse().
+			 */
+			if (tok->depth > 0 &&
+			    c != ',' && c != ']' && c != '}' && c != '/' &&
+				c != 'I' && c != 'i' &&
+			    !isspace((unsigned char)c))
+			{
+				tok->err = json_tokener_error_parse_number;
+				goto out;
+			}
 			if (case_len > 0)
 				printbuf_memappend_fast(tok->pb, case_start, case_len);