243 lines
9.7 KiB
C
243 lines
9.7 KiB
C
|
/*
|
||
|
* DO NOT MODIFY THE CONTENTS OF THIS FILE.
|
||
|
* IT WILL BE REPLACED DURING GRADING
|
||
|
*/
|
||
|
#ifndef ARGO_H
|
||
|
#define ARGO_H
|
||
|
|
||
|
/*
|
||
|
* Definitions for "Argo" (aka JSON).
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
* USAGE macro to be called from main() to print a help message and exit
|
||
|
* with a specified exit status.
|
||
|
*/
|
||
|
#define USAGE(program_name, retcode) do { \
|
||
|
fprintf(stderr, "USAGE: %s %s\n", program_name, \
|
||
|
"[-h] [-c|-v] [-p INDENT]\n" \
|
||
|
" -h Help: displays this help menu.\n" \
|
||
|
" -v Validate: the program reads from standard input and checks whether\n" \
|
||
|
" it is syntactically correct JSON. If there is any error, then a message\n" \
|
||
|
" describing the error is printed to standard error before termination.\n" \
|
||
|
" No other output is produced.\n" \
|
||
|
" -c Canonicalize: once the input has been read and validated, it is\n" \
|
||
|
" re-emitted to standard output in 'canonical form'. Unless -p has been\n" \
|
||
|
" specified, the canonicalized output contains no whitespace (except within\n" \
|
||
|
" strings that contain whitespace characters).\n" \
|
||
|
" -p Pretty-print: This option is only permissible if -c has also been specified.\n" \
|
||
|
" In that case, newlines and spaces are used to format the canonical output\n" \
|
||
|
" in a more human-friendly way. For the precise requirements on where this\n" \
|
||
|
" whitespace must appear, see the assignment handout.\n" \
|
||
|
" The INDENT is an optional nonnegative integer argument that specifies the\n" \
|
||
|
" number of additional spaces to be output at the beginning of a line for each\n" \
|
||
|
" for each increase in indentation level. If no value is specified, then a\n" \
|
||
|
" default value of 4 is used.\n" \
|
||
|
); \
|
||
|
exit(retcode); \
|
||
|
} while(0)
|
||
|
|
||
|
/*
|
||
|
* Type used to represent an input character. It is intended to
|
||
|
* represent a Unicode code point (4 bytes max), so the C type
|
||
|
* "char" is not used. It is signed, so that we can represent
|
||
|
* the out-of-band value EOF (-1) as a value of this type.
|
||
|
*/
|
||
|
typedef int ARGO_CHAR;
|
||
|
|
||
|
/*
|
||
|
* Type codes for Argo values.
|
||
|
*/
|
||
|
typedef enum {
|
||
|
ARGO_NO_TYPE = 0,
|
||
|
ARGO_BASIC_TYPE = 1,
|
||
|
ARGO_NUMBER_TYPE = 2,
|
||
|
ARGO_STRING_TYPE = 3,
|
||
|
ARGO_OBJECT_TYPE = 4,
|
||
|
ARGO_ARRAY_TYPE = 5
|
||
|
} ARGO_VALUE_TYPE;
|
||
|
|
||
|
/*
|
||
|
* Basic Argo values, represented by the (unquoted) tokens
|
||
|
* "true", "false", or "null" in Argo code.
|
||
|
*/
|
||
|
typedef enum {
|
||
|
ARGO_NULL, ARGO_TRUE, ARGO_FALSE
|
||
|
} ARGO_BASIC;
|
||
|
|
||
|
/*
|
||
|
* Structure used to hold a string value.
|
||
|
* The content field is maintained as an array of char, which is not null-terminated
|
||
|
* and which might contain '\0' characters. This data is interpreted as Unicode text,
|
||
|
* represented as an array of ARGO_CHAR values, each of which represents a single
|
||
|
* Unicode code point. The length field gives the length in bytes of the data.
|
||
|
* The capacity field records the actual size of the data area. This is included so
|
||
|
* that the size can be dynamically increased while the string is being read.
|
||
|
*/
|
||
|
typedef struct argo_string {
|
||
|
size_t capacity; // Current total size of space in the content.
|
||
|
size_t length; // Current length of the content.
|
||
|
ARGO_CHAR *content; // Unicode code points (not null terminated).
|
||
|
} ARGO_STRING;
|
||
|
|
||
|
/*
|
||
|
* Structure used to hold a number.
|
||
|
* The "text_value" field holds a printable/parseable representation of the number
|
||
|
* as Unicode text, conforming to the Argo standard.
|
||
|
* The "int_value" field holds the value of the number in integer format, if the
|
||
|
* number can be exactly represented as such.
|
||
|
* The "float_value" field holds the value of the number in floating-point format.
|
||
|
* The "valid_text" field is nonzero if the "text_valid" field contains a valid
|
||
|
* representation of the value.
|
||
|
* The "valid_int" field is nonzero if the "int_value" field contains a valid
|
||
|
* representation of the value.
|
||
|
* The "valid_float" field is nonzero if the "float_value" field contains a valid
|
||
|
* representation of the value.
|
||
|
*
|
||
|
* If multiple representations of the value of the number are present, they should
|
||
|
* agree with each other.
|
||
|
* It is up to an application to determine which representation is the appropriate
|
||
|
* one to use, based on the semantics of the data being represented.
|
||
|
*/
|
||
|
typedef struct argo_number {
|
||
|
struct argo_string string_value; // Value represented in textual format.
|
||
|
long int_value; // Value represented in integer format.
|
||
|
double float_value; // Value represented in floating-point format.
|
||
|
char valid_string; // Nonzero if string representation is valid.
|
||
|
char valid_int; // Nonzero if integer representation is valid.
|
||
|
char valid_float; // Nonzero if floating point representation is valid.
|
||
|
} ARGO_NUMBER;
|
||
|
|
||
|
/*
|
||
|
* An "object" has a list of members, each of which has a name and a value.
|
||
|
* To store the members, we use a circular, doubly linked list, with the next and
|
||
|
* previous pointers stored in the "next" and "prev" fields of the ARGO_VALUE structure
|
||
|
* and the member name stored in the "name" field of the ARGO_VALUE structure.
|
||
|
* The "member_list" field of the ARGO_OBJECT structure serves as the sentinel at
|
||
|
* the head of the list. This element does not represent one of the members;
|
||
|
* rather, its "next" field points to the first member and its "prev" field points
|
||
|
* to the last member. An empty list of members is represented by the situation in
|
||
|
* which both the "next" and "prev" fields point back to the sentinel object itself.
|
||
|
*
|
||
|
* Note that the collection of members of an object is supposed to be regarded as unordered,
|
||
|
* which would permit it to be represented using a hash map or similar data structure,
|
||
|
* which we are not doing here.
|
||
|
*/
|
||
|
typedef struct argo_object {
|
||
|
struct argo_value *member_list;
|
||
|
} ARGO_OBJECT;
|
||
|
|
||
|
/*
|
||
|
* An "array" has an ordered sequence of elements, each of which is just a value.
|
||
|
* Here we represent the elements as a circular, doubly linked list, in the same
|
||
|
* way as for the members of an object. The "element_list" field in the ARGO_ARRAY
|
||
|
* structure serves as the sentinel at the head of the list.
|
||
|
*
|
||
|
* Note that elements of an array do not have any name, so the "name" field in each
|
||
|
* of the elements will be NULL. Arrays could be represented as actual arrays,
|
||
|
* but we are not doing that here.
|
||
|
*/
|
||
|
typedef struct argo_array {
|
||
|
struct argo_value *element_list;
|
||
|
} ARGO_ARRAY;
|
||
|
|
||
|
/*
|
||
|
* The ARGO_VALUE structure is used to represent all kinds of Argo values.
|
||
|
* The "type" field tells what type of value it represents.
|
||
|
* It has "next" and "prev" fields so that it can be linked into "members"
|
||
|
* or "elements" lists. It has a "name" field which will hold the name in case
|
||
|
* it is a member of an object. The "content" field is the union of the structures
|
||
|
* that represent the various Argo types. Depending on the value of the "type" field,
|
||
|
* one of the "object", "array", or "string", "number", or "basic" variants of this union
|
||
|
* will be valid.
|
||
|
*/
|
||
|
typedef struct argo_value {
|
||
|
ARGO_VALUE_TYPE type;
|
||
|
struct argo_value *next; // Next value in list of members or elements.
|
||
|
struct argo_value *prev; // Previous value in list of members or element.
|
||
|
struct argo_string name; // NULL unless value is an object member.
|
||
|
union {
|
||
|
struct argo_object object;
|
||
|
struct argo_array array;
|
||
|
struct argo_string string;
|
||
|
struct argo_number number;
|
||
|
ARGO_BASIC basic;
|
||
|
} content;
|
||
|
} ARGO_VALUE;
|
||
|
|
||
|
/*
|
||
|
* The following value is the maximum number of digits that will be printed
|
||
|
* for a floating point value.
|
||
|
*/
|
||
|
#define ARGO_PRECISION 15
|
||
|
|
||
|
/*
|
||
|
* Macros that define particular character values mentioned in the Argo standard.
|
||
|
* You should use these macros where reference to these character values is required,
|
||
|
* rather than "hard-coding" the values as C character constants.
|
||
|
*/
|
||
|
#define ARGO_COLON ':'
|
||
|
#define ARGO_LBRACE '{'
|
||
|
#define ARGO_RBRACE '}'
|
||
|
#define ARGO_LBRACK '['
|
||
|
#define ARGO_RBRACK ']'
|
||
|
#define ARGO_QUOTE '"'
|
||
|
#define ARGO_BSLASH '\\'
|
||
|
#define ARGO_FSLASH '/'
|
||
|
#define ARGO_COMMA ','
|
||
|
#define ARGO_PERIOD '.'
|
||
|
#define ARGO_PLUS '+'
|
||
|
#define ARGO_MINUS '-'
|
||
|
#define ARGO_DIGIT0 '0'
|
||
|
#define ARGO_B 'b'
|
||
|
#define ARGO_E 'e'
|
||
|
#define ARGO_F 'f'
|
||
|
#define ARGO_N 'n'
|
||
|
#define ARGO_R 'r'
|
||
|
#define ARGO_T 't'
|
||
|
#define ARGO_U 'u'
|
||
|
#define ARGO_BS '\b'
|
||
|
#define ARGO_FF '\f'
|
||
|
#define ARGO_LF '\n'
|
||
|
#define ARGO_CR '\r'
|
||
|
#define ARGO_HT '\t'
|
||
|
#define ARGO_SPACE ' '
|
||
|
|
||
|
/*
|
||
|
* Macros that define particular classes of characters mentioned in the Argo standard.
|
||
|
* You should use these macros when it is necessary to test whether a character belongs
|
||
|
* to a particular class, rather than "hard-coding" expressions involving C character
|
||
|
* constants.
|
||
|
*/
|
||
|
#define argo_is_whitespace(c) ((c) == ' ' || (c) == '\n' || (c) == '\r' || c == '\t')
|
||
|
#define argo_is_exponent(c) ((c) == 'e' || (c) == 'E')
|
||
|
#define argo_is_digit(c) ((c) >= '0' && (c) <= '9')
|
||
|
#define argo_is_hex(c) (argo_is_digit(c) || ((c) >= 'A' && (c) <= 'F') || ((c) >= 'a' && (c) <= 'f'))
|
||
|
#define argo_is_control(c) ((c) >= 0 && (c) < ' ')
|
||
|
|
||
|
/*
|
||
|
* Macros that define the tokens used to represent the basic values
|
||
|
* "true", "false", and "null", defined by the Argo standard.
|
||
|
* You should use these macros rather than "hard-coding" C string literals
|
||
|
* into your program.
|
||
|
*/
|
||
|
#define ARGO_TRUE_TOKEN "true"
|
||
|
#define ARGO_FALSE_TOKEN "false"
|
||
|
#define ARGO_NULL_TOKEN "null"
|
||
|
|
||
|
/*
|
||
|
* Variable that is reset to zero at the beginning of each line and is
|
||
|
* incremented each time a character is read by function argo_read_char().
|
||
|
* It is intended to be used for error messages and debugging. It can be
|
||
|
* assigned to if it is necessary to reset the value for some reason,
|
||
|
* such as if reading from multiple sources is done.
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
* The following function is used to append a character to a string.
|
||
|
* An implementation has been provided for you.
|
||
|
*/
|
||
|
int argo_append_char(ARGO_STRING *, ARGO_CHAR);
|
||
|
|
||
|
#endif
|