CSE320/hw1/include/argo.h
2022-01-15 16:47:59 -05:00

243 lines
9.7 KiB
C

/*
* DO NOT MODIFY THE CONTENTS OF THIS FILE.
* IT WILL BE REPLACED DURING GRADING
*/
#ifndef ARGO_H
#define ARGO_H
/*
* Definitions for "Argo" (aka JSON).
*/
/*
* USAGE macro to be called from main() to print a help message and exit
* with a specified exit status.
*/
#define USAGE(program_name, retcode) do { \
fprintf(stderr, "USAGE: %s %s\n", program_name, \
"[-h] [-c|-v] [-p INDENT]\n" \
" -h Help: displays this help menu.\n" \
" -v Validate: the program reads from standard input and checks whether\n" \
" it is syntactically correct JSON. If there is any error, then a message\n" \
" describing the error is printed to standard error before termination.\n" \
" No other output is produced.\n" \
" -c Canonicalize: once the input has been read and validated, it is\n" \
" re-emitted to standard output in 'canonical form'. Unless -p has been\n" \
" specified, the canonicalized output contains no whitespace (except within\n" \
" strings that contain whitespace characters).\n" \
" -p Pretty-print: This option is only permissible if -c has also been specified.\n" \
" In that case, newlines and spaces are used to format the canonical output\n" \
" in a more human-friendly way. For the precise requirements on where this\n" \
" whitespace must appear, see the assignment handout.\n" \
" The INDENT is an optional nonnegative integer argument that specifies the\n" \
" number of additional spaces to be output at the beginning of a line for each\n" \
" for each increase in indentation level. If no value is specified, then a\n" \
" default value of 4 is used.\n" \
); \
exit(retcode); \
} while(0)
/*
* Type used to represent an input character. It is intended to
* represent a Unicode code point (4 bytes max), so the C type
* "char" is not used. It is signed, so that we can represent
* the out-of-band value EOF (-1) as a value of this type.
*/
typedef int ARGO_CHAR;
/*
* Type codes for Argo values.
*/
typedef enum {
ARGO_NO_TYPE = 0,
ARGO_BASIC_TYPE = 1,
ARGO_NUMBER_TYPE = 2,
ARGO_STRING_TYPE = 3,
ARGO_OBJECT_TYPE = 4,
ARGO_ARRAY_TYPE = 5
} ARGO_VALUE_TYPE;
/*
* Basic Argo values, represented by the (unquoted) tokens
* "true", "false", or "null" in Argo code.
*/
typedef enum {
ARGO_NULL, ARGO_TRUE, ARGO_FALSE
} ARGO_BASIC;
/*
* Structure used to hold a string value.
* The content field is maintained as an array of char, which is not null-terminated
* and which might contain '\0' characters. This data is interpreted as Unicode text,
* represented as an array of ARGO_CHAR values, each of which represents a single
* Unicode code point. The length field gives the length in bytes of the data.
* The capacity field records the actual size of the data area. This is included so
* that the size can be dynamically increased while the string is being read.
*/
typedef struct argo_string {
size_t capacity; // Current total size of space in the content.
size_t length; // Current length of the content.
ARGO_CHAR *content; // Unicode code points (not null terminated).
} ARGO_STRING;
/*
* Structure used to hold a number.
* The "text_value" field holds a printable/parseable representation of the number
* as Unicode text, conforming to the Argo standard.
* The "int_value" field holds the value of the number in integer format, if the
* number can be exactly represented as such.
* The "float_value" field holds the value of the number in floating-point format.
* The "valid_text" field is nonzero if the "text_valid" field contains a valid
* representation of the value.
* The "valid_int" field is nonzero if the "int_value" field contains a valid
* representation of the value.
* The "valid_float" field is nonzero if the "float_value" field contains a valid
* representation of the value.
*
* If multiple representations of the value of the number are present, they should
* agree with each other.
* It is up to an application to determine which representation is the appropriate
* one to use, based on the semantics of the data being represented.
*/
typedef struct argo_number {
struct argo_string string_value; // Value represented in textual format.
long int_value; // Value represented in integer format.
double float_value; // Value represented in floating-point format.
char valid_string; // Nonzero if string representation is valid.
char valid_int; // Nonzero if integer representation is valid.
char valid_float; // Nonzero if floating point representation is valid.
} ARGO_NUMBER;
/*
* An "object" has a list of members, each of which has a name and a value.
* To store the members, we use a circular, doubly linked list, with the next and
* previous pointers stored in the "next" and "prev" fields of the ARGO_VALUE structure
* and the member name stored in the "name" field of the ARGO_VALUE structure.
* The "member_list" field of the ARGO_OBJECT structure serves as the sentinel at
* the head of the list. This element does not represent one of the members;
* rather, its "next" field points to the first member and its "prev" field points
* to the last member. An empty list of members is represented by the situation in
* which both the "next" and "prev" fields point back to the sentinel object itself.
*
* Note that the collection of members of an object is supposed to be regarded as unordered,
* which would permit it to be represented using a hash map or similar data structure,
* which we are not doing here.
*/
typedef struct argo_object {
struct argo_value *member_list;
} ARGO_OBJECT;
/*
* An "array" has an ordered sequence of elements, each of which is just a value.
* Here we represent the elements as a circular, doubly linked list, in the same
* way as for the members of an object. The "element_list" field in the ARGO_ARRAY
* structure serves as the sentinel at the head of the list.
*
* Note that elements of an array do not have any name, so the "name" field in each
* of the elements will be NULL. Arrays could be represented as actual arrays,
* but we are not doing that here.
*/
typedef struct argo_array {
struct argo_value *element_list;
} ARGO_ARRAY;
/*
* The ARGO_VALUE structure is used to represent all kinds of Argo values.
* The "type" field tells what type of value it represents.
* It has "next" and "prev" fields so that it can be linked into "members"
* or "elements" lists. It has a "name" field which will hold the name in case
* it is a member of an object. The "content" field is the union of the structures
* that represent the various Argo types. Depending on the value of the "type" field,
* one of the "object", "array", or "string", "number", or "basic" variants of this union
* will be valid.
*/
typedef struct argo_value {
ARGO_VALUE_TYPE type;
struct argo_value *next; // Next value in list of members or elements.
struct argo_value *prev; // Previous value in list of members or element.
struct argo_string name; // NULL unless value is an object member.
union {
struct argo_object object;
struct argo_array array;
struct argo_string string;
struct argo_number number;
ARGO_BASIC basic;
} content;
} ARGO_VALUE;
/*
* The following value is the maximum number of digits that will be printed
* for a floating point value.
*/
#define ARGO_PRECISION 15
/*
* Macros that define particular character values mentioned in the Argo standard.
* You should use these macros where reference to these character values is required,
* rather than "hard-coding" the values as C character constants.
*/
#define ARGO_COLON ':'
#define ARGO_LBRACE '{'
#define ARGO_RBRACE '}'
#define ARGO_LBRACK '['
#define ARGO_RBRACK ']'
#define ARGO_QUOTE '"'
#define ARGO_BSLASH '\\'
#define ARGO_FSLASH '/'
#define ARGO_COMMA ','
#define ARGO_PERIOD '.'
#define ARGO_PLUS '+'
#define ARGO_MINUS '-'
#define ARGO_DIGIT0 '0'
#define ARGO_B 'b'
#define ARGO_E 'e'
#define ARGO_F 'f'
#define ARGO_N 'n'
#define ARGO_R 'r'
#define ARGO_T 't'
#define ARGO_U 'u'
#define ARGO_BS '\b'
#define ARGO_FF '\f'
#define ARGO_LF '\n'
#define ARGO_CR '\r'
#define ARGO_HT '\t'
#define ARGO_SPACE ' '
/*
* Macros that define particular classes of characters mentioned in the Argo standard.
* You should use these macros when it is necessary to test whether a character belongs
* to a particular class, rather than "hard-coding" expressions involving C character
* constants.
*/
#define argo_is_whitespace(c) ((c) == ' ' || (c) == '\n' || (c) == '\r' || c == '\t')
#define argo_is_exponent(c) ((c) == 'e' || (c) == 'E')
#define argo_is_digit(c) ((c) >= '0' && (c) <= '9')
#define argo_is_hex(c) (argo_is_digit(c) || ((c) >= 'A' && (c) <= 'F') || ((c) >= 'a' && (c) <= 'f'))
#define argo_is_control(c) ((c) >= 0 && (c) < ' ')
/*
* Macros that define the tokens used to represent the basic values
* "true", "false", and "null", defined by the Argo standard.
* You should use these macros rather than "hard-coding" C string literals
* into your program.
*/
#define ARGO_TRUE_TOKEN "true"
#define ARGO_FALSE_TOKEN "false"
#define ARGO_NULL_TOKEN "null"
/*
* Variable that is reset to zero at the beginning of each line and is
* incremented each time a character is read by function argo_read_char().
* It is intended to be used for error messages and debugging. It can be
* assigned to if it is necessary to reset the value for some reason,
* such as if reading from multiple sources is done.
*/
/*
* The following function is used to append a character to a string.
* An implementation has been provided for you.
*/
int argo_append_char(ARGO_STRING *, ARGO_CHAR);
#endif