CSE320/hw1/include/argo.h

/*
 * DO NOT MODIFY THE CONTENTS OF THIS FILE.
 * IT WILL BE REPLACED DURING GRADING
 */
#ifndef ARGO_H
#define ARGO_H

/*
 * Definitions for "Argo" (aka JSON).
 */

/*
 * USAGE macro to be called from main() to print a help message and exit
 * with a specified exit status.
 */
#define USAGE(program_name, retcode) do { \
fprintf(stderr, "USAGE: %s %s\n", program_name, \
"[-h] [-c|-v] [-p INDENT]\n" \
"   -h       Help: displays this help menu.\n" \
"   -v       Validate: the program reads from standard input and checks whether\n" \
"            it is syntactically correct JSON.  If there is any error, then a message\n" \
"            describing the error is printed to standard error before termination.\n" \
"            No other output is produced.\n" \
"   -c       Canonicalize: once the input has been read and validated, it is\n" \
"            re-emitted to standard output in 'canonical form'.  Unless -p has been\n" \
"            specified, the canonicalized output contains no whitespace (except within\n" \
"            strings that contain whitespace characters).\n" \
"   -p       Pretty-print:  This option is only permissible if -c has also been specified.\n" \
"            In that case, newlines and spaces are used to format the canonical output\n" \
"            in a more human-friendly way.  For the precise requirements on where this\n" \
"            whitespace must appear, see the assignment handout.\n" \
"            The INDENT is an optional nonnegative integer argument that specifies the\n" \
"            number of additional spaces to be output at the beginning of a line for each\n" \
"            for each increase in indentation level.  If no value is specified, then a\n" \
"            default value of 4 is used.\n" \
); \
exit(retcode); \
} while(0)

/*
 * Type used to represent an input character.  It is intended to
 * represent a Unicode code point (4 bytes max), so the C type
 * "char" is not used.  It is signed, so that we can represent
 * the out-of-band value EOF (-1) as a value of this type.
 */
typedef int ARGO_CHAR;

/*
 * Type codes for Argo values.
 */
typedef enum {
    ARGO_NO_TYPE = 0,
    ARGO_BASIC_TYPE = 1,
    ARGO_NUMBER_TYPE = 2,
    ARGO_STRING_TYPE = 3,
    ARGO_OBJECT_TYPE = 4,
    ARGO_ARRAY_TYPE = 5
} ARGO_VALUE_TYPE;

/*
 * Basic Argo values, represented by the (unquoted) tokens
 * "true", "false", or "null" in Argo code.
 */
typedef enum {
    ARGO_NULL, ARGO_TRUE, ARGO_FALSE
} ARGO_BASIC;

/*
 * Structure used to hold a string value.
 * The content field is maintained as an array of char, which is not null-terminated
 * and which might contain '\0' characters.  This data is interpreted as Unicode text,
 * represented as an array of ARGO_CHAR values, each of which represents a single
 * Unicode code point.  The length field gives the length in bytes of the data.
 * The capacity field records the actual size of the data area.  This is included so
 * that the size can be dynamically increased while the string is being read.
 */
typedef struct argo_string {
    size_t capacity;                  // Current total size of space in the content.
    size_t length;                    // Current length of the content.
    ARGO_CHAR *content;              // Unicode code points (not null terminated).
} ARGO_STRING;

/*
 * Structure used to hold a number.
 * The "text_value" field holds a printable/parseable representation of the number
 * as Unicode text, conforming to the Argo standard.
 * The "int_value" field holds the value of the number in integer format, if the
 * number can be exactly represented as such.
 * The "float_value" field holds the value of the number in floating-point format.
 * The "valid_text" field is nonzero if the "text_valid" field contains a valid
 * representation of the value.
 * The "valid_int" field is nonzero if the "int_value" field contains a valid
 * representation of the value.
 * The "valid_float" field is nonzero if the "float_value" field contains a valid
 * representation of the value.
 *
 * If multiple representations of the value of the number are present, they should
 * agree with each other.
 * It is up to an application to determine which representation is the appropriate
 * one to use, based on the semantics of the data being represented.
 */
typedef struct argo_number {
    struct argo_string string_value;   // Value represented in textual format.
    long int_value;                    // Value represented in integer format.
    double float_value;                // Value represented in floating-point format.
    char valid_string;		       // Nonzero if string representation is valid.
    char valid_int;		       // Nonzero if integer representation is valid.
    char valid_float;		       // Nonzero if floating point representation is valid.
} ARGO_NUMBER;

/*
 * An "object" has a list of members, each of which has a name and a value.
 * To store the members, we use a circular, doubly linked list, with the next and
 * previous pointers stored in the "next" and "prev" fields of the ARGO_VALUE structure
 * and the member name stored in the "name" field of the ARGO_VALUE structure.
 * The "member_list" field of the ARGO_OBJECT structure serves as the sentinel at
 * the head of the list.  This element does not represent one of the members;
 * rather, its "next" field points to the first member and its "prev" field points
 * to the last member.  An empty list of members is represented by the situation in
 * which both the "next" and "prev" fields point back to the sentinel object itself.
 *
 * Note that the collection of members of an object is supposed to be regarded as unordered,
 * which would permit it to be represented using a hash map or similar data structure,
 * which we are not doing here.
 */
typedef struct argo_object {
    struct argo_value *member_list;
} ARGO_OBJECT;

/*
 * An "array" has an ordered sequence of elements, each of which is just a value.
 * Here we represent the elements as a circular, doubly linked list, in the same
 * way as for the members of an object.  The "element_list" field in the ARGO_ARRAY
 * structure serves as the sentinel at the head of the list.
 *
 * Note that elements of an array do not have any name, so the "name" field in each
 * of the elements will be NULL.  Arrays could be represented as actual arrays,
 * but we are not doing that here.
 */
typedef struct argo_array {
    struct argo_value *element_list;
} ARGO_ARRAY;

/*
 * The ARGO_VALUE structure is used to represent all kinds of Argo values.
 * The "type" field tells what type of value it represents.
 * It has "next" and "prev" fields so that it can be linked into "members"
 * or "elements" lists.  It has a "name" field which will hold the name in case
 * it is a member of an object.  The "content" field is the union of the structures
 * that represent the various Argo types.  Depending on the value of the "type" field,
 * one of the "object", "array", or "string", "number", or "basic" variants of this union
 * will be valid.
 */
typedef struct argo_value {
    ARGO_VALUE_TYPE type;
    struct argo_value *next;           // Next value in list of members or elements.
    struct argo_value *prev;           // Previous value in list of members or element.
    struct argo_string name;           // NULL unless value is an object member.
    union {
	struct argo_object object;
	struct argo_array array;
	struct argo_string string;
	struct argo_number number;
	ARGO_BASIC basic;
    } content;
} ARGO_VALUE;

/*
 * The following value is the maximum number of digits that will be printed
 * for a floating point value.
 */
#define ARGO_PRECISION 15

/*
 * Macros that define particular character values mentioned in the Argo standard.
 * You should use these macros where reference to these character values is required,
 * rather than "hard-coding" the values as C character constants.
 */
#define ARGO_COLON ':'
#define ARGO_LBRACE '{'
#define ARGO_RBRACE '}'
#define ARGO_LBRACK '['
#define ARGO_RBRACK ']'
#define ARGO_QUOTE '"'
#define ARGO_BSLASH '\\'
#define ARGO_FSLASH '/'
#define ARGO_COMMA ','
#define ARGO_PERIOD '.'
#define ARGO_PLUS '+'
#define ARGO_MINUS '-'
#define ARGO_DIGIT0 '0'
#define ARGO_B 'b'
#define ARGO_E 'e'
#define ARGO_F 'f'
#define ARGO_N 'n'
#define ARGO_R 'r'
#define ARGO_T 't'
#define ARGO_U 'u'
#define ARGO_BS '\b'
#define ARGO_FF '\f'
#define ARGO_LF '\n'
#define ARGO_CR '\r'
#define ARGO_HT '\t'
#define ARGO_SPACE ' '

/*
 * Macros that define particular classes of characters mentioned in the Argo standard.
 * You should use these macros when it is necessary to test whether a character belongs
 * to a particular class, rather than "hard-coding" expressions involving C character
 * constants.
 */
#define argo_is_whitespace(c) ((c) == ' ' || (c) == '\n' || (c) == '\r' || c == '\t')
#define argo_is_exponent(c) ((c) == 'e' || (c) == 'E')
#define argo_is_digit(c) ((c) >= '0' && (c) <= '9')
#define argo_is_hex(c) (argo_is_digit(c) || ((c) >= 'A' && (c) <= 'F') || ((c) >= 'a' && (c) <= 'f'))
#define argo_is_control(c) ((c) >= 0 && (c) < ' ')

/*
 * Macros that define the tokens used to represent the basic values
 * "true", "false", and "null", defined by the Argo standard.
 * You should use these macros rather than "hard-coding" C string literals
 * into your program.
 */
#define ARGO_TRUE_TOKEN "true"
#define ARGO_FALSE_TOKEN "false"
#define ARGO_NULL_TOKEN "null"

/*
 * Variable that is reset to zero at the beginning of each line and is
 * incremented each time a character is read by function argo_read_char().
 * It is intended to be used for error messages and debugging.  It can be
 * assigned to if it is necessary to reset the value for some reason,
 * such as if reading from multiple sources is done.
 */

/*
 * The following function is used to append a character to a string.
 * An implementation has been provided for you.
 */
int argo_append_char(ARGO_STRING *, ARGO_CHAR);

#endif