commit c4ef05ced64f763a9f23e30922fc78f5e2859bc1 Author: Gene Stark Date: Fri Feb 18 17:27:51 2022 -0500 Bring in basecode from the development repo. diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..d2726d9 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,30 @@ +image: hwrunner:latest +variables: + GIT_SSL_NO_VERIFY: "true" + EXEC: par + HW_DIR: hw2 + CPU_LIMIT: 60 + FILE_LIMIT: 1000000 +before_script: + - make clean all -C ${HW_DIR} +stages: + - build + - run + - test +build: + stage: build + script: + - echo "Build done" +run: + stage: run + script: + - ulimit -t ${CPU_LIMIT} + - ulimit -f ${FILE_LIMIT} + - cd ${HW_DIR} && bin/${EXEC} --version + +test: + stage: test + script: + - ulimit -t ${CPU_LIMIT} + - ulimit -f ${FILE_LIMIT} + - cd ${HW_DIR} && bin/${EXEC}_tests -S --verbose=0 -j1 --timeout 30 diff --git a/hw2/.gitignore b/hw2/.gitignore new file mode 100644 index 0000000..665cbf9 --- /dev/null +++ b/hw2/.gitignore @@ -0,0 +1,6 @@ +bin/ +build/ +test_output/ +*~ +*.out +*.bak diff --git a/hw2/Makefile b/hw2/Makefile new file mode 100644 index 0000000..815e7d7 --- /dev/null +++ b/hw2/Makefile @@ -0,0 +1,59 @@ +CC := gcc +LEX := flex +SRCD := src +TSTD := tests +BLDD := build +BIND := bin +INCD := include + +MAIN := $(BLDD)/main.o + +ALL_SRCF := $(shell find $(SRCD) -type f -name *.c) +ALL_OBJF := $(patsubst $(SRCD)/%,$(BLDD)/%,$(ALL_SRCF:.c=.o)) +ALL_FUNCF := $(filter-out $(MAIN) $(AUX), $(ALL_OBJF)) + +TEST_SRCF := $(shell find $(TSTD) -type f -name *.c) + +INC := -I $(INCD) + +CFLAGS := -Wall -Werror -Wno-unused-variable -Wno-unused-function $(NO_MAXLINE_FLAG) -MMD +COLORF := -DCOLOR +DFLAGS := -g -DDEBUG -DCOLOR +PRINT_STAMENTS := -DERROR -DSUCCESS -DWARN -DINFO + +STD := -std=c99 -D_DEFAULT_SOURCE +TEST_LIB := -lcriterion +LIBS := + +CFLAGS += $(STD) + +EXEC := par +TEST_EXEC := $(EXEC)_tests + +.PHONY: clean all setup debug + +all: setup $(BIND)/$(EXEC) $(BIND)/$(TEST_EXEC) + +debug: CFLAGS += $(DFLAGS) $(PRINT_STAMENTS) $(COLORF) +debug: all + +setup: $(BIND) $(BLDD) +$(BIND): + mkdir -p $(BIND) +$(BLDD): + mkdir -p $(BLDD) + +$(BIND)/$(EXEC): $(ALL_OBJF) + $(CC) $^ -o $@ $(CURSES_LIBS) $(LIBS) + +$(BIND)/$(TEST_EXEC): $(ALL_FUNCF) $(TEST_SRCF) + $(CC) $(CFLAGS) $(INC) $(ALL_FUNCF) $(TEST_SRCF) $(TEST_LIB) $(LIBS) -o $@ + +$(BLDD)/%.o: $(SRCD)/%.c + $(CC) $(CFLAGS) $(INC) -c -o $@ $< + +clean: + rm -rf $(BLDD) $(BIND) + +.PRECIOUS: $(BLDD)/*.d +-include $(BLDD)/*.d diff --git a/hw2/doc/par.1 b/hw2/doc/par.1 new file mode 100644 index 0000000..23abe0b --- /dev/null +++ b/hw2/doc/par.1 @@ -0,0 +1,628 @@ +.\"********************* +.\"* par.1 * +.\"* for Par 3.20 * +.\"* Copyright 1993 by * +.\"* Adam M. Costello * +.\"********************* +.\" +.\" This is nroff -man (or troff -man) code. +.\" +.TH par 1 "1993" "Par 3.20" "USER COMMANDS" +.SH NAME +par \- filter for reformatting paragraphs +.SH SYNOPSIS +.ds O \fR[\fP +.ds C \fR]\fP +.de OP +.BI \*O\ \\$1 \\$2\ \*C +.. +.HP +.na +.B par +.OP w width +.OP p prefix +.OP s suffix +.OP h \*Ohang\*C +.OP l \*Olast\*C +.OP m \*Omin\*C +.OP version +.ad +.LP 0.5i +Any option may be immediately +preceeded by a minus sign (\-). +.ie t .ds Q `` +.el .ds Q "" +.ie t .ds U '' +.el .ds U "" +.SH DESCRIPTION +.de IT +.LP +\h'-\w"\\$1\ "u'\\$1\ \\$2 \\$3 \\$4 \\$5 \\$6 \\$7 \\$8 \\$9 +.. +.LP +.B par +is a filter which copies its input to +its output, reformatting each paragraph. +Paragraphs are delimited by blank lines. +.LP +Each output paragraph is generated from +the corresponding input lines as follows: +.RS +.LP +.IT 1. An optional prefix and/or suffix +is removed from each input line. +.IT 2. The remainder is divided into +words (delimited by white space). +.IT 3. The words are joined into lines +to make an eye-pleasing paragraph. +.IT 4. The prefixes and suffixes are reattached. +.SH OPTIONS +.LP +All options except +.B version +are used to set values of variables. Values set by +command line options hold for all paragraphs in the +input. Unset variables are given default values +which are recomputed separately for each paragraph. +.LP +The approximate role of each +variable is described here. See the +.SM DETAILS +section for a much more complete and precise description. +.TP 1i +.BI w width +Sets the value of +.IR width, +the maximum width of the output paragraph, in characters, +not including the trailing newline characters. +Must be an unsigned decimal integer greater than +.I prefix +(see below). Defaults to 72. If +.I width +is 9 or more, the +.B w +is not needed. +.TP +.BI p prefix +Sets the value of +.IR prefix , +the length of the prefix, in characters. Must be an unsigned +decimal integer. Defaults to 0 if there are no more than +.I hang ++ 1 lines in the input paragraph (see the +.B h +option). Otherwise defaults to the length +of the longest common prefix of all lines +in the input paragraph except the first +.I hang +of them. The first +.I prefix +characters of each output line are copied from the first +.I prefix +characters of the corresponding input line. If +.I prefix +is 8 or less, the +.B p +is not needed. +.TP +.BI s suffix +Sets the value of +.IR suffix , +the length of the suffix, in characters. Must be an unsigned +decimal integer. Defaults to 0 if there is no more than +1 line in the input paragraph. Otherwise defaults to the +length of the longest common suffix of all lines in the +input paragraph, after this common suffix has been stripped +of all initial white characters save the last. The last +.I suffix +characters of each output line are copied from the last +.I suffix +characters of the corresponding input line. +.TP +.BI h\fR[ hang\fR] +Sets the value of +.IR hang . +Must be an unsigned decimal integer. Defaults +to 0. Mainly affects the default value of +.I prefix +(see the +.B p +option). If the +.B h +option is given without a number, the value 1 is assumed. +.TP +.BI l\fR[ last\fR] +Sets the value of +.IR last . +Must be 0 or 1. Defaults to 0. If +.I last +is 1, +.B par +tries to make the last line of the output paragraph +about the same length as the others. If the l option +is given without a number, the value 1 is assumed. +.TP +.BI m\fR[ min\fR] +Sets the value of +.IR min . +Must be 0 or 1. Defaults to +.IR last . +If +.I min +is 1, +.B par +will try to make the paragraph narrower +without shortening the shortest line. If the +.B m +option is given without a number, the value 1 is assumed. +.TP +.B version +Causes all other options to be ignored. No input is +read. \*Qpar 3.20\*U is printed on the output. Of +course, this will change in future releases of Par. +.LP 0.5i +If the value of any variable is set more than +once, the last value is used. For each paragraph, +default values for any variables not set by command +line options are computed in the following order: +.RS +.LP +.I width hang last min prefix suffix +.RE +.LP +No integer appearing in an option may exceed 9999. +.LP +It is an error if +.I width +<= +.I prefix ++ +.IR suffix . +.SH ENVIRONMENT +.LP +If the environment variable +.SM PARINIT +is set, +.B par +will read command line options from it +before it reads them from the command line. +.SH DETAILS +.LP +The white characters are the space, formfeed, +newline, carriage return, tab, and vertical tab. +.LP +Lines are terminated by newline characters, but the +newlines are not considered to be included in the lines. +If the last character of the input is a non-newline, +then a newline will be inferred immediately after +it (but if the input is empty, no newline will be +inferred; the number of input lines will be 0). Thus, +the input can always be viewed as a sequence of lines. +.LP +A line is called +.I blank +if and only if it contains no non-white characters. +A subsequence of non-blank lines is called +.I maximal +if and only if there is no non-blank +line immediately before or after it. +.LP +The process described in the remainder of this section +is applied independently to each maximal subsequence of +non-blank input lines. (Each blank line of the input +is transformed into an empty line on the output). +.LP +After the values of the variables are determined (see the +.SM OPTIONS +section), the first +.I prefix +characters and the last +.I suffix +characters of each input line are removed and remembered. +It is an error for any line to contain fewer than +.I prefix ++ +.I suffix +characters. +.LP +The remaining text is treated as a sequence of +characters, not lines. The text is broken into words, +which are delimited by white characters. That is, a +.I word +is a maximal sub-sequence of non-white characters. If there +is at least one word, and the first word is preceeded only +by spaces (strictly spaces, not other white characters), +then the first word is expanded to include those spaces. +.LP +Let +.I L += +.I width +\- +.I prefix +\- +.IR suffix . +.LP +Every word which contains more than +.I L +characters is broken, after each +.IR L th +character, into multiple words. +.LP +These words are reassembled, preserving their +order, into lines. Adjacent words within +a line are separated by a single space. +.LP +If all the words fit on a single line of no more than +.I L +characters, then no line breaks are inserted. Otherwise, +line breaks are placed in such a way that the +resulting paragraph satisfies certain properties. If +.I min +is 1, those properties are: +.RS +.LP +.IT 1. No line contains more than +.I L +characters. +.IT 2. The shortest line is as +long as possible, subject to 1. +.IT 3. The longest line is as short as possible, +subject to properties 1 and 2. Call its length +.IR newL . +.IT 4. The sum of the squares +of the differences between +.I newL +and the lengths of the lines is as small as +possible, subject to properties 1, 2, and 3. +.RE +.LP +If +.I last +is 0, then the last line does not count as a line +for the purposes of properties 2 and 4 above. +.LP +If +.I min +is 0, then property 3 is disregarded, and +.I newL +is set equal to +.IR L . +.LP +If the number of lines in the +resultant paragraph is less than +.IR hang , +then empty lines are added at the end +to bring the number of lines up to +.IR hang . +.LP +If +.I suffix +is not 0, then each line is padded at the +end with spaces to bring its length up to +.IR newL . +.LP +To each line is prepended +.I prefix +characters. Let +.I n +be the number of input lines. The +characters which are prepended to the +.IR i th +line are chosen as follows: +.RS +.LP +.IT 1. If +.I i +<= +.IR n , +then the characters are copied from the ones +that were removed from the beginning of the +.IR n th +input line. +.IT 2. If +.I i +> +.I n +> +.IR hang , +then the characters are copied from the ones that were +removed from the beginning of the last input line. +.IT 3. If +.I i +> +.I n +and +.I n +<= +.IR hang , +then the characters are all spaces. +.RE +.LP +Then to each line is appended +.I suffix +characters. The characters which are appended to the +.IR i th +line are chosen as follows: +.RS +.LP +.IT 1. If +.I i +<= +.IR n , +then the characters are copied from the +ones that were removed from the end of the +.IR n th +input line. +.IT 2. If +.I i +> +.I n +> 0, then the characters are copied from the ones that +were removed from the end of the last input line. +.IT 3. If +.I n += 0, then the characters are all spaces. +.RE +.LP +Finally, the lines are printed to the output. +.SH DIAGNOSTICS +.LP +If there are no errors, +.B par +returns +.SM EXIT_SUCCESS +(see +.BR ). +.LP +If there is an error, then an error +message will be printed to the output, and +.B par +will return +.SM EXIT_FAILURE\s0\. +If the error is local to a single paragraph, then the +preceeding paragraphs will have been output before the +error was detected. Line numbers in error messages are +local to the input paragraph in which the error occurred. +.LP +Of course, trying to print an error message would be +futile if an error resulted from an output function, so +.B par +doesn't bother doing any error checking on output functions. +.SH EXAMPLES +.de VS +.RS -0.5i +.LP +.nf +.ps -1 +.cs R 20 +.. +.de VE +.cs R +.ps +.fi +.RE +.. +.de CM +\&\*Q\fB\\$1\fP\\*U: +.. +.LP +The superiority of +.BR par 's +dynamic programming algorithm over a +greedy algorithm (such as the one used by +.BR fmt ) +can be seen in the following example: +.LP +Original paragraph (note that +each line begins with 8 spaces): +.VS + We hold these truths to be self evident, + that all men are created equal, + that they are endowed by their creator + with certain unalienable rights, + that among these are + life, liberty, and the + pursuit of happiness. +.VE +.LP +After a greedy algorithm with width = 61: +.VS + We hold these truths to be self evident, that all men + are created equal, that they are endowed by their + creator with certain unalienable rights, that among + these are life, liberty, and the pursuit of + happiness. +.VE +.LP +After +.CM "par 61" +.VS + We hold these truths to be self evident, that all + men are created equal, that they are endowed by + their creator with certain unalienable rights, that + among these are life, liberty, and the pursuit of + happiness. +.VE +.LP +The line breaks chosen by +.B par +are clearly more pleasing. +.LP +I use +.B par +in conjunction with the !} command of the +.B vi +editor. Other editors probably provide +a similar feature for filtering text. +.LP +The rest of this section is a series of +before-and-after pictures showing some typical uses of +.BR par . +.LP +Before: +.VS + Four score and seven years ago, our fathers brought + forth on this continent + a new nation. +.VE +.LP +After +.CM "par 42" +.VS + Four score and seven years ago, + our fathers brought forth on this + continent a new nation. +.VE +.LP +Before: +.VS + /* Four score and seven years */ + /* ago, our */ + /* fathers brought forth on this continent */ + /* a new nation. */ +.VE +.LP +After +.CM "par 42" +.VS + /* Four score and seven years */ + /* ago, our fathers brought */ + /* forth on this continent a */ + /* new nation. */ +.VE +.LP +Or after +.CM "par l 42" +.VS + /* Four score and seven */ + /* years ago, our fathers */ + /* brought forth on this */ + /* continent a new nation. */ +.VE +.LP +Or after +.CM "par l 42 m0" +.VS + /* Four score and seven */ + /* years ago, our fathers */ + /* brought forth on this */ + /* continent a new nation. */ +.VE +.LP +Before: +.VS + Gettysburg Address: Four score + and seven years ago, + our fathers brought forth on + this continent + a new nation. +.VE +.LP +After +.CM "par h 56" +.VS + Gettysburg Address: Four score and seven years + ago, our fathers brought + forth on this continent a + new nation. +.VE +.LP +Before: +.VS + 1 Four score and + 2 seven years ago, + 3 our fathers brought + 4 forth on this continent + 5 a new nation. +.VE +.LP +After +.CM "par p11 44" +.VS + 1 Four score and seven years ago, + 2 our fathers brought forth on this + 3 continent a new nation. +.VE +.SH SEE ALSO +.LP +.B par.doc +.SH LIMITATIONS +.LP +If you like two spaces between sentences, too +bad. Differentiating between periods that end +sentences and periods used in abbreviations +is a complex problem beyond the scope of this +simple filter. Consider the following tough case: +.VS + I calc'd the approx. + Fermi level to 3 sig. digits. +.VE +.LP +Suppose that that should be reformatted to: +.VS + I calc'd the approx. Fermi + level to three sig. digits. +.VE +.LP +The program has to decide whether to put 1 or 2 spaces +between \*Qapprox.\*U and \*QFermi\*U. There is no obvious +hint from the original paragraph because there was a line +break between them, and \*QFermi\*U begins with a capital +letter. The program would apparently have to understand +English grammar to determine that the sentence does not +end there (and then it would only work for English text). +.LP +If you use tabs, you probably won't like the way +.B par +handles +(or doesn't handle) them. It treats them just like spaces. +I didn't bother trying to make sense of tabs because they +don't make sense to begin with. Not everyone's terminal +has the same tab settings, so text files containing +tabs are sometimes mangled. In fact, almost every text +file containing tabs gets mangled when something is +inserted at the beginning of each line (when quoting +e-mail or commenting out a section of a shell script, for +example), making them a pain to edit. In my opinion, the +world would be a nicer place if everyone stopped using +tabs (so I'm doing my part by not supporting them in +.BR par .) +.LP +There is currently no way for the length of the +output prefix to differ from the length of the +input prefix. Ditto for the suffix. I may consider +adding this capability in a future release, but +right now I'm not sure how I'd want it to work. +.SH BUGS +.LP +If I knew of any bugs, I wouldn't have released the package. +Of course, there may be bugs that I haven't yet discovered. +.LP +If you find any bugs, or if you have +any suggestions, please send e-mail to: +.RS +.LP +amc@wuecl.wustl.edu +.RE +.LP +or send paper mail to: +.RS +.LP +.nf +Adam M. Costello +Campus Box 1045 +Washington University +One Brookings Dr. +St. Louis, MO 63130 +USA +.fi +.RE +.LP +Note that both addresses could +change anytime after June 1994. +.LP +When reporting a bug, please include the exact input and +command line options used, and the version number of +.BR par , +so that I can reproduce it. diff --git a/hw2/doc/par.doc b/hw2/doc/par.doc new file mode 100644 index 0000000..a9f37a1 --- /dev/null +++ b/hw2/doc/par.doc @@ -0,0 +1,486 @@ + ********************* + * par.doc * + * for Par 3.20 * + * Copyright 1993 by * + * Adam M. Costello * + ********************* + + + Par 3.20 is a package containing: + + + This doc file. + + A man page based on this doc file. + + The ANSI C source for the filter "par". + + +Contents + + Contents + File List + Rights and Responsibilities + Release Notes + Compilation + Synopsis + Description + Options + Environment + Details + Diagnostics + Examples + Limitations + Bugs + + +File List + + The Par 3.20 package is always distributed with at least the following + files: + + buffer.h + buffer.c + failf.h + failf.c + par.1 + par.c + par.doc + protoMakefile + reformat.h + reformat.c + + Each file is a text file which identifies itself on the second line, and + identifies the version of Par to which it belongs on the third line, + so you can always tell which file is which even if the files have been + renamed. + + The file "par.1" is a man page for the filter par (not to be confused + with the package Par, which contains the source code for par). "par.1" + is based on this doc file, and conveys much (not all) of the same + information, but "par.doc" is the definitive documentation for both par + and Par. + + +Rights and Responsibilities + + The files listed in the Files List section above are each Copyright 1993 + by Adam M. Costello (henceforth "I"). + + I grant everyone permission to use these files in any way, subject to + the following two restrictions: + + 1) No one may distribute modifications of any of the files unless I am + the one who modified them. + + 2) No one may distribute any one of the files unless it is accompanied + by all of the other files. + + I cannot disallow the distribution of patches, but I would prefer that + users send me suggestions for changes so that I can incorporate them + into future versions of Par. See the Bugs section for my addresses. + + Though I have tried to make sure that Par is free of bugs, I make no + guarantees about its soundness. Therefore, I am not responsible for any + damage resulting from the use of these files. + + +Compilation + + To compile par, you need an ANSI C compiler. Copy protoMakefile to + Makefile and edit it, following the instructions in the comments. Then + use make (or the equivalent on your system) to compile par. + + If you have no make, compile each .c file into an object file and link + all the object files together by whatever method works on your system. + Then go look for a version of make that works on your system, since it + will come in handy in the future. + + If your compiler warns you about a pointer to a constant being converted + to a pointer to a non-constant in line 289 of reformat.c, ignore it. + Your compiler (like mine) is in error. What it thinks is a pointer to + a constant is actually a pointer to a pointer to a constant, which is + something quite different. The conversion is legal, and a true ANSI C + compiler wouldn't complain. + + If your compiler generates any other warnings that you think are + legitimate, please tell me about them (see the Bugs section). + + +Synopsis + + par [w] [p] [s] [h[]] [l[]] + [m[]] [version] + + Things enclosed in [square brackets] are optional. Things enclosed in + are variables. + + Any option may be immediately preceeded by a minus sign (-). + + +Description + + par is a filter which copies its input to its output, reformatting each + paragraph. Paragraphs are delimited by blank lines. + + Each output paragraph is generated from the corresponding input lines as + follows: + + 1. An optional prefix and/or suffix is removed from each input line. + 2. The remainder is divided into words (delimited by white space). + 3. The words are joined into lines to make an eye-pleasing paragraph. + 4. The prefixes and suffixes are reattached. + + +Options + + All options except version are used to set values of variables. Values + set by command line options hold for all paragraphs in the input. Unset + variables are given default values which are recomputed separately for + each paragraph. + + The approximate role of each variable is described here. See the + Details section for a much more complete and precise description. + + w Sets the value of , the maximum width of the output + paragraph, in characters, not including the trailing newline + characters. Must be an unsigned decimal integer greater than + (see below). Defaults to 72. If is 9 or + more, the w is not needed. + + p Sets the value of , the length of the prefix, in + characters. Must be an unsigned decimal integer. Defaults to + 0 if there are no more than + 1 lines in the input + paragraph (see the h option). Otherwise defaults to the + length of the longest common prefix of all lines in the + input paragraph except the first of them. The first + characters of each output line are copied from the + first characters of the corresponding input line. If + is 8 or less, the p is not needed. + + s Sets the value of , the length of the suffix, in + characters. Must be an unsigned decimal integer. Defaults to + 0 if there is no more than 1 line in the input paragraph. + Otherwise defaults to the length of the longest common suffix + of all lines in the input paragraph, after this common suffix + has been stripped of all initial white characters save the + last. The last characters of each output line are + copied from the last characters of the corresponding + input line. + + h[] Sets the value of . Must be an unsigned decimal + integer. Defaults to 0. Mainly affects the default value of + (see the p option). If the h option is given without + a number, the value 1 is assumed. + + l[] Sets the value of . Must be 0 or 1. Defaults to 0. If + is 1, par tries to make the last line of the output + paragraph about the same length as the others. If the l + option is given without a number, the value 1 is assumed. + + m[] Sets the value of . Must be 0 or 1. Defaults to . + If is 1, par will try to make the paragraph narrower + without shortening the shortest line. If the m option is + given without a number, the value 1 is assumed. + + version Causes all other options to be ignored. No input is read. + "par 3.20" is printed on the output. Of course, this will + change in future releases of Par. + + If the value of any variable is set more than once, the last value is + used. For each paragraph, default values for any variables not set by + command line options are computed in the following order: + + + + No integer appearing in an option may exceed 9999. + + It is an error if <= + . + + +Environment + + If the environment variable PARINIT is set, par will read command line + options from it before it reads them from the command line. + + +Details + + The white characters are the space, formfeed, newline, carriage return, + tab, and vertical tab. + + Lines are terminated by newline characters, but the newlines are not + considered to be included in the lines. If the last character of the + input is a non-newline, then a newline will be inferred immediately + after it (but if the input is empty, no newline will be inferred; the + number of input lines will be 0). Thus, the input can always be viewed + as a sequence of lines. + + A line is called blank if and only if it contains no non-white + characters. A subsequence of non-blank lines is called maximal if and + only if there is no non-blank line immediately before or after it. + + The process described in the remainder of this section is applied + independently to each maximal subsequence of non-blank input lines. + (Each blank line of the input is transformed into an empty line on the + output). + + After the values of the variables are determined (see the Options + section), the first characters and the last characters + of each input line are removed and remembered. It is an error for any + line to contain fewer than + characters. + + The remaining text is treated as a sequence of characters, not lines. + The text is broken into words, which are delimited by white characters. + That is, a word is a maximal sub-sequence of non-white characters. If + there is at least one word, and the first word is preceeded only by + spaces (strictly spaces, not other white characters), then the first + word is expanded to include those spaces. + + Let = - - . + + Every word which contains more than characters is broken, after each + th character, into multiple words. + + These words are reassembled, preserving their order, into lines. + Adjacent words within a line are separated by a single space. + + If all the words fit on a single line of no more than characters, + then no line breaks are inserted. Otherwise, line breaks are placed in + such a way that the resulting paragraph satisfies certain properties. If + is 1, those properties are: + + 1. No line contains more than characters. + + 2. The shortest line is as long as possible, subject to 1. + + 3. The longest line is as short as possible, subject to properties 1 + and 2. Call its length . + + 4. The sum of the squares of the differences between and the + lengths of the lines is as small as possible, subject to properties + 1, 2, and 3. + + If is 0, then the last line does not count as a line for the + purposes of properties 2 and 4 above. + + If is 0, then property 3 is disregarded, and is set equal + to . + + If the number of lines in the resultant paragraph is less than , + then empty lines are added at the end to bring the number of lines up to + . + + If is not 0, then each line is padded at the end with spaces to + bring its length up to . + + To each line is prepended characters. Let be the number of + input lines. The characters which are prepended to the th line are + chosen as follows: + + 1. If <= , then the characters are copied from the ones that + were removed from the beginning of the th input line. + + 2. If > > , then the characters are copied from the ones + that were removed from the beginning of the last input line. + + 3. If > and <= , then the characters are all spaces. + + Then to each line is appended characters. The characters which + are appended to the th line are chosen as follows: + + 1. If <= , then the characters are copied from the ones that + were removed from the end of the nth input line. + + 2. If > > 0, then the characters are copied from the ones that + were removed from the end of the last input line. + + 3. If = 0, then the characters are all spaces. + + Finally, the lines are printed to the output. + + +Diagnostics + + If there are no errors, par returns EXIT_SUCCESS (see ). + + If there is an error, then an error message will be printed to the + output, and par will return EXIT_FAILURE. If the error is local to a + single paragraph, then the preceeding paragraphs will have been output + before the error was detected. Line numbers in error messages are local + to the input paragraph in which the error occurred. + + Of course, trying to print an error message would be futile if an error + resulted from an output function, so par doesn't bother doing any error + checking on output functions. + + +Examples + + The superiority of par's dynamic programming algorithm over a greedy + algorithm (such as the one used by fmt) can be seen in the following + example: + + Original paragraph: + + We hold these truths to be self evident, + that all men are created equal, + that they are endowed by their creator + with certain unalienable rights, + that among these are + life, liberty, and the + pursuit of happiness. + + After a greedy algorithm with width = 61: + + We hold these truths to be self evident, that all men + are created equal, that they are endowed by their + creator with certain unalienable rights, that among + these are life, liberty, and the pursuit of + happiness. + + After "par 61": + + We hold these truths to be self evident, that all + men are created equal, that they are endowed by + their creator with certain unalienable rights, that + among these are life, liberty, and the pursuit of + happiness. + + The line breaks chosen by par are clearly more pleasing. + + I use par in conjunction with the !} command of the vi editor. Other + editors probably provide a similar feature for filtering text. + + The rest of this section is a series of before-and-after pictures + showing some typical uses of par. + + Before: + + Four score and seven years ago, our fathers brought + forth on this continent + a new nation. + + After "par 42": + + Four score and seven years ago, + our fathers brought forth on this + continent a new nation. + + Before: + + /* Four score and seven years */ + /* ago, our */ + /* fathers brought forth on this continent */ + /* a new nation. */ + + After "par 42": + + /* Four score and seven years */ + /* ago, our fathers brought */ + /* forth on this continent a */ + /* new nation. */ + + Or after "par l 42": + + /* Four score and seven */ + /* years ago, our fathers */ + /* brought forth on this */ + /* continent a new nation. */ + + Or after "par l 42 m0": + + /* Four score and seven */ + /* years ago, our fathers */ + /* brought forth on this */ + /* continent a new nation. */ + + Before: + + Gettysburg Address: Four score + and seven years ago, + our fathers brought forth on + this continent + a new nation. + + After "par h 56": + + Gettysburg Address: Four score and seven years + ago, our fathers brought + forth on this continent a + new nation. + + Before: + + 1 Four score and + 2 seven years ago, + 3 our fathers brought + 4 forth on this continent + 5 a new nation. + + After "par p11 44": + + 1 Four score and seven years ago, + 2 our fathers brought forth on this + 3 continent a new nation. + + +Limitations + + If you like two spaces between sentences, too bad. Differentiating + between periods that end sentences and periods used in abbreviations is + a complex problem beyond the scope of this simple filter. Consider the + following tough case: + + I calc'd the approx. + Fermi level to 3 sig. digits. + + Suppose that that should be reformatted to: + + I calc'd the approx. Fermi + level to three sig. digits. + + The program has to decide whether to put 1 or 2 spaces between "approx." + and "Fermi". There is no obvious hint from the original paragraph + because there was a line break between them, and "Fermi" begins with a + capital letter. The program would apparently have to understand English + grammar to determine that the sentence does not end there (and then it + would only work for English text). + + If you use tabs, you probably won't like the way par handles (or doesn't + handle) them. It treats them just like spaces. I didn't bother trying + to make sense of tabs because they don't make sense to begin with. Not + everyone's terminal has the same tab settings, so text files containing + tabs are sometimes mangled. In fact, almost every text file containing + tabs gets mangled when something is inserted at the beginning of each + line (when quoting e-mail or commenting out a section of a shell script, + for example), making them a pain to edit. In my opinion, the world would + be a nicer place if everyone stopped using tabs (so I'm doing my part by + not supporting them in par.) + + There is currently no way for the length of the output prefix to differ + from the length of the input prefix. Ditto for the suffix. I may + consider adding this capability in a future release, but right now I'm + not sure how I'd want it to work. + + +Bugs + + If I knew of any bugs, I wouldn't have released the package. Of course, + there may be bugs that I haven't yet discovered. + + If you find any bugs, or if you have any suggestions, please send e-mail + to: + + amc@wuecl.wustl.edu + + or send paper mail to: + + Adam M. Costello + Campus Box 1045 + Washington University + One Brookings Dr. + St. Louis, MO 63130 + USA + + Note that both addresses could change anytime after June 1994. + + When reporting a bug, please include the exact input and command line + options used, and the version number of par, so that I can reproduce it. diff --git a/hw2/doc/protoMakefile b/hw2/doc/protoMakefile new file mode 100644 index 0000000..202f85b --- /dev/null +++ b/hw2/doc/protoMakefile @@ -0,0 +1,46 @@ +# ********************* +# * protoMakefile * +# * for Par 3.20 * +# * Copyright 1993 by * +# * Adam M. Costello * +# ********************* + + +# Define CC so that the command +# $(CC) foo.c +# compiles the ANSI C source file "foo.c" into the object file "foo.o". +# +# Example (for Sun workstations): +# CC = acc -c -O -s -Xc + +CC = + +# Define LINK1 and LINK2 so that the command +# $(LINK1) foo1.o foo2.o foo3.o $(LINK2) foo +# links the object files "foo1.o", "foo2.o", "foo3.o" +# into the executable file "foo". +# +# Example (for Sun workstations): +# LINK1 = acc -s +# LINK2 = -o + +LINK1 = +LINK2 = + +# You shouldn't need to modify anything below this line. + +OBJS = buffer.o errmsg.o par.o reformat.o + +.c.o: + $(CC) $< + +par: $(OBJS) + $(LINK1) $(OBJS) $(LINK2) par + +buffer.o: buffer.c buffer.h errmsg.h + +errmsg.o: errmsg.c errmsg.h + +par.o: par.c errmsg.h buffer.h reformat.h + +reformat.o: reformat.c reformat.h buffer.h errmsg.h diff --git a/hw2/hw2.sublime-project b/hw2/hw2.sublime-project new file mode 100644 index 0000000..5465569 --- /dev/null +++ b/hw2/hw2.sublime-project @@ -0,0 +1,50 @@ +{ + "folders": + [ + { + "path":".", + "name":"Project Base" + }, + { + "path": "src", + "name": "C Source", + "follow_symlinks": false, + "file_include_patterns":["*.c"], + }, + { + "path": "include", + "name": "C Headers", + "follow_symlinks": false, + "file_include_patterns":["*.h"], + }, + { + "path": "tests", + "name": "Tests", + } + { + "path": "rsrc", + "name": "Resource Files", + } + ], + "settings": + { + }, + "build_systems": + [ + { + "name": "Release (full build)", + "working_dir":"$project_path", + "shell_cmd": "make clean all", + }, + { + "name": "Debug (full build)", + "working_dir":"$project_path", + "shell_cmd": "make clean debug", + }, + { + "name": "Test", + "working_dir":"$project_path", + "shell_cmd": "bin/${project_base_name}_tests}", + } + ] +} diff --git a/hw2/include/buffer.h b/hw2/include/buffer.h new file mode 100644 index 0000000..63bb9e8 --- /dev/null +++ b/hw2/include/buffer.h @@ -0,0 +1,74 @@ +/*********************/ +/* buffer.h */ +/* for Par 3.20 */ +/* Copyright 1993 by */ +/* Adam M. Costello */ +/*********************/ + +/* This is ANSI C code. */ + + +/* Note: Those functions declared here which do not use errmsg */ +/* always succeed, provided that they are passed valid arguments. */ + + +#include + + +struct buffer; + + +struct buffer *newbuffer(size_t itemsize); + + /* newbuffer(itemsize) returns a pointer to a new empty struct */ + /* buffer which holds items of size itemsize. Any struct buffer */ + /* *buf passed to any function declared in this header must */ + /* have been obtained from this function. itemsize must not be */ + /* 0. newbuffer() uses errmsg, and returns NULL on failure. */ + + +void freebuffer(struct buffer *buf); + + /* freebuffer(buf) frees the memory associated */ + /* with *buf. buf may not be used after this call. */ + + +void clearbuffer(struct buffer *buf); + + /* clearbuffer(buf) removes all items from */ + /* *buf, but does not free any memory. */ + + +void additem(struct buffer *buf, const void *item); + + /* additem(buf,item) copies *item to the end of *buf. item must point */ + /* to an object of the proper size for *buf. additem() uses errmsg. */ + + +int numitems(struct buffer *buf); + + /* numitems(buf) returns the number of items in *buf. */ + + +void *copyitems(struct buffer *buf); + + /* copyitems(buf) returns an array of objects of the proper size for */ + /* *buf, one for each item in *buf, or (void *) 0 if there are no items */ + /* in buf. The elements of the array are copied from the items in *buf, */ + /* in order. The array is allocated with malloc(), so it may be freed */ + /* with free(). copyitems() uses errmsg, and returns NULL on failure. */ + + +void *nextitem(struct buffer *buf); + + /* When buf was created by newbuffer, a pointer associated with buf */ + /* was initialized to point at the first slot in buf. If there is an */ + /* item in this slot, nextitem(buf) advances the pointer to the next */ + /* slot and returns the old value. If there is no item in the slot, */ + /* nextitem(buf) leaves the pointer where it is and returns NULL. */ + + +void rewindbuffer(struct buffer *buf); + + /* rewindbuffer(buf) resets the pointer used by */ + /* nextitem() to point at the first slot in buf. */ diff --git a/hw2/include/debug.h b/hw2/include/debug.h new file mode 100644 index 0000000..e8fc8b6 --- /dev/null +++ b/hw2/include/debug.h @@ -0,0 +1,88 @@ +#ifndef DEBUG_H +#define DEBUG_H + +#include + +#define NL "\n" + +#ifdef COLOR +#define KNRM "\033[0m" +#define KRED "\033[1;31m" +#define KGRN "\033[1;32m" +#define KYEL "\033[1;33m" +#define KBLU "\033[1;34m" +#define KMAG "\033[1;35m" +#define KCYN "\033[1;36m" +#define KWHT "\033[1;37m" +#define KBWN "\033[0;33m" +#else +#define KNRM "" +#define KRED "" +#define KGRN "" +#define KYEL "" +#define KBLU "" +#define KMAG "" +#define KCYN "" +#define KWHT "" +#define KBWN "" +#endif + +#ifdef VERBOSE +#define DEBUG +#define INFO +#define WARN +#define ERROR +#define SUCCESS +#endif + +#ifdef DEBUG +#define debug(S, ...) \ + do { \ + fprintf(stderr, KMAG "DEBUG: %s:%s:%d " KNRM S NL, __FILE__, \ + __extension__ __FUNCTION__, __LINE__, ##__VA_ARGS__); \ + } while (0) +#else +#define debug(S, ...) +#endif + +#ifdef INFO +#define info(S, ...) \ + do { \ + fprintf(stderr, KBLU "INFO: %s:%s:%d " KNRM S NL, __FILE__, \ + __extension__ __FUNCTION__, __LINE__, ##__VA_ARGS__); \ + } while (0) +#else +#define info(S, ...) +#endif + +#ifdef WARN +#define warn(S, ...) \ + do { \ + fprintf(stderr, KYEL "WARN: %s:%s:%d " KNRM S NL, __FILE__, \ + __extension__ __FUNCTION__, __LINE__, ##__VA_ARGS__); \ + } while (0) +#else +#define warn(S, ...) +#endif + +#ifdef SUCCESS +#define success(S, ...) \ + do { \ + fprintf(stderr, KGRN "SUCCESS: %s:%s:%d " KNRM S NL, __FILE__, \ + __extension__ __FUNCTION__, __LINE__, ##__VA_ARGS__); \ + } while (0) +#else +#define success(S, ...) +#endif + +#ifdef ERROR +#define error(S, ...) \ + do { \ + fprintf(stderr, KRED "ERROR: %s:%s:%d " KNRM S NL, __FILE__, \ + __extension__ __FUNCTION__, __LINE__, ##__VA_ARGS__); \ + } while (0) +#else +#define error(S, ...) +#endif + +#endif /* DEBUG_H */ diff --git a/hw2/include/errmsg.h b/hw2/include/errmsg.h new file mode 100644 index 0000000..d68dce1 --- /dev/null +++ b/hw2/include/errmsg.h @@ -0,0 +1,19 @@ +/*********************/ +/* errmsg.h */ +/* for Par 3.20 */ +/* Copyright 1993 by */ +/* Adam M. Costello */ +/*********************/ + +/* This is ANSI C code. */ + + +extern char errmsg[163]; + +/* Any function which uses errmsg must, before returning, */ +/* either set errmsg[0] to '\0' (indicating success), or */ +/* write an error message string into errmsg, (indicating */ +/* failure), being careful not to overrun the space. */ + + +extern const char * const outofmem; /* "Out of memory.\n" */ diff --git a/hw2/include/reformat.h b/hw2/include/reformat.h new file mode 100644 index 0000000..2bc1cc7 --- /dev/null +++ b/hw2/include/reformat.h @@ -0,0 +1,20 @@ +/*********************/ +/* reformat.h */ +/* for Par 3.20 */ +/* Copyright 1993 by */ +/* Adam M. Costello */ +/*********************/ + +/* This is ANSI C code. */ + + +char **reformat(const char * const *inlines, int width, + int prefix, int suffix, int hang, int last, int min); + + /* inlines is a NULL-terminated array of pointers to input lines. The */ + /* other parameters are the variables of the same name as described in */ + /* "par.doc". reformat(inlines,width,prefix,suffix,hang,last,min) returns */ + /* a NULL-terminated array of pointers to output lines containing the */ + /* reformatted paragraph, according to the specification in "par.doc". */ + /* None of the integer parameters may be negative. reformat() uses errmsg */ + /* (see "errmsg.h"), and returns NULL on failure. */ diff --git a/hw2/rsrc/banner.txt b/hw2/rsrc/banner.txt new file mode 100644 index 0000000..c4d1308 --- /dev/null +++ b/hw2/rsrc/banner.txt @@ -0,0 +1,13 @@ + /* We can't simply return c - '0' because this is ANSI */ + /* C code, so it has to work for any character set, not */ + /* just ones which put the digits together in order. */ + +/* Puts the decimal value of the string s into *pn, returning */ +/* 1 on success. If s is empty, or contains non-digits, */ +/* or represents an integer greater than 9999, then *pn */ +/* is not changed and 0 is returned. Does not use errmsg. */ + +/* Reads lines from stdin until EOF, or until a blank line is encountered, */ +/* in which case the newline is pushed back onto the input stream. Returns */ +/* a NULL-terminated array of pointers to individual lines, stripped of */ +/* their newline characters. Uses errmsg, and returns NULL on failure. */ diff --git a/hw2/rsrc/gettysburg.txt b/hw2/rsrc/gettysburg.txt new file mode 100644 index 0000000..2e5fd96 --- /dev/null +++ b/hw2/rsrc/gettysburg.txt @@ -0,0 +1,5 @@ +Four score and seven years ago our fathers brought forth on this continent, a new nation, conceived in Liberty, and dedicated to the proposition that all men are created equal. + +Now we are engaged in a great civil war, testing whether that nation, or any nation so conceived and so dedicated, can long endure. We are met on a great battle-field of that war. We have come to dedicate a portion of that field, as a final resting place for those who here gave their lives that that nation might live. It is altogether fitting and proper that we should do this. + +But, in a larger sense, we can not dedicate -- we can not consecrate -- we can not hallow -- this ground. The brave men, living and dead, who struggled here, have consecrated it, far above our poor power to add or detract. The world will little note, nor long remember what we say here, but it can never forget what they did here. It is for us the living, rather, to be dedicated here to the unfinished work which they who fought here have thus far so nobly advanced. It is rather for us to be here dedicated to the great task remaining before us -- that from these honored dead we take increased devotion to that cause for which they gave the last full measure of devotion -- that we here highly resolve that these dead shall not have died in vain -- that this nation, under God, shall have a new birth of freedom -- and that government of the people, by the people, for the people, shall not perish from the earth. diff --git a/hw2/rsrc/loremipsum.txt b/hw2/rsrc/loremipsum.txt new file mode 100644 index 0000000..c2eaaee --- /dev/null +++ b/hw2/rsrc/loremipsum.txt @@ -0,0 +1,34 @@ +Lorem ipsum +dolor sit amet, +consectetur +adipiscing +elit, sed do +eiusmod tempor +incididunt ut +labore et +dolore magna +aliqua. Ut enim +ad minim +veniam, quis +nostrud +exercitation +ullamco laboris +nisi ut aliquip +ex ea commodo +consequat. Duis +aute irure +dolor in +reprehenderit +in voluptate +velit esse +cillum dolore +eu fugiat nulla +pariatur. Excepteur +sint occaecat +cupidatat non +proident, sunt +in culpa qui +officia +deserunt mollit +anim id est +laborum. diff --git a/hw2/src/buffer.c b/hw2/src/buffer.c new file mode 100644 index 0000000..027e510 --- /dev/null +++ b/hw2/src/buffer.c @@ -0,0 +1,208 @@ +/*********************/ +/* buffer.c */ +/* for Par 3.20 */ +/* Copyright 1993 by */ +/* Adam M. Costello */ +/*********************/ + +/* This is ANSI C code. */ + + +/* additem(), copyitems(), and nextitem() rely on the fact that */ +/* sizeof (char) is 1. See section A7.4.8 of The C Programming */ +/* Language, Second Edition, by Kerninghan and Ritchie. */ + + +#include "buffer.h" /* Makes sure we're consistent with the */ + /* prototypes. Also includes */ +#include "errmsg.h" + +#include +#include + +#undef NULL +#define NULL ((void *) 0) + + +struct buffer { + struct block *firstblk, /* The first block. */ + *current, /* The last non-empty block, or */ + /* firstblk if all are empty. */ + *nextblk; /* The block containing the item to be */ + /* returned by nextitem(), or NULL. */ + int nextindex; /* Index of item in nextblock->items. */ + size_t itemsize; /* The size of an item. */ +}; + +struct block { + struct block *next; /* The next block, or NULL if none. */ + void *items; /* Storage for the items in this block. */ + int maxhere, /* Number of items that fit in *items. */ + numprevious, /* Total of numhere for all previous blocks. */ + numhere; /* The first numhere slots in *items are filled. */ +}; + + +struct buffer *newbuffer(size_t itemsize) +{ + struct buffer *buf; + struct block *blk; + void *items; + int maxhere; + + maxhere = 124 / itemsize; + if (maxhere < 4) maxhere = 4; + + buf = (struct buffer *) malloc(sizeof (struct buffer)); + blk = (struct block *) malloc(sizeof (struct block)); + items = malloc(maxhere * itemsize); + if (!buf || !blk || !items) { + strcpy(errmsg,outofmem); + goto nberror; + } + + buf->itemsize = itemsize; + buf->firstblk = buf->current = buf->nextblk = blk; + buf->nextindex = 0; + blk->numprevious = blk->numhere = 0; + blk->maxhere = maxhere; + blk->items = items; + + *errmsg = '\0'; + return buf; + + nberror: + if (buf) free(buf); + if (blk) free(blk); + if (items) free(items); + return NULL; +} + + +void freebuffer(struct buffer *buf) +{ + struct block *blk, *tmp; + + blk = buf->firstblk; + while (blk) { + tmp = blk; + blk = blk->next; + if (tmp->items) free(tmp->items); + free(tmp); + } + + free(buf); +} + + +void clearbuffer(struct buffer *buf) +{ + struct block *blk; + + for (blk = buf->firstblk; blk; blk = blk->next) + blk->numhere = 0; + + buf->current = buf->firstblk; +} + + +void additem(struct buffer *buf, const void *item) +{ + struct block *blk, *new; + void *items; + int maxhere; + size_t itemsize = buf->itemsize; + + blk = buf->current; + + if (blk->numhere == blk->maxhere) { + new = blk->next; + if (!new) { + maxhere = 2 * blk->maxhere; + new = (struct block * ) malloc(sizeof (struct block)); + items = malloc(maxhere * itemsize); + if (!new || !items) { + strcpy(errmsg,outofmem); + goto aierror; + } + blk->next = new; + new->next = NULL; + new->maxhere = maxhere; + new->numprevious = blk->numprevious + blk->numhere; + new->numhere = 0; + new->items = items; + } + blk = buf->current = new; + } + + memcpy( ((char *) blk->items) + (blk->numhere * itemsize), item, itemsize ); + + ++blk->numhere; + + *errmsg = '\0'; + return; + + aierror: + if (new) free(new); + if (items) free(items); +} + + +int numitems(struct buffer *buf) +{ + struct block *blk = buf->current; + return blk->numprevious + blk->numhere; +} + + +void *copyitems(struct buffer *buf) +{ + int n; + void *r; + struct block *blk, *b; + size_t itemsize = buf->itemsize; + + b = buf->current; + n = b->numprevious + b->numhere; + if (!n) return NULL; + + r = malloc(n * itemsize); + if (!r) { + strcpy(errmsg,outofmem); + return NULL; + } + + b = b->next; + + for (blk = buf->firstblk; blk != b; blk = blk->next) + memcpy( ((char *) r) + (blk->numprevious * itemsize), + blk->items, blk->numhere * itemsize); + + *errmsg = '\0'; + return r; +} + + +void rewindbuffer(struct buffer *buf) +{ + buf->nextblk = buf->firstblk; + buf->nextindex = 0; +} + + +void *nextitem(struct buffer *buf) +{ + void *r; + + if (!buf->nextblk || buf->nextindex >= buf->nextblk->numhere) + return NULL; + + r = ((char *) buf->nextblk->items) + (buf->nextindex * buf->itemsize); + + if (++buf->nextindex >= buf->nextblk->maxhere) { + buf->nextblk = buf->nextblk->next; + buf->nextindex = 0; + } + + return r; +} diff --git a/hw2/src/errmsg.c b/hw2/src/errmsg.c new file mode 100644 index 0000000..3d9b9e9 --- /dev/null +++ b/hw2/src/errmsg.c @@ -0,0 +1,16 @@ +/*********************/ +/* errmsg.c */ +/* for Par 3.20 */ +/* Copyright 1993 by */ +/* Adam M. Costello */ +/*********************/ + +/* This is ANSI C code. */ + + +#include "errmsg.h" /* Makes sure we're consistent with the declarations. */ + + +char errmsg[163]; + +const char * const outofmem = "Out of memory.\n"; diff --git a/hw2/src/main.c b/hw2/src/main.c new file mode 100644 index 0000000..b64bf90 --- /dev/null +++ b/hw2/src/main.c @@ -0,0 +1,7 @@ +#include + +extern int original_main(int argc, const char *const *argv); + +int main(int argc, const char *const *argv) { + original_main(argc, argv); +} diff --git a/hw2/src/par.c b/hw2/src/par.c new file mode 100644 index 0000000..d2e0043 --- /dev/null +++ b/hw2/src/par.c @@ -0,0 +1,340 @@ +/*********************/ +/* par.c */ +/* for Par 3.20 */ +/* Copyright 1993 by */ +/* Adam M. Costello */ +/*********************/ + +/* This is ANSI C code. */ + + +#include "errmsg.h" +#include "buffer.h" /* Also includes . */ +#include "reformat.h" + +#include +#include +#include +#include + +#undef NULL +#define NULL ((void *) 0) + + +const char * const progname = "par"; +const char * const version = "3.20"; + + +static int digtoint(char c) + +/* Returns the value represented by the digit c, */ +/* or -1 if c is not a digit. Does not use errmsg. */ +{ + return c == '0' ? 0 : + c == '1' ? 1 : + c == '2' ? 2 : + c == '3' ? 3 : + c == '4' ? 4 : + c == '5' ? 5 : + c == '6' ? 6 : + c == '7' ? 7 : + c == '8' ? 8 : + c == '9' ? 9 : + -1; + + /* We can't simply return c - '0' because this is ANSI */ + /* C code, so it has to work for any character set, not */ + /* just ones which put the digits together in order. */ +} + + +static int strtoudec(const char *s, int *pn) + +/* Puts the decimal value of the string s into *pn, returning */ +/* 1 on success. If s is empty, or contains non-digits, */ +/* or represents an integer greater than 9999, then *pn */ +/* is not changed and 0 is returned. Does not use errmsg. */ +{ + int n = 0; + + if (!*s) return 0; + + do { + if (n >= 1000 || !isdigit(*s)) return 0; + n = 10 * n + digtoint(*s); + } while (*++s); + + *pn = n; + + return 1; +} + + +static void parseopt( + const char *opt, int *pwidth, int *pprefix, + int *psuffix, int *phang, int *plast, int *pmin +) +/* Parses the single option in opt, setting *pwidth, *pprefix, */ +/* *psuffix, *phang, *plast, or *pmin as appropriate. Uses errmsg. */ +{ + const char *saveopt = opt; + char oc; + int n, r; + + if (*opt == '-') ++opt; + + if (!strcmp(opt, "version")) { + sprintf(errmsg, "%s %s\n", progname, version); + return; + } + + oc = *opt; + + if (isdigit(oc)) { + if (!strtoudec(opt, &n)) goto badopt; + if (n <= 8) *pprefix = n; + else *pwidth = n; + } + else { + if (!oc) goto badopt; + n = 1; + r = strtoudec(opt + 1, &n); + if (opt[1] && !r) goto badopt; + + if (oc == 'w' || oc == 'p' || oc == 's') { + if (!r) goto badopt; + if (oc == 'w') *pwidth = n; + else if (oc == 'p') *pprefix = n; + else *psuffix = n; + } + else if (oc == 'h') *phang = n; + else if (n <= 1) { + if (oc == 'l') *plast = n; + else if (oc == 'm') *pmin = n; + } + else goto badopt; + } + + *errmsg = '\0'; + return; + +badopt: + sprintf(errmsg, "Bad option: %.149s\n", saveopt); +} + + +static char **readlines(void) + +/* Reads lines from stdin until EOF, or until a blank line is encountered, */ +/* in which case the newline is pushed back onto the input stream. Returns */ +/* a NULL-terminated array of pointers to individual lines, stripped of */ +/* their newline characters. Uses errmsg, and returns NULL on failure. */ +{ + struct buffer *cbuf = NULL, *pbuf = NULL; + int c, blank; + char ch, *ln, *nullline = NULL, nullchar = '\0', **lines = NULL; + + cbuf = newbuffer(sizeof (char)); + if (*errmsg) goto rlcleanup; + pbuf = newbuffer(sizeof (char *)); + if (*errmsg) goto rlcleanup; + + for (blank = 1; ; ) { + c = getchar(); + if (c == EOF) break; + if (c == '\n') { + if (blank) { + ungetc(c,stdin); + break; + } + additem(cbuf, &nullchar); + if (*errmsg) goto rlcleanup; + ln = copyitems(cbuf); + if (*errmsg) goto rlcleanup; + additem(pbuf, &ln); + if (*errmsg) goto rlcleanup; + clearbuffer(cbuf); + blank = 1; + } + else { + if (!isspace(c)) blank = 0; + ch = c; + additem(cbuf, &ch); + if (*errmsg) goto rlcleanup; + } + } + + if (!blank) { + additem(cbuf, &nullchar); + if (*errmsg) goto rlcleanup; + ln = copyitems(cbuf); + if (*errmsg) goto rlcleanup; + additem(pbuf, &ln); + if (*errmsg) goto rlcleanup; + } + + additem(pbuf, &nullline); + if (*errmsg) goto rlcleanup; + lines = copyitems(pbuf); + +rlcleanup: + + if (cbuf) freebuffer(cbuf); + if (pbuf) { + if (!lines) + for (;;) { + lines = nextitem(pbuf); + if (!lines) break; + free(*lines); + } + } + + return lines; +} + + +static void setdefaults( + const char * const *inlines, int *pwidth, int *pprefix, + int *psuffix, int *phang, int *plast, int *pmin +) +/* If any of *pwidth, *pprefix, *psuffix, *phang, *plast, *pmin are */ +/* less than 0, sets them to default values based on inlines, according */ +/* to "par.doc". Does not use errmsg because it always succeeds. */ +{ + int numlines; + const char *start, *end, * const *line, *p1, *p2; + + if (*pwidth < 0) *pwidth = 72; + if (*phang < 0) *phang = 0; + if (*plast < 0) *plast = 0; + if (*pmin < 0) *pmin = *plast; + + for (line = inlines; *line; ++line); + numlines = line - inlines; + + if (*pprefix < 0) + if (numlines <= *phang + 1) + *pprefix = 0; + else { + start = inlines[*phang]; + for (end = start; *end; ++end); + for (line = inlines + *phang + 1; *line; ++line) { + for (p1 = start, p2 = *line; p1 < end && *p1 == *p2; ++p1, ++p2); + end = p1; + } + *pprefix = end - start; + } + + if (*psuffix < 0) + if (numlines <= 1) + *psuffix = 0; + else { + start = *inlines; + for (end = start; *end; ++end); + for (line = inlines + 1; *line; ++line) { + for (p2 = *line; *p2; ++p2) + for (p1 = end; + p1 > start && p2 > *line && p1[-1] == p2[-1]; + --p1, --p2); + start = p1; + } + while (end - start >= 2 && isspace(*start) && isspace(start[1])) ++start; + *psuffix = end - start; + } +} + + +static void freelines(char **lines) +/* Frees the strings pointed to in the NULL-terminated array lines, then */ +/* frees the array. Does not use errmsg because it always succeeds. */ +{ + char *line; + + for (line = *lines; *line; ++line) + free(line); + + free(lines); +} + + +main(int argc, const char * const *argv) +{ + int width, widthbak = -1, prefix, prefixbak = -1, suffix, suffixbak = -1, + hang, hangbak = -1, last, lastbak = -1, min, minbak = -1, c; + char *parinit, *picopy = NULL, *opt, **inlines = NULL, **outlines = NULL, + **line; + const char * const whitechars = " \f\n\r\t\v"; + + parinit = getenv("PARINIT"); + if (parinit) { + picopy = malloc((strlen(parinit) + 1) * sizeof (char)); + if (!picopy) { + strcpy(errmsg,outofmem); + goto parcleanup; + } + strcpy(picopy,parinit); + opt = strtok(picopy,whitechars); + while (opt) { + parseopt(opt, &widthbak, &prefixbak, + &suffixbak, &hangbak, &lastbak, &minbak); + if (*errmsg) goto parcleanup; + opt = strtok(NULL,whitechars); + } + free(picopy); + picopy = NULL; + } + + while (*++argv) { + parseopt(*argv, &widthbak, &prefixbak, + &suffixbak, &hangbak, &lastbak, &minbak); + if (*errmsg) goto parcleanup; + } + + for (;;) { + for (;;) { + c = getchar(); + if (c != '\n') break; + putchar(c); + } + ungetc(c,stdin); + + inlines = readlines(); + if (*errmsg) goto parcleanup; + if (!*inlines) { + free(inlines); + inlines = NULL; + continue; + } + + width = widthbak; prefix = prefixbak; suffix = suffixbak; + hang = hangbak; last = lastbak; min = minbak; + setdefaults((const char * const *) inlines, + &width, &prefix, &suffix, &hang, &last, &min); + + outlines = reformat((const char * const *) inlines, + width, prefix, suffix, hang, last, min); + if (*errmsg) goto parcleanup; + + freelines(inlines); + inlines = NULL; + + for (line = outlines; *line; ++line) + puts(*line); + + freelines(outlines); + outlines = NULL; + } + +parcleanup: + + if (picopy) free(picopy); + if (inlines) freelines(inlines); + if (outlines) freelines(outlines); + + if (*errmsg) { + fprintf(stderr, "%.163s", errmsg); + exit(EXIT_FAILURE); + } + + exit(EXIT_SUCCESS); +} diff --git a/hw2/src/reformat.c b/hw2/src/reformat.c new file mode 100644 index 0000000..eaf479c --- /dev/null +++ b/hw2/src/reformat.c @@ -0,0 +1,307 @@ +/*********************/ +/* reformat.c */ +/* for Par 3.20 */ +/* Copyright 1993 by */ +/* Adam M. Costello */ +/*********************/ + +/* This is ANSI C code. */ + + +#include "reformat.h" /* Makes sure we're consistent with the prototype. */ +#include "buffer.h" /* Also includes . */ +#include "errmsg.h" + +#include +#include +#include + +#undef NULL +#define NULL ((void *) 0) + + +struct word { + const char *chrs; /* Pointer to the characters in the word */ + /* (NOT terminated by '\0'). */ + struct word *prev, /* Pointer to previous word. */ + *next, /* Pointer to next word. */ + /* Supposing this word were the first... */ + *nextline; /* Pointer to first word in next line. */ + int linelen, /* Length of the first line. */ + score, /* Value of objective function. */ + length; /* Length of this word. */ +}; + + +static int choosebreaks( + struct word *head, struct word *tail, int L, int last, int min +) +/* Chooses linebreaks in a list of struct words according to */ +/* the policy in "par.doc" (L is , last is , and */ +/* min is ). head must point to a dummy word, and tail */ +/* must point to the last word. Returns . Uses errmsg. */ +{ + struct word *w1, *w2; + int linelen, shortest, newL, score, minlen, diff, sumsqdiff; + const char * const impossibility = + "Impossibility #%d has occurred. Please report it.\n"; + +/* Determine maximum length of the shortest line: */ + + /* Initialize words that could fit on the last line: */ + + for (w1 = tail, linelen = w1->length; + w1 != head && linelen <= L; + w1 = w1->prev, linelen += 1 + w1->length) { + w1->nextline = NULL; + w1->score = last ? linelen : L; + } + + /* Then choose line breaks: */ + + for ( ; w1 != head; w1 = w1->prev) { + w1->score = -1; + for (linelen = w1->length, w2 = w1->next; + linelen <= L; + linelen += 1 + w2->length, w2 = w2->next) { + shortest = linelen <= w2->score ? linelen : w2->score; + if (shortest > w1->score) { + w1->nextline = w2; + w1->score = shortest; + } + } + if (w1->score < 0) { + sprintf(errmsg,impossibility,1); + return 0; + } + } + + shortest = head->next ? head->next->score : L; + + if (!min) + newL = L; + else { + + /* Determine the minimum possible longest line: */ + + for (w1 = tail; w1 != head; w1 = w1->prev) { + w1->score = L + 1; + for (linelen = w1->length, w2 = w1->next; + linelen < w1->score; + linelen += 1 + w2->length, w2 = w2->next) { + if (w2) { + score = w2->score; + minlen = shortest; + } + else { + score = 0; + minlen = last ? shortest : 0; + } + if (linelen >= minlen) { + newL = linelen >= score ? linelen : score; + if (newL < w1->score) { + w1->nextline = w2; + w1->score = newL; + } + } + if (!w2) break; + } + } + + newL = head->next ? head->next->score : 0; + if (newL > L) { + sprintf(errmsg,impossibility,2); + return 0; + } + } + +/* Minimize the sum of the squares of the differences */ +/* between newL and the lengths of the lines: */ + + for (w1 = tail; w1 != head; w1 = w1->prev) { + w1->score = -1; + for (linelen = w1->length, w2 = w1->next; + linelen <= newL; + linelen += 1 + w2->length, w2 = w2->next) { + diff = newL - linelen; + minlen = shortest; + if (w2) + score = w2->score; + else { + score = 0; + if (!last) diff = minlen = 0; + } + if (linelen >= minlen && score >= 0) { + sumsqdiff = score + diff * diff; + if (w1->score < 0 || sumsqdiff <= w1->score) { + w1->nextline = w2; + w1->score = sumsqdiff; + w1->linelen = linelen; + } + } + if (!w2) break; + } + } + + if (head->next && head->next->score < 0) { + sprintf(errmsg,impossibility,3); + return 0; + } + + *errmsg = '\0'; + return newL; +} + + +char **reformat(const char * const *inlines, int width, + int prefix, int suffix, int hang, int last, int min) +{ + int numin, numout, affix, L, linelen, newL; + const char * const *line, **suffixes = NULL, **suf, *end, *p1, *p2; + char *q1, *q2, **outlines; + struct word dummy, *head, *tail, *w1, *w2; + struct buffer *pbuf = NULL; + +/* Initialization: */ + + *errmsg = '\0'; + dummy.next = dummy.prev = NULL; + head = tail = &dummy; + +/* Count the input lines: */ + + for (line = inlines; *line; ++line); + numin = line - inlines; + +/* Allocate space for pointers to the suffixes: */ + + if (numin) { + suffixes = malloc(numin * sizeof (const char *)); + if (!suffixes) { + strcpy(errmsg,outofmem); + goto rfcleanup; + } + } + +/* Set the pointers to the suffixes, and create the words: */ + + affix = prefix + suffix; + L = width - prefix - suffix; + + for (line = inlines, suf = suffixes; *line; ++line, ++suf) { + for (end = *line; *end; ++end); + if (end - *line < affix) { + sprintf(errmsg, + "Line %d shorter than + = %d + %d = %d\n", + line - inlines + 1, prefix, suffix, affix); + goto rfcleanup; + } + end -= suffix; + *suf = end; + p1 = *line + prefix; + for (;;) { + while (p1 < end && isspace(*p1)) ++p1; + if (p1 == end) break; + p2 = p1; + while (p2 < end && !isspace(*p2)) ++p2; + if (p2 - p1 > L) p2 = p1 + L; + w1 = malloc(sizeof (struct word)); + if (!w1) { + strcpy(errmsg,outofmem); + goto rfcleanup; + } + w1->next = NULL; + w1->prev = tail; + tail = tail->next = w1; + w1->chrs = p1; + w1->length = p2 - p1; + p1 = p2; + } + } + +/* Expand first word if preceeded only by spaces: */ + + w1 = head->next; + if (w1) { + p1 = *inlines + prefix; + for (p2 = p1; isspace(*p2); ++p2); + if (w1->chrs == p2) { + w1->chrs = p1; + w1->length += p2 - p1; + } + } + +/* Choose line breaks according to policy in "par.doc": */ + + newL = choosebreaks(head,tail,L,last,min); + if (*errmsg) goto rfcleanup; + +/* Construct the lines: */ + + pbuf = newbuffer(sizeof (char *)); + if (*errmsg) goto rfcleanup; + + numout = 0; + w1 = head->next; + while (numout < hang || w1) { + linelen = suffix ? newL + affix : + w1 ? w1->linelen + prefix : + prefix; + q1 = malloc((linelen + 1) * sizeof (char)); + if (!q1) { + strcpy(errmsg,outofmem); + goto rfcleanup; + } + additem(pbuf, &q1); + if (*errmsg) goto rfcleanup; + ++numout; + q2 = q1 + prefix; + if (numout <= numin) memcpy(q1, inlines[numout - 1], prefix); + else if (numin > hang) memcpy(q1, inlines[numin - 1], prefix); + else while (q1 < q2) *q1++ = ' '; + q1 = q2; + if (w1) + for (w2 = w1; ; ) { + memcpy(q1, w2->chrs, w2->length); + q1 += w2->length; + w2 = w2->next; + if (w2 == w1->nextline) break; + *q1++ = ' '; + } + q2 += linelen - affix; + while (q1 < q2) *q1++ = ' '; + q2 = q1 + suffix; + if (numout <= numin) memcpy(q1, suffixes[numout - 1], suffix); + else if (numin) memcpy(q1, suffixes[numin - 1], suffix); + else while(q1 < q2) *q1++ = ' '; + *q2 = '\0'; + if (w1) w1 = w1->nextline; + } + + q1 = NULL; + additem(pbuf, &q1); + if (*errmsg) goto rfcleanup; + + outlines = copyitems(pbuf); + +rfcleanup: + + if (suffixes) free(suffixes); + + while (tail != head) { + tail = tail->prev; + free(tail->next); + } + + if (pbuf) { + if (!outlines) + for (;;) { + outlines = nextitem(pbuf); + if (!outlines) break; + free(*outlines); + } + freebuffer(pbuf); + } + + return outlines; +} diff --git a/hw2/test_output/.git-keep b/hw2/test_output/.git-keep new file mode 100644 index 0000000..e69de29 diff --git a/hw2/tests/basecode_tests.c b/hw2/tests/basecode_tests.c new file mode 100644 index 0000000..70cd753 --- /dev/null +++ b/hw2/tests/basecode_tests.c @@ -0,0 +1,71 @@ +#include + +#include +#include + +#include "test_common.h" + +#define STANDARD_LIMITS "ulimit -t 10; ulimit -f 2000" + +/* + * Start the program and then trigger EOF on input. + * The program should exit with EXIT_SUCCESS. + */ +Test(base_suite, EOF_test) { + char *name = "EOF"; + sprintf(program_options, "%s", ""); + int err = run_using_system(name, "", "", STANDARD_LIMITS); + assert_expected_status(EXIT_SUCCESS, err); + assert_outfile_matches(name, NULL); +} + +/* + * Run the program with default options on a non-empty input file + * and check the results. + */ +Test(base_suite, basic_test) { + char *name = "basic"; + sprintf(program_options, "%s", ""); + int err = run_using_system(name, "", "", STANDARD_LIMITS); + assert_expected_status(EXIT_SUCCESS, err); + assert_outfile_matches(name, NULL); +} + +/* + * Run the program with default options on an input file with + * prefixes and suffixes and check the results. + */ +Test(base_suite, prefix_suffix_test) { + char *name = "prefix_suffix"; + sprintf(program_options, "%s", "w80"); + int err = run_using_system(name, "", "", STANDARD_LIMITS); + assert_expected_status(EXIT_SUCCESS, err); + assert_outfile_matches(name, NULL); +} + +/* + * Run the program with default options on a non-empty input file + * and use valgrind to check for leaks. + */ +Test(base_suite, valgrind_leak_test) { + char *name = "valgrind_leak"; + sprintf(program_options, "%s", ""); + int err = run_using_system(name, "", "valgrind --leak-check=full --undef-value-errors=no --error-exitcode=37", STANDARD_LIMITS); + assert_no_valgrind_errors(err); + assert_normal_exit(err); + assert_outfile_matches(name, NULL); +} + +/* + * Run the program with default options on a non-empty input file + * and use valgrind to check for uninitialized values. + */ +Test(base_suite, valgrind_uninitialized_test) { + char *name = "valgrind_uninitialized"; + sprintf(program_options, "%s", "p10 s10"); + int err = run_using_system(name, "", "valgrind --leak-check=no --undef-value-errors=yes --error-exitcode=37", STANDARD_LIMITS); + assert_no_valgrind_errors(err); + assert_expected_status(0x1, err); + assert_outfile_matches(name, NULL); +} + diff --git a/hw2/tests/rsrc/EOF.in b/hw2/tests/rsrc/EOF.in new file mode 100644 index 0000000..e69de29 diff --git a/hw2/tests/rsrc/EOF.out b/hw2/tests/rsrc/EOF.out new file mode 100644 index 0000000..e69de29 diff --git a/hw2/tests/rsrc/banner.txt b/hw2/tests/rsrc/banner.txt new file mode 100644 index 0000000..c4d1308 --- /dev/null +++ b/hw2/tests/rsrc/banner.txt @@ -0,0 +1,13 @@ + /* We can't simply return c - '0' because this is ANSI */ + /* C code, so it has to work for any character set, not */ + /* just ones which put the digits together in order. */ + +/* Puts the decimal value of the string s into *pn, returning */ +/* 1 on success. If s is empty, or contains non-digits, */ +/* or represents an integer greater than 9999, then *pn */ +/* is not changed and 0 is returned. Does not use errmsg. */ + +/* Reads lines from stdin until EOF, or until a blank line is encountered, */ +/* in which case the newline is pushed back onto the input stream. Returns */ +/* a NULL-terminated array of pointers to individual lines, stripped of */ +/* their newline characters. Uses errmsg, and returns NULL on failure. */ diff --git a/hw2/tests/rsrc/basic.in b/hw2/tests/rsrc/basic.in new file mode 120000 index 0000000..7344214 --- /dev/null +++ b/hw2/tests/rsrc/basic.in @@ -0,0 +1 @@ +gettysburg.txt \ No newline at end of file diff --git a/hw2/tests/rsrc/basic.out b/hw2/tests/rsrc/basic.out new file mode 100644 index 0000000..80ec472 --- /dev/null +++ b/hw2/tests/rsrc/basic.out @@ -0,0 +1,24 @@ +Four score and seven years ago our fathers brought forth on this +continent, a new nation, conceived in Liberty, and dedicated to the +proposition that all men are created equal. + +Now we are engaged in a great civil war, testing whether that nation, or +any nation so conceived and so dedicated, can long endure. We are met on +a great battle-field of that war. We have come to dedicate a portion of +that field, as a final resting place for those who here gave their lives +that that nation might live. It is altogether fitting and proper that we +should do this. + +But, in a larger sense, we can not dedicate -- we can not consecrate -- +we can not hallow -- this ground. The brave men, living and dead, who +struggled here, have consecrated it, far above our poor power to add or +detract. The world will little note, nor long remember what we say here, +but it can never forget what they did here. It is for us the living, +rather, to be dedicated here to the unfinished work which they who +fought here have thus far so nobly advanced. It is rather for us to be +here dedicated to the great task remaining before us -- that from these +honored dead we take increased devotion to that cause for which they +gave the last full measure of devotion -- that we here highly resolve +that these dead shall not have died in vain -- that this nation, under +God, shall have a new birth of freedom -- and that government of the +people, by the people, for the people, shall not perish from the earth. diff --git a/hw2/tests/rsrc/blank_lines.txt b/hw2/tests/rsrc/blank_lines.txt new file mode 100644 index 0000000..4979e96 --- /dev/null +++ b/hw2/tests/rsrc/blank_lines.txt @@ -0,0 +1,15 @@ +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod + + tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim + +veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea + + commodo consequat. Duis aute irure dolor in reprehenderit in voluptate + +velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint + + + +occaecat cupidatat non proident, sunt in culpa qui officia deserunt + +mollit anim id est laborum. diff --git a/hw2/tests/rsrc/gettysburg.txt b/hw2/tests/rsrc/gettysburg.txt new file mode 100644 index 0000000..2e5fd96 --- /dev/null +++ b/hw2/tests/rsrc/gettysburg.txt @@ -0,0 +1,5 @@ +Four score and seven years ago our fathers brought forth on this continent, a new nation, conceived in Liberty, and dedicated to the proposition that all men are created equal. + +Now we are engaged in a great civil war, testing whether that nation, or any nation so conceived and so dedicated, can long endure. We are met on a great battle-field of that war. We have come to dedicate a portion of that field, as a final resting place for those who here gave their lives that that nation might live. It is altogether fitting and proper that we should do this. + +But, in a larger sense, we can not dedicate -- we can not consecrate -- we can not hallow -- this ground. The brave men, living and dead, who struggled here, have consecrated it, far above our poor power to add or detract. The world will little note, nor long remember what we say here, but it can never forget what they did here. It is for us the living, rather, to be dedicated here to the unfinished work which they who fought here have thus far so nobly advanced. It is rather for us to be here dedicated to the great task remaining before us -- that from these honored dead we take increased devotion to that cause for which they gave the last full measure of devotion -- that we here highly resolve that these dead shall not have died in vain -- that this nation, under God, shall have a new birth of freedom -- and that government of the people, by the people, for the people, shall not perish from the earth. diff --git a/hw2/tests/rsrc/loremipsum.txt b/hw2/tests/rsrc/loremipsum.txt new file mode 100644 index 0000000..c2eaaee --- /dev/null +++ b/hw2/tests/rsrc/loremipsum.txt @@ -0,0 +1,34 @@ +Lorem ipsum +dolor sit amet, +consectetur +adipiscing +elit, sed do +eiusmod tempor +incididunt ut +labore et +dolore magna +aliqua. Ut enim +ad minim +veniam, quis +nostrud +exercitation +ullamco laboris +nisi ut aliquip +ex ea commodo +consequat. Duis +aute irure +dolor in +reprehenderit +in voluptate +velit esse +cillum dolore +eu fugiat nulla +pariatur. Excepteur +sint occaecat +cupidatat non +proident, sunt +in culpa qui +officia +deserunt mollit +anim id est +laborum. diff --git a/hw2/tests/rsrc/prefix_suffix.in b/hw2/tests/rsrc/prefix_suffix.in new file mode 120000 index 0000000..ac9be5c --- /dev/null +++ b/hw2/tests/rsrc/prefix_suffix.in @@ -0,0 +1 @@ +banner.txt \ No newline at end of file diff --git a/hw2/tests/rsrc/prefix_suffix.out b/hw2/tests/rsrc/prefix_suffix.out new file mode 100644 index 0000000..86e394f --- /dev/null +++ b/hw2/tests/rsrc/prefix_suffix.out @@ -0,0 +1,12 @@ + /* We can't simply return c - '0' because this is ANSI C code, so it has to */ + /* work for any character set, not just ones which put the digits together */ + /* in order. */ + +/* Puts the decimal value of the string s into *pn, returning 1 on success. */ +/* If s is empty, or contains non-digits, or represents an integer greater */ +/* than 9999, then *pn is not changed and 0 is returned. Does not use errmsg. */ + +/* Reads lines from stdin until EOF, or until a blank line is encountered, */ +/* in which case the newline is pushed back onto the input stream. Returns a */ +/* NULL-terminated array of pointers to individual lines, stripped of their */ +/* newline characters. Uses errmsg, and returns NULL on failure. */ diff --git a/hw2/tests/rsrc/valgrind_leak.in b/hw2/tests/rsrc/valgrind_leak.in new file mode 120000 index 0000000..7344214 --- /dev/null +++ b/hw2/tests/rsrc/valgrind_leak.in @@ -0,0 +1 @@ +gettysburg.txt \ No newline at end of file diff --git a/hw2/tests/rsrc/valgrind_leak.out b/hw2/tests/rsrc/valgrind_leak.out new file mode 100644 index 0000000..80ec472 --- /dev/null +++ b/hw2/tests/rsrc/valgrind_leak.out @@ -0,0 +1,24 @@ +Four score and seven years ago our fathers brought forth on this +continent, a new nation, conceived in Liberty, and dedicated to the +proposition that all men are created equal. + +Now we are engaged in a great civil war, testing whether that nation, or +any nation so conceived and so dedicated, can long endure. We are met on +a great battle-field of that war. We have come to dedicate a portion of +that field, as a final resting place for those who here gave their lives +that that nation might live. It is altogether fitting and proper that we +should do this. + +But, in a larger sense, we can not dedicate -- we can not consecrate -- +we can not hallow -- this ground. The brave men, living and dead, who +struggled here, have consecrated it, far above our poor power to add or +detract. The world will little note, nor long remember what we say here, +but it can never forget what they did here. It is for us the living, +rather, to be dedicated here to the unfinished work which they who +fought here have thus far so nobly advanced. It is rather for us to be +here dedicated to the great task remaining before us -- that from these +honored dead we take increased devotion to that cause for which they +gave the last full measure of devotion -- that we here highly resolve +that these dead shall not have died in vain -- that this nation, under +God, shall have a new birth of freedom -- and that government of the +people, by the people, for the people, shall not perish from the earth. diff --git a/hw2/tests/rsrc/valgrind_uninitialized.err b/hw2/tests/rsrc/valgrind_uninitialized.err new file mode 100644 index 0000000..72da492 --- /dev/null +++ b/hw2/tests/rsrc/valgrind_uninitialized.err @@ -0,0 +1,15 @@ +==872656== Memcheck, a memory error detector +==872656== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al. +==872656== Using Valgrind-3.15.0 and LibVEX; rerun with -h for copyright info +==872656== Command: bin/par p10 s10 +==872656== +Line 1 shorter than + = 10 + 10 = 20 +==872656== +==872656== HEAP SUMMARY: +==872656== in use at exit: 0 bytes in 0 blocks +==872656== total heap usage: 45 allocs, 45 frees, 5,754 bytes allocated +==872656== +==872656== For a detailed leak analysis, rerun with: --leak-check=full +==872656== +==872656== For lists of detected and suppressed errors, rerun with: -s +==872656== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0) diff --git a/hw2/tests/rsrc/valgrind_uninitialized.in b/hw2/tests/rsrc/valgrind_uninitialized.in new file mode 120000 index 0000000..25c76ee --- /dev/null +++ b/hw2/tests/rsrc/valgrind_uninitialized.in @@ -0,0 +1 @@ +loremipsum.txt \ No newline at end of file diff --git a/hw2/tests/rsrc/valgrind_uninitialized.out b/hw2/tests/rsrc/valgrind_uninitialized.out new file mode 100644 index 0000000..e69de29 diff --git a/hw2/tests/test_common.c b/hw2/tests/test_common.c new file mode 100644 index 0000000..bcccf80 --- /dev/null +++ b/hw2/tests/test_common.c @@ -0,0 +1,144 @@ +#include "test_common.h" + +char program_options[500]; +char test_infile[100]; +char test_output_subdir[100]; +char test_log_outfile[100]; + +/* + * Sets up to run a test. + * Initialize various filenames, using the name of the test as a base, + * and then initialize and run a command to remove old output from this test + * and to make sure that the test output directory exists. + */ +int setup_test(char *name) +{ + char cmd[1000]; + sprintf(test_infile, "%s/%s", TEST_REF_DIR, name); + sprintf(test_log_outfile, "%s/%s", TEST_OUTPUT_DIR, name); + sprintf(test_output_subdir, "%s/%s", TEST_OUTPUT_DIR, name); + sprintf(cmd, "rm -f %s.out %s.err; rm -fr %s; mkdir -p %s", + test_log_outfile, test_log_outfile, test_output_subdir, + test_output_subdir); + fprintf(stderr, "setup(%s)\n", cmd); + return system(cmd); +} + +/* + * Run the program as a "black box" using system(). + * A shell command is constructed and run that first performs test setup, + * then runs the program to be tested with input redirected from a test input + * file and standard and error output redirected to separate output files. + */ +int run_using_system(char *name, char *pre_cmd, char *valgrind_cmd, char *limits) +{ + char cmd[1000]; + setup_test(name); + sprintf(cmd, "%s;%s%s "PROGNAME" %s < %s.in > %s.out 2> %s.err", + limits, pre_cmd, + valgrind_cmd, program_options, test_infile, test_log_outfile, + test_log_outfile); + fprintf(stderr, "run(%s)\n", cmd); + return system(cmd); +} + +void assert_normal_exit(int status) +{ + cr_assert(!WIFSIGNALED(status), + "The program terminated with an unexpected signal (%d).\n", + WTERMSIG(status)); + cr_assert_eq(status, 0, + "The program did not exit normally (status = 0x%x).\n", + status); +} + +void assert_expected_status(int expected, int status) +{ + cr_assert(!WIFSIGNALED(status), + "The program terminated with an unexpected signal (%d).\n", + WTERMSIG(status)); + cr_assert_eq( + WEXITSTATUS(status), expected, + "The program did not exit with the expected status " + "(expected 0x%x, was 0x%x).\n", + expected, WEXITSTATUS(status)); +} + +void assert_signaled(int sig, int status) +{ + cr_assert(WIFSIGNALED(status), + "The program did not terminate with a signal.\n"); + cr_assert(WTERMSIG(status) == sig, + "The program did not terminate with the expected signal " + "(expected %d, was %d).\n", + sig, WTERMSIG(status)); +} + +/* + * Compare the standard output from the program being tested with reference + * output, after first possibly using "grep" to remove lines that match a filter + * pattern. + */ +void assert_outfile_matches(char *name, char *filter) +{ + char cmd[500]; + if (filter) { + sprintf(cmd, + "grep -v '%s' %s.out > %s_A.out; grep -v '%s' " + "%s/%s.out > %s_B.out; " + "diff --ignore-tab-expansion --ignore-trailing-space " + "--ignore-space-change --ignore-blank-lines %s_A.out " + "%s_B.out", + filter, test_log_outfile, name, filter, TEST_REF_DIR, + name, name, name, name); + } else { + sprintf(cmd, + "diff --ignore-tab-expansion --ignore-trailing-space " + "--ignore-space-change --ignore-blank-lines %s.out " + "%s/%s.out", + test_log_outfile, TEST_REF_DIR, name); + } + int err = system(cmd); + cr_assert_eq(err, 0, + "The output was not what was expected (diff exited with " + "status %d).\n", + WEXITSTATUS(err)); +} + +/* + * Compare the standard error output from the program being tested with + * reference output, after first possibly using "grep" to remove lines that + * match a filter pattern. + */ +void assert_errfile_matches(char *name, char *filter) +{ + char cmd[500]; + if (filter) { + sprintf(cmd, + "grep -v '%s' %s.err > %s_A.err; grep -v '%s' " + "%s/%s.err > %s_B.err; " + "diff ---ignore-tab-expansion --ignore-trailing-space " + "--ignore-space-change --ignore-blank-lines %s_A.err " + "%s_B.err", + filter, test_log_outfile, name, filter, TEST_REF_DIR, + name, name, name, name); + } else { + sprintf(cmd, + "diff --ignore-tab-expansion --ignore-trailing-space " + "--ignore-space-change --ignore-blank-lines %s.err " + "%s/%s.err", + test_log_outfile, TEST_REF_DIR, name); + } + int err = system(cmd); + cr_assert_eq(err, 0, + "The output was not what was expected (diff exited with " + "status %d).\n", + WEXITSTATUS(err)); +} + +void assert_no_valgrind_errors(int status) +{ + cr_assert_neq(WEXITSTATUS(status), 37, + "Valgrind reported errors -- see %s.err", + test_log_outfile); +} diff --git a/hw2/tests/test_common.h b/hw2/tests/test_common.h new file mode 100644 index 0000000..2b2f4fd --- /dev/null +++ b/hw2/tests/test_common.h @@ -0,0 +1,27 @@ +#include +#include +#include +#include + +#define TEST_TIMEOUT 15 + +#define PROGNAME "bin/par" +#define TEST_REF_DIR "tests/rsrc" +#define TEST_OUTPUT_DIR "test_output" + +extern int errors, warnings; + +extern char program_options[500]; +extern char test_output_subdir[100]; +extern char test_log_outfile[100]; + +int setup_test(char *name); +int run_using_system(char *name, char *limits, char *pre_cmd, char *valgrind_cmd); +void assert_normal_exit(int status); +void assert_error_exit(int status); +void assert_expected_status(int expected, int status); +void assert_expected_status(int expected, int status); +void assert_signaled(int sig, int status); +void assert_outfile_matches(char *name, char *filter); +void assert_errfile_matches(char *name, char *filter); +void assert_no_valgrind_errors(int status);