/* * Wrap, an improved ASCII text word wrapper. * * Compile with something like: gcc -std=c99 -Wall -o wrap wrap.c * * Copyright (C) 2008 by Alex Markley. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You may have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include #define _XOPEN_SOURCE #include //My complier hates me. It's a long story. int wcwidth(wchar_t c); #define WRAP "wrap: " #define VERSION "1.0" #define MODE_CUT 1 //In 'cut' mode, we disregard word boundries and simply cut the line at the requested line width. #define MODE_NICE 2 //In 'nice' mode, we wrap at word boundries. Words/strings/URIs which are longer than the line width will not be cut. #define MODE_STRICT 3 //In 'strict' mode, we wrap at word boundries. Words/strings/URIs which are longer than the line width will be cut. #define LINE_WIDTH_DEFAULT 75 #define TAB_WIDTH_DEFAULT 8 #define MODE_DEFAULT MODE_NICE #define SPACE 0x20 #define TAB 0x09 #define NEWLINE 0x0A #define CARRIAGE_RETURN 0x0D int main(int argc, char **argv); void usage(char *argzero); void wrap_worker(void); wchar_t *new_linebuf(int mywidth); void myfputwc(wchar_t wc, FILE *stream); int i, j, cargv, is_input_file, last_chance, work_performed, line_width, tab_width, mode, dos, dashcut, linebuf_len, current_column, cut_next, cut_here, newline_here, discard_here, goodcut, findnew_goodcut; FILE *input, *output; wint_t tempc; wchar_t c, *linebuf; int main(int argc, char **argv) { work_performed = 0; cargv = 1; input = NULL; output = NULL; linebuf = NULL; line_width = LINE_WIDTH_DEFAULT; tab_width = TAB_WIDTH_DEFAULT; mode = MODE_DEFAULT; dos = 0; dashcut = 1; //Set the locale. Required for handling multi-byte characters. if(!setlocale(LC_CTYPE, "")) { fprintf(stderr, WRAP "could not set specified locale!\n"); exit(1); } //Main loop, parse command-line arguments while(cargv <= argc) { is_input_file = 0; last_chance = 0; if(cargv == argc) { //Last chance for the main body to run. last_chance = 1; } else if(strcasecmp(argv[cargv], "-h") == 0 || strcasecmp(argv[cargv], "--help") == 0) { usage(argv[0]); exit(0); } else if(strcasecmp(argv[cargv], "-t") == 0) { cargv++; if(cargv >= argc) { fprintf(stderr, WRAP "-t must be followed by an integer. (ran out of arguments to process)\n"); exit(1); } if(sscanf(argv[cargv], "%d", &tab_width) != 1) { fprintf(stderr, WRAP "-t must be followed by an integer. (\"%s\" is not an integer)\n", argv[cargv]); exit(1); } if(tab_width < 1) { fprintf(stderr, WRAP "Tab width must be 1 or greater.\n"); exit(1); } } else if(strcasecmp(argv[cargv], "-w") == 0) { cargv++; if(cargv >= argc) { fprintf(stderr, WRAP "-w must be followed by an integer. (ran out of arguments to process)\n"); exit(1); } if(sscanf(argv[cargv], "%d", &line_width) != 1) { fprintf(stderr, WRAP "-w must be followed by an integer. (\"%s\" is not an integer)\n", argv[cargv]); exit(1); } if(line_width < 1) { fprintf(stderr, WRAP "Line width must be 1 or greater.\n"); exit(1); } //We'll need a buffer for line fragments. if(linebuf != NULL) free(linebuf); linebuf = new_linebuf(line_width); } else if(strcasecmp(argv[cargv], "-m") == 0) { cargv++; if(cargv >= argc) { fprintf(stderr, WRAP "-m must be followed by a valid mode.\n"); exit(1); } else if(strcasecmp(argv[cargv], "strict") == 0) mode = MODE_STRICT; else if(strcasecmp(argv[cargv], "cut") == 0) mode = MODE_CUT; else if(strcasecmp(argv[cargv], "nice") == 0) mode = MODE_NICE; else { fprintf(stderr, WRAP "Unknown mode \"%s\"... Reverting to default.\n", argv[cargv]); mode = MODE_DEFAULT; } } else if(strcasecmp(argv[cargv], "-n") == 0) { cargv++; if(cargv >= argc) { fprintf(stderr, WRAP "-n must be followed by a valid newline style.\n"); exit(1); } else if(strcasecmp(argv[cargv], "dos") == 0) dos = 1; else if(strcasecmp(argv[cargv], "unix") == 0) dos = 0; else { fprintf(stderr, WRAP "Unknown newline style \"%s\".\n", argv[cargv]); exit(1); } } else if(strcasecmp(argv[cargv], "-d") == 0) { cargv++; if(cargv >= argc) { fprintf(stderr, WRAP "-d must be followed by TRUE or FALSE.\n"); exit(1); } else if(strcasecmp(argv[cargv], "TRUE") == 0 || strcasecmp(argv[cargv], "YES") == 0 || strcasecmp(argv[cargv], "1") == 0) dashcut = 1; else if(strcasecmp(argv[cargv], "FALSE") == 0 || strcasecmp(argv[cargv], "NO") == 0 || strcasecmp(argv[cargv], "0") == 0) dashcut = 0; else { fprintf(stderr, WRAP "Unknown boolean \"%s\".\n", argv[cargv]); exit(1); } } else if(strcasecmp(argv[cargv], "-o") == 0) { cargv++; if(cargv >= argc) { fprintf(stderr, WRAP "-o must be followed by an output file name.\n"); exit(1); } if(output != NULL && output != stdout) fclose(output); if(strcmp(argv[cargv], "-") == 0) output = stdout; else if((output = fopen(argv[cargv], "wb")) == NULL) { fprintf(stderr, WRAP "failed to open \"%s\" for writing!\n", argv[cargv]); exit(1); } } else //Unknown. Must be an input file. { is_input_file = 1; if(input != NULL && input != stdin) fclose(input); if(strcmp(argv[cargv], "-") == 0) input = stdin; else if((input = fopen(argv[cargv], "rb")) == NULL) { fprintf(stderr, WRAP "failed to open \"%s\" for reading!\n", argv[cargv]); exit(1); } } //If appropriate, launch the primary worker. if(is_input_file || (last_chance && !work_performed)) { work_performed = 1; wrap_worker(); } //Next argument... cargv++; } exit(0); } void usage(char *argzero) { fprintf(stderr, "Wrap, an improved word wrapper. (Version " VERSION ")\n"); fprintf(stderr, "\n"); fprintf(stderr, "Usage: %s [-h] [--help] [-m MODE] [-w LINEWIDTH] [-t TABWIDTH] [-d] [-u] [-o OUTPUTFILE] [INPUTFILE1] [INPUTFILE2] [INPUTFILEN]\n", argzero); fprintf(stderr, " -h This message.\n"); fprintf(stderr, " --help This message.\n"); fprintf(stderr, " -m MODE Which word wrapping mode should we employ? See below for details.\n"); fprintf(stderr, " -w LINEWIDTH Define the maximum number of columns in the output. (Default: %d)\n", LINE_WIDTH_DEFAULT); fprintf(stderr, " -t TABWIDTH TABWIDTH defines the maximum width of a tab. (Default: %d)\n", TAB_WIDTH_DEFAULT); fprintf(stderr, " -n NEWLINESTYLE Which style of new line sequence should we use? UNIX (default) or DOS?\n"); fprintf(stderr, " -d BOOL Allow line breaks at hypens and dashes in addition to whitespace? (YES by default.)\n"); fprintf(stderr, " -o OUTPUTFILE File name to write wrapped text to. Use \"-\" or ommit to dump wrapped text to STDOUT.\n"); fprintf(stderr, " INPUTFILE File(s) to wrap. Use \"-\" or ommit to wrap text from STDIN.\n"); fprintf(stderr, "\n"); fprintf(stderr, " Note that arguments may appear in practically any order. Each INPUTFILE flag constitutes a request to wrap the input\n"); fprintf(stderr, " text with the preceeding parameters. Futher parameters will be ignored unless they are followed by another INPUTFILE.\n"); fprintf(stderr, "\n"); fprintf(stderr, "Acceptable Modes:\n"); fprintf(stderr, " nice The default. Wrap at word boundries. Allow long words (> LINEWIDTH) to exceed line length.\n"); fprintf(stderr, " strict Wrap at word boundries. Cut long words (> LINEWIDTH) at maximum line length.\n"); fprintf(stderr, " cut Ignore word boundries. Cut lines at maximum width.\n"); fprintf(stderr, "\n"); fprintf(stderr, "Legal:\n"); fprintf(stderr, " Copyright (C) 2008 Alex Markley.\n\n"); fprintf(stderr, " This program is free software: you can redistribute it and/or modify\n it under the terms of the GNU General Public License as published by\n the Free Software Foundation, either version 3 of the License, or\n (at your option) any later version.\n\n"); fprintf(stderr, " This program is distributed in the hope that it will be useful,\n but WITHOUT ANY WARRANTY; without even the implied warranty of\n MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n GNU General Public License for more details.\n\n"); fprintf(stderr, " You may have received a copy of the GNU General Public License\n along with this program. If not, see .\n"); return; } void wrap_worker(void) { //Defaults for basic operation. if(linebuf == NULL) linebuf = new_linebuf(line_width); if(input == NULL) input = stdin; if(output == NULL) output = stdout; current_column = 0; goodcut = -1; findnew_goodcut = 1; linebuf_len = 0; cut_next = 0; while((tempc = fgetwc(input)) != WEOF) { cut_here = 0; newline_here = 0; discard_here = 0; c = (wchar_t)tempc; //if(tempc > 127) fprintf(stderr, "(WC:%d)", tempc); if(cut_next) { cut_here = 1; cut_next = 0; } //Figure out the role of the character. switch(c) { default: i = wcwidth(c); if(i >= 0) current_column = current_column + i; else current_column++; break; case TAB: cut_here = 1; //Figure out the new current column. current_column = ((int)(current_column / tab_width) + 1) * tab_width; break; case SPACE: cut_here = 1; current_column++; break; case '-': case L'\u2014': case L'\u2013': if(dashcut) cut_next = 1; current_column++; break; case NEWLINE: newline_here = 1; discard_here = 1; break; case CARRIAGE_RETURN: discard_here = 1; break; } //Place the character in the line buffer. if(!discard_here) { linebuf[linebuf_len] = c; if(cut_here) { if(findnew_goodcut) { goodcut = linebuf_len; findnew_goodcut = 0; } } else //Not a cuttable character. Find a new good cutting point later. findnew_goodcut = 1; linebuf_len++; } //If the input calls for a new line, let's just give it to them. if(newline_here) { for(i = 0; i < linebuf_len; i++) myfputwc(linebuf[i], output); linebuf_len = 0; if(dos) myfputwc(CARRIAGE_RETURN, output); myfputwc(NEWLINE, output); current_column = 0; goodcut = -1; findnew_goodcut = 1; } //Has our current line exceeded its maximum width? if(current_column >= line_width) { if(mode == MODE_CUT) { for(i = 0; i < linebuf_len; i++) myfputwc(linebuf[i], output); if(dos) myfputwc(CARRIAGE_RETURN, output); myfputwc(NEWLINE, output); current_column = 0; goodcut = -1; findnew_goodcut = 1; } else //NICE or STRICT { if(goodcut >= 0) { //fprintf(stderr, WRAP "found a good cut: %d\n", goodcut); for(i = 0; i < goodcut; i++) myfputwc(linebuf[i], output); if(dos) myfputwc(CARRIAGE_RETURN, output); myfputwc(NEWLINE, output); current_column = 0; goodcut = -1; findnew_goodcut = 1; } else //No good cut? { for(i = 0; i < (linebuf_len-1); i++) myfputwc(linebuf[i], output); if(mode == MODE_STRICT) { if(dos) myfputwc(CARRIAGE_RETURN, output); myfputwc(NEWLINE, output); current_column = 0; goodcut = -1; findnew_goodcut = 1; } } } //No matter what happened above, we have a string fragment at the end of linebuf. //It needs to be trimmed and copied to the beginning. cut_here = 0; j = 0; for(;i < linebuf_len;i++) { //Cut off any white space at the beginning of the fragment. if(linebuf[i] != SPACE && linebuf[i] != TAB) cut_here = 1; if(cut_here) //Found a trim point already. { linebuf[j] = linebuf[i]; j++; current_column++; } } linebuf_len = j; } } } wchar_t *new_linebuf(int mywidth) { wchar_t *mylinebuf; if((mylinebuf = (wchar_t *)calloc(mywidth+8, sizeof(wchar_t))) == NULL) { fprintf(stderr, WRAP "out of memory\n"); exit(1); } return mylinebuf; } void myfputwc(wchar_t wc, FILE *stream) { if(fputwc(wc, stream) == WEOF) { fprintf(stderr, WRAP "failed writing character to output stream. disk full?\n"); exit(1); } }