/*
* Wrap, an improved ASCII text word wrapper.
*
* Compile with something like: gcc -std=c99 -Wall -o wrap wrap.c
*
* Copyright (C) 2008 by Alex Markley.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You may have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#include
#include
#include
#include
#include
#define _XOPEN_SOURCE
#include
//My complier hates me. It's a long story.
int wcwidth(wchar_t c);
#define WRAP "wrap: "
#define VERSION "1.0"
#define MODE_CUT 1 //In 'cut' mode, we disregard word boundries and simply cut the line at the requested line width.
#define MODE_NICE 2 //In 'nice' mode, we wrap at word boundries. Words/strings/URIs which are longer than the line width will not be cut.
#define MODE_STRICT 3 //In 'strict' mode, we wrap at word boundries. Words/strings/URIs which are longer than the line width will be cut.
#define LINE_WIDTH_DEFAULT 75
#define TAB_WIDTH_DEFAULT 8
#define MODE_DEFAULT MODE_NICE
#define SPACE 0x20
#define TAB 0x09
#define NEWLINE 0x0A
#define CARRIAGE_RETURN 0x0D
int main(int argc, char **argv);
void usage(char *argzero);
void wrap_worker(void);
wchar_t *new_linebuf(int mywidth);
void myfputwc(wchar_t wc, FILE *stream);
int i, j, cargv, is_input_file, last_chance, work_performed, line_width, tab_width, mode, dos, dashcut, linebuf_len, current_column, cut_next, cut_here, newline_here, discard_here, goodcut, findnew_goodcut;
FILE *input, *output;
wint_t tempc;
wchar_t c, *linebuf;
int main(int argc, char **argv)
{
work_performed = 0;
cargv = 1;
input = NULL;
output = NULL;
linebuf = NULL;
line_width = LINE_WIDTH_DEFAULT;
tab_width = TAB_WIDTH_DEFAULT;
mode = MODE_DEFAULT;
dos = 0;
dashcut = 1;
//Set the locale. Required for handling multi-byte characters.
if(!setlocale(LC_CTYPE, ""))
{
fprintf(stderr, WRAP "could not set specified locale!\n");
exit(1);
}
//Main loop, parse command-line arguments
while(cargv <= argc)
{
is_input_file = 0;
last_chance = 0;
if(cargv == argc)
{
//Last chance for the main body to run.
last_chance = 1;
}
else if(strcasecmp(argv[cargv], "-h") == 0 || strcasecmp(argv[cargv], "--help") == 0)
{
usage(argv[0]);
exit(0);
}
else if(strcasecmp(argv[cargv], "-t") == 0)
{
cargv++;
if(cargv >= argc)
{
fprintf(stderr, WRAP "-t must be followed by an integer. (ran out of arguments to process)\n");
exit(1);
}
if(sscanf(argv[cargv], "%d", &tab_width) != 1)
{
fprintf(stderr, WRAP "-t must be followed by an integer. (\"%s\" is not an integer)\n", argv[cargv]);
exit(1);
}
if(tab_width < 1)
{
fprintf(stderr, WRAP "Tab width must be 1 or greater.\n");
exit(1);
}
}
else if(strcasecmp(argv[cargv], "-w") == 0)
{
cargv++;
if(cargv >= argc)
{
fprintf(stderr, WRAP "-w must be followed by an integer. (ran out of arguments to process)\n");
exit(1);
}
if(sscanf(argv[cargv], "%d", &line_width) != 1)
{
fprintf(stderr, WRAP "-w must be followed by an integer. (\"%s\" is not an integer)\n", argv[cargv]);
exit(1);
}
if(line_width < 1)
{
fprintf(stderr, WRAP "Line width must be 1 or greater.\n");
exit(1);
}
//We'll need a buffer for line fragments.
if(linebuf != NULL) free(linebuf);
linebuf = new_linebuf(line_width);
}
else if(strcasecmp(argv[cargv], "-m") == 0)
{
cargv++;
if(cargv >= argc)
{
fprintf(stderr, WRAP "-m must be followed by a valid mode.\n");
exit(1);
}
else if(strcasecmp(argv[cargv], "strict") == 0)
mode = MODE_STRICT;
else if(strcasecmp(argv[cargv], "cut") == 0)
mode = MODE_CUT;
else if(strcasecmp(argv[cargv], "nice") == 0)
mode = MODE_NICE;
else
{
fprintf(stderr, WRAP "Unknown mode \"%s\"... Reverting to default.\n", argv[cargv]);
mode = MODE_DEFAULT;
}
}
else if(strcasecmp(argv[cargv], "-n") == 0)
{
cargv++;
if(cargv >= argc)
{
fprintf(stderr, WRAP "-n must be followed by a valid newline style.\n");
exit(1);
}
else if(strcasecmp(argv[cargv], "dos") == 0)
dos = 1;
else if(strcasecmp(argv[cargv], "unix") == 0)
dos = 0;
else
{
fprintf(stderr, WRAP "Unknown newline style \"%s\".\n", argv[cargv]);
exit(1);
}
}
else if(strcasecmp(argv[cargv], "-d") == 0)
{
cargv++;
if(cargv >= argc)
{
fprintf(stderr, WRAP "-d must be followed by TRUE or FALSE.\n");
exit(1);
}
else if(strcasecmp(argv[cargv], "TRUE") == 0 || strcasecmp(argv[cargv], "YES") == 0 || strcasecmp(argv[cargv], "1") == 0)
dashcut = 1;
else if(strcasecmp(argv[cargv], "FALSE") == 0 || strcasecmp(argv[cargv], "NO") == 0 || strcasecmp(argv[cargv], "0") == 0)
dashcut = 0;
else
{
fprintf(stderr, WRAP "Unknown boolean \"%s\".\n", argv[cargv]);
exit(1);
}
}
else if(strcasecmp(argv[cargv], "-o") == 0)
{
cargv++;
if(cargv >= argc)
{
fprintf(stderr, WRAP "-o must be followed by an output file name.\n");
exit(1);
}
if(output != NULL && output != stdout)
fclose(output);
if(strcmp(argv[cargv], "-") == 0)
output = stdout;
else if((output = fopen(argv[cargv], "wb")) == NULL)
{
fprintf(stderr, WRAP "failed to open \"%s\" for writing!\n", argv[cargv]);
exit(1);
}
}
else //Unknown. Must be an input file.
{
is_input_file = 1;
if(input != NULL && input != stdin)
fclose(input);
if(strcmp(argv[cargv], "-") == 0)
input = stdin;
else if((input = fopen(argv[cargv], "rb")) == NULL)
{
fprintf(stderr, WRAP "failed to open \"%s\" for reading!\n", argv[cargv]);
exit(1);
}
}
//If appropriate, launch the primary worker.
if(is_input_file || (last_chance && !work_performed))
{
work_performed = 1;
wrap_worker();
}
//Next argument...
cargv++;
}
exit(0);
}
void usage(char *argzero)
{
fprintf(stderr, "Wrap, an improved word wrapper. (Version " VERSION ")\n");
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [-h] [--help] [-m MODE] [-w LINEWIDTH] [-t TABWIDTH] [-d] [-u] [-o OUTPUTFILE] [INPUTFILE1] [INPUTFILE2] [INPUTFILEN]\n", argzero);
fprintf(stderr, " -h This message.\n");
fprintf(stderr, " --help This message.\n");
fprintf(stderr, " -m MODE Which word wrapping mode should we employ? See below for details.\n");
fprintf(stderr, " -w LINEWIDTH Define the maximum number of columns in the output. (Default: %d)\n", LINE_WIDTH_DEFAULT);
fprintf(stderr, " -t TABWIDTH TABWIDTH defines the maximum width of a tab. (Default: %d)\n", TAB_WIDTH_DEFAULT);
fprintf(stderr, " -n NEWLINESTYLE Which style of new line sequence should we use? UNIX (default) or DOS?\n");
fprintf(stderr, " -d BOOL Allow line breaks at hypens and dashes in addition to whitespace? (YES by default.)\n");
fprintf(stderr, " -o OUTPUTFILE File name to write wrapped text to. Use \"-\" or ommit to dump wrapped text to STDOUT.\n");
fprintf(stderr, " INPUTFILE File(s) to wrap. Use \"-\" or ommit to wrap text from STDIN.\n");
fprintf(stderr, "\n");
fprintf(stderr, " Note that arguments may appear in practically any order. Each INPUTFILE flag constitutes a request to wrap the input\n");
fprintf(stderr, " text with the preceeding parameters. Futher parameters will be ignored unless they are followed by another INPUTFILE.\n");
fprintf(stderr, "\n");
fprintf(stderr, "Acceptable Modes:\n");
fprintf(stderr, " nice The default. Wrap at word boundries. Allow long words (> LINEWIDTH) to exceed line length.\n");
fprintf(stderr, " strict Wrap at word boundries. Cut long words (> LINEWIDTH) at maximum line length.\n");
fprintf(stderr, " cut Ignore word boundries. Cut lines at maximum width.\n");
fprintf(stderr, "\n");
fprintf(stderr, "Legal:\n");
fprintf(stderr, " Copyright (C) 2008 Alex Markley.\n\n");
fprintf(stderr, " This program is free software: you can redistribute it and/or modify\n it under the terms of the GNU General Public License as published by\n the Free Software Foundation, either version 3 of the License, or\n (at your option) any later version.\n\n");
fprintf(stderr, " This program is distributed in the hope that it will be useful,\n but WITHOUT ANY WARRANTY; without even the implied warranty of\n MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n GNU General Public License for more details.\n\n");
fprintf(stderr, " You may have received a copy of the GNU General Public License\n along with this program. If not, see .\n");
return;
}
void wrap_worker(void)
{
//Defaults for basic operation.
if(linebuf == NULL)
linebuf = new_linebuf(line_width);
if(input == NULL)
input = stdin;
if(output == NULL)
output = stdout;
current_column = 0;
goodcut = -1;
findnew_goodcut = 1;
linebuf_len = 0;
cut_next = 0;
while((tempc = fgetwc(input)) != WEOF)
{
cut_here = 0;
newline_here = 0;
discard_here = 0;
c = (wchar_t)tempc;
//if(tempc > 127) fprintf(stderr, "(WC:%d)", tempc);
if(cut_next)
{
cut_here = 1;
cut_next = 0;
}
//Figure out the role of the character.
switch(c)
{
default:
i = wcwidth(c);
if(i >= 0) current_column = current_column + i;
else current_column++;
break;
case TAB:
cut_here = 1;
//Figure out the new current column.
current_column = ((int)(current_column / tab_width) + 1) * tab_width;
break;
case SPACE:
cut_here = 1;
current_column++;
break;
case '-':
case L'\u2014':
case L'\u2013':
if(dashcut) cut_next = 1;
current_column++;
break;
case NEWLINE:
newline_here = 1;
discard_here = 1;
break;
case CARRIAGE_RETURN:
discard_here = 1;
break;
}
//Place the character in the line buffer.
if(!discard_here)
{
linebuf[linebuf_len] = c;
if(cut_here)
{
if(findnew_goodcut)
{
goodcut = linebuf_len;
findnew_goodcut = 0;
}
}
else //Not a cuttable character. Find a new good cutting point later.
findnew_goodcut = 1;
linebuf_len++;
}
//If the input calls for a new line, let's just give it to them.
if(newline_here)
{
for(i = 0; i < linebuf_len; i++)
myfputwc(linebuf[i], output);
linebuf_len = 0;
if(dos) myfputwc(CARRIAGE_RETURN, output);
myfputwc(NEWLINE, output);
current_column = 0;
goodcut = -1;
findnew_goodcut = 1;
}
//Has our current line exceeded its maximum width?
if(current_column >= line_width)
{
if(mode == MODE_CUT)
{
for(i = 0; i < linebuf_len; i++)
myfputwc(linebuf[i], output);
if(dos) myfputwc(CARRIAGE_RETURN, output);
myfputwc(NEWLINE, output);
current_column = 0;
goodcut = -1;
findnew_goodcut = 1;
}
else //NICE or STRICT
{
if(goodcut >= 0)
{
//fprintf(stderr, WRAP "found a good cut: %d\n", goodcut);
for(i = 0; i < goodcut; i++)
myfputwc(linebuf[i], output);
if(dos) myfputwc(CARRIAGE_RETURN, output);
myfputwc(NEWLINE, output);
current_column = 0;
goodcut = -1;
findnew_goodcut = 1;
}
else //No good cut?
{
for(i = 0; i < (linebuf_len-1); i++)
myfputwc(linebuf[i], output);
if(mode == MODE_STRICT)
{
if(dos) myfputwc(CARRIAGE_RETURN, output);
myfputwc(NEWLINE, output);
current_column = 0;
goodcut = -1;
findnew_goodcut = 1;
}
}
}
//No matter what happened above, we have a string fragment at the end of linebuf.
//It needs to be trimmed and copied to the beginning.
cut_here = 0;
j = 0;
for(;i < linebuf_len;i++)
{
//Cut off any white space at the beginning of the fragment.
if(linebuf[i] != SPACE && linebuf[i] != TAB)
cut_here = 1;
if(cut_here) //Found a trim point already.
{
linebuf[j] = linebuf[i];
j++;
current_column++;
}
}
linebuf_len = j;
}
}
}
wchar_t *new_linebuf(int mywidth)
{
wchar_t *mylinebuf;
if((mylinebuf = (wchar_t *)calloc(mywidth+8, sizeof(wchar_t))) == NULL)
{
fprintf(stderr, WRAP "out of memory\n");
exit(1);
}
return mylinebuf;
}
void myfputwc(wchar_t wc, FILE *stream)
{
if(fputwc(wc, stream) == WEOF)
{
fprintf(stderr, WRAP "failed writing character to output stream. disk full?\n");
exit(1);
}
}