Added
Link Here
|
1 |
/* vi: set sw=4 ts=4: */ |
2 |
/* |
3 |
* wc implementation for busybox |
4 |
* |
5 |
* Copyright (C) 2003 Manuel Novoa III <mjn3@codepoet.org> |
6 |
* |
7 |
* Licensed under GPLv2 or later, see file LICENSE in this tarball for details. |
8 |
*/ |
9 |
|
10 |
/* BB_AUDIT SUSv3 _NOT_ compliant -- option -m is not currently supported. */ |
11 |
/* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */ |
12 |
|
13 |
/* Mar 16, 2003 Manuel Novoa III (mjn3@codepoet.org) |
14 |
* |
15 |
* Rewritten to fix a number of problems and do some size optimizations. |
16 |
* Problems in the previous busybox implementation (besides bloat) included: |
17 |
* 1) broken 'wc -c' optimization (read note below) |
18 |
* 2) broken handling of '-' args |
19 |
* 3) no checking of ferror on EOF returns |
20 |
* 4) isprint() wasn't considered when word counting. |
21 |
* |
22 |
* TODO: |
23 |
* |
24 |
* When locale support is enabled, count multibyte chars in the '-m' case. |
25 |
* |
26 |
* NOTES: |
27 |
* |
28 |
* The previous busybox wc attempted an optimization using stat for the |
29 |
* case of counting chars only. I omitted that because it was broken. |
30 |
* It didn't take into account the possibility of input coming from a |
31 |
* pipe, or input from a file with file pointer not at the beginning. |
32 |
* |
33 |
* To implement such a speed optimization correctly, not only do you |
34 |
* need the size, but also the file position. Note also that the |
35 |
* file position may be past the end of file. Consider the example |
36 |
* (adapted from example in gnu wc.c) |
37 |
* |
38 |
* echo hello > /tmp/testfile && |
39 |
* (dd ibs=1k skip=1 count=0 &> /dev/null; wc -c) < /tmp/testfile |
40 |
* |
41 |
* for which 'wc -c' should output '0'. |
42 |
*/ |
43 |
#include <stdio.h> |
44 |
#include <stdlib.h> |
45 |
#include <string.h> |
46 |
#include <unistd.h> |
47 |
#undef isspace |
48 |
#undef isprint |
49 |
#define isspace(c) ((((c) == ' ') || (((unsigned int)((c) - 9)) <= (13 - 9)))) |
50 |
#define isprint(c) (((unsigned int)((c) - 0x20)) <= (0x7e - 0x20)) |
51 |
#define isspace_given_isprint(c) ((c) == ' ') |
52 |
|
53 |
#define COUNT_T unsigned long |
54 |
#define COUNT_FMT "u" |
55 |
#define optind 1 |
56 |
FILE *fopen_or_warn_stdin(const char *filename) |
57 |
{ |
58 |
FILE *fp = stdin; |
59 |
|
60 |
if (filename[0]) { |
61 |
fp = fopen(filename, "r"); |
62 |
} |
63 |
|
64 |
return fp; |
65 |
} |
66 |
|
67 |
enum { |
68 |
WC_LINES = 0, |
69 |
WC_WORDS = 1, |
70 |
WC_CHARS = 2, |
71 |
WC_LENGTH = 3 |
72 |
}; |
73 |
|
74 |
int main(int argc, char **argv) |
75 |
{ |
76 |
FILE *fp; |
77 |
const char *s, *arg; |
78 |
const char *start_fmt = "%9"COUNT_FMT; |
79 |
const char *fname_fmt = " %s\n"; |
80 |
COUNT_T *pcounts; |
81 |
COUNT_T counts[4]; |
82 |
COUNT_T totals[4]; |
83 |
unsigned linepos; |
84 |
unsigned u; |
85 |
int num_files = 0; |
86 |
int c; |
87 |
signed char status = EXIT_SUCCESS; |
88 |
signed char in_word; |
89 |
unsigned print_type; |
90 |
|
91 |
print_type = getopt(argc, argv, "lwcL"); |
92 |
|
93 |
if (print_type == 0) { |
94 |
print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_CHARS); |
95 |
} |
96 |
|
97 |
argv += optind; |
98 |
if (!argv[0]) { |
99 |
*--argv = (char *) "wc"; |
100 |
fname_fmt = "\n"; |
101 |
if (!((print_type-1) & print_type)) /* exactly one option? */ |
102 |
start_fmt = "%"COUNT_FMT; |
103 |
} |
104 |
|
105 |
memset(totals, 0, sizeof(totals)); |
106 |
|
107 |
pcounts = counts; |
108 |
|
109 |
while ((arg = *argv++) != 0) { |
110 |
++num_files; |
111 |
fp = fopen_or_warn_stdin(arg); |
112 |
if (!fp) { |
113 |
status = EXIT_FAILURE; |
114 |
continue; |
115 |
} |
116 |
|
117 |
memset(counts, 0, sizeof(counts)); |
118 |
linepos = 0; |
119 |
in_word = 0; |
120 |
|
121 |
do { |
122 |
/* Our -w doesn't match GNU wc exactly... oh well */ |
123 |
|
124 |
++counts[WC_CHARS]; |
125 |
c = getc(fp); |
126 |
if (isprint(c)) { |
127 |
++linepos; |
128 |
if (!isspace_given_isprint(c)) { |
129 |
in_word = 1; |
130 |
continue; |
131 |
} |
132 |
} else if (((unsigned int)(c - 9)) <= 4) { |
133 |
/* \t 9 |
134 |
* \n 10 |
135 |
* \v 11 |
136 |
* \f 12 |
137 |
* \r 13 |
138 |
*/ |
139 |
if (c == '\t') { |
140 |
linepos = (linepos | 7) + 1; |
141 |
} else { /* '\n', '\r', '\f', or '\v' */ |
142 |
DO_EOF: |
143 |
if (linepos > counts[WC_LENGTH]) { |
144 |
counts[WC_LENGTH] = linepos; |
145 |
} |
146 |
if (c == '\n') { |
147 |
++counts[WC_LINES]; |
148 |
} |
149 |
if (c != '\v') { |
150 |
linepos = 0; |
151 |
} |
152 |
} |
153 |
} else if (c == EOF) { |
154 |
/* if (ferror(fp)) { |
155 |
status = EXIT_FAILURE; |
156 |
} |
157 |
*/ --counts[WC_CHARS]; |
158 |
goto DO_EOF; /* Treat an EOF as '\r'. */ |
159 |
} else { |
160 |
continue; |
161 |
} |
162 |
|
163 |
counts[WC_WORDS] += in_word; |
164 |
in_word = 0; |
165 |
if (c == EOF) { |
166 |
break; |
167 |
} |
168 |
} while (1); |
169 |
|
170 |
if (totals[WC_LENGTH] < counts[WC_LENGTH]) { |
171 |
totals[WC_LENGTH] = counts[WC_LENGTH]; |
172 |
} |
173 |
totals[WC_LENGTH] -= counts[WC_LENGTH]; |
174 |
|
175 |
if(fp != stdin) |
176 |
fclose(fp); |
177 |
|
178 |
OUTPUT: |
179 |
/* coreutils wc tries hard to print pretty columns |
180 |
* (saves results for all files, find max col len etc...) |
181 |
* we won't try that hard, it will bloat us too much */ |
182 |
s = start_fmt; |
183 |
u = 0; |
184 |
do { |
185 |
if (print_type & (1 << u)) { |
186 |
printf(s, pcounts[u]); |
187 |
s = " %9"COUNT_FMT; /* Ok... restore the leading space. */ |
188 |
} |
189 |
totals[u] += pcounts[u]; |
190 |
} while (++u < 4); |
191 |
printf(fname_fmt, arg); |
192 |
} |
193 |
|
194 |
/* If more than one file was processed, we want the totals. To save some |
195 |
* space, we set the pcounts ptr to the totals array. This has the side |
196 |
* effect of trashing the totals array after outputting it, but that's |
197 |
* irrelavent since we no longer need it. */ |
198 |
if (num_files > 1) { |
199 |
num_files = 0; /* Make sure we don't get here again. */ |
200 |
arg = "total"; |
201 |
pcounts = totals; |
202 |
--argv; |
203 |
goto OUTPUT; |
204 |
} |
205 |
|
206 |
fflush(stdout); |
207 |
exit(status); |
208 |
} |