|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include <config.h> |
|
|
|
|
|
#include <getopt.h> |
|
|
#include <sys/types.h> |
|
|
#include "system.h" |
|
|
#include "linebuffer.h" |
|
|
#include "fadvise.h" |
|
|
#include "hard-locale.h" |
|
|
#include "quote.h" |
|
|
#include "stdio--.h" |
|
|
#include "memcmp2.h" |
|
|
#include "xmemcoll.h" |
|
|
|
|
|
|
|
|
#define PROGRAM_NAME "comm" |
|
|
|
|
|
#define AUTHORS \ |
|
|
proper_name ("Richard M. Stallman"), \ |
|
|
proper_name ("David MacKenzie") |
|
|
|
|
|
|
|
|
static bool hard_LC_COLLATE; |
|
|
|
|
|
|
|
|
static bool only_file_1; |
|
|
|
|
|
|
|
|
static bool only_file_2; |
|
|
|
|
|
|
|
|
static bool both; |
|
|
|
|
|
|
|
|
static bool seen_unpairable; |
|
|
|
|
|
|
|
|
static bool issued_disorder_warning[2]; |
|
|
|
|
|
|
|
|
static unsigned char delim = '\n'; |
|
|
|
|
|
|
|
|
static bool total_option; |
|
|
|
|
|
|
|
|
static enum |
|
|
{ |
|
|
CHECK_ORDER_DEFAULT, |
|
|
CHECK_ORDER_ENABLED, |
|
|
CHECK_ORDER_DISABLED |
|
|
} check_input_order; |
|
|
|
|
|
|
|
|
|
|
|
static char const *col_sep = "\t"; |
|
|
static size_t col_sep_len = 0; |
|
|
|
|
|
|
|
|
|
|
|
enum |
|
|
{ |
|
|
CHECK_ORDER_OPTION = CHAR_MAX + 1, |
|
|
NOCHECK_ORDER_OPTION, |
|
|
OUTPUT_DELIMITER_OPTION, |
|
|
TOTAL_OPTION |
|
|
}; |
|
|
|
|
|
static struct option const long_options[] = |
|
|
{ |
|
|
{"check-order", no_argument, nullptr, CHECK_ORDER_OPTION}, |
|
|
{"nocheck-order", no_argument, nullptr, NOCHECK_ORDER_OPTION}, |
|
|
{"output-delimiter", required_argument, nullptr, OUTPUT_DELIMITER_OPTION}, |
|
|
{"total", no_argument, nullptr, TOTAL_OPTION}, |
|
|
{"zero-terminated", no_argument, nullptr, 'z'}, |
|
|
{GETOPT_HELP_OPTION_DECL}, |
|
|
{GETOPT_VERSION_OPTION_DECL}, |
|
|
{nullptr, 0, nullptr, 0} |
|
|
}; |
|
|
|
|
|
|
|
|
void |
|
|
usage (int status) |
|
|
{ |
|
|
if (status != EXIT_SUCCESS) |
|
|
emit_try_help (); |
|
|
else |
|
|
{ |
|
|
printf (_("\ |
|
|
Usage: %s [OPTION]... FILE1 FILE2\n\ |
|
|
"), |
|
|
program_name); |
|
|
fputs (_("\ |
|
|
Compare sorted files FILE1 and FILE2 line by line.\n\ |
|
|
"), stdout); |
|
|
fputs (_("\ |
|
|
\n\ |
|
|
When FILE1 or FILE2 (not both) is -, read standard input.\n\ |
|
|
"), stdout); |
|
|
fputs (_("\ |
|
|
\n\ |
|
|
With no options, produce three-column output. Column one contains\n\ |
|
|
lines unique to FILE1, column two contains lines unique to FILE2,\n\ |
|
|
and column three contains lines common to both files.\n\ |
|
|
"), stdout); |
|
|
fputs (_("\ |
|
|
\n\ |
|
|
-1 suppress column 1 (lines unique to FILE1)\n\ |
|
|
-2 suppress column 2 (lines unique to FILE2)\n\ |
|
|
-3 suppress column 3 (lines that appear in both files)\n\ |
|
|
"), stdout); |
|
|
fputs (_("\ |
|
|
\n\ |
|
|
--check-order check that the input is correctly sorted, even\n\ |
|
|
if all input lines are pairable\n\ |
|
|
--nocheck-order do not check that the input is correctly sorted\n\ |
|
|
"), stdout); |
|
|
fputs (_("\ |
|
|
--output-delimiter=STR separate columns with STR\n\ |
|
|
"), stdout); |
|
|
fputs (_("\ |
|
|
--total output a summary\n\ |
|
|
"), stdout); |
|
|
fputs (_("\ |
|
|
-z, --zero-terminated line delimiter is NUL, not newline\n\ |
|
|
"), stdout); |
|
|
fputs (HELP_OPTION_DESCRIPTION, stdout); |
|
|
fputs (VERSION_OPTION_DESCRIPTION, stdout); |
|
|
fputs (_("\ |
|
|
\n\ |
|
|
Comparisons honor the rules specified by 'LC_COLLATE'.\n\ |
|
|
"), stdout); |
|
|
printf (_("\ |
|
|
\n\ |
|
|
Examples:\n\ |
|
|
%s -12 file1 file2 Print only lines present in both file1 and file2.\n\ |
|
|
%s -3 file1 file2 Print lines in file1 not in file2, and vice versa.\n\ |
|
|
"), |
|
|
program_name, program_name); |
|
|
emit_ancillary_info (PROGRAM_NAME); |
|
|
} |
|
|
exit (status); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static void |
|
|
writeline (struct linebuffer const *line, int class) |
|
|
{ |
|
|
switch (class) |
|
|
{ |
|
|
case 1: |
|
|
if (!only_file_1) |
|
|
return; |
|
|
break; |
|
|
|
|
|
case 2: |
|
|
if (!only_file_2) |
|
|
return; |
|
|
if (only_file_1) |
|
|
fwrite (col_sep, 1, col_sep_len, stdout); |
|
|
break; |
|
|
|
|
|
case 3: |
|
|
if (!both) |
|
|
return; |
|
|
if (only_file_1) |
|
|
fwrite (col_sep, 1, col_sep_len, stdout); |
|
|
if (only_file_2) |
|
|
fwrite (col_sep, 1, col_sep_len, stdout); |
|
|
break; |
|
|
} |
|
|
|
|
|
fwrite (line->buffer, sizeof (char), line->length, stdout); |
|
|
|
|
|
if (ferror (stdout)) |
|
|
write_error (); |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static void |
|
|
check_order (struct linebuffer const *prev, |
|
|
struct linebuffer const *current, |
|
|
int whatfile) |
|
|
{ |
|
|
|
|
|
if (check_input_order != CHECK_ORDER_DISABLED |
|
|
&& ((check_input_order == CHECK_ORDER_ENABLED) || seen_unpairable)) |
|
|
{ |
|
|
if (!issued_disorder_warning[whatfile - 1]) |
|
|
{ |
|
|
int order; |
|
|
|
|
|
if (hard_LC_COLLATE) |
|
|
order = xmemcoll (prev->buffer, prev->length - 1, |
|
|
current->buffer, current->length - 1); |
|
|
else |
|
|
order = memcmp2 (prev->buffer, prev->length - 1, |
|
|
current->buffer, current->length - 1); |
|
|
|
|
|
if (0 < order) |
|
|
{ |
|
|
error ((check_input_order == CHECK_ORDER_ENABLED |
|
|
? EXIT_FAILURE : 0), |
|
|
0, _("file %d is not in sorted order"), whatfile); |
|
|
|
|
|
|
|
|
|
|
|
issued_disorder_warning[whatfile - 1] = true; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static _Noreturn void |
|
|
compare_files (char **infiles) |
|
|
{ |
|
|
|
|
|
struct linebuffer lba[2][4]; |
|
|
|
|
|
|
|
|
|
|
|
struct linebuffer *thisline[2]; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
struct linebuffer *all_line[2][4]; |
|
|
|
|
|
|
|
|
int alt[2][3]; |
|
|
|
|
|
|
|
|
FILE *streams[2]; |
|
|
|
|
|
|
|
|
uintmax_t total[] = {0, 0, 0}; |
|
|
|
|
|
int i, j; |
|
|
|
|
|
|
|
|
for (i = 0; i < 2; i++) |
|
|
{ |
|
|
for (j = 0; j < 4; j++) |
|
|
{ |
|
|
initbuffer (&lba[i][j]); |
|
|
all_line[i][j] = &lba[i][j]; |
|
|
} |
|
|
alt[i][0] = 0; |
|
|
alt[i][1] = 0; |
|
|
alt[i][2] = 0; |
|
|
streams[i] = (streq (infiles[i], "-") ? stdin : fopen (infiles[i], "r")); |
|
|
if (!streams[i]) |
|
|
error (EXIT_FAILURE, errno, "%s", quotef (infiles[i])); |
|
|
|
|
|
fadvise (streams[i], FADVISE_SEQUENTIAL); |
|
|
|
|
|
thisline[i] = readlinebuffer_delim (all_line[i][alt[i][0]], streams[i], |
|
|
delim); |
|
|
if (ferror (streams[i])) |
|
|
error (EXIT_FAILURE, errno, "%s", quotef (infiles[i])); |
|
|
} |
|
|
|
|
|
while (thisline[0] || thisline[1]) |
|
|
{ |
|
|
int order; |
|
|
bool fill_up[2] = { false, false }; |
|
|
|
|
|
|
|
|
|
|
|
if (!thisline[0]) |
|
|
order = 1; |
|
|
else if (!thisline[1]) |
|
|
order = -1; |
|
|
else |
|
|
{ |
|
|
if (hard_LC_COLLATE) |
|
|
order = xmemcoll (thisline[0]->buffer, thisline[0]->length - 1, |
|
|
thisline[1]->buffer, thisline[1]->length - 1); |
|
|
else |
|
|
{ |
|
|
size_t len = MIN (thisline[0]->length, thisline[1]->length) - 1; |
|
|
order = memcmp (thisline[0]->buffer, thisline[1]->buffer, len); |
|
|
if (order == 0) |
|
|
order = _GL_CMP (thisline[0]->length, thisline[1]->length); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if (order == 0) |
|
|
{ |
|
|
|
|
|
total[2]++; |
|
|
writeline (thisline[1], 3); |
|
|
} |
|
|
else |
|
|
{ |
|
|
seen_unpairable = true; |
|
|
if (order <= 0) |
|
|
{ |
|
|
|
|
|
total[0]++; |
|
|
writeline (thisline[0], 1); |
|
|
} |
|
|
else |
|
|
{ |
|
|
|
|
|
total[1]++; |
|
|
writeline (thisline[1], 2); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (0 <= order) |
|
|
fill_up[1] = true; |
|
|
if (order <= 0) |
|
|
fill_up[0] = true; |
|
|
|
|
|
for (i = 0; i < 2; i++) |
|
|
if (fill_up[i]) |
|
|
{ |
|
|
|
|
|
alt[i][2] = alt[i][1]; |
|
|
alt[i][1] = alt[i][0]; |
|
|
alt[i][0] = (alt[i][0] + 1) & 0x03; |
|
|
|
|
|
thisline[i] = readlinebuffer_delim (all_line[i][alt[i][0]], |
|
|
streams[i], delim); |
|
|
|
|
|
if (thisline[i]) |
|
|
check_order (all_line[i][alt[i][1]], thisline[i], i + 1); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
else if (all_line[i][alt[i][2]]->buffer) |
|
|
check_order (all_line[i][alt[i][2]], |
|
|
all_line[i][alt[i][1]], i + 1); |
|
|
|
|
|
if (ferror (streams[i])) |
|
|
error (EXIT_FAILURE, errno, "%s", quotef (infiles[i])); |
|
|
|
|
|
fill_up[i] = false; |
|
|
} |
|
|
} |
|
|
|
|
|
for (i = 0; i < 2; i++) |
|
|
if (fclose (streams[i]) != 0) |
|
|
error (EXIT_FAILURE, errno, "%s", quotef (infiles[i])); |
|
|
|
|
|
if (total_option) |
|
|
{ |
|
|
|
|
|
if (col_sep_len == 1) |
|
|
{ |
|
|
printf ("%ju%c%ju%c%ju%c%s%c", |
|
|
total[0], *col_sep, |
|
|
total[1], *col_sep, |
|
|
total[2], *col_sep, |
|
|
_("total"), delim); |
|
|
} |
|
|
else |
|
|
{ |
|
|
printf ("%ju%s%ju%s%ju%s%s%c", |
|
|
total[0], col_sep, |
|
|
total[1], col_sep, |
|
|
total[2], col_sep, |
|
|
_("total"), delim); |
|
|
} |
|
|
} |
|
|
|
|
|
if (issued_disorder_warning[0] || issued_disorder_warning[1]) |
|
|
error (EXIT_FAILURE, 0, _("input is not in sorted order")); |
|
|
|
|
|
|
|
|
exit (EXIT_SUCCESS); |
|
|
} |
|
|
|
|
|
int |
|
|
main (int argc, char **argv) |
|
|
{ |
|
|
int c; |
|
|
|
|
|
initialize_main (&argc, &argv); |
|
|
set_program_name (argv[0]); |
|
|
setlocale (LC_ALL, ""); |
|
|
bindtextdomain (PACKAGE, LOCALEDIR); |
|
|
textdomain (PACKAGE); |
|
|
hard_LC_COLLATE = hard_locale (LC_COLLATE); |
|
|
|
|
|
atexit (close_stdout); |
|
|
|
|
|
only_file_1 = true; |
|
|
only_file_2 = true; |
|
|
both = true; |
|
|
|
|
|
seen_unpairable = false; |
|
|
issued_disorder_warning[0] = issued_disorder_warning[1] = false; |
|
|
check_input_order = CHECK_ORDER_DEFAULT; |
|
|
total_option = false; |
|
|
|
|
|
while ((c = getopt_long (argc, argv, "123z", long_options, nullptr)) != -1) |
|
|
switch (c) |
|
|
{ |
|
|
case '1': |
|
|
only_file_1 = false; |
|
|
break; |
|
|
|
|
|
case '2': |
|
|
only_file_2 = false; |
|
|
break; |
|
|
|
|
|
case '3': |
|
|
both = false; |
|
|
break; |
|
|
|
|
|
case 'z': |
|
|
delim = '\0'; |
|
|
break; |
|
|
|
|
|
case NOCHECK_ORDER_OPTION: |
|
|
check_input_order = CHECK_ORDER_DISABLED; |
|
|
break; |
|
|
|
|
|
case CHECK_ORDER_OPTION: |
|
|
check_input_order = CHECK_ORDER_ENABLED; |
|
|
break; |
|
|
|
|
|
case OUTPUT_DELIMITER_OPTION: |
|
|
if (col_sep_len && !streq (col_sep, optarg)) |
|
|
error (EXIT_FAILURE, 0, _("multiple output delimiters specified")); |
|
|
col_sep = optarg; |
|
|
col_sep_len = *optarg ? strlen (optarg) : 1; |
|
|
break; |
|
|
|
|
|
case TOTAL_OPTION: |
|
|
total_option = true; |
|
|
break; |
|
|
|
|
|
case_GETOPT_HELP_CHAR; |
|
|
|
|
|
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); |
|
|
|
|
|
default: |
|
|
usage (EXIT_FAILURE); |
|
|
} |
|
|
|
|
|
if (! col_sep_len) |
|
|
col_sep_len = 1; |
|
|
|
|
|
if (argc - optind < 2) |
|
|
{ |
|
|
if (argc <= optind) |
|
|
error (0, 0, _("missing operand")); |
|
|
else |
|
|
error (0, 0, _("missing operand after %s"), quote (argv[argc - 1])); |
|
|
usage (EXIT_FAILURE); |
|
|
} |
|
|
|
|
|
if (2 < argc - optind) |
|
|
{ |
|
|
error (0, 0, _("extra operand %s"), quote (argv[optind + 2])); |
|
|
usage (EXIT_FAILURE); |
|
|
} |
|
|
|
|
|
compare_files (argv + optind); |
|
|
} |
|
|
|