#!/usr/bin/gawk -f
#
# Use: dtdiff new_file.html old_file.html > diff_file.html
#
# Program dtdiff compares two
-style resource record files, and
# produces a 'diff' report in HTML format. The comparison is done in
# memory, so it is not neccesary that either input file be sorted.
# D.Wells, NRAO-CV, 13Feb94.
# --------------------------------------------------------------------
END {
for (i = 1; i <=2; i++) { # read "new_file", then "old_file":
if((val = read_header(ARGV[i])) <= 0) {
printf("ABORT: read_header(%s) returned %d.\n", ARGV[i], val);
exit(13);
}
titles[i] = title;
links[i] = link;
j = 0;
do {
if ((val = read_record(ARGV[i])) <= 0) {
if (val < 0) {
printf("ABORT: read_record(%s) returned %d for j=%d.\n",
ARGV[i], val, j);
exit(13);
}
break;
}
j++;
records[i,j] = rr;
urls[i,j] = url;
} while (1);
num_recs[i] = j;
close(ARGV[i]);
#
print "file=" ARGV[i];
print num_recs[i] " records";
print "=" titles[i];
print "=" links[i];
print "URL[3]=" urls[i,3];
print "records[3]=" records[i,3];
print " ";
}
#
# At this point we have the two sets of records and URLs in memory.
# First we will compare the lists and delete identical records.
# Then we will produce several listings:
#
# Delete identical (same URL, same text) resources:
num_delete = 0;
for (i = 1; i <= num_recs[1]; i++) {
for (j = 1; j <= num_recs[2]; j++) {
if ((i == 3) && (j ==3)) print urls[1,i], urls[2,j];
if (urls[1,i] == urls[2,j]) {
print "equal: " i, j;
for (m = i; m < num_records[1]; m++) {
records[1,m] = records[1,m+1];
urls[1,m] = urls[1,m+1];
}
num_records[1]--;
for (m = j; m < num_records[2]; m++) {
records[2,m] = records[2,m+1];
urls[2,m] = urls[2,m+1];
}
num_records[2]--;
num_delete++;
}
}
}
#
# NOTE! above logic has subscript wrong by 1 as soon as delete is done.
#
printf ("%4d identical resource records deleted.\n", num_delete);
exit(1);
# Merge cases of identical URLs but different text:
num_merge = 0;
for (l = 1; l < n; l++) {
if ((key[l] == key[l+1]) && (resource[l] != resource[l+1])) {
# append resource[l+1] to [l] with
to make indentation:
resource[l] = resource[l] "
||" resource[l+1] "
||";
for (m = l+1; m < n; m++) { # move rest of list up one place:
key[m] = key[m+1];
resource[m] = resource[m+1];
} # resource[l+1] has been deleted:
num_merge++;
n--; # decrements count of resource[];
}
}
printf ("%4d similar resource records merged. (n=%d)\n",
num_merge, n) | "cat 1>&2";
printf ("%4d resource records edited:\n", num_edit) | "cat 1>&2";
for (i in hit) printf("\t%4d cases of |%s|\n", hit[i], i) | "cat 1>&2";
print "