I did in fact try to solve the "hard problem" that I thought you were posing. The following code looks for the longest string found in both file1 and file2. If there are multiple "longest" strings, it only reports the first one found. May be helpful to someone, at some point (although maybe not the solution you are looking for here):
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <sys/stat.h>
/* This routine returns the size of the file it is called with. */
static unsigned
get_file_size (const char * file_name)
{
struct stat sb;
if (stat (file_name, & sb) != 0) {
fprintf (stderr, "'stat' failed for '%s': %s.\n",
file_name, strerror (errno));
exit (EXIT_FAILURE);
}
return sb.st_size;
}
/* This routine reads the entire file into memory. */
static unsigned char *
read_whole_file (const char * file_name)
{
unsigned s;
unsigned char * contents;
FILE * f;
size_t bytes_read;
int status;
s = get_file_size (file_name);
contents = malloc (s + 1);
if (! contents) {
fprintf (stderr, "Not enough memory.\n");
exit (EXIT_FAILURE);
}
f = fopen (file_name, "r");
if (! f) {
fprintf (stderr, "Could not open '%s': %s.\n", file_name,
strerror (errno));
exit (EXIT_FAILURE);
}
bytes_read = fread (contents, sizeof (unsigned char), s, f);
if (bytes_read != s) {
fprintf (stderr, "Short read of '%s': expected %d bytes "
"but got %d: %s.\n", file_name, s, bytes_read,
strerror (errno));
exit (EXIT_FAILURE);
}
status = fclose (f);
if (status != 0) {
fprintf (stderr, "Error closing '%s': %s.\n", file_name,
strerror (errno));
exit (EXIT_FAILURE);
}
return contents;
}
int main(int argc, char* argv[]){
int i1, i2, l1, l2, lm;
unsigned char longestString[1000]; // lazy way to make big enough.
unsigned char tempString[1000];
int longestFound=0;
unsigned char *f1, *f2; // buffers with entire file contents
f1 = read_whole_file (argv[1]);
f2 = read_whole_file (argv[2]);
l1 = strlen(f1);
l2 = strlen(f2);
for(i1 = 0; i1 < l1; i1++) {
lm = 0;// length of match
for(i2 = i1; i2<l2; i2++) {
lm = 0;
while (f1[i1+lm] == f2[i2+lm] && (i1+lm<l1) && (i2+lm<l2) && lm < 1000-1) {
tempString[lm] = f1[i1+lm];
lm++;
}
if (lm > longestFound) {
tempString[lm]=0; // terminate string
strcpy(longestString, tempString);
longestFound = lm;
}
}
}
printf("longest string found is %d characters:\n", longestFound);
printf("%s\n", longestString);
free(f1);
free(f2);
return 0;
}
The code for reading entire file contents was found at http://www.lemoda.net/c/read-whole-file/index.html
b
,bl
,bla
,blab
, .. etc would you expect to get? Is there a "minimum length of match" you would want? I think there is no built in command that does exactly what you want; if you had the search strings on a line by themselves, it would be easy (usinggrep -F file1 file2
), but you don't... Can you do anything to bound the problem better? – Floris