// fawk.c : minimalist awk
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
//returns index of a non-nul terminated substring within a non-nul terminated string
int str_idx(char* str, int str_len, char* substr, int substr_len)
{
char* orig_str = str;
while( substr_len <= str_len )
{
int i;
for(i=0; i<substr_len; ++i)
{
if( str[i] != substr[i] )
break;
}
if( i == substr_len )
return str - orig_str;
--str_len;
++str;
}
return -1;
}
int usage(int ret_code)
{
printf(
"usage: fawk <pattern> <column # to print> <input> [options]\n"
"where\n"
" <pattern> a word in quotes to search for in the columns - no regexp allowed\n"
" <column # to print> serial of word to be printed\n"
" <input> input file to read\n"
" options:\n"
" --first_match_only stops searching after the first match\n"
" --substring_match the word is used as substring (default=exact match)\n"
"\n"
"example: fawk \"privvmpages\" 6 /proc/user_beancounters \n"
);
return ret_code;
}
int main(int argc, char* argv[])
{
FILE* fi;
int i;
char line[4096+1];
char* endp;
int column_to_output;
char* word_to_search_for;
size_t word_to_search_len;
int first_match_only = 0;
int substring_match = 0;
if( argc < 3 ) {
printf("error: improper number of parameters\n");
return usage(1);
}
word_to_search_for = argv[1];
word_to_search_len = strlen(word_to_search_for);
if( word_to_search_len <= 0 ) {
printf("error: invalid word to search for: '%s'\n", argv[1]);
return usage(2);
}
column_to_output = strtoul(argv[2], &endp, 10);
if( endp == argv[2] || column_to_output < 0 ) {
printf("error: invalid column number: '%s'\n", argv[2]);
return usage(3);
}
for(i=4; i < argc; ++i)
{
if( strcmp(argv[i], "--first_match_only")==0 )
first_match_only = 1;
else if( strcmp(argv[i], "--substring_match")==0 )
substring_match = 1;
}
fi = fopen(argv[3], "rt");
if( !fi ) {
printf("error: could not open input file '%s'\n", argv[3]);
return usage(4);
}
while( fgets(line, sizeof(line), fi) )
{
char* p;
size_t len;
int word_num, match_found;
char* word_to_output = NULL;
size_t word_to_output_len = 0;
line[sizeof(line)-1] = '\0';
len = strlen(line);
if( len > 0 && line[len-1] == '\n' )
line[--len] = '\0';
//if 0'th column requested, this means the whole line to be outputted
if( column_to_output == 0 )
{
word_to_output = line;
word_to_output_len = len;
}
//splitting row into words; finding the specified pattern in words
match_found = 0;
word_num = 0;
p = line;
while(!match_found || word_to_output_len==0)
{
char* word_start_p;
size_t word_len;
//skip spaces before next word
while( isspace(*p) )
++p;
if( !*p )
break; //line end, no more words
//word starting char found
++word_num;
word_start_p = p++;
//find word end
while( *p && !isspace(*p) )
++p;
word_len = p - word_start_p;
//if we are at the N'th column, store the word to be outputted
if( column_to_output == word_num )
{
word_to_output = word_start_p;
word_to_output_len = word_len;
}
//check if this word matches the pattern
if( !match_found && word_to_search_len <= word_len )
{
if( substring_match )
match_found = str_idx(word_start_p, word_len, word_to_search_for, word_to_search_len) >=0;
else
match_found = strncmp(word_start_p, word_to_search_for, word_to_search_len) == 0;
}
}
if(match_found && word_to_output_len > 0)
{
printf("%s\n", word_to_output);
if( first_match_only )
break;
}
}
fclose(fi); fi = NULL;
return 0;
}