From bd471562c72035d588b78e3c80c9eb0e9a9c18ec Mon Sep 17 00:00:00 2001 From: Tyler Murphy Date: Thu, 4 May 2023 12:24:47 -0400 Subject: [PATCH] grep --- readme.md | 2 +- src/command.h | 2 + src/commands/grep.c | 250 ++++++++++++++++++++++++++++++++++++++++++++ src/main.c | 7 +- src/util/shared.c | 8 ++ src/util/shared.h | 1 + 6 files changed, 266 insertions(+), 4 deletions(-) create mode 100644 src/commands/grep.c diff --git a/readme.md b/readme.md index 05eea07..f63aa5e 100644 --- a/readme.md +++ b/readme.md @@ -4,7 +4,7 @@ A terrible busybox/gnu coreutils clone. Currently the only supported commands are: -`dd`, `cat`, `yes`, `echo`, `printf`, `id`, `groups`, `ls`, `tail`, `head`, `ed`, `tee`, `true`, `false`, `tee`, `whoami`, `wc`, `xargs`, `tac`, `rm`, `cp`, `mkdir`, `mv` +`dd`, `cat`, `yes`, `echo`, `printf`, `id`, `groups`, `ls`, `tail`, `head`, `ed`, `tee`, `true`, `false`, `tee`, `whoami`, `wc`, `xargs`, `tac`, `rm`, `cp`, `mkdir`, `mv`, `grep` ## How to diff --git a/src/command.h b/src/command.h index 2e468dd..d614c75 100644 --- a/src/command.h +++ b/src/command.h @@ -1,5 +1,6 @@ #include "util/shared.h" #include "util/stack.h" +#include "util/regex.h" #include #include @@ -37,3 +38,4 @@ COMMAND(rm); COMMAND(cp); COMMAND(makedir); COMMAND(mv); +COMMAND(grep); diff --git a/src/commands/grep.c b/src/commands/grep.c new file mode 100644 index 0000000..ffa1a41 --- /dev/null +++ b/src/commands/grep.c @@ -0,0 +1,250 @@ +#include "../command.h" +#include + +static struct { + bool filename_prefix; + bool never_file_prefix; + bool line_number; + bool only_matching_names; + bool only_non_matching_names; + bool only_line_count; + bool only_matching_part; + bool quiet; + bool inverse; + bool ignore_case; + bool is_regex; +} flags; + +static int short_arg(char c, char* next) { + UNUSED(next); + switch (c) { + case 'H': + flags.filename_prefix = true; + break; + case 'h': + flags.never_file_prefix = true; + break; + case 'n': + flags.line_number = true; + break; + case 'l': + flags.only_matching_names = true; + break; + case 'L': + flags.only_non_matching_names = true; + break; + case 'c': + flags.only_line_count = true; + break; + case 'o': + flags.only_matching_part = true; + break; + case 'q': + flags.quiet = true; + break; + case 'v': + flags.inverse = true; + break; + case 'i': + flags.ignore_case = true; + break; + case 'F': + flags.is_regex = false; + break; + case 'E': + flags.is_regex = true; + break; + default: + return ARG_INVALID; + } + return ARG_UNUSED; +} + +static void help(void) { + printf("Usage: grep [-HhlLoqviFE] [-m N] PATTERN [FILE]...\n"); + printf("Search for PATTERN in FILEs (or stdin)\n"); + printf("\t-H\tAdd 'filename:' prefix\n"); + printf("\t-h\tDo not add 'filename:' prefix\n"); + printf("\t-n\tAdd 'line_no:' prefix\n"); + printf("\t-l\tShow only names of files that match\n"); + printf("\t-L\tShow only names of files that don't match\n"); + printf("\t-c\tShow only count of matching lines\n"); + printf("\t-o\tShow only the matching part of line\n"); + printf("\t-q\tQuiet. Return 0 if PATTERN is found, 1 otherwise\n"); + printf("\t-v\tSelect non-matching lines\n"); + printf("\t-i\tIgnore case\n"); + printf("\t-F\tPATTERN is a literal (not regexp)\n"); + printf("\t-E\tPATTERN is an extended regexp\n"); +} + +static bool match_regex(char** string, re_t pattern) { + int len; + int index; + if ((index = re_matchp(pattern, *string, &len)) < 0) return false; + if (flags.only_matching_part) { + (*string) += index; + (*string)[len] = '\0'; + } + return true; +} + +static bool match_literal(char** string, char* pattern) { + char* match = *string; + size_t match_len = strlen(match); + size_t pattern_len = strlen(pattern); + + if (match_len < pattern_len) return false; + + for (size_t i = 0; i < match_len - pattern_len + 1; i++) { + if ( + (!flags.ignore_case && strncmp(match + i, pattern, pattern_len) == 0) || + (flags.ignore_case && strncasecmp(match + i, pattern, pattern_len) == 0) + ) { + if (flags.only_matching_part) { + *string = (*string) + i; + (*string)[pattern_len] = '\0'; + } + return true; + } + } + + return false; +} + +static bool match(char** string, void* pattern) { + bool result; + if (flags.is_regex) { + result = match_regex(string, (re_t) pattern); + } else { + result = match_literal(string, (char*) pattern); + } + return (flags.inverse ? !result : result); +} + +static bool match_any(char* path, void* pattern) { + FILE* file = get_file_s(path, "r"); + if (file == NULL) return false; + + char* buf = NULL; + size_t offset; + bool matched = false; + + int read; + while ((read = getline(&buf, &offset, file)) > 0) { + if (buf[read-1] == '\n') buf[read-1] = '\0'; + char* save = buf; + if (match(&save, pattern)) { + matched = true; + break; + } + } + + if (buf != NULL) free(buf); + + return matched; +} + +static bool match_file(char* path, void* pattern, bool many) { + FILE* file = get_file_s(path, "r"); + if (file == NULL) return false; + + int num_matched = 0; + int line_num = 0; + char* buf = NULL; + size_t offset; + + int read; + while((read = getline(&buf, &offset, file)) > 0) { + if (buf[read-1] == '\n') buf[read-1] = '\0'; + char* matched = buf; + line_num++; + if (!match(&matched, pattern)) { + continue; + } + num_matched++; + if (flags.only_line_count || flags.quiet) continue; + if ((many || flags.filename_prefix) && !flags.never_file_prefix) { + print_file_path(path); + putchar(':'); + } + if (flags.line_number) { + printf("%d:", line_num); + } + if (flags.only_matching_part) { + printf("%s\n", matched); + } else { + printf("%s\n", buf); + } + } + + if (!flags.quiet && flags.only_line_count) { + if ((many || flags.filename_prefix) && !flags.never_file_prefix) { + print_file_path(path); + putchar(':'); + } + printf("%d\n", num_matched); + } + + if (buf != NULL) free(buf); + + return num_matched != 0; +} + +static void* compile(char* pattern) { + if (flags.is_regex) { + return re_compile(pattern); + } else { + return pattern; + } +} + +static bool run_match(char* path, void* pattern, bool many) { + bool result; + if (flags.only_matching_names || flags.only_non_matching_names) { + result = match_any(path, pattern); + if (flags.only_non_matching_names) result = !result; + if (result && !flags.quiet) { + print_file_path(path); + putchar('\n'); + } + return result; + } else { + return match_file(path, pattern, many); + } +} + +COMMAND(grep) { + + flags.only_matching_part = false; + flags.only_non_matching_names = false; + flags.only_matching_names = false; + flags.only_line_count = false; + flags.quiet = false; + flags.is_regex = true; + flags.line_number = false; + flags.never_file_prefix = false; + flags.filename_prefix = false; + flags.inverse = false; + + int start = parse_args(argc, argv, help, short_arg, NULL); + + if (argc - start < 1) global_help(help); + + char* pattern = argv[start++]; + + bool many = argc - start > 0; + bool ok = false; + + void* compiled = compile(pattern); + if (run_match("-", compiled, many)) ok = true; + + for (int i = start; i < argc; i++) { + if (run_match(argv[i], compiled, many)) ok = true; + } + + if (flags.quiet) { + return ok ? EXIT_SUCCESS : EXIT_FAILURE; + } else { + return EXIT_SUCCESS; + } +} diff --git a/src/main.c b/src/main.c index e23b915..c5943f0 100644 --- a/src/main.c +++ b/src/main.c @@ -12,7 +12,6 @@ char* cmd; int main (ARGUMENTS) { if (argc < 1) { - fprintf(stderr, "fatal: argument 0 missing"); return EXIT_FAILURE; } @@ -24,7 +23,7 @@ int main (ARGUMENTS) { if (argc < 2) { printf("usage: lazysphere [function [arguments]...]\n\n"); printf("currently defined functions:\n"); - printf("\tdd, cat, yes, echo, printf, id, groups, ls, tail, head, ed, tee, true, false, tee, whoami, wc, xargs, tac, rm, cp, mkdir, mv\n"); + printf("\tdd, cat, yes, echo, printf, id, groups, ls, tail, head, ed, tee, true, false, tee, whoami, wc, xargs, tac, rm, cp, mkdir, mv, grep\n"); return EXIT_SUCCESS; } argc--; @@ -84,8 +83,10 @@ int main (ARGUMENTS) { return makedir(NEXT_ARGS); } else if (streql(cmd, "mv")) { return mv(NEXT_ARGS); + } else if (streql(cmd, "grep")) { + return grep(NEXT_ARGS); } else { - fprintf(stderr, "lazysphere: invalid command %s", cmd); + fprintf(stderr, "lazysphere: invalid command %s\n", cmd); return EXIT_FAILURE; } diff --git a/src/util/shared.c b/src/util/shared.c index 8f2fe93..5e08b8d 100644 --- a/src/util/shared.c +++ b/src/util/shared.c @@ -170,6 +170,14 @@ void print_date_time(time_t mills, char buf[13]) { snprintf(buf + n, 13 - n, "%d %02d:%02d ", info->tm_mday, info->tm_hour, info->tm_sec); } +void print_file_path(char* path) { + if (streql("-", path)) { + printf("(standard input)"); + } else { + printf("%s", path); + } +} + #ifndef MAJOR #define MAJOR 0 #endif diff --git a/src/util/shared.h b/src/util/shared.h index 26e27c3..07da4a7 100644 --- a/src/util/shared.h +++ b/src/util/shared.h @@ -52,6 +52,7 @@ bool prefix(const char* pre, const char* str); void print_file_size(size_t bytes, char buf[5]); void print_date_time(time_t mills, char buf[13]); +void print_file_path(char* path); #define UNUSED(x) (void)(x) #define ARG_UNUSED 0