From 67d80e8f5e9fb9ae6d816bb807f932ac0a4310c9 Mon Sep 17 00:00:00 2001 From: kalashjain23 Date: Thu, 6 Oct 2022 12:13:22 +0530 Subject: [PATCH 1/3] implemented Boyer_Moore algorithm --- utils/boyer_moore_algorithm.py | 65 ++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 utils/boyer_moore_algorithm.py diff --git a/utils/boyer_moore_algorithm.py b/utils/boyer_moore_algorithm.py new file mode 100644 index 0000000..210c255 --- /dev/null +++ b/utils/boyer_moore_algorithm.py @@ -0,0 +1,65 @@ +from colorama import Fore + +NO_OF_CHARS = 256 + +class BoyerMooreAlgorithm(): + def __init__(self, pattern): + self.pattern = pattern + self.shift_table = [-1] * NO_OF_CHARS + + # Fill the actual value of last occurrence + for char in range(len(pattern)): + self.shift_table[ord(pattern[char])] = char; + + def find_pattern(self, source, file = False, line_number = 0): + source = " ".join(source) + pattern_length = len(self.pattern) + result = [] + source_index = 0 + while(source_index <= len(source) - pattern_length): + j = pattern_length - 1 + + ''' + Keep reducing index j of pattern while + characters of pattern and text are matching + at this shift source_index + ''' + while j >= 0 and self.pattern[j] == source[source_index+j]: + j -= 1 + + ''' + If the pattern is present at current shift, + then index j will become -1 after the above loop + ''' + if j < 0: + result.append(source_index) + ''' + Shift the pattern so that the next character in text + aligns with the last occurrence of it in pattern. + The condition source_index + pattern_length < len(source) is necessary for the case when + pattern occurs at the end of text + ''' + source_index += (pattern_length - self.shift_table[ord(source[source_index + pattern_length])] + if source_index + pattern_length < len(source) else 1) + else: + ''' + Shift the pattern so that the bad character in source text + aligns with the last occurrence of it in pattern. The + max function is used to make sure that we get a positive + shift. We may get a negative shift if the last occurrence + of bad character in pattern is on the right side of the + current character. + ''' + source_index += max(1, j - self.shift_table[ord(source[source_index+j])]) + + if file and result != []: + print(f"Line {line_number}: ", end = '') + + previos_index = 0 + for index in result: + print(source[previos_index:index], end="") + print(Fore.CYAN + source[index:index + len(self.pattern)] + Fore.RESET, end = "") + previos_index = index + len(self.pattern) + + if result != []: + print('\n') From 8433e753b76dded7b98f2d63f1a1a1ba68305709 Mon Sep 17 00:00:00 2001 From: kalashjain23 Date: Thu, 6 Oct 2022 12:14:18 +0530 Subject: [PATCH 2/3] added a method to run the grep command --- pysh.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/pysh.py b/pysh.py index e288f92..be0bd7d 100644 --- a/pysh.py +++ b/pysh.py @@ -3,6 +3,7 @@ import os import subprocess from utils.commands_list import * +from utils.boyer_moore_algorithm import BoyerMooreAlgorithm import calendar import getpass import shutil @@ -627,6 +628,24 @@ def do_diff(self, *args): else: print("pysh: diff: incorrect usage: try 'diff [FILE1] [FILE2]'") + def do_grep(self, *args): + commands = args[0].split() + self.save_history("grep " + " ".join(commands)) + pattern = BoyerMooreAlgorithm(commands[0]) + + if '-f' in commands: + if os.path.exists(os.getcwd() + '/' + commands[commands.index('-f') - 1]): + with open(commands[commands.index('-f') - 1], 'r') as file: + line_number = 1 + for line in file: + pattern.find_pattern(line.split(' '), True, line_number) + line_number += 1 + else: + print("pysh: grep: {}: No such file or directory".format(commands[commands.index('-f') - 1])) + + else: + pattern.find_pattern(commands[1:]) + # help section def help_exit(self): @@ -716,6 +735,9 @@ def help_kill(self): def help_diff(self): print(commands_list_manual['diff']) + def help_grep(self): + print(commands_list_manual['grep']) + def default(self, line: str) -> bool: self.stdout.write("pysh: command not found: {}\n".format(line)) From 44cfa803beec1d8606a3bf0f1cfa2cc5f492349a Mon Sep 17 00:00:00 2001 From: kalashjain23 Date: Thu, 6 Oct 2022 12:14:45 +0530 Subject: [PATCH 3/3] added the new grep command in the commands list --- utils/commands_list.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/utils/commands_list.py b/utils/commands_list.py index fabaf07..4f9181b 100644 --- a/utils/commands_list.py +++ b/utils/commands_list.py @@ -1,7 +1,7 @@ commands_list = ['lf', 'ldir', 'pwd', 'cd', 'manual', 'mkdir', 'calendar', 'calc', 'whoami', 'echo', 'rm', 'cat', 'cp', 'mv', 'date', 'file', 'history', - 'head', 'tail', 'touch', 'wc', 'ip', 'host', 'arch', 'ps', 'wget', 'kill', 'diff'] + 'head', 'tail', 'touch', 'wc', 'ip', 'host', 'arch', 'ps', 'wget', 'kill', 'diff', 'grep'] commands_list_manual = { 'exit': "Exits the shell where it is currently running. \nusage: 'exit'", @@ -34,5 +34,6 @@ 'ps': "Displays the snapshot of the current processes. \nusage: 'ps'", 'wget': "Network downloader - download files from internet. \nusage: 'wget [URL]'", 'kill': "Terminate an unresponsive program. \nusage: 'kill [PID]'", - 'diff': "Compares files line by line. \nusage: 'diff [FILE1] [FILE2]'" + 'diff': "Compares files line by line. \nusage: 'diff [FILE1] [FILE2]'", + 'grep': "Utility for searching plain-text data sets for lines that match a regular expression. \nusage: 'grep [expression] [data-set] -f' {if the data-set is a file}\nusage: 'grep [expression] [data-set]' {if the data-set is an input string}" }