/** --------------------------------------------------------------------------- problem : ---------- - find all occurrences of a provided pattern(sub-string) in a given text. --------------------------------------------------------------------------- input : ---------- - unsigned char* text : the text string to be searched. - int n : length of text string. - unsigned char* pattern : the pattern(sub-string) to be searched in the text. - int m : length of pattern. --------------------------------------------------------------------------- output : ---------- - all position within text from where the pattern can be found. --------------------------------------------------------------------------- constraints : ------------- - for each element or letter 't' of text : 0 <= t <= 254. - for each element or letter 'p' of pattern : 0 <= p <= 254. - text of pattern cannot contain any letter or element with value 255. --------------------------------------------------------------------------- /**/ //------------------------------------------------------------------------- #include <iostream> #include <conio.h> #define MAX_ALPHABET_SIZE 256 #define CHARACTER_NOT_IN_THE_TEXT 255 void boyer_moore_horspool(unsigned char* text, int n, unsigned char* pattern, int m) { int d[MAX_ALPHABET_SIZE], i, j, k, lim; // Preprocessing for(k = 0; k < MAX_ALPHABET_SIZE; ++k) { d[k] = m; } for(k = 0; k < m; ++k ) { d[pattern[k]] = m - k; } // To avoid having code pattern[m] = CHARACTER_NOT_IN_THE_TEXT; lim = n - m; // Searching for(k = 0; k <= lim; k += d[text[k + m]]) { i = k; j = 0; while(text[i] == pattern[j]) { ++i; ++j; } if(j == m) { cout << "pattern found in text at position : " << k << endl; if( (k + (m<<1)) >= n) { break; } } } } //------------------------------------------------------------------------- /** --------------------------------------------------------------------------- solution : ------------ - complexity : average case = O(n) , O(length of text, n). worst case = O(nm), O([length of text:n]*[length of pattern:m]). --------------------------------------------------------------------------- /**/ void main() { //positions.....012345678901234567890 char text[] = "ZZACCABCDBDABCDABCDQQ"; char pattern[] = "ABCD"; boyer_moore_horspool(text, strlen(text), pattern, strlen(pattern)); getch(); } /** --------------------------------------------------------------------------- output : ------------ pattern found in text at position : 5 pattern found in text at position : 11 pattern found in text at position : 15 --------------------------------------------------------------------------- /**/Have used the above code from the following link :
Boyer-Moore-Horspool Algorithm