forked from TheAlgorithms/Python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrabin_karp.py
More file actions
77 lines (61 loc) · 2.28 KB
/
rabin_karp.py
File metadata and controls
77 lines (61 loc) · 2.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
"""Rabin-Karp rolling hash algorithm for substring search.
Implements the classic Rabin-Karp algorithm using a rolling hash to find
all occurrences of a pattern in a text in O(n) average time.
The algorithm uses a simple polynomial rolling hash with modulo prime to
avoid overflow. It works well for ASCII/Unicode strings.
References:
- Rabin, M. O., & Karp, R. M. (1987). Algorithms for pattern matching.
"""
from typing import List
def rabin_karp(text: str, pattern: str) -> List[int]:
"""Return starting indices of pattern in text using rolling hash.
Args:
text: The text to search within.
pattern: The pattern to find.
Returns:
List of starting indices (0-based) where pattern occurs.
Example:
>>> rabin_karp("abracadabra", "abra")
[0, 7]
"""
# Edge cases
if pattern == "":
# By convention, empty pattern matches at each position plus one
return list(range(len(text) + 1))
if len(pattern) > len(text):
return []
# Rolling hash parameters
base = 256 # number of possible character values (ASCII/extended)
prime = 101 # a small prime for modulus
m, n = len(pattern), len(text)
# Precompute base^(m-1) mod prime for rolling removal
h = 1
for _ in range(m - 1):
h = (h * base) % prime
# Compute initial hash values
pattern_hash = 0
window_hash = 0
for i in range(m):
pattern_hash = (base * pattern_hash + ord(pattern[i])) % prime
window_hash = (base * window_hash + ord(text[i])) % prime
matches: List[int] = []
# Slide the window over text
for i in range(n - m + 1):
if pattern_hash == window_hash:
# Double-check to avoid hash collisions
if text[i:i + m] == pattern:
matches.append(i)
if i < n - m:
# Roll: remove leading char, add trailing char
window_hash = (base * (window_hash - ord(text[i]) * h) + ord(text[i + m])) % prime
if window_hash < 0:
window_hash += prime
return matches
def demo() -> None:
"""Run a simple demonstration."""
text = "abracadabra"
pattern = "abra"
indices = rabin_karp(text, pattern)
print(f"Pattern '{pattern}' found at positions: {indices}")
if __name__ == "__main__":
demo()