-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathsearch.py
More file actions
66 lines (53 loc) · 2 KB
/
search.py
File metadata and controls
66 lines (53 loc) · 2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#-*-encoding: utf-8 -*-
import os, time, fnmatch, codecs
content_extract = 32
max_cutouts = 20
class Item:
def __init__(self, prefix, suffix):
self.prefix = prefix
self.suffix = suffix
class SearchResult:
def __init__(self, fullpath, items):
self.fullpath = fullpath
self.items = items
self.name = None
class Search:
def __init__(self, path, search_string, file_filter):
self.search_path = path
self.search_string = search_string
self.file_filter = file_filter
time_begin = time.time()
def walk(self):
file_count = 0
ret = []
for root, dirlist, filelist in os.walk(self.search_path, followlinks=True):
for filename in filelist:
if not filename.startswith("."):
for file_filter in self.file_filter:
if fnmatch.fnmatch(filename, file_filter):
result = self.search_file(os.path.join(root, filename))
ret.append(result)
file_count += 1
return ret
def search_file(self, filepath):
f = codecs.open(filepath, mode="r", encoding="utf-8")
content = f.read()
f.close()
contents = None
if self.search_string in content:
contents = self.cutout_content(content)
return SearchResult(filepath, contents)
def cutout_content(self, content):
current_pos = 0
search_string_len = len(self.search_string)
ret = []
for i in xrange(max_cutouts):
try:
pos = content.index(self.search_string, current_pos)
except ValueError, e:
break
prefix = content[pos - content_extract : pos]
suffix = content[pos + search_string_len : pos + search_string_len + content_extract]
ret.append(Item(prefix, suffix))
current_pos += pos + search_string_len
return ret