Lines 338-343
Link Here
|
338 |
|
338 |
|
339 |
return paths |
339 |
return paths |
340 |
|
340 |
|
|
|
341 |
@staticmethod |
342 |
def _filter_redundant_regexes(rs): |
343 |
""" |
344 |
Given a list of regular expressions to be joined via "|".join(), |
345 |
removes those branches that have non-unique suffixes: |
346 |
['a$', 'ba$'] --> ['a$'] |
347 |
for the purpose of improving efficiency. |
348 |
|
349 |
This is used in _prepare_search_regex to make filename matching |
350 |
more efficient. |
351 |
|
352 |
@type rs: list |
353 |
@param rs: file path regular expressions each ending with '$' |
354 |
@rtype: list |
355 |
@return: list with possibly fewer branches |
356 |
|
357 |
@raise ValueError: if a regular expression does not end with '$' |
358 |
""" |
359 |
|
360 |
# sort in place |
361 |
rs.sort(key= len, reverse= True) |
362 |
rs_filtered = [] |
363 |
|
364 |
# keep a list item only if no suffix of it is present |
365 |
for i in range(len(rs)): |
366 |
found = False |
367 |
for j in range(i + 1, len(rs)): |
368 |
if rs[i].endswith(rs[j]): |
369 |
found = True |
370 |
break |
371 |
if not found: |
372 |
if not rs[i].endswith('$'): |
373 |
raise ValueError( |
374 |
"_filter_redundant_regexes needs all regexes to end with '$'") |
375 |
rs_filtered.append(rs[i]) |
376 |
|
377 |
# we assume a search with very few hits; sort order is unimportant |
378 |
return rs_filtered |
379 |
|
380 |
|
341 |
def _prepare_search_regex(self, queries): |
381 |
def _prepare_search_regex(self, queries): |
342 |
"""Create a regex out of the queries""" |
382 |
"""Create a regex out of the queries""" |
343 |
|
383 |
|
Lines 357-363
Link Here
|
357 |
else: |
397 |
else: |
358 |
query = "/%s$" % re.escape(query) |
398 |
query = "/%s$" % re.escape(query) |
359 |
result.append(query) |
399 |
result.append(query) |
360 |
result = "|".join(result) |
400 |
|
|
|
401 |
# remove same redundancy here, 9% improvement in run time with "equery belongs" |
402 |
result = "|".join( self._filter_redundant_regexes(result) ) |
361 |
return result |
403 |
return result |
362 |
|
404 |
|
363 |
# ========= |
405 |
# ========= |