blob: 6a41948c3ed47d7879ff99728740c5654279c436 [file] [log] [blame]
Alexander Afanasyevfce5bbd2013-08-07 18:50:00 -07001# -*- Mode:python; c-file-style:"gnu"; indent-tabs-mode:nil -*- */
2#
3# Copyright (c) 2013, Regents of the University of California
4# Yingdi Yu, Alexander Afanasyev
5#
6# BSD license, See the doc/LICENSE file for more information
7#
8# Author: Yingdi Yu <yingdi@cs.ucla.edu>
9# Alexander Afanasyev <alexander.afanasyev@ucla.edu>
10#
11
12import sys
13import re
14import logging
15from Name import Name
16
17_LOG = logging.getLogger ("ndn.nre")
18
19class RegexError(Exception):
20 def __init__(self, msg):
21 self.msg = msg
22 def __str__(self):
23 return self.msg
24
25class BaseMatcher(object):
26 def __init__(self, expr, backRef, exact=True):
27 _LOG.debug(self.__class__.__name__ + ".Constructor")
28 self.expr = expr
29 self.backRef = backRef
30 self.exact = exact
31 self.matchResult = []
32 self.matcherList = []
33
34 def match(self, name, offset, len):
35 _LOG.debug(self.__class__.__name__ + ".match(): " + "expr: " + self.expr + " offset: " + str(offset) + " length: " + str(len))
36 self.matchResult = []
37
38 if self._recursiveMatch(0, name, offset, len):
39 for i in range(offset, offset + len):
40 self.matchResult.append(name[i])
41 return True
42 else:
43 return False
44
45 def _recursiveMatch(self, mId, name, offset, length):
46 _LOG.debug(self.__class__.__name__ + "._recursiveMatch(): " + self.expr)
47 _LOG.debug("mId: " + str(mId) + " name: " + str(name) + " offset: " + str(offset) + " length: " + str(length) + " matcherListSize: " + str(len(self.matcherList)))
48 tried = 0
49
50 if mId >= len(self.matcherList) :
51 if length != 0 :
52 _LOG.debug("Fail " + self.__class__.__name__ + "._recursiveMatch(): no more matcher, but more components")
53 return False
54 else:
55 _LOG.debug("Succeed " + self.__class__.__name__ + "._recursiveMatch(): no more matcher, no more components")
56 return True
57
58 matcher = self.matcherList[mId]
59
60 while tried <= length:
61 if matcher.match(name, offset, tried) and self._recursiveMatch(mId + 1, name, offset + tried, length - tried) :
62 return True
63 _LOG.debug(self.__class__.__name__ + " expr: " + self.expr + " mId: " + str(mId) + " tried: " + str(tried) + " length: " + str(length))
64 tried += 1
65
66 return False
67
68
69 def aggressiveMatch(self, name, offset, len):
70 _LOG.debug(self.__class__.__name__ + ".aggressiveMatch(): " + "expr: " + self.expr + " offset: " + str(offset) + " length: " + str(len))
71 self.matchResult = []
72
73 if self._aRecursiveMatch(0, name, offset, len):
74 for i in range(offset, offset + len):
75 self.matchResult.append(name[i])
76 return True
77 else:
78 return False
79
80 def _aRecursiveMatch(self, mId, name, offset, length):
81 _LOG.debug(self.__class__.__name__ + "._aRecursiveMatch(): " + self.expr)
82 _LOG.debug("mId: " + str(mId) + " name: " + str(name) + " offset: " + str(offset) + " length: " + str(length) + " matcherListSize: " + str(len(self.matcherList)))
83
84 tried = length
85
86 if mId >= len(self.matcherList) :
87 if length != 0 :
88 _LOG.debug("Fail " + self.__class__.__name__ + "._recursiveMatch(): no more matcher, but more components")
89 return False
90 else:
91 _LOG.debug("Succeed " + self.__class__.__name__ + "._recursiveMatch(): no more matcher, no more components")
92 return True
93
94 matcher = self.matcherList[mId]
95
96 while tried >= 0:
97 if matcher.aggressiveMatch(name, offset, tried) and self._aRecursiveMatch(mId + 1, name, offset + tried, length - tried):
98 return True
99 _LOG.debug(self.__class__.__name__ + " expr: " + self.expr + " mId: " + str(mId) + " tried: " + str(tried) + " length: " + str(length))
100 tried -= 1
101
102 return False
103
104
105class ComponentMatcher(BaseMatcher):
106 def __init__(self, expr, backRef, exact=True):
107 _LOG.debug(self.__class__.__name__ + ".Constructor")
108 _LOG.debug("expr " + expr)
109
110 super(ComponentMatcher, self).__init__(expr, backRef, exact)
111
112 pattern = re.compile(self.expr)
113 self.pseudoBackRefMatcher = []
114 for i in range(0, pattern.groups):
115 pseudoMatcher = BaseMatcher("", self.backRef)
116 self.pseudoBackRefMatcher.append(pseudoMatcher)
117 self.backRef.append(pseudoMatcher)
118
119 def _appendBackRef(self, res):
120 if res and 0 < len(res.groups()):
121
122 group = res.groups()
123 for i in range(0, len(group)):
124 self.pseudoBackRefMatcher[i].matchResult = []
125 self.pseudoBackRefMatcher[i].matchResult.append(group[i])
126
127 def match(self, name, offset, len):
128 _LOG.debug(self.__class__.__name__ + ".match(): " + self.expr)
129 _LOG.debug("Name " + str(name) + " offset " + str(offset) + " len " +str(len))
130
131 self.matchResult = []
132
133 if "" == self.expr:
134 res = self.matchResult.append(name[offset])
135 self._appendBackRef(res)
136 _LOG.debug("Succeed " + self.__class__.__name__ + ".match() ")
137 return True
138
139 matcher = re.compile(self.expr)
140 if self.exact:
Alexander Afanasyev36f10532013-08-08 11:25:14 -0700141 res = matcher.match(str(name[offset]))
Alexander Afanasyevfce5bbd2013-08-07 18:50:00 -0700142 if res:
143 self._appendBackRef(res)
144 self.matchResult.append(name[offset])
145 _LOG.debug("Succeed " + self.__class__.__name__ + ".match() ")
146 return True
147 else:
Alexander Afanasyev36f10532013-08-08 11:25:14 -0700148 res = matcher.search(str(name[offset]))
Alexander Afanasyevfce5bbd2013-08-07 18:50:00 -0700149 if res:
150 self._appendBackRef(res)
151 self.matchResult.append(name[offset])
152 return True
153
154 return False
155
156 def aggressiveMatch(self, name, offset, len):
157 return self.match(name, offset, len)
158
159
160class ComponentSetMatcher(BaseMatcher):
161 def __init__(self, expr, backRef, exact=True):
162 _LOG.debug(self.__class__.__name__ + ".Constructor")
163
164 errMsg = "Error: ComponentSetMatcher.Constructor: "
165 self.include = True
166
167 super(ComponentSetMatcher, self).__init__(expr, backRef, exact)
168
169 if '<' == self.expr[0]:
170 self._compileSingleComponent()
171 elif '[' == self.expr[0]:
172 lastIndex = len(self.expr) - 1
173 if ']' != self.expr[lastIndex]:
174 raise RegexError(errMsg + " No matched ']' " + self.expr)
175 if '^' == self.expr[1]:
176 self.include = False
177
178 self._compileMultipleComponents(2, lastIndex)
179 else:
180 self._compileMultipleComponents(1, lastIndex)
181
182
183 def _compileSingleComponent(self):
184 _LOG.debug(self.__class__.__name__ + "._compileSingleComponent")
185
186 errMsg = "Error: ComponentSetMatcher.CompileSingleComponent(): "
187
188 end = self._extractComponent(1)
189
190 if len(self.expr) != end:
191 raise RegexError(errMsg + "out of bound " + self.expr)
192 else:
193 self.matcherList.append(ComponentMatcher(self.expr[1:end-1], self.backRef))
194
195 def _compileMultipleComponents(self, start, lastIndex):
196 _LOG.debug(self.__class__.__name__ + "._compileMultipleComponents")
197
198 errMsg = "Error: ComponentSetMatcher.CompileMultipleComponents(): "
199
200 index = start
201 tmp_index = start
202
203 while index < lastIndex:
204 if '<' != self.expr[index]:
205 raise RegexError(errMsg + "Component expr error " + self.expr)
206 tmp_index = index + 1
207 index = self._extractComponent(tmp_index)
208 self.matcherList.append(ComponentMatcher(self.expr[tmp_index:index-1], self.backRef))
209
210 if index != lastIndex:
211 raise RegexError(errMsg + "Not sufficient expr to parse " + self.expr)
212
213 def _extractComponent(self, index):
214 _LOG.debug(self.__class__.__name__ + "._extractComponent")
215 lcount = 1
216 rcount = 0
217
218 while lcount > rcount :
219 if len(self.expr) == index:
220 break
221 elif '<' == self.expr[index]:
222 lcount += 1
223 elif '>' == self.expr[index]:
224 rcount += 1
225
226 index += 1
227
228 return index
229
230 def match(self, name, offset, len):
231 _LOG.debug(self.__class__.__name__ + ".match(): " + self.expr)
232
233 self.matchResult = []
234
235 matched = False
236
237 if 1 != len:
238 return False
239
240 for matcher in self.matcherList:
241 res = matcher.match(name, offset, len)
242 if True == res:
243 matched = True
244 break
245
246 if(matched if self.include else (not matched)):
247 self.matchResult.append(name[offset])
248 return True
249 else:
250 return False
251
252 def aggressiveMatch(self, name, offset, len):
253 return self.match(name, offset, len)
254
255class BackRefMatcher(BaseMatcher):
256 def __init__(self, expr, backRef, exact=True):
257 _LOG.debug (self.__class__.__name__ + ".Constructor")
258 super(BackRefMatcher, self).__init__(expr, backRef, exact)
259
260 errMsg = "Error: BackRefMatcher Constructor: "
261
262 _LOG.debug ("expr: " + self.expr);
263 _LOG.debug ("backRefManager " + str(self.backRef) + " size: " + str(len(self.backRef)))
264
265 lastIndex = len(self.expr) - 1
266
267 if '(' == self.expr[0] and ')' == self.expr[lastIndex]:
268 self.backRef.append(self)
269 self.matcherList.append(PatternListMatcher(self.expr[1:lastIndex], self.backRef, self.exact))
270 else:
271 raise RegexError(errMsg + " Unrecognoized format " + self.expr)
272
273
274class PatternListMatcher(BaseMatcher):
275 def __init__(self, expr, backRef, exact=True):
276 _LOG.debug(self.__class__.__name__ + ".Constructor")
277 super(PatternListMatcher, self).__init__(expr, backRef, exact)
278 _LOG.debug("expr: " + self.expr)
279
280 exprSize = len(self.expr)
281 index = 0
282 subHead = index
283
284 while index < exprSize:
285 subHead = index
286 (r_res, r_index) = self._extractPattern(subHead, index)
287 index = r_index
288 if not r_res:
289 raise RegexError("Fail to create PatternListMatcher")
290
291
292 def _extractPattern(self, index, next):
293 _LOG.debug(self.__class__.__name__ + "._extractPattern")
294
295 errMsg = "Error: PatternListMatcher._extractPattern: "
296
297 start = index
298 End = index
299 indicator = index
300
301 _LOG.debug ("expr: " + self.expr + " index: " + str(index))
302
303 if '(' == self.expr[index]:
304 index += 1
305 index = self._extractSubPattern('(', ')', index)
306 indicator = index
307 end = self._extractRepetition(index)
308 if indicator == end:
309 self.matcherList.append(BackRefMatcher(self.expr[start:end], self.backRef, self.exact))
310 else:
311 self.matcherList.append(RepeatMatcher(self.expr[start:end], self.backRef, indicator-start, self.exact))
312 elif '<' == self.expr[index]:
313 index += 1
314 index = self._extractSubPattern('<', '>', index)
315 indicator = index
316 end = self._extractRepetition(index)
317 self.matcherList.append(RepeatMatcher(self.expr[start:end], self.backRef, indicator-start, self.exact))
318 _LOG.debug("start: " + str(start) + " end: " + str(end) + " indicator: " + str(indicator))
319 elif '[' == self.expr[index]:
320 index += 1
321 index = self._extractSubPattern('[', ']', index)
322 indicator = index
323 end = self._extractRepetition(index)
324 self.matcherList.append(RepeatMatcher(self.expr[start:end], self.backRef, indicator-start, self.exact))
325 _LOG.debug("start: " + str(start) + " end: " + str(end) + " indicator: " + str(indicator))
326 else:
327 raise RegexError(errMsg +"unexpected syntax")
328
329
330
331 return (True, end)
332
333 def _extractSubPattern(self, left, right, index):
334 _LOG.debug(self.__class__.__name__ + "._extractSubPattern")
335
336 lcount = 1
337 rcount = 0
338
339 while lcount > rcount:
340 if index >= len(self.expr):
341 raise RegexError("Error: parenthesis mismatch")
342 if left == self.expr[index]:
343 lcount += 1
344 if right == self.expr[index]:
345 rcount += 1
346 index += 1
347
348 return index
349
350 def _extractRepetition(self, index):
351 _LOG.debug(self.__class__.__name__ + "._extractRepetition")
352
353 exprSize = len(self.expr)
354
355 _LOG.debug("expr: " + self.expr + " index: " + str(index))
356
357 errMsg = "Error: PatternListMatcher._extractRepetition: "
358
359 if index == exprSize:
360 return index
361
362 if '+' == self.expr[index] or '?' == self.expr[index] or '*' == self.expr[index] :
363 index += 1
364 return index
365
366 if '{' == self.expr[index]:
367 while '}' != self.expr[index]:
368 index += 1
369 if index == exprSize:
370 break
371 if index == exprSize:
372 raise RegexError(errMsg + "Missing right brace bracket")
373 else:
374 index += 1
375 return index
376 else:
377 _LOG.debug ("return index: " + str(index))
378 return index
379
380class RepeatMatcher(BaseMatcher):
381 def __init__(self, expr, backRef, indicator, exact=True):
382 _LOG.debug(self.__class__.__name__ + ".Constructor")
383 _LOG.debug("expr: " + expr);
384 super(RepeatMatcher, self).__init__(expr, backRef, exact)
385 self.indicator = indicator
386 if '(' == self.expr[0]:
387 self.matcherList.append(BackRefMatcher(self.expr[0:self.indicator], self.backRef))
388 else:
389 self.matcherList.append(ComponentSetMatcher(self.expr[0:self.indicator], self.backRef))
390
391 self._parseRepetition()
392 _LOG.debug("repeatMin: " + str(self.repeatMin) + " repeatMax: " + str(self.repeatMax))
393
394 def _parseRepetition(self):
395 _LOG.debug(self.__class__.__name__ + "._parseRepetition")
396
397 errMsg = "Error: RepeatMatcher._parseRepetition(): ";
398
399 exprSize = len(self.expr)
400 intMax = sys.maxint
401
402 if exprSize == self.indicator:
403 self.repeatMin = 1
404 self.repeatMax = 1
405 return
406 else:
407 if exprSize == (self.indicator + 1):
408 if '?' == self.expr[self.indicator]:
409 self.repeatMin = 0
410 self.repeatMax = 1
411 if '+' == self.expr[self.indicator]:
412 self.repeatMin = 1
413 self.repeatMax = intMax
414 if '*' == self.expr[self.indicator]:
415 self.repeatMin = 0
416 self.repeatMax = intMax
417 return
418 else:
419 repeatStruct = self.expr[self.indicator:exprSize]
420 min = 0
421 max = 0
422
423 if re.match('{[0-9]+,[0-9]+}$', repeatStruct):
424 repeats = repeatStruct[1:-1].split(',')
425 min = int(repeats[0])
426 max = int(repeats[1])
427 elif re.match('{[0-9]+,}$', repeatStruct):
428 repeats = repeatStruct[1:-1].split(',')
429 min = int(repeats[0])
430 max = intMax
431 elif re.match('{,[0-9]+}$', repeatStruct):
432 repeats = repeatStruct[1:-1].split(',')
433 min = 0
434 max = int(repeats[1])
435 elif re.match('{[0-9]+}$', repeatStruct):
436 min = int(repeatStruct[1:- 1])
437 max = min;
438 else:
439 raise RegexError(errMsg + "Unrecognized format "+ self.expr);
440
441 if min > intMax or max > intMax or min > max:
442 raise RegexError(errMsg + "Wrong number " + self.expr);
443
444 self.repeatMin = min
445 self.repeatMax = max
446
447 def match(self, name, offset, len):
448 _LOG.debug(self.__class__.__name__ + ".match(): " + "expr: " + self.expr + " offset: " + str(offset) + " len: " + str(len) + " repeatMin: " + str(self.repeatMin))
449 self.matchResult = []
450
451 if 0 == self.repeatMin:
452 if 0 == len:
453 return True
454
455 if self._recursiveMatch(0, name, offset, len):
456 for i in range(offset, offset+len):
457 self.matchResult.append(name[i])
458 return True
459 else:
460 return False
461
462 def _recursiveMatch(self, repeat, name, offset, len):
463 _LOG.debug (self.__class__.__name__ + "._recursiveMatch()" + " repeat: " + str(repeat) + " offset: " + str(offset) + " len: " + str(len) + " rMin: " + str(self.repeatMin) + " rMax: " + str(self.repeatMax))
464 tried = 0
465 matcher = self.matcherList[0]
466
467 if 0 < len and repeat >= self.repeatMax:
468 _LOG.debug("Match Fail: Reach m_repeatMax && More components")
469 return False
470
471 if 0 == len and repeat < self.repeatMin:
472 _LOG.debug("Match Fail: No more components && have NOT reached m_repeatMin " + str(len) + ", " + str(self.repeatMin))
473 return False
474
475 if 0 == len and repeat >= self.repeatMin:
476 _LOG.debug("Match Succeed: No more components && reach m_repeatMin")
477 return True
478
479 while tried <= len:
480 _LOG.debug("Attempt tried: " + str(tried))
481
482 if matcher.match(name, offset, tried) and self._recursiveMatch(repeat + 1, name, offset + tried, len - tried):
483 return True;
484 _LOG.debug("Failed at tried: " + str(tried));
485 tried += 1
486
487 return False
488
489
490 def aggressiveMatch(self, name, offset, len):
491 _LOG.debug(self.__class__.__name__ + ".aggressiveMatch(): " + "expr: " + self.expr + " offset: " + str(offset) + " len: " + str(len) + " repeatMin: " + str(self.repeatMin))
492 self.matchResult = []
493
494 if 0 == self.repeatMin:
495 if 0 == len:
496 return True
497
498 if self._aRecursiveMatch(0, name, offset, len):
499 for i in range(offset, offset+len):
500 self.matchResult.append(name[i])
501 return True
502 else:
503 return False
504
505 def _aRecursiveMatch(self, repeat, name, offset, len):
506 _LOG.debug (self.__class__.__name__ + "._aRecursiveMatch()" + " repeat: " + str(repeat) + " offset: " + str(offset) + " len: " + str(len) + " rMin: " + str(self.repeatMin) + " rMax: " + str(self.repeatMax))
507 tried = len
508 matcher = self.matcherList[0]
509
510 if 0 < len and repeat >= self.repeatMax:
511 _LOG.debug("Match Fail: Reach m_repeatMax && More components")
512 return False
513
514 if 0 == len and repeat < self.repeatMin:
515 _LOG.debug("Match Fail: No more components && have NOT reached m_repeatMin " + str(len) + ", " + str(self.repeatMin))
516 return False
517
518 if 0 == len and repeat >= self.repeatMin:
519 _LOG.debug("Match Succeed: No more components && reach m_repeatMin")
520 return True
521
522 while tried >= 0:
523 _LOG.debug("Attempt tried: " + str(tried))
524
525 if matcher.aggressiveMatch(name, offset, tried) and self._aRecursiveMatch(repeat + 1, name, offset + tried, len - tried):
526 return True;
527 _LOG.debug("Failed at tried: " + str(tried));
528 tried -= 1
529
530 return False
531
532
533
534class RegexMatcher(BaseMatcher):
535 def __init__(self, expr, exact=True):
536 _LOG.debug(self.__class__.__name__ + ".Constructor")
537 super(RegexMatcher, self).__init__(expr, None, exact)
538
539 self.backRef = []
540 self.second_backRef = []
541
542 self.secondaryMatcher = None
543
544
545 errMsg = "Error: RegexTopMatcher Constructor: "
546 tmp_expr = self.expr
547
548 if '$' != tmp_expr[-1]:
549 tmp_expr = tmp_expr + "<.*>*";
550 else:
551 tmp_expr = tmp_expr[0:-1]
552
553 if '^' != tmp_expr[0]:
554 self.secondaryMatcher = PatternListMatcher("<.*>*" + tmp_expr, self.second_backRef, self.exact)
555 else:
556 tmp_expr = tmp_expr[1:]
557
558 _LOG.debug ("reconstructed expr " + tmp_expr);
559
560 self.primaryMatcher = PatternListMatcher(tmp_expr, self.backRef, self.exact)
561
562
563
564 def firstMatcher():
565 return None
566
567 def matchName(self, name):
568 _LOG.debug(self.__class__.__name__ + ".matchName")
569
570 self.secondaryUsed = False
571
572 res = self.primaryMatcher.match(name, 0, len(name))
573 self.matchResult += self.primaryMatcher.matchResult
574 if False == res and None != self.secondaryMatcher:
575 res = self.secondaryMatcher.match(name, 0, len(name))
576 self.matchResult += self.secondaryMatcher.matchResult
577 self.secondaryUsed = True
578 return res
579
580 def extract(self, rule):
581 _LOG.debug(self.__class__.__name__ + ".extract")
582
583 if not re.match('(\\\\[0-9]+)+$', rule):
584 raise RegexError("Wrong format of rule")
585
586 refs = rule.split('\\')
587 refs.pop(0)
588
589 backRef = self.backRef
590 if self.secondaryUsed:
591 backRef = self.second_backRef
592
593 result = []
594 for index in refs:
595 i = int(index) - 1
596
597 if len(backRef) <= i or 0 > i:
598 raise RegexError("Wrong back reference number!")
599
600 result += backRef[i].matchResult
601
602 return result
603
604 def matchN(self, name):
605 _LOG.debug(self.__class__.__name__ + ".matchN")
606
607 self.secondaryUsed = False
608
609 res = self.primaryMatcher.aggressiveMatch(name, 0, len(name))
610 self.matchResult += self.primaryMatcher.matchResult
611 if False == res and None != self.secondaryMatcher:
612 res = self.secondaryMatcher.aggressiveMatch(name, 0, len(name))
613 self.matchResult += self.secondaryMatcher.matchResult
614 self.secondaryUsed = True
615 return res
616
617 def expand (self, rule):
618 return self.extract (rule)
619
620def match (pattern, name, flags=0):
621 """
622 If zero or more characters at the beginning of string match the regular expression pattern, return a corresponding matches as a list. Return None if the string does not match the pattern.
623"""
624 if not isinstance (name, Name):
625 raise TypeError ("name is not ndn.Name type")
626
627 m = RegexMatcher (pattern)
628 res = m.matchN (Name (name))
629 if not res:
630 return None
631 return m