| Home | Trees | Indices | Help |
|
|---|
|
|
1 """Base classes for match providers.
2
3 They are used by business objects to give
4 phrasewheels the ability to guess phrases.
5
6 Copyright (C) GNUMed developers
7 license: GPL
8 """
9 __version__ = "$Revision: 1.34 $"
10 __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>, I.Haywood <ihaywood@gnu.org>, S.J.Tan <sjtan@bigpond.com>"
11
12 # std lib
13 import string, types, time, sys, re as regex, logging
14
15
16 # GNUmed
17 from Gnumed.pycommon import gmPG2
18
19
20 _log = logging.getLogger('gm.ui')
21 _log.info(__version__)
22
23
24 default_ignored_chars = "[?!.'\\(){}\[\]<>~#*$%^_]+" + '"'
25 default_word_separators = '[- \t=+&:@]+'
26 #============================================================
28 """Base class for match providing objects.
29
30 Match sources might be:
31 - database tables
32 - flat files
33 - previous input
34 - config files
35 - in-memory list created on the fly
36 """
37 print_queries = False
38 #--------------------------------------------------------
40 self.setThresholds()
41
42 self._context_vals = {}
43 self.__ignored_chars = regex.compile(default_ignored_chars)
44 # used to normalize word boundaries:
45 self.__word_separators = regex.compile(default_word_separators)
46 #--------------------------------------------------------
47 # actions
48 #--------------------------------------------------------
50 """Return matches according to aFragment and matching thresholds.
51
52 FIXME: design decision: we dont worry about data source changes
53 during the lifetime of a MatchProvider
54 FIXME: append _("*get all items*") on truncation
55 """
56 # sanity check
57 if aFragment is None:
58 raise ValueError, 'Cannot find matches without a fragment.'
59
60 # user explicitly wants all matches
61 if aFragment == u'*':
62 return self.getAllMatches()
63
64 # case insensitivity
65 tmpFragment = aFragment.lower()
66 # remove ignored chars
67 if self.__ignored_chars is not None:
68 tmpFragment = self.__ignored_chars.sub('', tmpFragment)
69 # normalize word separators
70 if self.__word_separators is not None:
71 tmpFragment = u' '.join(self.__word_separators.split(tmpFragment))
72 # length in number of significant characters only
73 lngFragment = len(tmpFragment)
74
75 # order is important !
76 if lngFragment >= self.__threshold_substring:
77 return self.getMatchesBySubstr(tmpFragment)
78 elif lngFragment >= self.__threshold_word:
79 return self.getMatchesByWord(tmpFragment)
80 elif lngFragment >= self.__threshold_phrase:
81 return self.getMatchesByPhrase(tmpFragment)
82 else:
83 return (False, [])
84 #--------------------------------------------------------
87 #--------------------------------------------------------
90 #--------------------------------------------------------
93 #--------------------------------------------------------
96 #--------------------------------------------------------
97 # configuration
98 #--------------------------------------------------------
100 """Set match location thresholds.
101
102 - the fragment passed to getMatches() must contain at least this many
103 characters before it triggers a match search at:
104 1) phrase_start - start of phrase (first word)
105 2) word_start - start of any word within phrase
106 3) in_word - _inside_ any word within phrase
107 """
108 # sanity checks
109 if aSubstring < aWord:
110 _log.error('Setting substring threshold (%s) lower than word-start threshold (%s) does not make sense. Retaining original thresholds (%s:%s, respectively).' % (aSubstring, aWord, self.__threshold_substring, self.__threshold_word))
111 return False
112 if aWord < aPhrase:
113 _log.error('Setting word-start threshold (%s) lower than phrase-start threshold (%s) does not make sense. Retaining original thresholds (%s:%s, respectively).' % (aSubstring, aWord, self.__threshold_word, self.__threshold_phrase))
114 return False
115
116 # now actually reassign thresholds
117 self.__threshold_phrase = aPhrase
118 self.__threshold_word = aWord
119 self.__threshold_substring = aSubstring
120
121 return True
122 #--------------------------------------------------------
124 if word_separators is None:
125 self.__word_separators = None
126 else:
127 self.__word_separators = regex.compile(word_separators)
128
133
134 word_separators = property(_get_word_separators, _set_word_separators)
135 #--------------------------------------------------------
137 if ignored_chars is None:
138 self.__ignored_chars = None
139 else:
140 self.__ignored_chars = regex.compile(ignored_chars)
141
146
147 ignored_chars = property(_get_ignored_chars, _set_ignored_chars)
148 #--------------------------------------------------------
150 """Set value to provide context information for matches.
151
152 The matching code may ignore it depending on its exact
153 implementation. Names and values of the context depend
154 on what is being matched.
155
156 <context> -- the *placeholder* key *inside* the context
157 definition, not the context *definition* key
158 """
159 if context is None:
160 return False
161 self._context_vals[context] = val
162 return True
163 #--------------------------------------------------------
169 #------------------------------------------------------------
170 # usable instances
171 #------------------------------------------------------------
173 """Match provider where all possible options can be held
174 in a reasonably sized, pre-allocated list.
175 """
177 """aSeq must be a list of dicts. Each dict must have the keys (data, label, weight)
178 """
179 if not type(aSeq) in [types.ListType, types.TupleType]:
180 _log.error('fixed list match provider argument must be a list or tuple of dicts')
181 raise TypeError('fixed list match provider argument must be a list or tuple of dicts')
182
183 self.__items = aSeq
184 cMatchProvider.__init__(self)
185 #--------------------------------------------------------
186 # internal matching algorithms
187 #
188 # if we end up here:
189 # - aFragment will not be "None"
190 # - aFragment will be lower case
191 # - we _do_ deliver matches (whether we find any is a different story)
192 #--------------------------------------------------------
194 """Return matches for aFragment at start of phrases."""
195 matches = []
196 # look for matches
197 for item in self.__items:
198 # at start of phrase, that is
199 if string.find(string.lower(item['list_label']), aFragment) == 0:
200 matches.append(item)
201 # no matches found
202 if len(matches) == 0:
203 return (False, [])
204
205 matches.sort(self.__cmp_items)
206 return (True, matches)
207 #--------------------------------------------------------
209 """Return matches for aFragment at start of words inside phrases."""
210 matches = []
211 # look for matches
212 for item in self.__items:
213 pos = string.find(string.lower(item['list_label']), aFragment)
214 # found at start of phrase
215 if pos == 0:
216 matches.append(item)
217 # found as a true substring
218 elif pos > 0:
219 # but use only if substring is at start of a word
220 if (item['list_label'])[pos-1] == u' ':
221 matches.append(item)
222 # no matches found
223 if len(matches) == 0:
224 return (False, [])
225
226 matches.sort(self.__cmp_items)
227 return (True, matches)
228 #--------------------------------------------------------
230 """Return matches for aFragment as a true substring."""
231 matches = []
232 # look for matches
233 for item in self.__items:
234 if string.find(string.lower(item['list_label']), aFragment) != -1:
235 matches.append(item)
236 # no matches found
237 if len(matches) == 0:
238 return (False, [])
239
240 matches.sort(self.__cmp_items)
241 return (True, matches)
242 #--------------------------------------------------------
244 """Return all items."""
245 matches = self.__items
246 # no matches found
247 if len(matches) == 0:
248 return (False, [])
249
250 matches.sort(self.__cmp_items)
251 return (True, matches)
252 #--------------------------------------------------------
254 """items must be a list of dicts. Each dict must have the keys (data, label, weight)"""
255 self.__items = items
256 #--------------------------------------------------------
267 # ===========================================================
269 """Match provider which searches matches
270 in the results of a function call.
271 """
273 """get_candidates() must return a list of strings."""
274 if get_candidates is None:
275 _log.error('must define function to retrieve match candidates list')
276 raise ValueError('must define function to retrieve match candidates list')
277
278 self._get_candidates = get_candidates
279 cMatchProvider.__init__(self)
280 #--------------------------------------------------------
281 # internal matching algorithms
282 #
283 # if we end up here:
284 # - aFragment will not be "None"
285 # - aFragment will be lower case
286 # - we _do_ deliver matches (whether we find any is a different story)
287 #--------------------------------------------------------
289 """Return matches for aFragment at start of phrases."""
290 print "getting phrase matches"
291 matches = []
292 candidates = self._get_candidates()
293 # look for matches
294 for candidate in candidates:
295 # at start of phrase, that is
296 if aFragment.startswith(candidate['list_label'].lower()):
297 matches.append(candidate)
298 # no matches found
299 if len(matches) == 0:
300 return (False, [])
301
302 matches.sort(self.__cmp_candidates)
303 return (True, matches)
304 #--------------------------------------------------------
306 """Return matches for aFragment at start of words inside phrases."""
307 print "getting word matches"
308 matches = []
309 candidates = self._get_candidates()
310 # look for matches
311 for candidate in candidates:
312 pos = candidate['list_label'].lower().find(aFragment)
313 # pos = string.find(string.lower(candidate['list_label']), aFragment)
314 # found as a true substring
315 # but use only if substring is at start of a word
316 # FIXME: use word seps
317 if (pos == 0) or (candidate['list_label'][pos-1] == u' '):
318 matches.append(candidate)
319 # no matches found
320 if len(matches) == 0:
321 return (False, [])
322
323 matches.sort(self.__cmp_candidates)
324 return (True, matches)
325 #--------------------------------------------------------
327 """Return matches for aFragment as a true substring."""
328 matches = []
329 candidates = self._get_candidates()
330 # look for matches
331 for candidate in candidates:
332 if candidate['list_label'].lower().find(aFragment) != -1:
333 # if string.find(string.lower(candidate['list_label']), aFragment) != -1:
334 matches.append(candidate)
335 # no matches found
336 if len(matches) == 0:
337 return (False, [])
338
339 matches.sort(self.__cmp_candidates)
340 return (True, matches)
341 #--------------------------------------------------------
345 #--------------------------------------------------------
349 # FIXME: do ordering
350 # if candidate1 < candidate2:
351 # return -1
352 # if candidate1 == candidate2:
353 # return 0
354 # return 1
355
356 # ===========================================================
358 """Match provider which searches matches
359 in possibly several database tables.
360
361 queries:
362 - a list of unicode strings
363 - each string is a query
364 - each string must contain: "... where <column> %(fragment_condition)s ..."
365 - each string can contain in the where clause: "... %(<context_key>)s ..."
366 - each query must return (data, label)
367
368 context definitions to be used in the queries
369 example: {'ctxt_country': {'where_part': 'and country = %(country)s', 'placeholder': 'country'}}
370 """
372 if type(queries) != types.ListType:
373 queries = [queries]
374
375 self._queries = queries
376
377 if context is None:
378 self._context = {}
379 else:
380 self._context = context
381
382 self._args = {}
383 cMatchProvider.__init__(self)
384 #--------------------------------------------------------
385 # internal matching algorithms
386 #
387 # if we end up here:
388 # - aFragment will not be "None"
389 # - aFragment will be lower case
390 # - we _do_ deliver matches (whether we find any is a different story)
391 #--------------------------------------------------------
393 """Return matches for aFragment at start of phrases."""
394
395 fragment_condition = u"ILIKE %(fragment)s"
396 self._args['fragment'] = u"%s%%" % aFragment
397
398 return self._find_matches(fragment_condition)
399 #--------------------------------------------------------
401 """Return matches for aFragment at start of words inside phrases."""
402
403 fragment_condition = u"~* %(fragment)s"
404 aFragment = gmPG2.sanitize_pg_regex(expression = aFragment, escape_all = False)
405 self._args['fragment'] = u"( %s)|(^%s)" % (aFragment, aFragment)
406
407 return self._find_matches(fragment_condition)
408 #--------------------------------------------------------
410 """Return matches for aFragment as a true substring."""
411
412 fragment_condition = u"ILIKE %(fragment)s"
413 self._args['fragment'] = u"%%%s%%" % aFragment
414
415 return self._find_matches(fragment_condition)
416 #--------------------------------------------------------
420 #--------------------------------------------------------
422 if self.print_queries:
423 print "----------------------"
424 matches = []
425 for query in self._queries:
426 where_fragments = {'fragment_condition': fragment_condition}
427
428 for context_key, context_def in self._context.items():
429 try:
430 placeholder = context_def['placeholder']
431 where_part = context_def['where_part']
432 self._args[placeholder] = self._context_vals[placeholder]
433 # we do have a context value for this key, so add the where condition
434 where_fragments[context_key] = where_part
435 if self.print_queries:
436 print "ctxt ph:", placeholder
437 print "ctxt where:", where_part
438 print "ctxt val:", self._context_vals[placeholder]
439 except KeyError:
440 # we don't have a context value for this key, so skip the where condition
441 where_fragments[context_key] = u''
442
443 cmd = query % where_fragments
444
445 if self.print_queries:
446 print "class:", self.__class__.__name__
447 print "ctxt:", self._context_vals
448 print "args:", self._args
449 print "query:", cmd
450
451 try:
452 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': self._args}])
453 except:
454 _log.exception('[%s]: error running match provider SQL, dropping query', self.__class__.__name__)
455 idx = self._queries.index(query)
456 del self._queries[idx]
457 break
458
459 # no matches found: try next query
460 if len(rows) == 0:
461 continue
462
463 for row in rows:
464 match = {'weight': 0}
465
466 try:
467 match['data'] = row['data']
468 except KeyError:
469 match['data'] = row[0]
470
471 try:
472 match['list_label'] = row['list_label']
473 except KeyError:
474 match['list_label'] = row[1]
475
476 # explicit "field_label" in result ?
477 try:
478 match['field_label'] = row['field_label']
479 # no
480 except KeyError:
481 # but does row[2] exist ?
482 try:
483 match['field_label'] = row[2]
484 # no: reuse "list_label"
485 except IndexError:
486 match['field_label'] = match['list_label']
487
488 # try:
489 # match['label'] = row['label']
490 # except KeyError:
491 # match['label'] = match['list_label']
492
493 matches.append(match)
494
495 return (True, matches)
496
497 # none found whatsoever
498 return (False, [])
499 #================================================================
500 if __name__ == '__main__':
501 pass
502
503 #================================================================
504
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Tue Jun 7 03:58:43 2011 | http://epydoc.sourceforge.net |