From: gfawcett Date: Thu, 30 Dec 2010 16:48:25 +0000 (+0000) Subject: Tune item-sorting to ignore punctuation X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=9100d3da889da04236c3044880e975efed39d0b2;p=Syrup.git Tune item-sorting to ignore punctuation Article titles with quotation marks in them were throwing off the sort. git-svn-id: svn://svn.open-ils.org/ILS-Contrib/servres/trunk@1155 6d9bc8c9-1ec2-4278-b937-99fde70a366f --- diff --git a/conifer/syrup/models.py b/conifer/syrup/models.py index 182541b..b8c7e29 100644 --- a/conifer/syrup/models.py +++ b/conifer/syrup/models.py @@ -307,10 +307,14 @@ class Site(BaseModel): # TODO: internationalize the stopwords list. STOPWORDS = set(['a', 'an', 'that', 'there', 'the', 'this']) + RE_PUNCTUATION = re.compile("""[,'".:;]""") + def sort_title(item): """First cut of a stop words routine.""" - normal_text = [t for t in item.lower().split() if t not in STOPWORDS] - return " ".join(normal_text) + text = item.lower() + text = RE_PUNCTUATION.sub('', text) # remove common punctuation + words = [t for t in text.split() if t not in STOPWORDS] + return " ".join(words) items = self.items() # make a node-lookup table