From: dbs <dbs@dcc99617-32d9-48b4-a31d-7c20da2025e4>
Date: Sun, 6 Jul 2008 16:01:49 +0000 (+0000)
Subject: Tighten up our entity checks; include a more complete range of legal entity chars
X-Git-Url: https://old-git.evergreen-ils.org/?a=commitdiff_plain;h=08ea9b17f4755c02816a96d1b144c775ce4ce029;p=Evergreen.git

Tighten up our entity checks; include a more complete range of legal entity chars
Add a missing entity discovered by our more stringent entity tests


git-svn-id: svn://svn.open-ils.org/ILS/trunk@9969 dcc99617-32d9-48b4-a31d-7c20da2025e4
---

diff --git a/Open-ILS/web/opac/locale/en-US/lang.dtd b/Open-ILS/web/opac/locale/en-US/lang.dtd
index 7aeafa709a..58c1247990 100644
--- a/Open-ILS/web/opac/locale/en-US/lang.dtd
+++ b/Open-ILS/web/opac/locale/en-US/lang.dtd
@@ -2026,6 +2026,7 @@
 <!ENTITY staff.cat.z3950.catalog_service.tooltiptext "Evergreen Native Catalog">
 <!ENTITY staff.cat.z3950.save_creds.label "Save as Default">
 <!ENTITY staff.cat.z3950.save_creds.accesskey "D">
+<!ENTITY staff.cat.z3950.query.description "Description">
 <!ENTITY staff.cat.z3950.query.label "Query">
 <!ENTITY staff.cat.z3950.query.label "Tip: Click a search field label and then the 'Save as Default' button to have that field focused by default.">
 <!ENTITY staff.cat.z3950.clear.label "Clear Form">
diff --git a/build/i18n/tests/check_entities.py b/build/i18n/tests/check_entities.py
index 3355581a9f..784effc942 100644
--- a/build/i18n/tests/check_entities.py
+++ b/build/i18n/tests/check_entities.py
@@ -62,6 +62,8 @@ def parse_entities():
     prefix = os.path.commonprefix(dtd_files)
 
     for d_file in dtd_files:
+		if DEBUG:
+			print "Checking %s\n" % (d_file)
 
         # Get the shortest unique address for this file
         short_df = d_file[len(prefix):]
@@ -79,7 +81,7 @@ def parse_entities():
             # Parse entity/value 
             unpack = re.search(r'<!ENTITY\s+(.+?)\s+([\'"])(.*?)\2\s*>', line)
             if DEBUG and unpack:
-                print unpack.groups()
+                print(unpack.groups())
 
             # Skip anything other than entity definitions
             # Note that this makes some massive assumptions:
@@ -96,7 +98,7 @@ def parse_entities():
 
             entity_key, quote, value = unpack.groups()
             if DEBUG:
-                print entity_key, value
+                print(entity_key, value)
 
             if not entities.has_key(entity_key):
                 entities[entity_key] = [{'value': value, 'file': short_df}]
@@ -139,23 +141,35 @@ def check_xul(root, filename, entities):
 
     # Typical entity usage:
     # &blah.blah.blah_bity.blah;
-    strings = re.compile(r'''&([a-zA-Z._]+);''')
+    strings = re.compile(r'''&([a-zA-Z:_][a-zA-Z0-9:_\-.]+);''')
 
     xul = open(os.path.join(root, filename), 'r')
     content = xul.read()
     xul.close()
 
     if DEBUG:
-        print "File: %s" % (os.path.normpath(os.path.join(root, filename)))
+        print("File: %s" % (os.path.normpath(os.path.join(root, filename))))
 
     for s_match in strings.finditer(content):
         num_strings += 1
         if not entities.has_key(s_match.group(1)):
-            print "File: %s" % (os.path.normpath(os.path.join(root, filename)))
-            print "\tEntity %s not found, expected in %s" % (s_match.group(1), 'lang.dtd')
+            print("File: %s" % (os.path.normpath(os.path.join(root, filename))))
+            print("\tEntity %s not found, expected in %s" % (s_match.group(1), 'lang.dtd'))
+
+	# Find bad entities
+	bad_strings = re.compile(r'''&([^a-zA-Z:_]?[a-zA-Z0-9:_]*[^a-zA-Z0-9:_\-.;][a-zA-Z0-9:_\-.]*);''')
+
+	# Match character entities (&#0129; etc), which are okay
+	char_entity = re.compile(r'''^((#([0-9])+)|(#x([0-9a-fA-F])+))$''')
+
+	for s_match in bad_strings.finditer(content):
+		# Rule out character entities and URL concatenation
+		if (not char_entity.search(s_match.group(1))) and s_match.group(1) != "'":
+			print("File: %s" % (os.path.normpath(os.path.join(root, filename))))
+			print("\tBad entity: %s" % (s_match.group(1)))
 
     if DEBUG:
-        print "\t%d entities found" % (num_strings)
+        print("\t%d entities found" % (num_strings))
 
 if __name__ == '__main__':
     entities = parse_entities()