From f5727683ea8cabd74bb2ccf8e6e41429ef1b9889 Mon Sep 17 00:00:00 2001 From: Pasi Kallinen Date: Thu, 21 Mar 2013 12:01:24 +0200 Subject: [PATCH] Improve the db-seed-i18n.py -script to actually grab all strings. Previously the script could only handle oils_i18n_gettext -markers if the marker and it's parameters were all on one line. Now it also correctly parses the postgres E'' escaped strings, and removes sql string concatenations. Also adds test cases. Signed-off-by: Pasi Kallinen Signed-off-by: Dan Scott --- build/i18n/scripts/db-seed-i18n.py | 74 +++++++++-------- build/i18n/tests/data/sql2pot.pot | 158 ++++++++++++++++++++++++++++++++++++ build/i18n/tests/data/sqlsource.sql | 116 ++++++++++++++++++++++++++ 3 files changed, 314 insertions(+), 34 deletions(-) diff --git a/build/i18n/scripts/db-seed-i18n.py b/build/i18n/scripts/db-seed-i18n.py index 990afcc08e..7ecd7ce31d 100755 --- a/build/i18n/scripts/db-seed-i18n.py +++ b/build/i18n/scripts/db-seed-i18n.py @@ -54,44 +54,50 @@ class SQL(basel10n.BaseL10N): self.pothead() num = 0 - findi18n = re.compile(r'.*?oils_i18n_gettext\((.*?)\'\)') - intkey = re.compile(r'\s*(?P\d+)\s*,\s*\'(?P.+?)\',\s*\'(?P.+?)\',\s*\'(?P.+?)$') - textkey = re.compile(r'\s*\'(?P.*?)\'\s*,\s*\'(?P.+?)\',\s*\'(?P.+?)\',\s*\'(?P.+?)$') + findi18n = re.compile(r'oils_i18n_gettext\((.*?)\'\s*\)', re.UNICODE+re.MULTILINE+re.DOTALL) + intkey = re.compile(r'\s*(?P\d+)\s*,\s*E?\'(?P.+?)\',\s*\'(?P.+?)\',\s*\'(?P.+?)$', re.UNICODE+re.MULTILINE+re.DOTALL) + textkey = re.compile(r'\s*\'(?P.*?)\'\s*,\s*E?\'(?P.+?)\',\s*\'(?P.+?)\',\s*\'(?P.+?)$', re.UNICODE+re.MULTILINE+re.DOTALL) serts = dict() # Iterate through the source SQL grabbing table names and l10n strings sourcefile = codecs.open(source, encoding='utf-8') - for line in sourcefile: - try: - num = num + 1 - entry = findi18n.search(line) - if entry is None: - continue - for parms in entry.groups(): - # Try for an integer-based primary key parameter first - fi18n = intkey.search(parms) - if fi18n is None: - # Otherwise, it must be a text-based primary key parameter - fi18n = textkey.search(parms) - fq_field = "%s.%s" % (fi18n.group('class'), fi18n.group('property')) - # Unescape escaped SQL single-quotes for translators' sanity - msgid = re.compile(r'\'\'').sub("'", fi18n.group('string')) - - # Hmm, sometimes people use ":" in text identifiers and - # polib doesn't seem to like that; urlencode the colon - occurid = re.compile(r':').sub("%3A", fi18n.group('id')) - - if (msgid in serts): - serts[msgid].occurrences.append((os.path.basename(source), num)) - serts[msgid].tcomment = ' '.join((serts[msgid].tcomment, 'id::%s__%s' % (fq_field, occurid))) - else: - poe = polib.POEntry() - poe.tcomment = 'id::%s__%s' % (fq_field, occurid) - poe.occurrences = [(os.path.basename(source), num)] - poe.msgid = msgid - serts[msgid] = poe - except Exception, exc: - print "Error in line %d of SQL source file: %s" % (num, exc) + sourcelines = sourcefile.read() + try: + for match in findi18n.finditer(sourcelines): + parms = match.group(1) + num = sourcelines[:match.start()].count('\n') + 1 # ugh + + # Try for an integer-based primary key parameter first + fi18n = intkey.search(parms) + if fi18n is None: + # Otherwise, it must be a text-based primary key parameter + fi18n = textkey.search(parms) + if fi18n is None: + raise Exception("Cannot parse the source. Empty strings in there?") + + fq_field = "%s.%s" % (fi18n.group('class'), fi18n.group('property')) + + # strip sql string concatenation + strx = re.sub(r'\'\s*\|\|\s*\'', '', fi18n.group('string')) + + # Unescape escaped SQL single-quotes for translators' sanity + msgid = re.compile(r'\'\'').sub("'", strx) + + # Hmm, sometimes people use ":" in text identifiers and + # polib doesn't seem to like that; urlencode the colon + occurid = re.compile(r':').sub("%3A", fi18n.group('id')) + + if (msgid in serts): + serts[msgid].occurrences.append((os.path.basename(source), num)) + serts[msgid].tcomment = ' '.join((serts[msgid].tcomment, 'id::%s__%s' % (fq_field, occurid))) + else: + poe = polib.POEntry() + poe.tcomment = 'id::%s__%s' % (fq_field, occurid) + poe.occurrences = [(os.path.basename(source), num)] + poe.msgid = msgid + serts[msgid] = poe + except Exception, exc: + print "Error in oils_i18n_gettext line %d of SQL source file: %s" % (num, exc) for poe in serts.values(): self.pot.append(poe) diff --git a/build/i18n/tests/data/sql2pot.pot b/build/i18n/tests/data/sql2pot.pot index 4eb2a3d6a5..67800f1626 100644 --- a/build/i18n/tests/data/sql2pot.pot +++ b/build/i18n/tests/data/sql2pot.pot @@ -11,6 +11,26 @@ msgstr "" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8-bit\n" +# id::TEST110A.TEST110B__Str10 +#: sqlsource.sql:221 +msgid "TEST'110" +msgstr "" + +# id::csc.name__1 +#: sqlsource.sql:154 +msgid "Test Carrier" +msgstr "" + +# id::TEST010A.TEST010B__10 +#: sqlsource.sql:193 +msgid "TEST'010" +msgstr "" + +# id::TEST108A.TEST108B__Str8 +#: sqlsource.sql:214 +msgid "TEST108" +msgstr "" + # id::cbs.source__2 #: sqlsource.sql:5 msgid "System Local" @@ -21,6 +41,11 @@ msgstr "" msgid "3_days_1_renew" msgstr "" +# id::TEST001A.TEST001B__1 +#: sqlsource.sql:169 +msgid "TEST001" +msgstr "" + # id::cit.name__3 #: sqlsource.sql:53 msgid "Other" @@ -36,6 +61,11 @@ msgstr "" msgid "oclc" msgstr "" +# id::TEST101A.TEST101B__Str1 +#: sqlsource.sql:197 +msgid "TEST101" +msgstr "" + # id::ccpbt.label__staff_client id::cbrebt.label__staff_client #: sqlsource.sql:102 sqlsource.sql:105 msgid "General Staff Client container" @@ -46,6 +76,11 @@ msgstr "" msgid "Subunit" msgstr "" +# id::csc.region__1 +#: sqlsource.sql:148 +msgid "Local" +msgstr "" + # id::cblvl.value__b #: sqlsource.sql:80 msgid "Serial component part" @@ -56,6 +91,11 @@ msgstr "" msgid "28_days_2_renew" msgstr "" +# id::coust.label__acq.copy_creator_uses_receiver +#: sqlsource.sql:117 +msgid "Set copy creator as receiver" +msgstr "" + # id::crcd.name__6 #: sqlsource.sql:67 msgid "35_days_1_renew" @@ -76,6 +116,11 @@ msgstr "" msgid "English (US)" msgstr "" +# id::TEST007A.TEST007B__7 +#: sqlsource.sql:184 +msgid "TEST007" +msgstr "" + # id::ccpbt.label__misc id::ccnbt.label__misc id::cbrebt.label__misc #: sqlsource.sql:101 sqlsource.sql:103 sqlsource.sql:104 msgid "Miscellaneous" @@ -86,26 +131,82 @@ msgstr "" msgid "Collection" msgstr "" +# id::ppl.description__1 +#: sqlsource.sql:109 +msgid "EVERYTHING" +msgstr "" + +# id::ccvm.description__488 +#: sqlsource.sql:142 +msgid "The item is intended for children, approximate ages 0-5 years." +msgstr "" + # id::cblvl.value__i #: sqlsource.sql:83 msgid "Integrating resource" msgstr "" +# id::coust.label__vandelay.default_match_set +# id::coust.description__vandelay.default_match_set +#: sqlsource.sql:125 sqlsource.sql:131 +msgid "Default Record Match Set" +msgstr "" + +# id::TEST103A.TEST103B__Str3 +#: sqlsource.sql:201 +msgid "TEST103" +msgstr "" + +# id::TEST102A.TEST102B__Str2 +#: sqlsource.sql:199 +msgid "TEST102" +msgstr "" + +# id::ccvm.value__487 +#: sqlsource.sql:141 +msgid "Unknown or unspecified" +msgstr "" + +# id::TEST003A.TEST003B__3 +#: sqlsource.sql:173 +msgid "TEST003" +msgstr "" + +# id::TEST002A.TEST002B__2 +#: sqlsource.sql:171 +msgid "TEST002" +msgstr "" + # id::vqbrad.description__4 #: sqlsource.sql:78 msgid "Pagination" msgstr "" +# id::TEST109A.TEST109B__Str9 +#: sqlsource.sql:196 +msgid "TEST109" +msgstr "" + # id::cnct.name__1 #: sqlsource.sql:45 msgid "Paperback Book" msgstr "" +# id::TEST006A.TEST006B__6 +#: sqlsource.sql:178 +msgid "TEST006" +msgstr "" + # id::i18n_l.description__es-US #: sqlsource.sql:99 msgid "American Spanish" msgstr "" +# id::TEST004A.TEST004B__4 +#: sqlsource.sql:173 +msgid "TEST004" +msgstr "" + # id::i18n_l.name__en-CA #: sqlsource.sql:92 msgid "English (Canada)" @@ -116,6 +217,31 @@ msgstr "" msgid "Spanish (US)" msgstr "" +# id::TEST009A.TEST009B__9 +#: sqlsource.sql:168 +msgid "TEST009" +msgstr "" + +# id::TEST008A.TEST008B__8 +#: sqlsource.sql:186 +msgid "TEST008" +msgstr "" + +# id::TEST106A.TEST106B__Str6 +#: sqlsource.sql:206 +msgid "TEST106" +msgstr "" + +# id::TEST107A.TEST107B__Str7 +#: sqlsource.sql:212 +msgid "TEST107" +msgstr "" + +# id::TEST104A.TEST104B__Str4 +#: sqlsource.sql:201 +msgid "TEST104" +msgstr "" + # id::i18n_l.description__en-CA #: sqlsource.sql:93 msgid "Canadian English" @@ -136,11 +262,26 @@ msgstr "" msgid "Good" msgstr "" +# id::TEST005A.TEST005B__5 +#: sqlsource.sql:175 +msgid "TEST005" +msgstr "" + # id::vqbrad.description__3 #: sqlsource.sql:77 msgid "Language of work" msgstr "" +# id::ccvm.description__487 +#: sqlsource.sql:141 +msgid "The target audience for the item not known or not specified." +msgstr "" + +# id::TEST105A.TEST105B__Str5 +#: sqlsource.sql:203 +msgid "TEST105" +msgstr "" + # id::acpl.name__1 #: sqlsource.sql:72 msgid "Stacks" @@ -186,11 +327,28 @@ msgstr "" msgid "SSN" msgstr "" +# id::coust.description__acq.copy_creator_uses_receiver +#: sqlsource.sql:120 +msgid "" +"When receiving a copy in acquisitions, set the copy \"creator\" to be the " +"staff that received the copy" +msgstr "" + +# id::ccvm.value__488 +#: sqlsource.sql:142 +msgid "Preschool" +msgstr "" + # id::cblvl.value__s #: sqlsource.sql:85 msgid "Serial" msgstr "" +# id::ppl.description__1 +#: sqlsource.sql:111 +msgid "Allow a user to log in to the OPAC" +msgstr "" + # id::crcd.name__5 #: sqlsource.sql:65 msgid "2_months_2_renew" diff --git a/build/i18n/tests/data/sqlsource.sql b/build/i18n/tests/data/sqlsource.sql index dda3df85c6..4b7df68e2b 100644 --- a/build/i18n/tests/data/sqlsource.sql +++ b/build/i18n/tests/data/sqlsource.sql @@ -103,3 +103,119 @@ INSERT INTO container.copy_bucket_type (code,label) VALUES ('staff_client', oils INSERT INTO container.call_number_bucket_type (code,label) VALUES ('misc', oils_i18n_gettext('misc', 'Miscellaneous', 'ccnbt', 'label')); INSERT INTO container.biblio_record_entry_bucket_type (code,label) VALUES ('misc', oils_i18n_gettext('misc', 'Miscellaneous', 'cbrebt', 'label')); INSERT INTO container.biblio_record_entry_bucket_type (code,label) VALUES ('staff_client', oils_i18n_gettext('staff_client', 'General Staff Client container', 'cbrebt', 'label')); + +-- 950..data.seed-values.sql +INSERT INTO permission.perm_list ( id, code, description ) VALUES + ( -1, 'EVERYTHING', oils_i18n_gettext( -1, + 'EVERYTHING', 'ppl', 'description' )), + ( 1, 'OPAC_LOGIN', oils_i18n_gettext( 1, + 'Allow a user to log in to the OPAC', 'ppl', 'description' )); + +INSERT into config.org_unit_setting_type +( name, grp, label, description, datatype, fm_class ) VALUES +( 'acq.copy_creator_uses_receiver', 'acq', + oils_i18n_gettext('acq.copy_creator_uses_receiver', + 'Set copy creator as receiver', + 'coust', 'label'), + oils_i18n_gettext('acq.copy_creator_uses_receiver', + 'When receiving a copy in acquisitions, set the copy "creator" to be the staff that received the copy', + 'coust', 'description'), + 'bool', null), +,( 'vandelay.default_match_set', 'vandelay', + oils_i18n_gettext( + 'vandelay.default_match_set', + 'Default Record Match Set', + 'coust', + 'label' + ), + oils_i18n_gettext( + 'vandelay.default_match_set', + 'Default Record Match Set', + 'coust', + 'description' + ), + 'string', null) +; + +INSERT INTO config.coded_value_map (id, ctype, code, value, description) VALUES + (487,'audience', ' ', oils_i18n_gettext('487', 'Unknown or unspecified', 'ccvm', 'value'), oils_i18n_gettext('487', 'The target audience for the item not known or not specified.', 'ccvm', 'description')), + (488,'audience', 'a', oils_i18n_gettext('488', 'Preschool', 'ccvm', 'value'), oils_i18n_gettext('488', 'The item is intended for children, approximate ages 0-5 years.', 'ccvm', 'description')) +; + +INSERT INTO config.sms_carrier VALUES + ( + 1, + oils_i18n_gettext( + 1, + 'Local', + 'csc', + 'region' + ), + oils_i18n_gettext( + 1, + 'Test Carrier', + 'csc', + 'name' + ), + 'opensrf+$number@localhost', + FALSE + ) +; + +-- specific contrived test cases + +-- first, with numeric ID +oils_i18n_gettext(9, 'TEST009', 'TEST009A', 'TEST009B') + oils_i18n_gettext(1, 'TEST001', 'TEST001A', 'TEST001B') + + oils_i18n_gettext(2, 'TEST002', 'TEST002A', 'TEST002B') + +oils_i18n_gettext(3, 'TEST003', 'TEST003A', 'TEST003B'), oils_i18n_gettext(4, 'TEST004', 'TEST004A', 'TEST004B') + +oils_i18n_gettext(5, + 'TEST005', 'TEST005A', 'TEST005B'); + + oils_i18n_gettext(6, +'TEST006', + 'TEST006A', +'TEST006B' +) + +oils_i18n_gettext(7, 'TEST' || '007', 'TEST007A', 'TEST007B') + + oils_i18n_gettext(8, 'TEST' || +'008', + + 'TEST008A', + +'TEST008B' ) + +oils_i18n_gettext(10, 'TEST''010', 'TEST010A', 'TEST010B') + +-- then the same tests with string ID +oils_i18n_gettext('Str9', 'TEST109', 'TEST109A', 'TEST109B') + oils_i18n_gettext('Str1', 'TEST101', 'TEST101A', 'TEST101B') + + oils_i18n_gettext('Str2', 'TEST102', 'TEST102A', 'TEST102B') + +oils_i18n_gettext('Str3', 'TEST103', 'TEST103A', 'TEST103B'), oils_i18n_gettext('Str4', 'TEST104', 'TEST104A', 'TEST104B') + +oils_i18n_gettext('Str5', + 'TEST105', 'TEST105A', 'TEST105B'); + + oils_i18n_gettext('Str6', +'TEST106', + 'TEST106A', +'TEST106B' +) + +oils_i18n_gettext('Str7', 'TEST' || '107', 'TEST107A', 'TEST107B') + + oils_i18n_gettext('Str8', 'TEST' || +'108', + + 'TEST108A', + +'TEST108B' ) + +oils_i18n_gettext('Str10', 'TEST''110', 'TEST110A', 'TEST110B') -- 2.11.0