And now we handle ebrary records per our spec (no proxy, substitute channel)

author dbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>

Thu, 25 Nov 2010 04:22:46 +0000 (04:22 +0000)

committer dbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>

Thu, 25 Nov 2010 04:22:46 +0000 (04:22 +0000)
author dbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>
Thu, 25 Nov 2010 04:22:46 +0000 (04:22 +0000)
committer dbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>
Thu, 25 Nov 2010 04:22:46 +0000 (04:22 +0000)
diff --git a/tools/ebooks/prep_ebook_records.py b/tools/ebooks/prep_ebook_records.py

index 5fe7c1b..7f630cb 100644 (file)
--- a/tools/ebooks/prep_ebook_records.py
+++ b/tools/ebooks/prep_ebook_records.py
@@ -13,7 +13,7 @@ requirements that would be the same for each record and therefore can
  be accommodated in batch load.
  """
  
-import os, os.path, sys, getopt, pymarc, pymarc.marc8
+import os, os.path, sys, getopt, pymarc, pymarc.marc8, re
  
  class Institution():
      """Defines standard settings for each Conifer institution"""
@@ -262,21 +262,47 @@ def process_urls(field, options):
          print "* No subfield 'u' found in this 856"
          return None
  
+
      for lib in options['libraries']:
          data = options['settings'].get_settings(lib)
+        subs = get_subfields(field, data)
          eight_five_six = pymarc.Field(tag = '856',
              indicators = ['4', '0'],
-            subfields = [
-                'u', data['proxy'] + field['u'],
-                'y', data['link_text'],
-                'z', data['public_note'],
-                '9', data['code']
-            ]
+            subfields = subs 
          )
          new_fields.append(eight_five_six)
  
      return new_fields
  
+def get_subfields(field, data):
+    """Creates 856 subfields required by Conifer"""
+
+    subs = []
+    url = field['u']
+
+    # Is this an ebrary URL?
+    ebrary = False
+    if url.find('.ebrary.com') > -1:
+        ebrary = True
+        
+    # ebrary URLs look like: http://site.ebrary.com/lib/<channel>/Doc?id=2001019
+    # we need to replace <channel> with the library-specific channel
+    if ebrary:
+        ebrary_url = re.search(r'^(.+?/lib/).+?(/.+?)$', url) 
+        url = ebrary_url.group(1) + data['ebrary_code'] + ebrary_url.group(2)
+        subs.extend(['u', url])
+    else:
+        subs.extend(['u', data['proxy'] + field['u']])
+
+    subs.extend([
+            'y', data['link_text'],
+            'z', data['public_note'],
+            '9', data['code']
+    ])
+
+    return subs
+
+
  if __name__ == '__main__':
  
      process_records(parse_opts())
author	dbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>
	Thu, 25 Nov 2010 04:22:46 +0000 (04:22 +0000)
committer	dbs <dbs@6d9bc8c9-1ec2-4278-b937-99fde70a366f>
	Thu, 25 Nov 2010 04:22:46 +0000 (04:22 +0000)