Check for a hit in SFX for every library we have enabled
authorDan Scott <dan@coffeecode.net>
Wed, 22 Jun 2011 18:50:08 +0000 (14:50 -0400)
committerDan Scott <dscott@laurentian.ca>
Mon, 12 Nov 2012 17:57:51 +0000 (12:57 -0500)
It's possible that one of the libraries has not yet turned on a given
target in SFX, so we don't want any false negatives. And if a library
hasn't opted into a particular package, they shouldn't be one of the
enabled flags passed to this script.

Also, clean up ISBN marking and access note creation, as we were
modifying the wrong MARC record.

Signed-off-by: Dan Scott <dscott@laurentian.ca>
tools/ebooks/prep_ebook_records.py

index b18c548..f6489be 100644 (file)
@@ -296,7 +296,7 @@ def process_fields(record, options):
 
     add_publisher(record, new_record, options)
     add_restriction(new_record, options)
-    marked_isbn = mark_isbn_for_sfx(record, options)
+    marked_isbn = mark_isbn_for_sfx(new_record, options)
     if not marked_isbn:
         print("No matching ISBN target found in SFX for %s" %
             (new_record['856']['u'])
@@ -359,49 +359,43 @@ def mark_isbn_for_sfx(record, options):
     matches.
     """
 
+    # For every ISBN in the record
     for isbn in record.get_fields('020'):
         for isbnval in isbn.get_subfields('a'):
             isbnval = clean_isbn(isbnval)
-            sfx = 'http://sfx.scholarsportal.info/windsor'
-            # check to see if there are holdings in SFX knowledgebase
-            # use one of the participating libraries
-            if 'windsor' in options['libraries']:
-                sfx = options['settings'].get_settings('windsor')['sfx_url']
-            elif 'laurentian' in options['libraries']:
-                sfx = options['settings'].get_settings('laurentian')['sfx_url']
-            elif 'algoma' in options['libraries']:
-                sfx = options['settings'].get_settings('algoma')['sfx_url']
+            # And for every library we have enabled
+            for lib in options['libraries']:
+                sfx = options['settings'].get_settings(lib)['sfx_url']
+                url = "%s?url_ver=Z39.88-2004&url_ctx_fmt=infofi/fmt:kev:mtx:ctx&" \
+                    "ctx_enc=UTF-8&ctx_ver=Z39.88-2004&rfr_id=info:sid/evergreen&" \
+                    "sfx.ignore_date_threshold=1&" \
+                    "sfx.response_type=multi_obj_detailed_xml" \
+                    "&__service_type=getFullTxt&rft.isbn=%s" % (sfx, isbnval)
+
+                try:
+                    req = urllib2.urlopen(url)
+                    sfx_res = BeautifulSoup(req.read())
+                except urllib2.HTTPError, ex:
+                    print("%s for URL %s" % (ex, url))
+                    continue
+                except urllib2.URLError, ex:
+                    print("%s for URL %s" % (ex, url))
+                    continue
             
-            url = "%s?url_ver=Z39.88-2004&url_ctx_fmt=infofi/fmt:kev:mtx:ctx&" \
-                "ctx_enc=UTF-8&ctx_ver=Z39.88-2004&rfr_id=info:sid/evergreen&" \
-                "sfx.ignore_date_threshold=1&" \
-                "sfx.response_type=multi_obj_detailed_xml" \
-                "&__service_type=getFullTxt&rft.isbn=%s" % (sfx, isbnval)
-
-            try:
-                req = urllib2.urlopen(url)
-                sfx_res = BeautifulSoup(req.read())
-            except urllib2.HTTPError, ex:
-                print("%s for URL %s" % (ex, url))
-                return None
-            except urllib2.URLError, ex:
-                print("%s for URL %s" % (ex, url))
-                return None
-        
-            # We want a target with a service_type element of 'getFullTxt'
-            targets = sfx_res.ctx_obj.ctx_obj_targets.findAll(
-                'target', recursive=False
-            )
+                # We want a target with a service_type element of 'getFullTxt'
+                targets = sfx_res.ctx_obj.ctx_obj_targets.findAll(
+                    'target', recursive=False
+                )
 
-            if len(targets) == 0:
-                # No SFX targets found for this ISBN - next!
-                continue
+                if len(targets) == 0:
+                    # No SFX targets found for this ISBN - next!
+                    continue
 
-            for target in targets:
-                if target.service_type.renderContents() == 'getFullTxt':
-                    # Add the $9 subfield to mark this as a good one
-                    isbn.add_subfield('9', 'SFX')
-                    return True
+                for target in targets:
+                    if target.service_type.renderContents() == 'getFullTxt':
+                        # Add the $9 subfield to mark this as a good one
+                        isbn.add_subfield('9', 'SFX')
+                        return True
     return False
 
 def clean_isbn(isbn):