From c076d094f5dbf7507e7836291c07d719f99e6729 Mon Sep 17 00:00:00 2001 From: gfawcett Date: Sun, 9 Jan 2011 00:56:13 +0000 Subject: [PATCH] Hash-based storage for uploaded files Two files with same content will share the same stored data. git-svn-id: svn://svn.open-ils.org/ILS-Contrib/servres/trunk@1171 6d9bc8c9-1ec2-4278-b937-99fde70a366f --- .../0012_auto__add_field_item_fileobj_origname.py | 189 +++++++++++++++++++++ conifer/syrup/models.py | 3 +- conifer/syrup/views/items.py | 31 +++- 3 files changed, 219 insertions(+), 4 deletions(-) create mode 100644 conifer/syrup/migrations/0012_auto__add_field_item_fileobj_origname.py diff --git a/conifer/syrup/migrations/0012_auto__add_field_item_fileobj_origname.py b/conifer/syrup/migrations/0012_auto__add_field_item_fileobj_origname.py new file mode 100644 index 0000000..ec82f04 --- /dev/null +++ b/conifer/syrup/migrations/0012_auto__add_field_item_fileobj_origname.py @@ -0,0 +1,189 @@ +# encoding: utf-8 +import datetime +from south.db import db +from south.v2 import SchemaMigration +from django.db import models + +class Migration(SchemaMigration): + + def forwards(self, orm): + + # Adding field 'Item.fileobj_origname' + db.add_column('syrup_item', 'fileobj_origname', self.gf('django.db.models.fields.CharField')(max_length=2048, null=True, blank=True), keep_default=False) + + + def backwards(self, orm): + + # Deleting field 'Item.fileobj_origname' + db.delete_column('syrup_item', 'fileobj_origname') + + + models = { + 'auth.group': { + 'Meta': {'object_name': 'Group'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '80'}), + 'permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}) + }, + 'auth.permission': { + 'Meta': {'unique_together': "(('content_type', 'codename'),)", 'object_name': 'Permission'}, + 'codename': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'content_type': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['contenttypes.ContentType']"}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '50'}) + }, + 'auth.user': { + 'Meta': {'object_name': 'User'}, + 'date_joined': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), + 'email': ('django.db.models.fields.EmailField', [], {'max_length': '75', 'blank': 'True'}), + 'first_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), + 'groups': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Group']", 'symmetrical': 'False', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'is_active': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'blank': 'True'}), + 'is_staff': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'blank': 'True'}), + 'is_superuser': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'blank': 'True'}), + 'last_login': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now'}), + 'last_name': ('django.db.models.fields.CharField', [], {'max_length': '30', 'blank': 'True'}), + 'password': ('django.db.models.fields.CharField', [], {'max_length': '128'}), + 'user_permissions': ('django.db.models.fields.related.ManyToManyField', [], {'to': "orm['auth.Permission']", 'symmetrical': 'False', 'blank': 'True'}), + 'username': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '30'}) + }, + 'contenttypes.contenttype': { + 'Meta': {'unique_together': "(('app_label', 'model'),)", 'object_name': 'ContentType', 'db_table': "'django_content_type'"}, + 'app_label': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'model': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '100'}) + }, + 'syrup.config': { + 'Meta': {'object_name': 'Config'}, + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '256'}), + 'value': ('django.db.models.fields.CharField', [], {'max_length': '8192'}) + }, + 'syrup.course': { + 'Meta': {'object_name': 'Course'}, + 'code': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '64'}), + 'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), + 'department': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['syrup.Department']"}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'last_modified': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '1024'}) + }, + 'syrup.declaration': { + 'Meta': {'object_name': 'Declaration'}, + 'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'item': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['syrup.Item']"}), + 'last_modified': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}), + 'user': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']"}) + }, + 'syrup.department': { + 'Meta': {'object_name': 'Department'}, + 'active': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'blank': 'True'}), + 'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'last_modified': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '256'}), + 'service_desk': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['syrup.ServiceDesk']"}) + }, + 'syrup.group': { + 'Meta': {'object_name': 'Group'}, + 'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), + 'external_id': ('django.db.models.fields.CharField', [], {'db_index': 'True', 'max_length': '2048', 'null': 'True', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'last_modified': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}), + 'site': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['syrup.Site']"}) + }, + 'syrup.item': { + 'Meta': {'object_name': 'Item'}, + 'author': ('django.db.models.fields.CharField', [], {'db_index': 'True', 'max_length': '8192', 'null': 'True', 'blank': 'True'}), + 'bib_id': ('django.db.models.fields.CharField', [], {'max_length': '256', 'null': 'True', 'blank': 'True'}), + 'copyright_status': ('django.db.models.fields.CharField', [], {'default': "'UK'", 'max_length': '2'}), + 'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), + 'fileobj': ('django.db.models.fields.files.FileField', [], {'default': 'None', 'max_length': '255', 'null': 'True', 'blank': 'True'}), + 'fileobj_mimetype': ('django.db.models.fields.CharField', [], {'max_length': '128', 'null': 'True', 'blank': 'True'}), + 'fileobj_origname': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'isbn': ('django.db.models.fields.CharField', [], {'max_length': '17', 'null': 'True', 'blank': 'True'}), + 'issue': ('django.db.models.fields.CharField', [], {'max_length': '64', 'null': 'True', 'blank': 'True'}), + 'item_type': ('django.db.models.fields.CharField', [], {'max_length': '7'}), + 'itemtype': ('django.db.models.fields.CharField', [], {'db_index': 'True', 'max_length': '1', 'null': 'True', 'blank': 'True'}), + 'last_modified': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}), + 'marcxml': ('django.db.models.fields.TextField', [], {'null': 'True', 'blank': 'True'}), + 'pages': ('django.db.models.fields.CharField', [], {'max_length': '64', 'null': 'True', 'blank': 'True'}), + 'parent_heading': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['syrup.Item']", 'null': 'True', 'blank': 'True'}), + 'published': ('django.db.models.fields.CharField', [], {'max_length': '64', 'null': 'True', 'blank': 'True'}), + 'publisher': ('django.db.models.fields.CharField', [], {'max_length': '8192', 'null': 'True', 'blank': 'True'}), + 'site': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['syrup.Site']"}), + 'source_title': ('django.db.models.fields.CharField', [], {'db_index': 'True', 'max_length': '8192', 'null': 'True', 'blank': 'True'}), + 'title': ('django.db.models.fields.CharField', [], {'max_length': '8192', 'db_index': 'True'}), + 'url': ('django.db.models.fields.URLField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}), + 'volume': ('django.db.models.fields.CharField', [], {'max_length': '64', 'null': 'True', 'blank': 'True'}) + }, + 'syrup.membership': { + 'Meta': {'unique_together': "(('group', 'user'),)", 'object_name': 'Membership'}, + 'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), + 'group': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['syrup.Group']"}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'last_modified': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}), + 'role': ('django.db.models.fields.CharField', [], {'default': "'STUDT'", 'max_length': '6'}), + 'user': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']"}) + }, + 'syrup.servicedesk': { + 'Meta': {'object_name': 'ServiceDesk'}, + 'active': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'blank': 'True'}), + 'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), + 'external_id': ('django.db.models.fields.CharField', [], {'max_length': '256', 'null': 'True', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'last_modified': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '100'}) + }, + 'syrup.site': { + 'Meta': {'unique_together': "(('course', 'start_term', 'owner'),)", 'object_name': 'Site'}, + 'access': ('django.db.models.fields.CharField', [], {'default': "'MEMBR'", 'max_length': '5'}), + 'course': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['syrup.Course']"}), + 'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), + 'end_term': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'end_term'", 'to': "orm['syrup.Term']"}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'last_modified': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}), + 'owner': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']"}), + 'service_desk': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['syrup.ServiceDesk']"}), + 'start_term': ('django.db.models.fields.related.ForeignKey', [], {'related_name': "'start_term'", 'to': "orm['syrup.Term']"}), + 'uwindsor_bookbag': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}), + 'uwindsor_eres': ('django.db.models.fields.CharField', [], {'max_length': '2048', 'null': 'True', 'blank': 'True'}) + }, + 'syrup.term': { + 'Meta': {'object_name': 'Term'}, + 'code': ('django.db.models.fields.CharField', [], {'unique': 'True', 'max_length': '64'}), + 'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), + 'finish': ('django.db.models.fields.DateField', [], {}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'last_modified': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '256'}), + 'start': ('django.db.models.fields.DateField', [], {}) + }, + 'syrup.userprofile': { + 'Meta': {'object_name': 'UserProfile'}, + 'created': ('django.db.models.fields.DateTimeField', [], {'auto_now_add': 'True', 'blank': 'True'}), + 'external_memberships_checked': ('django.db.models.fields.DateTimeField', [], {'null': 'True', 'blank': 'True'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'ils_userid': ('django.db.models.fields.CharField', [], {'max_length': '50', 'null': 'True', 'blank': 'True'}), + 'last_email_notice': ('django.db.models.fields.DateTimeField', [], {'default': 'datetime.datetime.now', 'null': 'True', 'blank': 'True'}), + 'last_modified': ('django.db.models.fields.DateTimeField', [], {'auto_now': 'True', 'blank': 'True'}), + 'user': ('django.db.models.fields.related.ForeignKey', [], {'to': "orm['auth.User']"}), + 'wants_email_notices': ('django.db.models.fields.BooleanField', [], {'default': 'False', 'blank': 'True'}) + }, + 'syrup.z3950target': { + 'Meta': {'object_name': 'Z3950Target'}, + 'active': ('django.db.models.fields.BooleanField', [], {'default': 'True', 'blank': 'True'}), + 'database': ('django.db.models.fields.CharField', [], {'max_length': '50'}), + 'host': ('django.db.models.fields.CharField', [], {'max_length': '50'}), + 'id': ('django.db.models.fields.AutoField', [], {'primary_key': 'True'}), + 'name': ('django.db.models.fields.CharField', [], {'max_length': '100'}), + 'port': ('django.db.models.fields.IntegerField', [], {'default': '210'}), + 'syntax': ('django.db.models.fields.CharField', [], {'default': "'USMARC'", 'max_length': '10'}) + } + } + + complete_apps = ['syrup'] diff --git a/conifer/syrup/models.py b/conifer/syrup/models.py index 8962dd1..3c4cdf8 100644 --- a/conifer/syrup/models.py +++ b/conifer/syrup/models.py @@ -643,8 +643,9 @@ class Item(BaseModel): url = m.URLField(blank=True, null=True, max_length=2048) # for items of type ELEC (attached electronic document) - fileobj = m.FileField(upload_to='uploads/%Y/%m/%d', max_length=255, + fileobj = m.FileField(upload_to='uploads', max_length=255, blank=True, null=True, default=None) + fileobj_origname = m.CharField(max_length=2048, blank=True, null=True) fileobj_mimetype = m.CharField(max_length=128, blank=True, null=True) diff --git a/conifer/syrup/views/items.py b/conifer/syrup/views/items.py index 8227eb0..b51251b 100644 --- a/conifer/syrup/views/items.py +++ b/conifer/syrup/views/items.py @@ -1,3 +1,5 @@ +import os.path +import hashlib from _common import * from conifer.plumbing.hooksystem import * from conifer.syrup import integration @@ -191,8 +193,26 @@ def item_add(request, site_id, item_id): item_type='ELEC', parent_heading=parent_item, fileobj_mimetype = upload.content_type, + fileobj_origname = upload.name, **data) - item.fileobj.save(upload.name, upload) + + # we'll save the file with a name based on the hash of its + # contents. + hash = hashlib.md5() + for x in upload: + hash.update(x) + + savename = hash.hexdigest() + prefix = models.Item._meta.get_field('fileobj').upload_to + fullpath = os.path.join(settings.MEDIA_ROOT, prefix, savename) + + if os.path.isfile(fullpath): + # just use the existing copy + item.fileobj.name = os.path.join(prefix, savename) + else: + # save a new copy + item.fileobj.save(savename, upload) + item.save() else: raise NotImplementedError @@ -353,8 +373,13 @@ def item_download(request, site_id, item_id, filename): fileiter = item.fileobj.chunks() resp = HttpResponse(fileiter) - resp['Content-Type'] = item.fileobj_mimetype or 'application/octet-stream' - #resp['Content-Disposition'] = 'attachment; filename=%s' % name + mime = item.fileobj_mimetype or 'application/octet-stream' + resp['Content-Type'] = mime + + if item.fileobj_origname: + disposition = 'attachment' if mime.startswith('application/') else 'inline' + resp['Content-Disposition'] = '%s; filename=%s' % (disposition, + item.fileobj_origname) return resp -- 2.11.0