diff --git a/README.md b/README.md index 7e422bd..1735316 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,7 @@ MONGODB_SEPARATE_COLLECTIONS = True | --- | --- | --- | --- | | `MONGODB_DATABASE` | scrapy-mongodb | No | Database to use. Does not need to exist. | | `MONGODB_COLLECTION` | items | No | Collection within the database to use. Does not need to exist. | +| `MONGODB_ITEM_COLLECTION` | None | No | Collection name to use if an item has this key. Does not need to exist. | | `MONGODB_URI` | mongodb://localhost:27017 | No | URI to the MongoDB instance or replica sets you want to connect to. It must start with `mongodb://` (see more in the [MongoDB docs][1]). E.g.: `mongodb://user:pass@host:port`, `mongodb://user:pass@host:port,host2:port2` | | `MONGODB_UNIQUE_KEY` | None | No | If you want to have a unique key in the database, enter the key name here. `scrapy-mongodb` will ensure the key is properly indexed. | | `MONGODB_BUFFER_DATA` | None | No | To ease the load on MongoDB, set this option to the number of items you want to buffer in the client before sending them to database. Setting a `MONGODB_UNIQUE_KEY` together with `MONGODB_BUFFER_DATA` is not supported. | diff --git a/scrapy_mongodb.py b/scrapy_mongodb.py index 9c9a80a..7de69e8 100644 --- a/scrapy_mongodb.py +++ b/scrapy_mongodb.py @@ -32,6 +32,7 @@ class MongoDBPipeline(BaseItemExporter): 'write_concern': 0, 'database': 'scrapy-mongodb', 'collection': 'items', + 'item_collection': None, 'separate_collections': False, 'replica_set': None, 'unique_key': None, @@ -82,7 +83,7 @@ def open_spider(self, spider): # Set up the database self.database = connection[self.config['database']] - self.collections = {'default': self.database[self.config['collection']]} + self.collections = {} self.logger.info(u'Connected to MongoDB {0}, using "{1}"'.format( self.config['uri'], @@ -140,6 +141,7 @@ def configure(self): ('write_concern', 'MONGODB_REPLICA_SET_W'), ('database', 'MONGODB_DATABASE'), ('collection', 'MONGODB_COLLECTION'), + ('item_collection', 'MONGODB_ITEM_COLLECTION'), ('separate_collections', 'MONGODB_SEPARATE_COLLECTIONS'), ('replica_set', 'MONGODB_REPLICA_SET'), ('unique_key', 'MONGODB_UNIQUE_KEY'), @@ -219,7 +221,7 @@ def insert_item(self, item, spider): if self.config['append_timestamp']: item['scrapy-mongodb'] = {'ts': datetime.datetime.utcnow()} - collection_name, collection = self.get_collection(spider.name) + collection_name, collection = self.get_collection(spider.name, item) if self.config['unique_key'] is None: try: @@ -253,17 +255,20 @@ def insert_item(self, item, spider): return item - def get_collection(self, name): - if self.config['separate_collections']: - collection = self.collections.get(name) + def get_collection(self, name, item): + + collection_name = self.config['collection'] + + if self.config['item_collection'] and self.config['item_collection'] in item: + collection_name = item[self.config['item_collection']] + elif self.config['separate_collections']: collection_name = name - if not collection: - collection = self.database[name] - self.collections[name] = collection - else: - collection = self.collections.get('default') - collection_name = self.config['collection'] + collection = self.collections.get(collection_name) + + if not collection: + collection = self.database[collection_name] + self.collections[collection_name] = collection # Ensure unique index if self.config['unique_key']: