1.MONGODB
MONGO_URL='localhost' MONGO_DB='image360'
import pymongo
class HtestPipeline(object):
def process_item(self, item, spider):
return item
class MongoDBPipeline(object):
def __init__(self,mongo_uri,mongo_db):
self.mongo_uri=mongo_uri
self.mongo_db=mongo_db
@classmethod
def from_crawler(cls,crawler):
return cls(
mongo_uri=crawler.settings.get('MONGO_URL'),
mongo_db=crawler.settings.get('MONGO_DB')
)
def open_spider(self,spider):
self.client=pymongo.MongoClient(self.mongo_uri)
self.db=self.client[self.mongo_db]
def close_spider(self,spider):
self.client.close()
def process_item(self, item, spider):
self.db[item.collection].insert(dict(item))
return item
2.MYSQL 提前建好表文件
MYSQL_HOST='localhost' MYSQL_DATABASER='imag360' MYSQL_USER='root' MYSQL_PASSWORD='123456' MYSQL_PORT=3306
import pymysql
class MySqlDbPipelines(object):
def __init__(self,host,database,user,password,port):
self.host=host
self.database=database
self.user=user
self.password=password
self.port=port
@classmethod
def from_crawler(cls,crawler):
return cls(
host=crawler.settings.get('MYSQL_HOST'),
database = crawler.settings.get('MYSQL_DATABASER'),
user = crawler.settings.get('MYSQL_USER'),
password = crawler.settings.get('MYSQL_PASSWORD'),
port = crawler.settings.get('MYSQL_PORT'),
)
def open_spider(self,spider):
self.db=pymysql.connect(self.host,
self.user,
self.password,
self.database,
charset='utf8',
port=self.port)
self.cursor=self.db.cursor()
def close_spider(self,spider):
self.db.close()
def process_item(self,item,spider):
data=dict(item)
keys=','.join(data.keys())
values=','.join(['%s']*len(data))
sql='INSERT INTO %s (%s) VALUES (%s)' % (item.table,keys,values)
self.cursor.execute(sql,tuple(data.values()))
self.db.commit()
return item