版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_27378621/article/details/81112370
可以使用ensure_index或者create_index方法,两种方法语法相同。
首先,连接数据库中的目标集合:
col = MongoClient(the_client).get_database(the_db).get_collection(the_col)
然后,创建唯一索引,不加unique的话默认是普通的索引,即unique=False:
col.create_index([("索引字段名", 1)], unique=True)
其中的1和-1分别表示正序与负序排列。注意,索引要用中括号——[ ("索引",1)],具体参见下方源码说明。
实例:
# -*- coding:utf-8 -*-
# 给mongodb集合创建索引
from pymongo import MongoClient
def create_mongodb_index(the_data_client, the_data_db, the_data_cl, index_name, unique=False):
data_client = MongoClient(the_data_client)
data_db = data_client.get_database(the_data_db)
data_col = data_db.get_collection(the_data_cl)
print "start, the index is:", index_name
data_col.ensure_index([(index_name, 1)], unique=unique)
print "run over"
if __name__ == '__main__':
DataClient = ''
DataDB = ''
DataCol = ''
IndexName = ''
create_mongodb_index(DataClient, DataDB, DataCol, index_name=IndexName, unique=False)
附:源代码
def create_index(self, keys, session=None, **kwargs):
"""Creates an index on this collection.
Takes either a single key or a list of (key, direction) pairs.
The key(s) must be an instance of :class:`basestring`
(:class:`str` in python 3), and the direction(s) must be one of
(:data:`~pymongo.ASCENDING`, :data:`~pymongo.DESCENDING`,
:data:`~pymongo.GEO2D`, :data:`~pymongo.GEOHAYSTACK`,
:data:`~pymongo.GEOSPHERE`, :data:`~pymongo.HASHED`,
:data:`~pymongo.TEXT`).
To create a single key ascending index on the key ``'mike'`` we just
use a string argument::
>>> my_collection.create_index("mike")
For a compound index on ``'mike'`` descending and ``'eliot'``
ascending we need to use a list of tuples::
>>> my_collection.create_index([("mike", pymongo.DESCENDING),
... ("eliot", pymongo.ASCENDING)])
All optional index creation parameters should be passed as
keyword arguments to this method. For example::
>>> my_collection.create_index([("mike", pymongo.DESCENDING)],
... background=True)
Valid options include, but are not limited to:
- `name`: custom name to use for this index - if none is
given, a name will be generated.
- `unique`: if ``True`` creates a uniqueness constraint on the index.
- `background`: if ``True`` this index should be created in the
background.
- `sparse`: if ``True``, omit from the index any documents that lack
the indexed field.
- `bucketSize`: for use with geoHaystack indexes.
Number of documents to group together within a certain proximity
to a given longitude and latitude.
- `min`: minimum value for keys in a :data:`~pymongo.GEO2D`
index.
- `max`: maximum value for keys in a :data:`~pymongo.GEO2D`
index.
- `expireAfterSeconds`: <int> Used to create an expiring (TTL)
collection. MongoDB will automatically delete documents from
this collection after <int> seconds. The indexed field must
be a UTC datetime or the data will not expire.
- `partialFilterExpression`: A document that specifies a filter for
a partial index.
- `collation` (optional): An instance of
:class:`~pymongo.collation.Collation`. This option is only supported
on MongoDB 3.4 and above.
See the MongoDB documentation for a full list of supported options by
server version.
.. warning:: `dropDups` is not supported by MongoDB 3.0 or newer. The
option is silently ignored by the server and unique index builds
using the option will fail if a duplicate value is detected.
.. note:: `partialFilterExpression` requires server version **>= 3.2**
.. note:: The :attr:`~pymongo.collection.Collection.write_concern` of
this collection is automatically applied to this operation when using
MongoDB >= 3.4.
:Parameters:
- `keys`: a single key or a list of (key, direction)
pairs specifying the index to create
- `session` (optional): a
:class:`~pymongo.client_session.ClientSession`.
- `**kwargs` (optional): any additional index creation
options (see the above list) should be passed as keyword
arguments
.. versionchanged:: 3.6
Added ``session`` parameter. Added support for passing maxTimeMS
in kwargs.
.. versionchanged:: 3.4
Apply this collection's write concern automatically to this operation
when connected to MongoDB >= 3.4. Support the `collation` option.
.. versionchanged:: 3.2
Added partialFilterExpression to support partial indexes.
.. versionchanged:: 3.0
Renamed `key_or_list` to `keys`. Removed the `cache_for` option.
:meth:`create_index` no longer caches index names. Removed support
for the drop_dups and bucket_size aliases.
.. mongodoc:: indexes
"""
keys = helpers._index_list(keys)
name = kwargs.setdefault("name", helpers._gen_index_name(keys))
cmd_options = {}
if "maxTimeMS" in kwargs:
cmd_options["maxTimeMS"] = kwargs.pop("maxTimeMS")
self.__create_index(keys, kwargs, session, **cmd_options)
return name
def __create_index(self, keys, index_options, session, **kwargs):
"""Internal create index helper.
:Parameters:
- `keys`: a list of tuples [(key, type), (key, type), ...]
- `index_options`: a dict of index options.
- `session` (optional): a
:class:`~pymongo.client_session.ClientSession`.
"""
index_doc = helpers._index_document(keys)
index = {"key": index_doc}
collation = validate_collation_or_none(
index_options.pop('collation', None))
index.update(index_options)
with self._socket_for_writes() as sock_info:
if collation is not None:
if sock_info.max_wire_version < 5:
raise ConfigurationError(
'Must be connected to MongoDB 3.4+ to use collations.')
else:
index['collation'] = collation
cmd = SON([('createIndexes', self.name), ('indexes', [index])])
cmd.update(kwargs)
self._command(
sock_info, cmd, read_preference=ReadPreference.PRIMARY,
codec_options=_UNICODE_REPLACE_CODEC_OPTIONS,
write_concern=self._write_concern_for(session),
session=session)
def ensure_index(self, key_or_list, cache_for=300, **kwargs):
"""**DEPRECATED** - Ensures that an index exists on this collection.
.. versionchanged:: 3.0
**DEPRECATED**
"""
warnings.warn("ensure_index is deprecated. Use create_index instead.",
DeprecationWarning, stacklevel=2)
# The types supported by datetime.timedelta.
if not (isinstance(cache_for, integer_types) or
isinstance(cache_for, float)):
raise TypeError("cache_for must be an integer or float.")
if "drop_dups" in kwargs:
kwargs["dropDups"] = kwargs.pop("drop_dups")
if "bucket_size" in kwargs:
kwargs["bucketSize"] = kwargs.pop("bucket_size")
keys = helpers._index_list(key_or_list)
name = kwargs.setdefault("name", helpers._gen_index_name(keys))
# Note that there is a race condition here. One thread could
# check if the index is cached and be preempted before creating
# and caching the index. This means multiple threads attempting
# to create the same index concurrently could send the index
# to the server two or more times. This has no practical impact
# other than wasted round trips.
if not self.__database.client._cached(self.__database.name,
self.__name, name):
self.__create_index(keys, kwargs, session=None)
self.__database.client._cache_index(self.__database.name,
self.__name, name, cache_for)
return name
return None