按照和基本调用
安装
conda insatll elasticsearch
基本调用
from elasticsearch import Elasticsearch, helpers
HOSTS = 'http://abc.com'
INDEX = 'abc'
es = Elasticsearch(HOSTS)
js = es.search(INDEX, {'query': {'match_all': {}}})
print(js)
封装自用
from elasticsearch import Elasticsearch, helpers
HOSTS = 'http://abc.com'
INDEX = 'abc' # 索引名
SIZE = 100
SCROLL = '5m'
SORT_KEY = '_id' # 排序key名
class ES:
def __init__(self):
self.es = Elasticsearch(HOSTS)
def search(self, body, hits=True, index=INDEX):
js = self.es.search(index, body)
return js['hits'] if hits else js
def query(self, query, size=SIZE, start=0, hits=True):
"""
size: 返回的数量
start: 起始索引
"""
body = {'query': query, 'from': start, 'size': size}
return self.search(body, hits)
def scroll(self, body, size=SIZE, return_ls=False, index=INDEX):
"""分批取数"""
js = self.es.search(index, body, scroll=SCROLL, size=size)
scroll_id = js['_scroll_id'] # 卷动ID:用于取出剩余数据
if return_ls:
yield js['hits']['hits'] # 产出首批数据
total = js['hits']['total'] # 结果总数
for _ in range(total // SIZE):
yield self.es.scroll(scroll_id=scroll_id, scroll=SCROLL)['hits']['hits'] # 产出剩余数据
else:
hits = js['hits']['hits']
while hits:
for i in hits:
yield i # 从列表中取数
hits = self.es.scroll(scroll_id=scroll_id, scroll=SCROLL)['hits']['hits']
def index(self, body, index=INDEX_NOTICE):
"""数据写入"""
self.es.index(index, body, id=body['id'])
def bulk(self, dt, index=INDEX_NOTICE):
"""数据写入"""
action = {
'_index': index,
'_source': dt,
'_id': dt['id'],
}
helpers.bulk(self.es, [action])
def delete_by_query(self, body, index=INDEX_NOTICE):
self.es.delete_by_query(index, body)
es = ES()
if __name__ == '__main__':
_body = {
# '_source': ["dataType"],
'size': 5, # 10000最大?
'query': {
'range': {'id': {'gt': '616000000000000000'}}
}
}
print(es.search(_body))
for i in es.scroll(_body):
print(i)
{
'took': 614,
'timed_out': False,
'_shards': {
'total': 1,
'successful': 1,
'skipped': 0,
'failed': 0
},
'hits': {
'total': {
'value': 10000,
'relation': 'gte'
},
'max_score': 1.0,
'hits': [{
'_index': 'abc',
'_type': '_doc',
'_id': '9948942229923430',
'_score': 1.0,
'_source': {
'title': '森林公园林地资源',
'url': 'bbb',
}
}, {
'_index': 'abc',
'_type': '_doc',
'_id': '9948937613253017',
'_score': 1.0,
'_source': {
'title': '小型微型企业创业创新示范基地',
'url': 'aaa',
}
}]
}
}
常用查询语句
## 查询所有结果
match_all = {'query': {'match_all': {}}}
## 匹配查询:查询所有有效记录
match_valid = {
'_source': ['id'],
'query': {'match': {'valid': 'Y'}},
}
## 匹配查询:查询所有有效记录,并排序
match_valid_sort = {
'_source': ['id'],
'from': 0,
'query': {'match': {'valid': 'Y'}},
'sort': {'_uid': {'order': 'asc'}}
}
## 范围查询:查大于某ID的记录
range_gt = {
'_source': ['id'],
'query': {
'range': {
'id': {
'gte': '615000000000000000', # 大于等于
'lt': '616000000000000000', # 小于
},
}
},
'sort': {'_id': {'order': 'asc'}}
}
## 匹配+范围
match_range = {
'_source': ['id'],
'query': {
'bool': {
'must': [
{
'match': {
'valid': 'Y'
}
}, {
'range': {
'id': {
'gte': '615000000000000000',
'lt': '616000000000000000',
}
}
}
]
}
},
'sort': {'_id': {'order': 'asc'}}
}
## 匹配+范围
match_phrase = {
'_source': ['title'],
'size': 20,
'query': {
'bool': {
'must': [
{
'match': {
'valid': 'Y'
}
}, {
'match_phrase': {
'title': '复工'
}
},
]
}
},
'sort': {'_id': {'order': 'asc'}}
}
## 匹配+正则
match_regexp = {
'_source': ['title'],
# 'size': 20,
'query': {
'bool': {
'must': [
{
'match': {
'valid': 'Y'
}
}, {
'regexp': {
'title': '复工复产|扶持',
}
},
]
}
},
'sort': {'_id': {'order': 'asc'}}
}
if __name__ == '__main__':
from elastic_search import es
result = es.search(match_regexp)
print(result)