按照和基本调用

安装

conda insatll elasticsearch

基本调用

from elasticsearch import Elasticsearch, helpers

HOSTS = 'http://abc.com'
INDEX = 'abc'

es = Elasticsearch(HOSTS)
js = es.search(INDEX, {'query': {'match_all': {}}})
print(js)

封装自用

from elasticsearch import Elasticsearch, helpers
HOSTS = 'http://abc.com'
INDEX = 'abc'  # 索引名
SIZE = 100
SCROLL = '5m'
SORT_KEY = '_id'  # 排序key名


class ES:
    def __init__(self):
        self.es = Elasticsearch(HOSTS)

    def search(self, body, hits=True, index=INDEX):
        js = self.es.search(index, body)
        return js['hits'] if hits else js

    def query(self, query, size=SIZE, start=0, hits=True):
        """
        size: 返回的数量
        start: 起始索引
        """
        body = {'query': query, 'from': start, 'size': size}
        return self.search(body, hits)

    def scroll(self, body, size=SIZE, return_ls=False, index=INDEX):
        """分批取数"""
        js = self.es.search(index, body, scroll=SCROLL, size=size)
        scroll_id = js['_scroll_id']  # 卷动ID：用于取出剩余数据
        if return_ls:
            yield js['hits']['hits']  # 产出首批数据
            total = js['hits']['total']  # 结果总数
            for _ in range(total // SIZE):
                yield self.es.scroll(scroll_id=scroll_id, scroll=SCROLL)['hits']['hits']  # 产出剩余数据
        else:
            hits = js['hits']['hits']
            while hits:
                for i in hits:
                    yield i  # 从列表中取数
                hits = self.es.scroll(scroll_id=scroll_id, scroll=SCROLL)['hits']['hits']

    def index(self, body, index=INDEX_NOTICE):
        """数据写入"""
        self.es.index(index, body, id=body['id'])

    def bulk(self, dt, index=INDEX_NOTICE):
        """数据写入"""
        action = {
            '_index': index,
            '_source': dt,
            '_id': dt['id'],
        }
        helpers.bulk(self.es, [action])

    def delete_by_query(self, body, index=INDEX_NOTICE):
        self.es.delete_by_query(index, body)


es = ES()


if __name__ == '__main__':
    _body = {
        # '_source': ["dataType"],
        'size': 5,  # 10000最大？
        'query': {
            'range': {'id': {'gt': '616000000000000000'}}
        }
    }
    print(es.search(_body))
    for i in es.scroll(_body):
        print(i)

print

{
	'took': 614,
	'timed_out': False,
	'_shards': {
		'total': 1,
		'successful': 1,
		'skipped': 0,
		'failed': 0
	},
	'hits': {
		'total': {
			'value': 10000,
			'relation': 'gte'
		},
		'max_score': 1.0,
		'hits': [{
			'_index': 'abc',
			'_type': '_doc',
			'_id': '9948942229923430',
			'_score': 1.0,
			'_source': {
				'title': '森林公园林地资源',
				'url': 'bbb',
			}
		}, {
			'_index': 'abc',
			'_type': '_doc',
			'_id': '9948937613253017',
			'_score': 1.0,
			'_source': {
				'title': '小型微型企业创业创新示范基地',
				'url': 'aaa',
			}
		}]
	}
}

常用查询语句

## 查询所有结果
match_all = {'query': {'match_all': {}}}

## 匹配查询：查询所有有效记录
match_valid = {
    '_source': ['id'],
    'query': {'match': {'valid': 'Y'}},
}

## 匹配查询：查询所有有效记录，并排序
match_valid_sort = {
    '_source': ['id'],
    'from': 0,
    'query': {'match': {'valid': 'Y'}},
    'sort': {'_uid': {'order': 'asc'}}
}

## 范围查询：查大于某ID的记录
range_gt = {
    '_source': ['id'],
    'query': {
        'range': {
            'id': {
                'gte': '615000000000000000',  # 大于等于
                'lt': '616000000000000000',  # 小于
            },
        }
    },
    'sort': {'_id': {'order': 'asc'}}
}

## 匹配+范围
match_range = {
    '_source': ['id'],
    'query': {
        'bool': {
            'must': [
                {
                    'match': {
                        'valid': 'Y'
                    }
                }, {
                    'range': {
                        'id': {
                            'gte': '615000000000000000',
                            'lt': '616000000000000000',
                        }
                    }
                }
            ]
        }
    },
    'sort': {'_id': {'order': 'asc'}}
}

## 匹配+范围
match_phrase = {
    '_source': ['title'],
    'size': 20,
    'query': {
        'bool': {
            'must': [
                {
                    'match': {
                        'valid': 'Y'
                    }
                }, {
                    'match_phrase': {
                        'title': '复工'
                    }
                },
            ]
        }
    },
    'sort': {'_id': {'order': 'asc'}}
}


## 匹配+正则
match_regexp = {
    '_source': ['title'],
    # 'size': 20,
    'query': {
        'bool': {
            'must': [
                {
                    'match': {
                        'valid': 'Y'
                    }
                }, {
                    'regexp': {
                        'title': '复工复产|扶持',
                    }
                },
            ]
        }
    },
    'sort': {'_id': {'order': 'asc'}}
}


if __name__ == '__main__':
    from elastic_search import es
    result = es.search(match_regexp)
    print(result)

Python读写ElasticSearch【自用】

按照和基本调用

常用查询语句

猜你喜欢