import json
import hashlib
from pprint import pprint
from elasticsearch import Elasticsearch
es = Elasticsearch(hosts="10.109.24.153", port=9200)
es.indices.create(index="newlegalindex", ignore=400)
# with open("/Users/rouckiechen/PycharmProjects/falv/falv_zhongguorenda_spider/id_num.txt", encoding="utf-8") as f:
# temp_str = f.readlines()
# for i in temp_str:
# item = json.loads(i, strict=False)
# href = item["sourceURL"]
# uuid = hashlib.md5(href.encode(encoding='UTF-8')).hexdigest()
# item["uuid"] = uuid
# res = es.index(index="newlegalindex", doc_type="mytype", body=item, id=uuid)
# print(i.replace("\n", ""))
# _id = i.replace("\n", "")
# print(href)
# print(uuid)
# if item["fgtitle"] == "" or item["fgtext"] == "":
# print(item["fgtitle"], item["sourceURL"])
# es.update(index="newlegalindex", doc_type="mytype", id=_id, body={"doc": {"fbdept": "全国人大常委会"}})
query_json = {
"bool": {
"must": {
"term": {
"fbdept": "全国人大常委会"
}
},
"should": [
{
"match": {
"sourcenum": "1003"
}
}
]
}
}
# source_arr = ["_id", "fbdept", "sourceURL"]
source_arr = ["_id", "fbdept", "sourceURL"]
res = es.search(index="newlegalindex", body={"query": query_json, "_source": source_arr, "from": 0, "size": 3000}) # 获取所有数据
id_list = res['hits']['hits']
for i in id_list:
_id = i["_id"]
es.update(index="newlegalindex", doc_type="mytype", id=_id, body={"doc": {"sourcenum": "2001", "status": "2", "fgarea": ""}})
print(_id)
ES 上传,更新数据
猜你喜欢
转载自blog.csdn.net/Rouckie/article/details/88850901
今日推荐
周排行