文章目录
hdfs包提交
链接: https://pan.baidu.com/s/12EsTYabf9ivxV0nLQ4yxYQ 提取码: aids 复制这段内容后打开百度网盘手机App,操作更方便哦
下载我分享的hdfs.zip即可,集群提交
PYSPARK_PYTHON=/data/anaconda3/bin/python3 \
/opt/spark/bin/spark-submit \
--master yarn \
--deploy-mode client \
--py-files /xxx/hdfs.zip \
XXX.py >>./log/XXX.log 2>>./log/XXX.err
向HDFS读写json
hdfsConn = Client('http://172.00.100.00:9870', root='/xxx/xxx', timeout=1000, session=False)
di = {
'dw': 0, 'd2w': 1, 'dw1': 2}
di=str(di)
print(di)
# 写入json
hdfsConn.write('/user/renwanxin/ad_ctr_test/data/user_modelMake222.json', data=di, overwrite=True, append=False)
# 读json
di = []
with hdfsConn.read('/user/renwanxin/ad_ctr_test/data/user_modelMake222.json', encoding='utf-8', delimiter=',') as f:
for line in f:
di.append(line)
di = ','.join([x for x in di])
print(dict(di))
# print(eval(di))
{'dw': 0, 'd2w': 1, 'dw1': 2}
{'dw': 0, 'd2w': 1, 'dw1': 2}