【python】【待整理】

时间原因待整理,先简记

csv读写

csv格式变换

  • 转置
  • 循环放大
  • 按照某标准分组

数值处理

  • 平均值填补
  • 0值处理
import pandas as pd

csv_data = pd.read_csv('C:/Users/hyq68/Desktop/DATAA.csv', encoding = 'ANSI')
'''
#以下是转换数据格式
UN_Code = []
for row in csv_data.UN_Code:
    UN_Code.append(str(row))
for i in range(len(UN_Code)):
    if len(UN_Code[i]) == 1:
        UN_Code[i] = ('00'+UN_Code[i])
    if len(UN_Code[i]) == 2:
        UN_Code[i] = ('0'+UN_Code[i])
UN_Code = pd.DataFrame(UN_Code)
'''
UN_Code.to_csv('C:/Users/hyq68/Desktop/DATAA.csv',encoding='utf-8')

###数据清洗模板
import pandas as pd
import numpy as np

#读取文件
data = pd.read_csv('C:/Users/hyq68/Desktop/population.csv',encoding = 'ANSI')
population = []

#增添数据
for i in range(len(data)):
    for m in range(2000,2016):
        population.append(data['Country Code'][i])
for i in range(len(data)):
    for m in range(2000,2016):
        population.append(str(m))
for i in range(len(data)):
    for m in range(2000,2016):
        population.append(data[str(m)][i])

#数据变形
Country_Code = population[0:int(len(population)/3)]
Year = population[int((len(population)/3)):int((len(population)/3)*2)]
Population = population[int(((len(population)/3)*2)):len(population)]
Data = [Country_Code,Year,Population]
Data = np.transpose(Data)

#写入数据
population = pd.DataFrame(columns = ['Country_Code','Year','Population'], data = Data)
population.to_csv('C:/Users/hyq68/Desktop/population_new.csv',encoding = 'utf-8')

#-----------------------缺失值处理:平均值---------------------------#
import numpy as np
import pandas as pd

data = pd.read_csv('C:/Users/hyq68/Desktop/data_raw.csv', encoding = 'ANSI')
World_Average = pd.read_csv('C:/Users/hyq68/Desktop/World_Average.csv', encoding = 'utf-8')
WA_ME = World_Average['Military_Expenditure (% of GDP)']

#将数据按照国家分组
Military_Expenditure = data['Military_Expenditure (% of GDP)']
country_Military_Expenditure = []
for m in range(int(len(data)/(16))):
    country_Military_Expenditure.append([])
    for n in range(16):
        country_Military_Expenditure[m].append(Military_Expenditure[m*16+n])

#计算平均值
avg = []
for m in country_Military_Expenditure:
    sum_ = 0
    count_ = 0
    avg_ = 0
    for n in m:
        if n != '..':
            n = float(n)
            sum_ += n
            count_ += 1
    if count_ != 0:
        avg_ = sum_/count_
    avg.append(avg_)

#用平均值替换..
for m in country_Military_Expenditure:
    for n in m:
        if n == "..":
            country_Military_Expenditure[country_Military_Expenditure.index(m)][m.index('..')] = avg[country_Military_Expenditure.index(m)]

#解决0问题,解决类型问题
for m in country_Military_Expenditure:
    for n in m:
        if n == 0:
            country_Military_Expenditure[country_Military_Expenditure.index(m)][m.index(0)] = WA_ME[m.index(0)]

#拉平向量
result = []
for m in country_Military_Expenditure:
    for n in m:
        result.append(float(n))

#写入数据
data_mature = pd.read_csv('C:/Users/hyq68/Desktop/data_mature.csv', encoding = 'utf-8')
data_mature['Military Expenditure (% of GDP)'] = result
data_mature.to_csv('C:/Users/hyq68/Desktop/data_mature.csv',encoding = 'utf-8')

猜你喜欢

转载自blog.csdn.net/why_not_study/article/details/103535958