python批量处理csv文件

Posted on 2016-04-12 In Tool

1 2	# -- coding:utf-8 -- #encoding:utf-8

为了使处理文件时用 utf-8 编码，做如上编码声明

import hashlib
def md5(text):
    m = hashlib.md5()
    m.update(text)
    return m.hexdigest()

利用 hashlib 进行 md5 编码

def import_points_csv(filename,outfilename):
    print(filename)
    g=file(outfilename,&quot;w+&quot;)
    #使用w+(写入模式)打开文件outfilename
    with open(filename, 'r') as f:
    #读取filename
        line = f.readline()
        #逐行读取f。因为要处理csv的列因此逐行处理
        while line:
            line = line.replace('n','')
            line = line.replace('r','')
            #处理每行的字符串，使其为单纯的一行以逗号分隔的文字
            row = line.split(',')
            #用逗号作为分隔符，对字符串line进行切片，以数组的形式存入row中
            state = row[0].strip()
            date = row[20]
            thistype = '国控'
            print(thistype)
            name = row[1].strip()
            stationid = md5(name+state+thistype)
            row[21] = stationid
            md5id = md5(date+stationid)
            row[22] = md5id
            row[1]
            #对数组row进行处理。
            print(row)
            for i in range(0, len(row)-1):
                row[i] = row[i] + ','
            #对row进行拼接，分隔符为逗号
            g.writelines(row)
            g.writelines('n')
            #逐行写入文件，每行的最后加上一个换行符。
            line = f.readline()
    g.close()

1	import_points_csv('国控2.csv'.decode('utf-8').encode('gbk'),'contry_2.csv')

由于文件名是中文，因此要将其编码才能被程序读取，不然在运行时显示文件名为乱码且无法读取