: :办公软件 2019-10-08 17:52:13
3074337170,40960,41215,2111136891
转换后示例如下:
after_ip,begain_port,end_port,before_ip
['183.62.169.146'],40960,41215,['125.213.100.123']
同时转换后的['及去没有想到办法去掉。
期望输出(可以使用UE进行二次处理,去除符号):
183.62.169.146,40960,41215,125.213.100.123
import sys
import csv
import string
import pandas as pd
import numpy as np
def decode_after_ip(): #计算转换前IP为IPv4
after_ip_list = []
for ip in after_ip:
floor_list = []
yushu=ip
for i in reversed(range(4)):
res=divmod(yushu,256**i)
floor_list.append(str(res[0]))
yushu=res[1]
decode_ip = ('.'.join(floor_list))
result_list = decode_ip.split()
after_ip_list.append(result_list)
return after_ip_list
def decode_before_ip(): #计算转换后IP为IPv4
before_ip_list = []
for ip in before_ip:
floor_list = []
yushu=ip
for i in reversed(range(4)):
res=divmod(yushu,256**i)
floor_list.append(str(res[0]))
yushu=res[1]
decode_ip = ('.'.join(floor_list))
result_list = decode_ip.split()
before_ip_list.append(result_list)
return before_ip_list
def decode_begin_port(): #转换起始端口为list
begin_port_list = []
for port in begain_port:
begin_port_list.append(str(port))
return begin_port_list
def decode_end_port(): #转换结束端口为list
end_port_list = []
for port in end_port:
end_port_list.append(str(port))
return end_port_list
csv_reader = pd.read_csv('explongip.csv', usecols=['after_ip', 'begain_port', 'end_port', 'before_ip'],nrows = 200,chunksize=100)
#nrow读取文件行数 chunksize=每次读取行数,此行为调试用
#csv_reader = pd.read_csv('explongip.csv', usecols=['after_ip', 'begain_port', 'end_port', 'before_ip'],chunksize=1000000)
#实际处理文件测试每次读取100W行数据
csv_file = pd.DataFrame()
count = 0 #循环计数确认循环次数
for chunk in csv_reader: #分块读取
csv_file = csv_file.append(chunk,ignore_index=True)
# 从DataFrame取出数据
after_ip = csv_file.loc[:, 'after_ip']
begain_port = csv_file.loc[:, 'begain_port']
end_port = csv_file.loc[:, 'end_port']
before_ip = csv_file.loc[:, 'before_ip']
# 将数据转换为字典并转换为DataFrame
csv = {'after_ip': decode_after_ip(),'begain_port': begain_port, 'end_port': end_port,'before_ip': decode_before_ip()}
exp_file = pd.DataFrame(csv)
# 追加输出csv
exp_file.to_csv("decode_ip.csv", mode='a', index=False)
count=count + 1
print("The loop count:",count)
csv_file = pd.DataFrame()
#每次循环后将csv_file置为空,否则处理大量数据时会出现Memoryerror
print(csv_file)
#输出检查是否成功置为空值
09-26python求序列周期
08-21python库快速安装工具
08-02还不错的python的识别库
01-23NR_5G_Utils_Python应用
01-23Python读取shp文件
01-23python翻译小工具
01-23深度学习入门基于Python实现源码
10-12Python 3.10发布,带来诸多改进
10-10Python代码实现验证码识别功能
10-08Python通过缩进来识别代码
09-17讲解Python常用绘图库的绘图原理
09-11最佳的学习的Python编程开源库