程序的跑批日志有时候累加的多了会特别的大,如果我们只需要获取文件的后几行的话,就不需要依次遍历。
自定义方法
import os
def file_get_last_lines(file_path, num):
"""
读取大文件的最后几行
:param file_path: 文件路径
:param num: 读取行数
:return:
"""
num = int(num)
blk_size_max = 4096
n_lines = []
with open(file_path, 'rb') as fp:
fp.seek(0, os.SEEK_END)
cur_pos = fp.tell()
while cur_pos > 0 and len(n_lines) < num:
blk_size = min(blk_size_max, cur_pos)
fp.seek(cur_pos - blk_size, os.SEEK_SET)
blk_data = fp.read(blk_size)
assert len(blk_data) == blk_size
lines = blk_data.split(b'\n')
# adjust cur_pos
if len(lines) > 1 and len(lines[0]) > 0:
n_lines[0:0] = lines[1:]
cur_pos -= (blk_size - len(lines[0]))
else:
n_lines[0:0] = lines
cur_pos -= blk_size
fp.seek(cur_pos, os.SEEK_SET)
if len(n_lines) > 0 and len(n_lines[-1]) == 0:
del n_lines[-1]
return n_lines[-num:]
使用示例
file_path = 'D:/python/PyCharmProjects/jianbao/Trans/20201130/SH603131.csv'
lines = file_get_last_lines(file_path, 5)
print(lines)
print('--------------------------------')
for line in lines:
print(line.decode('utf-8')) # 注意:读出来的是二进制,需要解码一下变成字符串
输出:
[b'140306,810,27.13,8123,3905856,S,6439959,5587158,5,1,14:05:09.589\r', b'140306,810,27.13,8677,3905857,S,6439959,5587159,5,1,14:05:09.590\r', b'140307,60,27.13,300,3905925,S,6440135,5587159,5,1,14:05:10.034\r', b'140402,470,27.13,500,3916594,S,6458210,5587159,5,1,14:06:05.042\r', b'140434,340,27.13,300,3922790,S,6468534,5587159,5,1,14:06:37.083\r'] -------------------------------- 140306,810,27.13,8123,3905856,S,6439959,5587158,5,1,14:05:09.589 140306,810,27.13,8677,3905857,S,6439959,5587159,5,1,14:05:09.590 140307,60,27.13,300,3905925,S,6440135,5587159,5,1,14:05:10.034 140402,470,27.13,500,3916594,S,6458210,5587159,5,1,14:06:05.042 140434,340,27.13,300,3922790,S,6468534,5587159,5,1,14:06:37.083
参考: