import pandas as pd
import numpy as np
import datetime as dt
import glob
import os
import re
from itertools import islice
from matplotlib import pyplot as plt
#
# #打开文件
def get_wenjian(file, n):
return [x.strip() for x in islice(file, n)]
def chuli_head(line):
res = {}
res['ens'] = int(line[0].split()[7])
res['date'] = dt.datetime(year=2000 + int(line[0].split()[0]), month=int(line[0].split()[1]),
day=int(line[0].split()[2]),
hour=int(line[0].split()[3]), minute=int(line[0].split()[4]),
second=int(line[0].split()[5]))
# print(res['date'])
# res['h'] = round(float(line[1].split()[8]), 3)
s1 = round(float(line[1].split()[8]), 3)
s2 = round(float(line[1].split()[9]), 3)
s3 = round(float(line[1].split()[10]), 3)
s4 = round(float(line[1].split()[11]), 3)
s = np.array([s1, s2, s3, s4])
d1 = np.mean(s)
res['h'] =d1
# s = np.array(line[1].split()[8], line[1].split()[9], line[1].split()[10], line[1].split()[11])
# res['h'] = np.mean(s) # 平均深度
res['b'] = round(float(line[2].split()[0]), 2)
res['lat'] = float(line[3].split()[0]) if float(line[3].split()[0]) != 30000. else 'NA'
res['lon'] = float(line[3].split()[1]) if float(line[3].split()[1]) != 30000. else 'NA'
res['top_q'] = round(float(line[4].split()[1]), 3) # 顶层流量
res['b_q'] = round(float(line[4].split()[2]), 3) # 底部流量
res['nbins'] = int(line[5].split()[0]) # 层数
return res
def chuli_shuju(ens, ensnum, ensdist, ensh, enslat, enslon, topq, botq,date):
res = {}
df = pd.DataFrame([x.split() for x in ens], columns=['hb', 'v', 'd', 'v1', 'v2', 'v3', 'v4',
'bs1', 'bs2', 'bs3', 'bs4', 'percgood', 'q'], dtype='float')
rmcols = ['v1', 'v2', 'v3', 'v4', 'percgood', 'q']
df.drop(rmcols, inplace=True, axis=1) # 删去不需要用到的数据
df = df.replace(dict.fromkeys([-32768, 2147483647, 255], np.nan))
df = df.dropna()
df['bs'] = df[['bs1', 'bs2', 'bs3', 'bs4']].mean(axis=1) # 平均4探头回波强度
df.drop(['bs1', 'bs2', 'bs3', 'bs4'], inplace=True, axis=1) # 删去4探头回波强度
df['ens'] = ensnum # 剖面号
df['date'] = date # 添加日期
df['dist'] = ensdist
df['h'] = ensh # 使用头部信息中的水深
df['lat'] = enslat # 头部信息纬度信息
df['lon'] = enslon # 头部信息经度信息
df['v'] = df['v'] * 0.01
to_round = [col for col in df.columns if not 'lat' in col and not 'lon' in col]
# print(to_round)
df[to_round] = df[to_round].round(3) # 保留3位小数
res = pd.melt(df, id_vars=['ens', 'dist', 'lon', 'lat', 'hb', 'h','date']) # 合并出现 value 和variable 即将 v,d,bs 合并成一列 #
# 出现的variable为列名,value为分别的值,id_vars 为不合并列
# print(res)
return res
def file_proc(path_in, path_out): # 主函数
with open(path_in, "r") as f:
f.readline() # 去除前三行无效信息
f.readline()
f.readline()
opr = {}
df = pd.DataFrame() # 建立数据表
head = get_wenjian(f, 6) # 获取文件头6行信息并释放迭代器
while head:
opr = chuli_head(head)
chunk = get_wenjian(f, opr['nbins']) # 获得前6行之后的层数行文件信息
ens = chuli_shuju(chunk, opr['ens'], opr['b'], opr['h'], opr['lat'], opr['lon'], opr['top_q'], opr['b_q'],opr['date'])
df = df.append(ens, ignore_index=True)
head = get_wenjian(f, 6) # 进去下一个文件信息循环,即层数 + 头部信息
# print(chunk)
# print(ens)
# print(df)
df.to_csv(path_out, index=False, na_rep='-32768')
bsf = df.loc[df['variable'] == 'bs', ['dist', 'hb', 'value', 'h']]
bsf['hb'] = bsf['hb'].astype(float)
bsf['h'] = bsf['h'].astype(float)
bsf['value'] = bsf['value'].astype(float)
print('Finished ' + path_in)
return
if __name__ == "__main__":
ff = glob.glob("*_ASC.txt")
if not ff:
print('No files to convert.')
exit()
else:
print("Detected ASCII *_ASC.txt files:
", "
".join(ff))
for f in ff:
file_in = f
file_out = re.sub(r'(?i)txt', 'csv', f)
file_proc(file_in, file_out)