python使用正则搜索字符串或文件中的浮点数代码实例

464次阅读  |  发布于5年以前

用python和numpy处理数据次数比较多,写了几个小函数,可以方便地读写数据:


    # -*- coding: utf-8 -*-
    #----------------------------------------------------------------------
    # FileName:gettxtdata.py
    #功能:读取字符串和文件中的数值数据(浮点数)
    #主要提供类似matlab中的dlmread和dlmwrite函数
    #同时提供loadtxtdata和savetxtdata函数
    #Data: 2013-1-10
    #Author:吴徐平
    #----------------------------------------------------------------------
    import numpy
    #----------------------------------------------------------------------
    def StringToDoubleArray(String):
      """
      #将字符串中的所有非Double类型的字符全部替换成空格
      #以'#'开头注释直至行尾,都被清空
      #返回一维numpy.array数组

      """ 
      from StringIO import StringIO
      import re

      DataArray=numpy.empty([0],numpy.float64)

      if len(String.strip())>0:
        #清空注释行,都是以'#'开头子字符
        doublestring=re.sub('#.*$', " ", String, count=0, flags=re.IGNORECASE)
        #删除非数字字符      
        doublestring=re.sub('[^0-9.e+-]', " ", doublestring, count=0, flags=re.IGNORECASE)
        #去掉不正确的数字格式(代码重复是有必要的)
        doublestring=re.sub('[.e+-](?=\s)', " ", doublestring, count=0, flags=re.IGNORECASE)
        doublestring=re.sub('[.e+-](?=\s)', " ", doublestring, count=0, flags=re.IGNORECASE)
        doublestring=re.sub('[e+-]$', " ", doublestring, count=0, flags=re.IGNORECASE)
        doublestring=re.sub('[e+-]$', " ", doublestring, count=0, flags=re.IGNORECASE)
        #去掉首尾空格
        doublestring=doublestring.strip()
        if len(doublestring)>0:
          StrIOds=StringIO(doublestring)
          DataArray= numpy.genfromtxt(StrIOds)

      return DataArray

    #----------------------------------------------------------------------
    def GetDoubleListFromString(String):
      """
      #使用换行符分割字符串
      #将字符串中的所有非Double类型的字符全部替换成空格
      #以'#'开头注释直至行尾,都被清空
      #将每一行转换成numpy.array数组
      #返回numpy.array数组的列表

      """ 
      from StringIO import StringIO
      import re

      DoubleList=[]
      StringList=String.split('\n')#使用换行符分割字符串
      for Line in StringList:
        if len(Line.strip())>0:
          #清空注释行,都是以'#'开头子字符
          doublestring=re.sub('#.*$', " ", Line, count=0, flags=re.IGNORECASE)
          #删除非数字字符      
          doublestring=re.sub('[^0-9.e+-]', " ", doublestring, count=0, flags=re.IGNORECASE)
          #去掉不正确的数字格式(代码重复是有必要的)
          doublestring=re.sub('[.e+-](?=\s)', " ", doublestring, count=0, flags=re.IGNORECASE)
          doublestring=re.sub('[.e+-](?=\s)', " ", doublestring, count=0, flags=re.IGNORECASE)
          doublestring=re.sub('[e+-]$', " ", doublestring, count=0, flags=re.IGNORECASE)
          doublestring=re.sub('[e+-]$', " ", doublestring, count=0, flags=re.IGNORECASE)
          #去掉首尾空格
          doublestring=doublestring.strip()
          if len(doublestring)>0:
            StrIOds=StringIO(doublestring)
            DoubleList.append(numpy.genfromtxt(StrIOds))   
      return DoubleList

    #----------------------------------------------------------------------
    def GetDoubleListFromFile(FileName):
      """
      #将文本文件中的所有Double类型的字符全部替换成numpy.array数组
      #每一行都是numpy.array数组
      ##返回numpy.array数组的列表
      #注意:返回列表的每个元素又都是一个numpy.array数组
      #注意:返回列表的每个元素(或文件每行)可以包含不同多个数的数字

      """ 
      file=open(FileName, 'r')
      read_file = file.read()
      file.close() 
      DoubleList=GetDoubleListFromString(read_file)
      return DoubleList

    def dlmread(FileName,dtype=numpy.float64):
      """
      #Load Data From Txt-File.
      #分隔符默认是:";",",",空格类 (包括\t)等等
      #以#开头的被认为是注释,不会被读取
      #Return Value:二维数值数组(numpy.ndarray)
      #对文本中数据的排列格式要求最低,且容许出现注释字符,智能化程度最高,但速度较慢
      """
      DoubleList=GetDoubleListFromFile(FileName)
      dlsize=[]#每一行数组的大小
      for dL in DoubleList:
        dlsize.append(dL.size)

      MinColumnSize=min(dlsize)#数组的最大列数
      MaxColumnSize=max(dlsize)#数组的最小列数
      #数组创建和赋值
      DoubleArray=numpy.empty([len(DoubleList),MinColumnSize],dtype=dtype)

      row=range(0,len(DoubleList))
      colum=range(0,MinColumnSize)

      for i in row:
        for j in colum:
          DoubleArray[i][j]=DoubleList[i][j] 

      return DoubleArray
    #----------------------------------------------------------------------

    def loadtxtdata(filename,delimiter=""):
      """
      #Load Data From Txt-File with delimiter.
      #分隔符默认是:";",",",空格类 (包括\t)和自定义的delimiter等
      #Return Value:  二维数值数组(numpy.ndarray)
      #对文本中数据的排列格式要求较高,且不容许出现注释字符,智能化程度较低,但速度较快
      """
      from StringIO import StringIO
      import re

      file_handle=open(filename,'r')
      LinesALL=file_handle.read()#读入字符串
      file_handle.close()

      DelimiterALL=delimiter+",;"#分隔符
      SpaceString=" "#空格
      for RChar in DelimiterALL:
        LinesALL=LinesALL.replace(RChar,SpaceString)

      return numpy.genfromtxt(StringIO(LinesALL))

    #----------------------------------------------------------------------  
    def savetxtdata(filename, X, fmt='%.8e', delimiter=' ', newline='\n'):
      """
      Save Data To Txt-File.
      """
      numpy.savetxt(filename, X, fmt=fmt, delimiter=delimiter, newline=newline)   
      return True

    #----------------------------------------------------------------------
    def dlmwrite(filename, X, fmt='%.8e', delimiter=' ', newline='\n'):
      """
      Save Data To Txt-File.
      """
      numpy.savetxt(filename, X, fmt=fmt, delimiter=delimiter, newline=newline)   
      return True

    #----------------------------------------------------------------------
    #测试程序 
    #----------------------------------------------------------------------
    if __name__ == '__main__':
      #生成随机数
      data=numpy.random.randn(3,4)
      filename='D:/x.txt'
      #写入文件
      dlmwrite(filename,data)
      x=GetDoubleListFromFile(filename)
      print(x)
      print(dlmread(filename))
      y=StringToDoubleArray('79l890joj')
      print(y)
      z=loadtxtdata(filename)
      print(z)

我只在python2.7中试过,如果要在python3.x中使用,可自行测试.

Copyright© 2013-2020

All Rights Reserved 京ICP备2023019179号-8