python开发之基于thread线程搜索本地文件的方法 - 哈喽比特

1282次阅读 | 发布于6年以前
本文实例讲述了python开发之基于thread线程搜索本地文件的方法。分享给大家供大家参考，具体如下：
先来看看运行效果图：
利用多个线程处理搜索的问题，我们可以发现他很快....
下面是代码部分：

    # A parallelized "find(1)" using the thread module.
    # This demonstrates the use of a work queue and worker threads.
    # It really does do more stats/sec when using multiple threads,
    # although the improvement is only about 20-30 percent.
    # (That was 8 years ago. In 2002, on Linux, I can't measure
    # a speedup. :-( )
    # I'm too lazy to write a command line parser for the full find(1)
    # command line syntax, so the predicate it searches for is wired-in,
    # see function selector() below. (It currently searches for files with
    # world write permission.)
    # Usage: parfind.py [-w nworkers] [directory] ...
    # Default nworkers is 4
    import sys
    import getopt
    import time
    import os
    from stat import *
    import _thread as thread
    # Work queue class. Usage:
    #  wq = WorkQ()
    #  wq.addwork(func, (arg1, arg2, ...)) # one or more calls
    #  wq.run(nworkers)
    # The work is done when wq.run() completes.
    # The function calls executed by the workers may add more work.
    # Don't use keyboard interrupts!
    class WorkQ:
      # Invariants:
      # - busy and work are only modified when mutex is locked
      # - len(work) is the number of jobs ready to be taken
      # - busy is the number of jobs being done
      # - todo is locked iff there is no work and somebody is busy
      def __init__(self):
        self.mutex = thread.allocate()
        self.todo = thread.allocate()
        self.todo.acquire()
        self.work = []
        self.busy = 0
      def addwork(self, func, args):
        job = (func, args)
        self.mutex.acquire()
        self.work.append(job)
        self.mutex.release()
        if len(self.work) == 1:
          self.todo.release()
      def _getwork(self):
        self.todo.acquire()
        self.mutex.acquire()
        if self.busy == 0 and len(self.work) == 0:
          self.mutex.release()
          self.todo.release()
          return None
        job = self.work[0]
        del self.work[0]
        self.busy = self.busy + 1
        self.mutex.release()
        if len(self.work) > 0:
          self.todo.release()
        return job
      def _donework(self):
        self.mutex.acquire()
        self.busy = self.busy - 1
        if self.busy == 0 and len(self.work) == 0:
          self.todo.release()
        self.mutex.release()
      def _worker(self):
        time.sleep(0.00001)   # Let other threads run
        while 1:
          job = self._getwork()
          if not job:
            break
          func, args = job
          func(*args)
          self._donework()
      def run(self, nworkers):
        if not self.work:
          return # Nothing to do
        for i in range(nworkers-1):
          thread.start_new(self._worker, ())
        self._worker()
        self.todo.acquire()
    # Main program
    def main():
      nworkers = 4
      #print(getopt.getopt(sys.argv[1:], '-w:'))
      opts, args = getopt.getopt(sys.argv[1:], '-w:')
      for opt, arg in opts:
        if opt == '-w':
          nworkers = int(arg)
      if not args:
        #print(os.curdir)
        args = [os.curdir]
      wq = WorkQ()
      for dir in args:
        wq.addwork(find, (dir, selector, wq))
      t1 = time.time()
      wq.run(nworkers)
      t2 = time.time()
      sys.stderr.write('Total time %r sec.\n' % (t2-t1))
    # The predicate -- defines what files we look for.
    # Feel free to change this to suit your purpose
    def selector(dir, name, fullname, stat):
      # Look for world writable files that are not symlinks
      return (stat[ST_MODE] & 0o002) != 0 and not S_ISLNK(stat[ST_MODE])
    # The find procedure -- calls wq.addwork() for subdirectories
    def find(dir, pred, wq):
      try:
        names = os.listdir(dir)
      except os.error as msg:
        print(repr(dir), ':', msg)
        return
      for name in names:
        if name not in (os.curdir, os.pardir):
          fullname = os.path.join(dir, name)
          try:
            stat = os.lstat(fullname)
          except os.error as msg:
            print(repr(fullname), ':', msg)
            continue
          if pred(dir, name, fullname, stat):
            print(fullname)
          if S_ISDIR(stat[ST_MODE]):
            if not os.path.ismount(fullname):
              wq.addwork(find, (fullname, pred, wq))
    # Call the main program
    main()
希望本文所述对大家Python程序设计有所帮助。