Python的多线程与多进程实践

浏览数：22 / 时间：2015年06月08日

最近要产生大量的”假“的电话号码，所以就写了一个自动产生电话号码程序，产生了一百万条数据，然后把数据放到一个文件中。

死跑版：

# -*- coding: utf-8 -*-
# 以下是中国的手机号码分段情况 
#   新联通　　（中国联通+中国网通）手机号码开头数字 130、131、132、145、 
               # 155、156、185、186 
#   新移动　　（中国移动+中国铁通）手机号码开头数字 134、135、136、137、 
               # 138、139、147、150、151、152、157、158、159、182、183、187、188 
#   新电信　　（中国电信+中国卫通）手机号码开头数字 133、153、189、180 
# this script generates telephone numbers in mobile_china, unicom_china,tele_china
import random
from random import choice
import time# mobile info
mobile_china = [‘134‘,‘135‘,‘136‘,‘137‘,‘138‘,‘139‘,‘147‘,‘150‘,‘151‘,‘152‘,‘157‘,‘158‘,‘159‘,‘182‘,‘183‘,‘187‘,‘188‘ ]
unicom_china = [‘130‘,‘131‘,‘132‘,‘145‘,‘155‘,‘156‘,‘185‘,‘186‘]
tele_china = [‘133‘,‘153‘,‘189‘,‘180‘]
# the heading three numbers of a telephone number
all_three = mobile_china + unicom_china + tele_china
# the tailing eight numbers of a telephone number
all_eight = 99999999
# 1 million -- the total counts of telephone number
# 100 million records is too much for my computer
Max = 1000000
def telenum_gen():
    # create a file 
    tele_book = open(‘tele_book_solo.txt‘,‘w‘)
    for i in xrange(Max):
        three = choice(all_three)
        eight = str(random.randint(0,all_eight)).rjust(8,‘0‘)
        tele_num = three + eight
        print "now the %r th record is %r " %(i,tele_num)
        tele_book.write(tele_num+"\n")
    tele_book.close()
    
# begin time    
start = time.clock()

telenum_gen()
# end time
end = time.clock()
print "Running time is %d seconds." %(end-start)

算法很简单，把所有可能的中国联通，中国移动，还有中国电信的前三位号码放到一个列表中，后面八位随机数产生，然后二者拼接成字符串即可。

但是这个运行时间太慢了，运行了4000多秒。如果要产生1亿条数据，计算机直接挂掉了。

于是就考虑，能不能使用多线程或者多进程呢？

于是，就有了多线程版本的电话号码生成器：

多线程版：

# -*- coding: utf-8 -*-
# 以下是中国的手机号码分段情况 
#   新联通　　（中国联通+中国网通）手机号码开头数字 130、131、132、145、 
               # 155、156、185、186 
#   新移动　　（中国移动+中国铁通）手机号码开头数字 134、135、136、137、 
               # 138、139、147、150、151、152、157、158、159、182、183、187、188 
#   新电信　　（中国电信+中国卫通）手机号码开头数字 133、153、189、180 
# this script generates telephone numbers in mobile_china, unicom_china,tele_china
import random
from random import choice
import time
import threading 

# mobile info
mobile_china = [‘134‘,‘135‘,‘136‘,‘137‘,‘138‘,‘139‘,‘147‘,‘150‘,‘151‘,‘152‘,‘157‘,‘158‘,‘159‘,‘182‘,‘183‘,‘187‘,‘188‘ ]
unicom_china = [‘130‘,‘131‘,‘132‘,‘145‘,‘155‘,‘156‘,‘185‘,‘186‘]
tele_china = [‘133‘,‘153‘,‘189‘,‘180‘]
# the heading three numbers of a telephone number
all_three = mobile_china + unicom_china + tele_china
# the tailing eight numbers of a telephone number
all_eight = 99999999
# 1 million -- the total counts of telephone number
# 100 million records is too much for my computer
Max = 1000000
worker_num = 5

def telenum_gen(file):
    split_work = Max/worker_num
    for i in xrange(split_work):
        three = choice(all_three)
        eight = str(random.randint(0,all_eight)).rjust(8,‘0‘)
        tele_num = three + eight
        file.write(tele_num+"\n")        
        print "now the %r th record is %r " %(i,tele_num)
        

if __name__ == ‘__main__‘:
    # begin time    
    start = time.clock()
    threads = []
    tele_book = open(‘tele_book_thread.txt‘,‘w‘)
    for x in xrange(worker_num):
        threads.append(threading.Thread(target=telenum_gen,args=(tele_book,)))
    #begin to work
    for worker in threads:
        worker.start()
    for worker in threads:
        worker.join()
    tele_book.close()
    # end time
    end = time.clock()
    print "Running time is %d seconds." %(end-start)

产生了5个线程，共同对同一个文件做写入操作。

结果，运行时间要比直接产生还要慢，我实验了10万条数据，慢了20秒左右。

于是，又想，是不是多进程会更好一点呢？就查找资料，写了多进程版本的：

多进程版：

# -*- coding: utf-8 -*-
# 以下是中国的手机号码分段情况 
#   新联通　　（中国联通+中国网通）手机号码开头数字 130、131、132、145、 
               # 155、156、185、186 
#   新移动　　（中国移动+中国铁通）手机号码开头数字 134、135、136、137、 
               # 138、139、147、150、151、152、157、158、159、182、183、187、188 
#   新电信　　（中国电信+中国卫通）手机号码开头数字 133、153、189、180 
# this script generates telephone numbers in mobile_china, unicom_china,tele_china
import random
from random import choice
import time
import multiprocessing
from multiprocessing import Process
# mobile info
mobile_china = [‘134‘,‘135‘,‘136‘,‘137‘,‘138‘,‘139‘,‘147‘,‘150‘,‘151‘,‘152‘,‘157‘,‘158‘,‘159‘,‘182‘,‘183‘,‘187‘,‘188‘ ]
unicom_china = [‘130‘,‘131‘,‘132‘,‘145‘,‘155‘,‘156‘,‘185‘,‘186‘]
tele_china = [‘133‘,‘153‘,‘189‘,‘180‘]
# the heading three numbers of a telephone number
all_three = mobile_china + unicom_china + tele_china
# the tailing eight numbers of a telephone number
all_eight = 99999999
# 1 million -- the total counts of telephone number
# 100 million records is too much for my computer
Max = 100000
worker_num = 5
# input worker
def telenum_gen(queue):
    split_work = Max/worker_num
    for i in xrange(split_work):
        three = choice(all_three)
        eight = str(random.randint(0,all_eight)).rjust(8,‘0‘)
        tele_num = three + eight
        queue.put(tele_num)     
        print "now the %r th record is %r " %(i,tele_num)
# output worker 
def write_telenum(queue):
    split_work = Max/worker_num
    tele_book = open(‘tele_book_process.txt‘,‘a‘)
    for i in xrange(split_work):
        tele_num = queue.get()
        tele_book.write(tele_num+"\n")
    tele_book.close()
            
if __name__ == ‘__main__‘:
    print "author: YQ"
    # begin time    
    start = time.clock()
    # define input worker list and output worker list of a queue
    input_workers = []
    output_workers = []
    
    lock  = multiprocessing.Lock()    # To prevent file exception
    queue = multiprocessing.Queue(-1)
    # open a file 
    
    # input process
    for x in xrange(worker_num):
        process = multiprocessing.Process(target=telenum_gen,args=(queue,))
        process.start()
        input_workers.append(process)
    for x in xrange(worker_num):
        process = multiprocessing.Process(target=write_telenum,args=(queue,))
        process.start()
        output_workers.append(process)
    #begin to work
    print "this is the length of input workers:",len(input_workers)
    for p in input_workers:
        p.join()
    queue.close()  # No more object will come, close the queue
    # write_telenum(queue,tele_book)
    for p in output_workers:
        p.join()
    # end time
    end = time.clock()
    print "Running time is %d seconds." %(end-start)