例行程序
def routine():
lastday = ''
while True:
day = datetime.datetime.now().strftime("%Y%m%d")
hour = int(datetime.datetime.now().strftime("%H"))
if day != lastday and hour == 6: # exec when 6 a.m.
cur_date = datetime.datetime.now() - datetime.timedelta(days=1)
cal_date_str = (cur_date).strftime('%Y%m%d')
run_job(cal_date_str)
lastday = day
time.sleep(600)
exit(0)
if __name__ == '__main__':
if len(sys.argv) < 2:
exit(0)
if sys.argv[1] == 'routine':
routine()
ngram
用法:zip(*[iter(a[i:]) for i in xrange(n)])
>>> a=range(10)
>>> zip(*[a[i:] for i in xrange(2)])
[(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 9)]
还可以用itertools实现pairwise
hash
hash(text) % 99999999
md5计算
import hashlib
m = hashlib.md5()
m.update(text)
text_md5 = m.hexdigest()
text_md5 = hashlib.md5(text).hexdigest()
数值
sys.maxint
sys.float_info.max
sys.float_info.epsilon
sorted
sorted(dictobj.items(), key=lambda x:x[1], reverse=True)
dictobj == sorted(dictobj.items(), key=lambda x:x[1], reverse=True)
comparision
使用multiprocessing多进程
example 1:
from multiprocessing import Process, Lock
def f(l, i):
l.acquire()
print 'hello world', i
l.release()
if __name__ == '__main__':
lock = Lock()
for num in range(10):
Process(target=f, args=(lock, num)).start()
example 2:
from multiprocessing import Process, Manager
def f(d, l):
d[1] = '1'
d['2'] = 2
d[0.25] = None
l.reverse()
if __name__ == '__main__':
manager = Manager()
d = manager.dict()
l = manager.list(range(10))
p = Process(target=f, args=(d, l))
p.start()
p.join()
print d
print l
will print
{0.25: None, 1: '1', '2': 2}
[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
example 3:
from multiprocessing import Process, Pool
from multiprocessing.managers import BaseManager
class MySharedClass(object):
stored_value = 0
def get(self):
return self.stored_value
def set(self, new_value):
self.stored_value = new_value
return self.stored_value
class MyManager(BaseManager):
pass
MyManager.register('MySharedClass', MySharedClass)
def worker ( proxy_object, i):
proxy_object.set( proxy_object.get() + i )
print ("id %d, sum %d" %(i, proxy_object.get()))
return proxy_object
if __name__ == '__main__':
manager = MyManager()
manager.start()
shared = manager.MySharedClass()
pool = Pool(5)
for i in range(33):
pool.apply(func=worker, args=(shared, i))
pool.close()
pool.join()
print "result: %d" % shared.get()
使用threading多线程
# encoding: utf-8
import sys, os, threading, time
from datetime import datetime
def simple_function(tname, delay):
n = 0
while n < 5:
n += 1
print tname
time.sleep(delay)
class MyThread(threading.Thread):
def __init__(self, thread_id):
threading.Thread.__init__(self)
self.thread_id = thread_id
def run(self):
print '%d start' % self.thread_id
simple_function(self.thread_id, 0.01)
def main():
try:
threadpool = []
for i in xrange(30):
threadpool.append(MyThread(i))
for t in threadpool:
t.start()
except:
print 'create thread error'
exit(1)
while 1:
time.sleep(5)
print 'main alive'
print 'running thread num: %d' % len(threading.enumerate())
if __name__ == '__main__':
main()
itertools
- mapreduce中使用groupby
import sys, os
import itertools
import operator
for k, vs in itertools.groupby(itertools.imap(lambda x:x.rstrip().split('\t'), sys.stdin), operator.itemgetter(0,1,2,3,4)):
print k
for v in vs:
print v
- pairwise
import itertools def pairwise(iterable): "s -> (s0,s1), (s1,s2), (s2, s3), ..." a, b = itertools.tee(iterable) next(b, None) return itertools.izip(a, b)
使用requests访问网站
#encoding:utf-8
import requests
import sys
fr = open(sys.argv[1])
fw = open(sys.argv[2], 'w')
for q in open(sys.argv[1]):
q=q.strip()
p = requests.get('http://10.6.131.76:5000/query/?d=2&q=' + q)
for line in p.content.split('\n'):
if 'result' in line:
fw.write('%s\t%s\n' % (q, line))
fr.close()
fw.close()
获取主机名和ip
import socket
hostname = socket.getfqdn(socket.gethostname())
hostaddr = socket.gethostbyname(hostname)
URL解析
>>> import urlparse
>>> parsedTuple = urlparse.urlparse("
http://www.google.com/search?hl=en&q=python&btnG=Google+Search")
>>> parsedTuple
ParseResult(scheme='http', netloc='
www.google.com', path='/search', params='', query='hl=en&q=python&btnG=Google+Search', fragment='')
URL解码
>>> import urllib
>>> rawurl='%7B%22keyword%22%3A%22%25E9%25BB%2584%25E6%2599%25AF%25E7%2591%259C%22%2C%22from%22%3A%22content%22%7D'
>>> url=urllib.unquote(rawurl)
>>> print url
{"keyword":"%E9%BB%84%E6%99%AF%E7%91%9C","from":"content"}
基础代码模板
def print_usage():
print "usage: python %s -i [input_file] -o [output_file] -vec [poivector_file] -tag [poitag_file] -type [assess/itemcf]" % sys.argv[0]
print "example: python %s -i assess_pois.gbk -o assess_result.v7 -vec vectors-poi-v7.txt.part -type assess" % sys.argv[0]
if len(sys.argv) < 5:
print_usage()
exit(1)
if '-i' in sys.argv:
argpos = sys.argv.index('-i')
input_filename = sys.argv[argpos+1]
if '-o' in sys.argv:
argpos = sys.argv.index('-o')
output_filename = sys.argv[argpos+1]
if '-vec' in sys.argv:
argpos = sys.argv.index('-vec')
poivector_filename = sys.argv[argpos+1]
sim_type = 'vec'
if '-tag' in sys.argv:
argpos = sys.argv.index('-tag')
poitag_filename = sys.argv[argpos+1]
sim_type = 'tag'
if '-type' in sys.argv:
argpos = sys.argv.index('-type')
task_type = sys.argv[argpos+1]
if __name__ == '__main__':
pass
类属
>>> class Person(): pass
…
>>> p = Person()
>>> isinstance(p, Person)
True
>>> def person(): pass
...
>>> person.name = 'mike'
>>> person
<function person at 0xec37d0>
>>> person.name
‘mike'
>>> hasattr(person, 'name')
True