import random
url_path=[
"class/112.html",
"class/118.html",
"learn/821",
"course/list"
]
ip_slice=[156,65,595,654]
http_reference=[
"http://www.baidu.coms?sd={query}",
"http://www.sougou.web?sd={query}",
"http://www.google.coms?sd={query}",
"http://www.yahoo.coms?sd={query}"
]
search_keyword=[
"spark",
"hadoop",
"zookeeper"]
states=[404,500,400,250]
def sample_status():
return random.sample(states,1)[0]
def sample_reference():
if random.uniform(0,1)>0.2:
return "-"
ref_str=random.sample(http_reference,1)
que_str=random.sample(search_keyword,1)
return ref_str[0].format(query=que_str[0])
def sample_url():
return random.sample(url_path,1)[0]
def sample_ip():
slice =random.sample(ip_slice,4)
return ".".join([str(item) for item in slice])
def generatelog(count=10):
while count>=1:
querty_log="${url}\t{ip}\t{reference}\t{st}".format(url=sample_url(),ip=sample_ip(),reference=sample_reference(),st=sample_status())
print querty_log
count=count-1
if __name__ == '__main__':
generatelog()
url_path=[
"class/112.html",
"class/118.html",
"learn/821",
"course/list"
]
ip_slice=[156,65,595,654]
http_reference=[
"http://www.baidu.coms?sd={query}",
"http://www.sougou.web?sd={query}",
"http://www.google.coms?sd={query}",
"http://www.yahoo.coms?sd={query}"
]
search_keyword=[
"spark",
"hadoop",
"zookeeper"]
states=[404,500,400,250]
def sample_status():
return random.sample(states,1)[0]
def sample_reference():
if random.uniform(0,1)>0.2:
return "-"
ref_str=random.sample(http_reference,1)
que_str=random.sample(search_keyword,1)
return ref_str[0].format(query=que_str[0])
def sample_url():
return random.sample(url_path,1)[0]
def sample_ip():
slice =random.sample(ip_slice,4)
return ".".join([str(item) for item in slice])
def generatelog(count=10):
while count>=1:
querty_log="${url}\t{ip}\t{reference}\t{st}".format(url=sample_url(),ip=sample_ip(),reference=sample_reference(),st=sample_status())
print querty_log
count=count-1
if __name__ == '__main__':
generatelog()