Hello World问题
已知一个字母字符的集合,利用其中的字符和字母猜测出设定的密码(例如Hello world)。
1.初次尝试
字符集合与猜测目标密码如下:
geneSet = " abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!."
target = "Hello World!"
下面这个函数表示产生一个最初的猜测:
import random
def generate_parent(length):
genes = []
while len(genes) < length:
sampleSize = min(length - len(genes), len(geneSet))
genes.extend(random.sample(geneSet, sampleSize))
return ''.join(genes)
为了便于快速猜测出目标密码,需要给出一个反馈来告诉猜测者当前猜测是否更接近答案,如果不是,就可以舍弃该猜测。下面的函数用来计算适应值,它反映当前猜测和目标密码有多少个相同的字母:
def get_fitness(guess):
return sum(1 for expected, actual in zip(target, guess)
if expected == actual)
下面这个函数为变异,即对之前的猜测中的某个字母替换,得到新的猜测。
def mutate(parent):
index = random.randrange(0,len(parent))
childGenes = list(parent)
newGene, alternate = random.sample(geneSet, 2)
childGenes[index] = alternate
if childGenes[index] == newGene
else newGene
return ''.join(childGenes)
下面这个函数为输出过程中的结果,输出其对应的猜测字符串、该猜测的适应值、从开始到本次猜测结束的时间。
import datetime
def display(guess, startTime):
timeDiff = datetime.datetime.now() - startTime
fitness = get_fitness(guess)
print("{} {} {}".format(guess, fitness, timeDiff))
下面是主函数部分,主要包含产生一个初始猜测、以及重要的一个循环。循环的过程包括:产生一个猜测;计算该猜测的适应值;将该猜测的适应值与之前最好的适应值相比较;保留最高的适应值。
if __name__ == "__main__":
random.seed()
startTime = datetime.datetime.now()
bestParent = generate_parent(len(target))
bestFitness = get_fitness(bestParent)
display(bestParent, startTime)
while True:
child = mutate(bestParent)
childFitness = get_fitness(child)
if bestFitness >= childFitness:
continue
display(child, startTime)
if childFitness >= len(bestParent):
break
bestParent = child
bestFitness = childFitness
guessPassword.py完整代码:
import random
import datetime
geneSet = " abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!."
target = "Hello World!"
def generate_parent(length):
genes = []
while len(genes) < length:
sampleSize = min(length - len(genes), len(geneSet))
genes.extend(random.sample(geneSet, sampleSize))
return ''.join(genes)
def get_fitness(guess):
return sum(1 for expected, actual in zip(target, guess)
if expected == actual)
def mutate(parent):
index = random.randrange(0,len(parent))
childGenes = list(parent)
newGene, alternate = random.sample(geneSet, 2)
childGenes[index] = alternate
if childGenes[index] == newGene
else newGene
return ''.join(childGenes)
def display(guess, startTime):
timeDiff = datetime.datetime.now() - startTime
fitness = get_fitness(guess)
print("{} {} {}".format(guess, fitness, timeDiff))
if __name__ == "__main__":
random.seed()
startTime = datetime.datetime.now()
bestParent = generate_parent(len(target))
bestFitness = get_fitness(bestParent)
display(bestParent, startTime)
while True:
child = mutate(bestParent)
childFitness = get_fitness(child)
if bestFitness >= childFitness:
continue
display(child, startTime)
if childFitness >= len(bestParent):
break
bestParent = child
bestFitness = childFitness
代码执行结果如下图:
2.可复用的代码
首先编写一个可以复用的进行密码猜测的文件 genetic.py ,其完整代码如下:
import random
def _generate_parent(length, genSet):
genes = []
while len(genes) < length:
sampleSize = min(length - len(genes), len(genSet))
genes.extend(random.sample(genSet,sampleSize))
return ''.join(genes)
def _mutate(parent, geneSet):
index = random.randrange(0, len(parent))
childGenes = list(parent)
newGene, alternate = random.sample(geneSet, 2)
childGenes[index] = alternate
if childGenes[index] == newGene
else newGene
return ''.join(childGenes)
def get_best(get_fitness, targenLen, optimalFitness, geneSet, dispaly):
random.seed()
bestParent = _generate_parent(targenLen, geneSet)
bestFitness = get_fitness(bestParent)
dispaly(bestParent)
if bestFitness >= optimalFitness:
return bestParent
while True:
child = _mutate(bestParent,geneSet)
childFitness = get_fitness(child)
if bestFitness >= childFitness:
continue
dispaly(child)
if childFitness >= optimalFitness:
return child
bestParent = child
bestFitness = childFitness
其中,函数_generate_parent和函数_mutate两个函数类型为python中的protected,只能由模块中的其他函数调用。对于函数get_best,其形参中的get_fitness,和形参display为参数只有猜测产生的字符串,因为get_best不需要知道目标target是什么,也不需要知道过了多少时间。
上面的代码由文件guess_Password.py中的函数调用,其代码如下:
import datetime
import genetic
def test_Hello_World():
target = "Hello World!"
guess_password(target)
def guess_password(target):
geneSet = " abcdefghigklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!."
startTime = datetime.datetime.now()
def fnGetFitness(guess):
return get_fitness(guess, target)
def fnDisplay(genes):
display(genes, target, startTime)
optimalFitness = len(target)
genetic.get_best(fnGetFitness, len(target), optimalFitness, geneSet, fnDisplay)
def display(genes, target, startime):
timeDiff = datetime.datetime.now() - startime
fitness = get_fitness(genes, target)
print("{} {} {}".format(genes, fitness, timeDiff))
def get_fitness(genes, target):
return sum(1 for expected, actual in zip(target, genes)
if expected == actual)
if __name__ == '__main__':
test_Hello_World()
3.使用python的unittest架构
使用unittest架构,需要讲主要测试功能一如unittest.TestCase继承的类中。但是必须将self作为每个函数的第一个参数添加,因为它们将属于测试类。
这里只需要修改guessPassword.py修改后的整体代码如下:
import datetime
import genetic
import unittest
class GuessPasswordTests(unittest.TestCase):
geneSet = " abcdefghigklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!."
def test_Hello_World(self):
target = "Hello World!"
self.guess_password(target)
def guess_password(self, target):
startTime = datetime.datetime.now()
def fnGetFitness(guess):
return get_fitness(guess, target)
def fnDisplay(genes):
display(genes, target, startTime)
optimalFitness = len(target)
best = genetic.get_best(fnGetFitness, len(target), optimalFitness, self.geneSet, fnDisplay)
self.assertEqual(best,target)
def display(genes, target, startime):
timeDiff = datetime.datetime.now() - startime
fitness = get_fitness(genes, target)
print("{} {} {}".format(genes, fitness, timeDiff))
def get_fitness(genes, target):
return sum(1 for expected, actual in zip(target, genes)
if expected == actual)
if __name__ == '__main__':
unittest.main()
genetic.py的内容不变,为了方便,再次粘贴如下:
import random
def _generate_parent(length, genSet):
genes = []
while len(genes) < length:
sampleSize = min(length - len(genes), len(genSet))
genes.extend(random.sample(genSet,sampleSize))
return ''.join(genes)
def _mutate(parent, geneSet):
index = random.randrange(0, len(parent))
childGenes = list(parent)
newGene, alternate = random.sample(geneSet, 2)
childGenes[index] = alternate
if childGenes[index] == newGene
else newGene
return ''.join(childGenes)
def get_best(get_fitness, targenLen, optimalFitness, geneSet, dispaly):
random.seed()
bestParent = _generate_parent(targenLen, geneSet)
bestFitness = get_fitness(bestParent)
dispaly(bestParent)
if bestFitness >= optimalFitness:
return bestParent
while True:
child = _mutate(bestParent,geneSet)
childFitness = get_fitness(child)
if bestFitness >= childFitness:
continue
dispaly(child)
if childFitness >= optimalFitness:
return child
bestParent = child
bestFitness = childFitness
此外,改程序的命令行测试输入及输出如下:
4.测试更长的密码
在上述guessPassword.py中的GuessPasswordTests类中加入一个函数,代码如下:
def test_For_I_am_fearfully_and_wonderfully_made(self):
target = "For I am fearfully and wonderfully made."
self.guess_password(target)
部分执行结果如下图:
5.引入染色体Chromosome类
这里引入染色体类,其中有Genes和Fitness两个属性,这会是genetic引擎更加灵活。
其中 genetic.py 更改之后的整体代码如下:
import random
class Chromosome:
def __init__(self, genes, fitness):
self.Genes = genes
self.Fitness = fitness
def _generate_parent(length, genSet, get_fitness):
genes = []
while len(genes) < length:
sampleSize = min(length - len(genes), len(genSet))
genes.extend(random.sample(genSet,sampleSize))
genes = ''.join(genes)
fitness = get_fitness(genes)
return Chromosome(genes, fitness)
def _mutate(parent, geneSet, get_fitness):
index = random.randrange(0, len(parent.Genes))
childGenes = list(parent.Genes)
newGene, alternate = random.sample(geneSet, 2)
childGenes[index] = alternate
if childGenes[index] == newGene
else newGene
genes = ''.join(childGenes)
fitness = get_fitness(genes)
return Chromosome(genes,fitness)
def get_best(get_fitness, targenLen, optimalFitness, geneSet, dispaly):
random.seed()
bestParent = _generate_parent(targenLen, geneSet, get_fitness)
dispaly(bestParent)
if bestParent.Fitness >= optimalFitness:
return bestParent
while True:
child = _mutate(bestParent, geneSet, get_fitness)
if bestParent.Fitness >= child.Fitness:
continue
dispaly(child)
if child.Fitness >= optimalFitness:
return child
bestParent = child
更改后的 guessPAssword.py 整体代码如下:
import datetime
import genetic
import unittest
class GuessPasswordTests(unittest.TestCase):
geneSet = " abcdefghigklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!."
def test_Hello_World(self):
target = "Hello World!"
self.guess_password(target)
def test_For_I_am_fearfully_and_wonderfully_made(self):
target = "For I am fearfully and wonderfully made."
self.guess_password(target)
def guess_password(self, target):
startTime = datetime.datetime.now()
def fnGetFitness(guess):
return get_fitness(guess, target)
def fnDisplay(candidate):
display(candidate, startTime)
optimalFitness = len(target)
best = genetic.get_best(fnGetFitness, len(target), optimalFitness, self.geneSet, fnDisplay)
self.assertEqual(best.Genes,target)
def display(candidate, startime):
timeDiff = datetime.datetime.now() - startime
print("{} {} {}".format(candidate.Genes, candidate.Fitness, timeDiff))
def get_fitness(genes, target):
return sum(1 for expected, actual in zip(target, genes)
if expected == actual)
if __name__ == '__main__':
unittest.main()
6.Benchmarking
接下来为genetic增加对benchmarkng的支持,因为它能够使你知道引擎寻找解的平均时间和标准差。修改后的 genetic.py 完整代码如下:
import random
import statistics
import time
import sys
def _generate_parent(length, geneSet, get_fitness):
genes = []
while len(genes) < length:
sampleSize = min(length - len(genes), len(geneSet))
genes.extend(random.sample(geneSet, sampleSize))
genes = ''.join(genes)
fitness = get_fitness(genes)
return Chromosome(genes, fitness)
def _mutate(parent, geneSet, get_fitness):
index = random.randrange(0, len(parent.Genes))
childGenes = list(parent.Genes)
newGene, alternate = random.sample(geneSet, 2)
childGenes[index] = alternate if newGene == childGenes[index] else newGene
genes = ''.join(childGenes)
fitness = get_fitness(genes)
return Chromosome(genes, fitness)
def get_best(get_fitness, targetLen, optimalFitness, geneSet, display):
random.seed()
bestParent = _generate_parent(targetLen, geneSet, get_fitness)
display(bestParent)
if bestParent.Fitness >= optimalFitness:
return bestParent
while True:
child = _mutate(bestParent, geneSet, get_fitness)
if bestParent.Fitness >= child.Fitness:
continue
display(child)
if child.Fitness >= optimalFitness:
return child
bestParent = child
class Chromosome:
def __init__(self, genes, fitness):
self.Genes = genes
self.Fitness = fitness
class Benchmark:
@staticmethod
def run(function):
timings = []
stdout = sys.stdout
for i in range(100):
sys.stdout = None
startTime = time.time()
function()
seconds = time.time() - startTime
sys.stdout = stdout
timings.append(seconds)
mean = statistics.mean(timings)
if i<10 or i % 10 == 9:
print("{} {:3.2f} {:3.2f}".format(
1 + i, mean,
statistics.stdev(timings, mean)
if i > 1 else 0))
修改后的 guessPassword.py 完整代码如下:
import datetime
import unittest
import genetic
def get_fitness(guess, target):
return sum(1 for expected, actual in zip(target, guess)
if expected == actual)
def display(candidate, startTime):
timeDiff = datetime.datetime.now() - startTime
print("{} {} {}".format(
candidate.Genes, candidate.Fitness, timeDiff))
class GuessPasswordTests(unittest.TestCase):
geneset = " abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!.,"
def For_I_am_fearfully_and_wonderfully_made(self):
target = "For I am fearfully and wonderfully made."
self.guess_password(target)
def guess_password(self, target):
startTime = datetime.datetime.now()
def fnGetFitness(genes):
return get_fitness(genes, target)
def fnDisplay(candidate):
display(candidate, startTime)
optimalFitness = len(target)
best = genetic.get_best(fnGetFitness, len(target), optimalFitness,
self.geneset, fnDisplay)
self.assertEqual(best.Genes, target)
def test_benchmark(self):
genetic.Benchmark.run(self.For_I_am_fearfully_and_wonderfully_made)
if __name__ == '__main__':
unittest.main()