Using DEAP the evolutionary computation framework¶
Download: example_19_using_deap.py
Less overhead version: example_19b_using_deap_less_overhead.py
Less overhead and post-processing version: example_19c_using_deap_with_post_processing.py
This shows an example of how to use pypet in combination with the evolutionary computation framework DEAP.
Note storing during a single run as in the example adds a lot of overhead and only makes sense if your fitness evaluation takes quite long. There’s also an example with less overhead at the middle section.
Moreover, if you are interested in using DEAP in a post-processing scheme (Adding Post-Processing), at the very bottom you can find an example using post-processing.
""" An example showing how to use DEAP optimization (http://pythonhosted.org/deap/).
DEAP can be combined with *pypet* to keep track of all the data and the full trajectory
of points created by a genetic algorithm.
Note that *pypet* adds quite some overhead to the optimization algorithm.
Using *pypet* in combination with DEAP is only suitable in case the
evaluation of an individual (i.e. a single run) takes a considerable amount of time
(i.e. 1 second or longer) and, thus, pypet's overhead is only marginal.
This *OneMax* problem serves only as an example and is not a well suited problem.
Suitable would be the genetic optimization of neural networks where running and evaluating
the network may take a few seconds.
"""
__author__ = 'Robert Meyer'
import random
from deap import base
from deap import creator
from deap import tools
from pypet import Environment, cartesian_product
def eval_one_max(traj, individual):
"""The fitness function"""
traj.f_add_result('$set.$.individual', list(individual))
fitness = sum(individual)
traj.f_add_result('$set.$.fitness', fitness)
traj.f_store() # We switched off automatic storing, so we need to store manually
return (fitness,) # DEAP wants a tuple here!
def main():
env = Environment(trajectory='deap',
overwrite_file=True,
multiproc=True,
ncores=4,
log_level=50, # only display ERRORS
log_stdout=False,
wrap_mode='QUEUE',
use_pool=True,
freeze_input=True, # To avoid copying the trajectory for each run
automatic_storing=False, # This is important, we want to run several
# batches with the Environment so we want to avoid re-storing all
# data over and over again to save some overhead.
comment='Using pypet and DEAP'
)
traj = env.traj
# ------- Add parameters ------- #
traj.f_add_parameter('popsize', 100, comment='Population size')
traj.f_add_parameter('CXPB', 0.5, comment='Crossover term')
traj.f_add_parameter('MUTPB', 0.2, comment='Mutation probability')
traj.f_add_parameter('NGEN', 20, comment='Number of generations')
traj.f_add_parameter('generation', 0, comment='Current generation')
traj.f_add_parameter('ind_idx', 0, comment='Index of individual')
traj.f_add_parameter('ind_len', 50, comment='Length of individual')
traj.f_add_parameter('indpb', 0.005, comment='Mutation parameter')
traj.f_add_parameter('tournsize', 3, comment='Selection parameter')
traj.f_add_parameter('seed', 42, comment='Seed for RNG')
# ------- Create and register functions with DEAP ------- #
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)
toolbox = base.Toolbox()
# Attribute generator
toolbox.register("attr_bool", random.randint, 0, 1)
# Structure initializers
toolbox.register("individual", tools.initRepeat, creator.Individual,
toolbox.attr_bool, traj.ind_len)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
# Operator registering
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=traj.indpb)
toolbox.register("select", tools.selTournament, tournsize=traj.tournsize)
toolbox.register("evaluate", eval_one_max)
toolbox.register("map", env.run_map) # Important to use `run_map` instead of `run`
# because we pass an iterable argument to our fitness function
# ------- Initialize Population -------- #
random.seed(traj.seed)
pop = toolbox.population(n=traj.popsize)
CXPB, MUTPB, NGEN = traj.CXPB, traj.MUTPB, traj.NGEN
print("Start of evolution")
for g in range(traj.NGEN):
# ------- Evaluate current generation -------- #
print("-- Generation %i --" % g)
# Determine individuals that need to be evaluated
eval_pop = [ind for ind in pop if not ind.fitness.valid]
# Add as many explored runs as individuals that need to be evaluated
traj.f_expand(cartesian_product({'generation': [g], 'ind_idx': range(len(eval_pop))}))
fitnesses_results = toolbox.map(toolbox.evaluate, eval_pop) # evaluate using our fitness function
# fitnesses_results is a list of
# a nested tuple: [(run_idx, (fitness,)), ...]
for idx, result in enumerate(fitnesses_results):
# Update fitnesses_results
_, fitness = result # The environment returns tuples: [(run_idx, run), ...]
eval_pop[idx].fitness.values = fitness
print(" Evaluated %i individuals" % len(fitnesses_results))
# Gather all the fitnesses_results in one list and print the stats
fits = [ind.fitness.values[0] for ind in pop]
length = len(pop)
mean = sum(fits) / length
sum2 = sum(x*x for x in fits)
std = abs(sum2 / length - mean**2)**0.5
print(" Min %s" % min(fits))
print(" Max %s" % max(fits))
print(" Avg %s" % mean)
print(" Std %s" % std)
# ------- Create the next generation by crossover and mutation -------- #
if g < traj.NGEN -1: # not necessary for the last generation
# Select the next generation individuals
offspring = toolbox.select(pop, len(pop))
# Clone the selected individuals
offspring = list(map(toolbox.clone, offspring))
# Apply crossover and mutation on the offspring
for child1, child2 in zip(offspring[::2], offspring[1::2]):
if random.random() < CXPB:
toolbox.mate(child1, child2)
del child1.fitness.values
del child2.fitness.values
for mutant in offspring:
if random.random() < MUTPB:
toolbox.mutate(mutant)
del mutant.fitness.values
# The population is entirely replaced by the offspring
pop[:] = offspring
print("-- End of (successful) evolution --")
best_ind = tools.selBest(pop, 1)[0]
print("Best individual is %s, %s" % (best_ind, best_ind.fitness.values))
traj.f_store() # We switched off automatic storing, so we need to store manually
if __name__ == "__main__":
main()
Less Overhead Version¶
""" An example showing how to use DEAP optimization (http://pythonhosted.org/deap/).
DEAP can be combined with *pypet* to keep track of all the data and the full trajectory
of points created by a genetic algorithm.
This is a version with less overhead added by *pypet*.
We avoid a lot of overhead by not storing data during a single run.
As a consequence we also have all fitnesses in a single list in the end
and not scattered across the hdf5 file in sub-groups.
Multiprocessing is also causing some overhead since `eval_one_max` is not heavy
enough to justify the overhead of sending data to other processes.
"""
__author__ = 'Robert Meyer'
import random
from deap import base
from deap import creator
from deap import tools
from pypet import Environment, cartesian_product
def eval_one_max(traj):
"""The fitness function"""
fitness = sum(traj.individual)
return (fitness,) # DEAP wants a tuple here!
def main():
env = Environment(trajectory='faster_deap',
overwrite_file=True,
multiproc=False,
log_stdout=False,
log_level=50, # only display ERRORS
automatic_storing=False, # This is important, we want to run several
# batches with the Environment so we want to avoid re-storing all
# data over and over again to save some overhead.
comment='Using pypet and DEAP with less overhead'
)
traj = env.traj
# ------- Add parameters ------- #
traj.f_add_parameter('popsize', 100, comment='Population size')
traj.f_add_parameter('CXPB', 0.5, comment='Crossover term')
traj.f_add_parameter('MUTPB', 0.2, comment='Mutation probability')
traj.f_add_parameter('NGEN', 20, comment='Number of generations')
traj.f_add_parameter('generation', 0, comment='Current generation')
traj.f_add_parameter('ind_idx', 0, comment='Index of individual')
traj.f_add_parameter('ind_len', 50, comment='Length of individual')
traj.f_add_parameter('indpb', 0.005, comment='Mutation parameter')
traj.f_add_parameter('tournsize', 3, comment='Selection parameter')
traj.f_add_parameter('seed', 42, comment='Seed for RNG')
# Placeholders for individuals and results that are about to be explored
traj.f_add_derived_parameter('individual', [0 for x in range(traj.ind_len)],
'An indivudal of the population')
traj.f_add_result('fitnesses', [], comment='Fitnesses of all individuals')
# ------- Create and register functions with DEAP ------- #
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)
toolbox = base.Toolbox()
# Attribute generator
toolbox.register("attr_bool", random.randint, 0, 1)
# Structure initializers
toolbox.register("individual", tools.initRepeat, creator.Individual,
toolbox.attr_bool, traj.ind_len)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
# Operator registering
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=traj.indpb)
toolbox.register("select", tools.selTournament, tournsize=traj.tournsize)
toolbox.register("evaluate", eval_one_max)
toolbox.register("map", env.run) # We pass the individual as part of traj, so
# we no longer need the `run_map` but just `run`
# ------- Initialize Population -------- #
random.seed(traj.seed)
pop = toolbox.population(n=traj.popsize)
CXPB, MUTPB, NGEN = traj.CXPB, traj.MUTPB, traj.NGEN
print("Start of evolution")
for g in range(traj.NGEN):
# ------- Evaluate current generation -------- #
print("-- Generation %i --" % g)
# Determine individuals that need to be evaluated
eval_pop = [ind for ind in pop if not ind.fitness.valid]
# Add as many explored runs as individuals that need to be evaluated.
# Furthermore, add the individuals as explored parameters.
# We need to convert them to lists or write our own custom IndividualParameter ;-)
# Note the second argument to `cartesian_product`:
# This is for only having the cartesian product
# between ``generation x (ind_idx AND individual)``, so that every individual has just one
# unique index within a generation.
traj.f_expand(cartesian_product({'generation': [g],
'ind_idx': range(len(eval_pop)),
'individual':[list(x) for x in eval_pop]},
[('ind_idx', 'individual'),'generation']))
fitnesses_results = toolbox.map(toolbox.evaluate) # evaluate using our fitness function
# fitnesses_results is a list of
# a nested tuple: [(run_idx, (fitness,)), ...]
for idx, result in enumerate(fitnesses_results):
# Update fitnesses
_, fitness = result # The environment returns tuples: [(run_idx, run), ...]
eval_pop[idx].fitness.values = fitness
# Append all fitnesses (note that DEAP fitnesses are tuples of length 1
# but we are only interested in the value)
traj.fitnesses.extend([x.fitness.values[0] for x in eval_pop])
print(" Evaluated %i individuals" % len(fitnesses_results))
# Gather all the fitnesses in one list and print the stats
fits = [ind.fitness.values[0] for ind in pop]
length = len(pop)
mean = sum(fits) / length
sum2 = sum(x*x for x in fits)
std = abs(sum2 / length - mean**2)**0.5
print(" Min %s" % min(fits))
print(" Max %s" % max(fits))
print(" Avg %s" % mean)
print(" Std %s" % std)
# ------- Create the next generation by crossover and mutation -------- #
if g < traj.NGEN -1: # not necessary for the last generation
# Select the next generation individuals
offspring = toolbox.select(pop, len(pop))
# Clone the selected individuals
offspring = list(map(toolbox.clone, offspring))
# Apply crossover and mutation on the offspring
for child1, child2 in zip(offspring[::2], offspring[1::2]):
if random.random() < CXPB:
toolbox.mate(child1, child2)
del child1.fitness.values
del child2.fitness.values
for mutant in offspring:
if random.random() < MUTPB:
toolbox.mutate(mutant)
del mutant.fitness.values
# The population is entirely replaced by the offspring
pop[:] = offspring
print("-- End of (successful) evolution --")
best_ind = tools.selBest(pop, 1)[0]
print("Best individual is %s, %s" % (best_ind, best_ind.fitness.values))
traj.f_store() # We switched off automatic storing, so we need to store manually
if __name__ == "__main__":
main()
Less Overhead and Post-Processing Version¶
""" An example showing how to use DEAP optimization (http://pythonhosted.org/deap/).
DEAP can be combined with *pypet* to keep track of all the data and the full trajectory
of points created by a genetic algorithm.
This is a version with less overhead added by *pypet*
and using the post processing functionality.
"""
__author__ = 'Robert Meyer'
import random
from deap import base
from deap import creator
from deap import tools
from pypet import Environment, cartesian_product
def eval_one_max(traj):
"""The fitness function"""
fitness = sum(traj.individual)
return (fitness,) # DEAP wants a tuple here!
class Postprocessing(object):
"""Callable that implements postprocessing.
It is realised as an object to be able to keep track of current generation
and the population.
"""
def __init__(self, pop, eval_pop, toolbox, g=0):
self.g = g # the current generation
self.pop = pop # the current population
self.toolbox = toolbox # the DEAP toolbox
self.eval_pop = eval_pop # the currently evaluated population
def __call__(self, traj, fitnesses_results):
"""Implements post-processing"""
CXPB, MUTPB, NGEN = traj.CXPB, traj.MUTPB, traj.NGEN
while fitnesses_results:
result = fitnesses_results.pop()
# Update fitnesses
run_index, fitness = result # The environment returns tuples: [(run_idx, run), ...]
# We need to convert the current run index into an ind_idx
# (index of individual within one generation)
traj.v_idx = run_index
ind_index = traj.par.ind_idx
# Use the ind_idx to update the fintess
self.eval_pop[ind_index].fitness.values = fitness
traj.v_idx = -1 # set the trajectory back to default
# Append all fitnesses (note that DEAP fitnesses are tuples of length 1
# but we are only interested in the value)
traj.fitnesses.extend([x.fitness.values[0] for x in self.eval_pop])
print(" Evaluated %i individuals" % len(fitnesses_results))
# Gather all the fitnesses in one list and print the stats
fits = [ind.fitness.values[0] for ind in self.pop]
length = len(self.pop)
mean = sum(fits) / length
sum2 = sum(x*x for x in fits)
std = abs(sum2 / length - mean**2)**0.5
print(" Min %s" % min(fits))
print(" Max %s" % max(fits))
print(" Avg %s" % mean)
print(" Std %s" % std)
# ------- Create the next generation by crossover and mutation -------- #
if self.g < traj.NGEN -1: # not necessary for the last generation
# Select the next generation individuals
offspring = self.toolbox.select(self.pop, len(self.pop))
# Clone the selected individuals
offspring = list(map(self.toolbox.clone, offspring))
# Apply crossover and mutation on the offspring
for child1, child2 in zip(offspring[::2], offspring[1::2]):
if random.random() < CXPB:
self.toolbox.mate(child1, child2)
del child1.fitness.values
del child2.fitness.values
for mutant in offspring:
if random.random() < MUTPB:
self.toolbox.mutate(mutant)
del mutant.fitness.values
# The population is entirely replaced by the offspring
self.pop[:] = offspring
self.eval_pop = [ind for ind in self.pop if not ind.fitness.valid]
# Add as many explored runs as individuals that need to be evaluated.
# Furthermore, add the individuals as explored parameters.
# We need to convert them to lists or write our own custom IndividualParameter ;-)
# Note the second argument to `cartesian_product`:
# This is for only having the cartesian product
# between ``generation x (ind_idx AND individual)``,
# so that every individual has just one
# unique index within a generation.
self.g += 1 # Update generation counter
traj.f_expand(cartesian_product({'generation': [self.g],
'ind_idx': range(len(self.eval_pop)),
'individual':[list(x) for x in self.eval_pop]},
[('ind_idx', 'individual'),'generation']))
def main():
env = Environment(trajectory='postproc_deap',
overwrite_file=True,
log_stdout=False,
log_level=50, # only display ERRORS
automatic_storing=True, # Since we us post-processing, we
# can safely enable automatic storing, because everything will
# only be stored once at the very end of all runs.
comment='Using pypet and DEAP with less overhead'
)
traj = env.traj
# ------- Add parameters ------- #
traj.f_add_parameter('popsize', 100, comment='Population size')
traj.f_add_parameter('CXPB', 0.5, comment='Crossover term')
traj.f_add_parameter('MUTPB', 0.2, comment='Mutation probability')
traj.f_add_parameter('NGEN', 20, comment='Number of generations')
traj.f_add_parameter('generation', 0, comment='Current generation')
traj.f_add_parameter('ind_idx', 0, comment='Index of individual')
traj.f_add_parameter('ind_len', 50, comment='Length of individual')
traj.f_add_parameter('indpb', 0.005, comment='Mutation parameter')
traj.f_add_parameter('tournsize', 3, comment='Selection parameter')
traj.f_add_parameter('seed', 42, comment='Seed for RNG')
# Placeholders for individuals and results that are about to be explored
traj.f_add_derived_parameter('individual', [0 for x in range(traj.ind_len)],
'An indivudal of the population')
traj.f_add_result('fitnesses', [], comment='Fitnesses of all individuals')
# ------- Create and register functions with DEAP ------- #
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)
toolbox = base.Toolbox()
# Attribute generator
toolbox.register("attr_bool", random.randint, 0, 1)
# Structure initializers
toolbox.register("individual", tools.initRepeat, creator.Individual,
toolbox.attr_bool, traj.ind_len)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
# Operator registering
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=traj.indpb)
toolbox.register("select", tools.selTournament, tournsize=traj.tournsize)
# ------- Initialize Population and Trajectory -------- #
random.seed(traj.seed)
pop = toolbox.population(n=traj.popsize)
eval_pop = [ind for ind in pop if not ind.fitness.valid]
traj.f_explore(cartesian_product({'generation': [0],
'ind_idx': range(len(eval_pop)),
'individual':[list(x) for x in eval_pop]},
[('ind_idx', 'individual'),'generation']))
# ----------- Add postprocessing ------------------ #
postproc = Postprocessing(pop, eval_pop, toolbox) # Add links to important structures
env.add_postprocessing(postproc)
# ------------ Run applying post-processing ---------- #
env.run(eval_one_max)
# ------------ Finished all runs and print result --------------- #
print("-- End of (successful) evolution --")
best_ind = tools.selBest(pop, 1)[0]
print("Best individual is %s, %s" % (best_ind, best_ind.fitness.values))
if __name__ == "__main__":
main()