import random
import numpy as np
from dragon.search_space.dag_encoding import AdjMatrix, fill_adj_matrix
[docs]
class DAGTwoPoint:
"""DAGTwoPoint
Implementation of a two-point crossover on an array.
A DAG-based crossover is used if some of the exchanged values are DAGs.
Parameters
----------
size: int, default=10
Maximum size a DAG may have.
Examples
----------
>>> from dragon.search_operators.crossover import DAGTwoPoint
>>> from dragon.search_space.base_variables import Constant, FloatVar, IntVar, CatVar, ArrayVar
>>> arr = ArrayVar(IntVar("Number of features", 1, 10), CatVar("Optimizer", features=['Adam', 'SGD', 'AdamMax']), FloatVar('learning rate', 0.001, 0.5), Constant('Seed', value=0))
>>> parent_1 = arr.random()
>>> parent_2 = arr.random()
>>> crossover = DAGTwoPoint(arr)
>>> print(parent_1)
[5, 'AdamMax', 0.16718361674068502, 0]
>>> print(parent_2)
[9, 'SGD', 0.28364322926906005, 0]
>>> print(crossover(parent_1, parent_2))
([5, 'AdamMax', 0.28364322926906005, 0], [9, 'SGD', 0.16718361674068502, 0])
>>> from dragon.search_space.dag_variables import HpVar, NodeVariable, EvoDagVariable
>>> from dragon.search_space.bricks import MLP, MaxPooling1D, AVGPooling1D
>>> from dragon.search_space.base_variables import DynamicBlock
>>> from dragon.search_space.bricks_variables import activation_var
>>> mlp = HpVar("Operation", Constant("MLP operation", MLP), hyperparameters={"out_channels": IntVar("out_channels", 1, 10)})
>>> pooling = HpVar("Operation", CatVar("Pooling operation", [MaxPooling1D, AVGPooling1D]), hyperparameters={"pool_size": IntVar("pool_size", 1, 5)})
>>> candidates = NodeVariable(label = "Candidates",
... combiner=CatVar("Combiner", features=['add', 'concat']),
... operation=CatVar("Candidates", [mlp, pooling]),
... activation_function=activation_var("Activation"))
>>> operations = DynamicBlock("Operations", candidates, repeat=3)
>>> dag = EvoDagVariable(label="DAG", operations=operations)
>>> arr = ArrayVar(dag, dag, Constant('Seed', value=0))
>>> parent_1 = arr.random()
>>> parent_2 = arr.random()
>>> crossover = DAGTwoPoint(arr)
>>> print(parent_1)
[NODES: [
(combiner) add -- (name) <class 'dragon.search_space.bricks.basics.Identity'> -- (hp) {} -- (activation) Identity() -- ,
(combiner) concat -- (name) <class 'dragon.search_space.bricks.pooling.AVGPooling1D'> -- (hp) {'pool_size': 3} -- (activation) ELU(alpha=1.0) -- ] | MATRIX:[[0, 1], [0, 0]], NODES: [
(combiner) add -- (name) <class 'dragon.search_space.bricks.basics.Identity'> -- (hp) {} -- (activation) Identity() -- ,
(combiner) concat -- (name) <class 'dragon.search_space.bricks.basics.MLP'> -- (hp) {'out_channels': 10} -- (activation) SiLU() -- ,
(combiner) add -- (name) <class 'dragon.search_space.bricks.pooling.MaxPooling1D'> -- (hp) {'pool_size': 4} -- (activation) Sigmoid() -- ] | MATRIX:[[0, 1, 1], [0, 0, 1], [0, 0, 0]], 0]
>>> print(parent_2)
[NODES: [
(combiner) add -- (name) <class 'dragon.search_space.bricks.basics.Identity'> -- (hp) {} -- (activation) Identity() -- ,
(combiner) concat -- (name) <class 'dragon.search_space.bricks.pooling.AVGPooling1D'> -- (hp) {'pool_size': 1} -- (activation) LeakyReLU(negative_slope=0.01) -- ] | MATRIX:[[0, 1], [0, 0]], NODES: [
(combiner) add -- (name) <class 'dragon.search_space.bricks.basics.Identity'> -- (hp) {} -- (activation) Identity() -- ,
(combiner) concat -- (name) <class 'dragon.search_space.bricks.basics.MLP'> -- (hp) {'out_channels': 9} -- (activation) ELU(alpha=1.0) -- ] | MATRIX:[[0, 1], [0, 0]], 0]
>>> print(crossover(parent_1, parent_2))
([NODES: [
(combiner) add -- (name) <class 'dragon.search_space.bricks.basics.Identity'> -- (hp) {} -- (activation) Identity() -- ,
(combiner) concat -- (name) <class 'dragon.search_space.bricks.pooling.AVGPooling1D'> -- (hp) {'pool_size': 3} -- (activation) ELU(alpha=1.0) -- ] | MATRIX:[[0, 1], [0, 0]], NODES: [
(combiner) add -- (name) <class 'dragon.search_space.bricks.basics.Identity'> -- (hp) {} -- (activation) Identity() -- ,
(combiner) concat -- (name) <class 'dragon.search_space.bricks.basics.MLP'> -- (hp) {'out_channels': 10} -- (activation) SiLU() -- ,
(combiner) add -- (name) <class 'dragon.search_space.bricks.pooling.MaxPooling1D'> -- (hp) {'pool_size': 4} -- (activation) Sigmoid() -- ] | MATRIX:[[0, 1, 1], [0, 0, 1], [0, 0, 0]], 0], [NODES: [
(combiner) add -- (name) <class 'dragon.search_space.bricks.basics.Identity'> -- (hp) {} -- (activation) Identity() -- ,
(combiner) concat -- (name) <class 'dragon.search_space.bricks.pooling.AVGPooling1D'> -- (hp) {'pool_size': 1} -- (activation) LeakyReLU(negative_slope=0.01) -- ] | MATRIX:[[0, 1], [0, 0]], NODES: [
(combiner) add -- (name) <class 'dragon.search_space.bricks.basics.Identity'> -- (hp) {} -- (activation) Identity() -- ,
(combiner) concat -- (name) <class 'dragon.search_space.bricks.basics.MLP'> -- (hp) {'out_channels': 9} -- (activation) ELU(alpha=1.0) -- ] | MATRIX:[[0, 1], [0, 0]], 0])
"""
def __init__(self, size=10):
self.size = size
def __call__(self, ind1, ind2):
"""
Crossover implementation:
- Two crossover points `cxpoint1` and `cxpoint2` are picked randomly from the parent
- Exchange the parts between `cxpoint1` and `cxpoint2`
- For each element between those points, if one of them is an `AdjMatrix`: use the `adj_matrix_crossover` function to mix the graphs.
"""
size = min(len(ind1), len(ind2))
cxpoint1 = random.randint(1, size)
cxpoint2 = random.randint(1, size - 1)
if cxpoint2 >= cxpoint1:
cxpoint2 += 1
else: # Swap the two cx points
cxpoint1, cxpoint2 = cxpoint2, cxpoint1
ind1[cxpoint1:cxpoint2], ind2[cxpoint1:cxpoint2] = ind2[cxpoint1:cxpoint2], ind1[cxpoint1:cxpoint2]
for i in range(cxpoint1, cxpoint2):
if isinstance(ind1[i], AdjMatrix):
ind1[i], ind2[i] = adj_matrix_crossover(ind1[i], ind2[i], self.size)
return ind1, ind2
[docs]
def adj_matrix_crossover(p1, p2, size=10):
"""
adj_matrix_crossover(p1, p2)
DAG-based crossover
- Select the index of the operations that would be exchange in each graphs.
- Remove the corresponding lines and columns from the adjacency matrices
- Compute the index where the new nodes will be inserted
- Insert the new rows and columns within the adjacency matrices
- Make sure no nodes without incoming or outgoing connections are remaining within the matrices
- Make sure the new matrices are upper-triangular
- Recreate the list of nodes
- Create new `AdjMatrix` variables with the new nodes and matrices
Parameters
----------
p1: `AdjMatrix`
First parent
p2: `AdjMatrix
Second parent
size: int, default=10
Maximum number of nodes that an offspring graph could have
Returns
----------
f1: `AdjMatrix`
First offspring
f2: `AdjMatrix`
Second offspring
Examples
----------
>>> import numpy as np
>>> from dragon.search_space.dag_variables import AdjMatrix
>>> import torch.nn as nn
>>> from dragon.search_space.bricks import MLP, Identity
>>> from dragon.search_space.dag_variables import Node
>>> node_1 = Node(combiner="add", operation=MLP, hp={"out_channels": 10}, activation=nn.ReLU())
>>> node_2 = Node(combiner="add", operation=MLP, hp={"out_channels": 5}, activation=nn.ReLU())
>>> node_3 = Node(combiner="concat", operation=Identity, hp={}, activation=nn.Softmax())
>>> op1 = [node_1, node_2, node_3]
>>> op2 = [node_1, node_3]
>>> m1 = np.array([[0, 1, 1],
... [0, 0, 1],
... [0, 0, 0]])
>>> m2 = np.array([[0, 1],
... [0, 0]])
>>> print(AdjMatrix(op1, m1))
NODES: [
(combiner) add -- (name) <class 'dragon.search_space.bricks.basics.MLP'> -- (hp) {'out_channels': 10} -- (activation) ReLU() -- ,
(combiner) add -- (name) <class 'dragon.search_space.bricks.basics.MLP'> -- (hp) {'out_channels': 5} -- (activation) ReLU() -- ,
(combiner) concat -- (name) <class 'dragon.search_space.bricks.basics.Identity'> -- (hp) {} -- (activation) Softmax(dim=None) -- ] | MATRIX:[[0, 1, 1], [0, 0, 1], [0, 0, 0]]
>>> print(AdjMatrix(op2, m2))
NODES: [
(combiner) add -- (name) <class 'dragon.search_space.bricks.basics.MLP'> -- (hp) {'out_channels': 10} -- (activation) ReLU() -- ,
(combiner) concat -- (name) <class 'dragon.search_space.bricks.basics.Identity'> -- (hp) {} -- (activation) Softmax(dim=None) -- ] | MATRIX:[[0, 1], [0, 0]]
>>> f1, f2 = adj_matrix_crossover(AdjMatrix(op1, m1), AdjMatrix(op2, m2))
>>> print(f1)
NODES: [
(combiner) add -- (name) <class 'dragon.search_space.bricks.basics.MLP'> -- (hp) {'out_channels': 10} -- (activation) ReLU() -- ,
(combiner) add -- (name) <class 'dragon.search_space.bricks.basics.MLP'> -- (hp) {'out_channels': 5} -- (activation) ReLU() -- ,
(combiner) concat -- (name) <class 'dragon.search_space.bricks.basics.Identity'> -- (hp) {} -- (activation) Softmax(dim=None) -- ] | MATRIX:[[0, 1, 0], [0, 0, 1], [0, 0, 0]]
>>> print(f2)
NODES: [
(combiner) add -- (name) <class 'dragon.search_space.bricks.basics.MLP'> -- (hp) {'out_channels': 10} -- (activation) ReLU() -- ,
(combiner) concat -- (name) <class 'dragon.search_space.bricks.basics.Identity'> -- (hp) {} -- (activation) Softmax(dim=None) -- ] | MATRIX:[[0, 1], [0, 0]]
"""
crossed = False
while not crossed:
op1 = p1.operations
op2 = p2.operations
m1 = p1.matrix
m2 = p2.matrix
# Randomly select the points that would be exchange from each graph
s1 = list(set(np.random.choice(range(1, len(op1)), size=len(op1) - 1)))
s2 = list(set(np.random.choice(range(1, len(op2)), size=len(op2) - 1)))
s1.sort()
s2.sort()
# Remove the corresponding points from the adjacency matrices
it = 0
for i1 in s1:
m1 = np.delete(m1, i1 - it, axis=0)
m1 = np.delete(m1, i1 - it, axis=1)
it+=1
it = 0
for i2 in s2:
m2 = np.delete(m2, i2 - it, axis=0)
m2 = np.delete(m2, i2 - it, axis=1)
it+=1
# Compute the index where the new nodes will be inserted
old_s1 = np.array(list(set(range(len(op1))) - set(s1)))
old_s2 = np.array(list(set(range(len(op2))) - set(s2)))
new_s1 = [np.argmin(np.abs(old_s2 - s1[0]))]
if new_s1[0] == old_s2[new_s1[0]]:
new_s1[0] += 1
for i1 in range(1, len(s1)):
new_s1.append(min(s1[i1] - s1[i1-1] + new_s1[i1-1], len(old_s2) + len(new_s1)))
new_s2 = [np.argmin(np.abs(old_s1 - s2[0]))]
if new_s2[0] == old_s1[new_s2[0]]:
new_s2[0] += 1
for i2 in range(1, len(s2)):
new_s2.append(min(s2[i2] - s2[i2 - 1] + new_s2[i2-1], len(old_s1) + len(new_s2)))
# Insert the new nodes
m1 = np.insert(m1, np.clip(new_s2, 0, m1.shape[0]), 0, axis=0)
m1 = np.insert(m1, np.clip(new_s2, 0, m1.shape[1]), 0, axis=1)
m2 = np.insert(m2, np.clip(new_s1, 0, m2.shape[0]), 0, axis=0)
m2 = np.insert(m2, np.clip(new_s1, 0, m2.shape[1]), 0, axis=1)
# Make sure no nodes without incoming or outgoing connections are remaining within the matrices
for i in range(len(s1)):
diff = new_s1[i] - s1[i]
if diff >= 0:
length = min(m2.shape[0] - diff, p1.matrix.shape[0])
m2[diff:diff+length, new_s1[i]] = p1.matrix[:length, s1[i]]
m2[new_s1[i], diff:diff+length] = p1.matrix[s1[i], :length]
if diff < 0:
length = min(m2.shape[0], p1.matrix.shape[0]+diff)
m2[:length, new_s1[i]] = p1.matrix[-diff:-diff+length, s1[i]]
m2[new_s1[i], :length] = p1.matrix[s1[i], -diff:-diff+length]
for i in range(len(s2)):
diff = new_s2[i] - s2[i]
if diff >= 0:
length = min(m1.shape[0] - diff, p2.matrix.shape[0])
m1[diff:diff+length, new_s2[i]] = p2.matrix[:length, s2[i]]
m1[new_s2[i], diff:diff+length] = p2.matrix[s2[i], :length]
if diff < 0:
length = min(m1.shape[0], p2.matrix.shape[0]+diff)
m1[:length, new_s2[i]] = p2.matrix[-diff:-diff + length, s2[i]]
m1[new_s2[i], :length] = p2.matrix[s2[i], -diff:-diff + length]
# Make sure the new matrices are upper-triangular
m1 = np.triu(m1, k=1)
m1 = fill_adj_matrix(m1)
m2 = np.triu(m2, k=1)
m2 = fill_adj_matrix(m2)
# Recreate the list of nodes
op1 = [op1[i] for i in range(len(op1)) if i not in s1]
op2 = [op2[i] for i in range(len(op2)) if i not in s2]
for i in range(len(new_s1)):
op2 = op2[:new_s1[i]] + [p1.operations[s1[i]]] + op2[new_s1[i]:]
for i in range(len(new_s2)):
op1 = op1[:new_s2[i]] + [p2.operations[s2[i]]] + op1[new_s2[i]:]
if max(len(op1), len(op2)) <= size:
crossed = True
for j in range(1, len(op1)):
if hasattr(op1[j], "modification"):
if hasattr(op1[j], "input_shape"):
input_shapes = [op1[i].output_shape for i in range(j) if m1[i, j] == 1]
op1[j].modification(input_shapes=input_shapes)
for j in range(1, len(op2)):
if hasattr(op2[j], "modification"):
if hasattr(op2[j], "input_shape"):
input_shapes = [op2[i].output_shape for i in range(j) if m2[i, j] == 1]
op2[j].modification(input_shapes=input_shapes)
# Create new `AdjMatrix` variables with the new nodes and matrices
return AdjMatrix(op1, m1), AdjMatrix(op2, m2)