Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 44 additions & 65 deletions exploration/pareto.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,12 @@
import heapq
import numpy as np
from eden.util import timeit
### from graphlearn.local_substitution_graph_grammar import LocalSubstitutionGraphGrammar as lsggold ########## TMP
from graphlearn.cipcorevector import LsggCoreVec as lsgg
from toolz.curried import compose, map, concat
from exploration.pareto_funcs import _manage_int_or_float
logger = logging.getLogger(__name__)
import structout as so
from exploration import pareto_funcs as paretof, cost_estimator as costs
from exploration import pareto_options, pareto_funcs as paretof, cost_estimator as costs
from extensions import lsggscramble as lsggs
from sklearn.metrics.pairwise import euclidean_distances

Expand Down Expand Up @@ -223,39 +222,43 @@ def optimize(self, graphs):

def optimize_step(self, graphs):
# filter, expand, chk duplicates
costs = self.get_costs(graphs)
status = self.checkstatus(costs, graphs)
step_start_time = time.time()
graphlen_start = len(graphs)
graphs, status = self.filter_by_cost(graphs)
if status: return [],True,None
graphs = self.filter_by_cost(costs, graphs)
graphlen_filter = len(graphs)
logger.log(10, f"cost_filter: Got {graphlen_start} graphs, reduced to {graphlen_filter} ({time.time()-step_start_time})")
num_graphs = len(graphs)
if self.grammar.cipselector == new_cipselector0: ### SPECIAL CASE
logger.log(10, "USING CIPSELECTOR 0")
graphs = self._expand_neighbors2(graphs)
else:
graphs = self._expand_neighbors(graphs)
avg_productions = len(graphs)/num_graphs
logger.log(10, f"Average productions per graph: {avg_productions}")
graphlen_expand = len(graphs)
avg_productions = graphlen_expand/graphlen_filter
logger.log(10, f"graph generation: Got {avg_productions} per graph. ({time.time()-step_start_time})")
graphs = self.duplicate_rm(graphs)
logger.log(10, f"duplicate_rm: {graphlen_expand} -> {len(graphs)} graphs. ({time.time()-step_start_time})")
return graphs, status, avg_productions



def filter_by_cost(self,costs,graphs):
def filter_by_cost(self,graphs):
"""expand "keepgraphs" graphs, divided between top graphs in everything
and pareto front, discard rest"""
timenow=time.time()
in_count = len(graphs)
keepgraphs = self.keepgraphs

if True: # Greedy Cost calculation doesnt work at the moment.. self.pareto_option != 'greedy':
costs = self.get_costs(graphs)
status = self.checkstatus(costs, graphs)
if status:
# Some graph has distance == 0
return graphs, True

if in_count <= self.keepgraphs:
if len(graphs) <= self.keepgraphs:
# Only few graphs remaining so just return all of them.
logger.debug('cost_filter: keep all %d graphs' % in_count)
return graphs
logger.log(10, "cost_filter: keep all graphs")
return graphs, False

elif self.pareto_option == "random":
# Return randomly selected graphs without any application of pareto.
res = random.sample(graphs, keepgraphs)

## elif self.prefilter_kick!=0:
## # DELETE THE 25% worst in each category
## assert False
Expand All @@ -264,45 +267,37 @@ def filter_by_cost(self,costs,graphs):
## keep = [i for i in range(len(graphs)) if i not in trash]
## graphs = [graphs[i] for i in keep]
## costs = costs[keep]

elif self.pareto_option == 'greedy':
# Return graphs with the lowest euclidean distance to the target vector
distances = []
for g in graphs:
distances.append(euclidean_distances(self.target_graph_vector, vertex_vec(g, self.decomposer).sum(axis=0))[0][0])
ranked_distances = np.argsort(distances)[:keepgraphs]
res = [graphs[i] for i in ranked_distances]
return pareto_options.greedy(graphs, self.target_graph_vector, self.decomposer, keepgraphs)

elif self.pareto_option == "random":
# Return randomly selected graphs without any application of pareto.
return random.sample(graphs, keepgraphs), False

elif self.pareto_option == "default":
# Take best graphs from estimators and pareto front
costs_ranked = np.argsort(costs,axis=0)[:int(keepgraphs/6),[0,1,3]]
want , counts = np.unique(costs_ranked,return_counts=True)
res = [graphs[idd] for idd,count in zip( want,counts) if count > 0 ]
dontwant = [i for i in range(len(graphs)) if i not in want]
restgraphs = [graphs[i] for i in dontwant]
restcosts = costs[dontwant][:,[0,1,2]]
paretoselectedgraphs = paretof._pareto_set(restgraphs, restcosts)
random.shuffle(paretoselectedgraphs)
res += paretoselectedgraphs[:int(keepgraphs/2)]
return pareto_options.default(graphs, costs, keepgraphs), False

elif self.pareto_option == "paretogreed":
# 1. choose pareto graphs
# 2. new score is the average rank over all costs
# 2. new score is the average rank over all costs
# 3. choose k best of those
graphs, costs = paretof._pareto_set(graphs, costs,return_costs=True)
costs_ranked = np.argsort(costs,axis=0).sum(axis=1)
choosegr = np.argsort(costs_ranked)
res = [graphs[x] for x in choosegr[:keepgraphs]]
else:
paretoselectedgraphs = paretof._pareto_set(graphs, costs)
random.shuffle(paretoselectedgraphs)
return pareto_options.paretogreed(graphs, costs, keepgraphs), False

paretoselectedgraphs = paretof._pareto_set(graphs, costs)
random.shuffle(paretoselectedgraphs)

if self.pareto_option == "pareto_only":
# Return only graphs from the pareto front
res = paretoselectedgraphs[:keepgraphs]

elif self.pareto_option == "all":
# Return ALL graphs from the pareto front
res = paretoselectedgraphs
if self.pareto_option == "pareto_only":
# Return only graphs from the pareto front
return paretoselectedgraphs[:keepgraphs], False

elif self.pareto_option == "all":
# Return ALL graphs from the pareto front
return paretoselectedgraphs, False
else:
raise ValueError("Invalid Pareto Option")

## # DEBUG TO SHOW THE REAL DISTANCE
## if self.cheat:
Expand All @@ -320,16 +315,10 @@ def filter_by_cost(self,costs,graphs):
## from util import util
## util.dumpfile(graphs[costs_ranked[stuff][0]],"gr")
## print ("graph dumped")
logger.log(10, f"cost_filter: got {in_count} graphs, reduced to {len(res)} (%.2fs)"%(time.time()-timenow))

return res


def duplicate_rm(self,graphs):
timenow=time.time()
count = len(graphs)
graphs = list(self._duplicate_rm(graphs))
logger.debug("duplicate_rm: %d -> %d graphs (%.2fs)" % (count, len(graphs), time.time()-timenow))
return graphs

def _duplicate_rm(self,graphs):
Expand Down Expand Up @@ -360,7 +349,7 @@ def get_costs(self, graphs):
for i,e in enumerate(costs[:,2]):
nucol[i,2] = resdic[e]
costs = np.hstack((costs, np.sum(nucol,axis =1).reshape(-1,1)))
logger.debug("costs: best dist: %f (%.2fs)" % (np.min(costs[:,0]) ,time.time()-timenow))
logger.log(10, f"costs: best dist: {np.min(costs[:,0])} ({time.time()-timenow})")
return costs

def _get_neighbors(self, graph):
Expand All @@ -373,17 +362,14 @@ def _get_neighbors(self, graph):
return neighs

def _expand_neighbors(self, graphs):
timenow = time.time()
global _decomposer ##### Stupid hack but I dont know how else to allow lambda functions in multiprocessing
_decomposer = self.decomposer #####
if self.multiproc>1:
with multiprocessing.Pool(self.multiproc) as p:
res = list(concat(p.map(self._get_neighbors,graphs)))
logger.debug("graph generation: %.2fs" % (time.time()-timenow))
return res
else:
res = list(concat(map(self._get_neighbors,graphs)))
logger.debug("graph generation: %.2fs" % (time.time()-timenow))
return res


Expand All @@ -398,14 +384,11 @@ def _get_score_substitution(self, graph): #

def _expand_neighbors2(self, graphs): #
"""Only used with Cipselector Option 0. Replaces _expand_neighbors"""
timenow = time.time()
if self.multiproc>1:
with multiprocessing.Pool(self.multiproc) as p:
res = list(concat(p.map(self._get_score_substitution,graphs)))
logger.debug("graph generation: %.2fs" % (time.time()-timenow))
else:
res = list(concat(map(self._get_score_substitution,graphs)))
logger.debug("graph generation: %.2fs" % (time.time()-timenow))
res.sort(reverse=True, key=lambda a: a[0])
counter = 0
grlist = []
Expand Down Expand Up @@ -499,10 +482,6 @@ def __init__(
self.output_k_best = output_k_best
self.decomposer = decomposer
self.grammar = lsgg_size_hack(radii=core_sizes, thickness=context_size, core_vec_decomposer=decomposer, cipselector=cipselector, nodelevel_radius_and_thickness=True) #cip_root_all=False, half_step_distance=True)
### self.grammar = lsggold(nodelevel_radius_and_thickness=True) #cip_root_all=False, half_step_distance=True)
## self.grammar.radii = core_sizes #self.grammar.set_core_size(core_sizes)
## self.grammar.thickness = context_size #self.grammar.decomposition_args['thickness_list'] = [context_size]
#self.grammar.set_min_count(min_count) interfacecount 1 makes no sense
self.grammar.filter_min_cip = min_count #self.grammar.filter_args['min_cip_count'] = min_count
self.optiopts = kwargs
self.cs2cs = cs2cs
Expand Down
53 changes: 53 additions & 0 deletions exploration/pareto_options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import exploration.pareto_funcs as paretof
from graphlearn.cipcorevector import vertex_vec
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances
import random


def greedy(graphs, target, decomposer, keepgraphs):
"""
Return graphs with the lowest euclidean distance to the target vector.
Also returns if one of the distances equals 0.
"""
distances = []
#### distances = euclidean_distances(target, vertex_vec(graphs, decomposer))
for g in graphs:
distances.append(euclidean_distances(target, vertex_vec(g, decomposer).sum(axis=0))[0][0])
if min(distances) == 0:
return graphs, True
ranked_distances = np.argsort(distances)[:keepgraphs]
res = [graphs[i] for i in ranked_distances]
if distances[ranked_distances[0]] < 0.000001:
## => At least 1 distance is 0 => Successful reconstruction
return res, True
return res, False


def default(graphs, costs, keepgraphs):
"""
Take best graphs from estimators and pareto front.
"""
costs_ranked = np.argsort(costs,axis=0)[:int(keepgraphs/6),[0,1,3]]
want , counts = np.unique(costs_ranked,return_counts=True)
res = [graphs[idd] for idd,count in zip( want,counts) if count > 0 ]
dontwant = [i for i in range(len(graphs)) if i not in want]
restgraphs = [graphs[i] for i in dontwant]
restcosts = costs[dontwant][:,[0,1,2]]
paretoselectedgraphs = paretof._pareto_set(restgraphs, restcosts)
random.shuffle(paretoselectedgraphs)
res += paretoselectedgraphs[:int(keepgraphs/2)]
return res


def paretogreed(graphs, costs, keepgraphs):
"""
1. choose pareto graphs
2. new score is the average rank over all costs
3. choose k best of those
"""
graphs, costs = paretof._pareto_set(graphs, costs,return_costs=True)
costs_ranked = np.argsort(costs,axis=0).sum(axis=1)
choosegr = np.argsort(costs_ranked)
res = [graphs[x] for x in choosegr[:keepgraphs]]
return res
15 changes: 10 additions & 5 deletions reconstruct.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def maketasks(params):
## OPTIONS FOR GRAPHS
##########################################

EXPERIMENT_REPEATS = 50 #### CHANGE THIS BACK TO 100! 50 only for chemsets
EXPERIMENT_REPEATS = 50
# 1. param dict

params_graphs = {
Expand Down Expand Up @@ -117,24 +117,27 @@ def maketasks(params):
# Pareto Option "default": (3*5 best graphs for each category + 15 pareto front)
# Pareto Option "random": (No pareto front and no 3*5 best graphs. Just take 30 random graphs total)
# Pareto Option "greedy": (Instead of using the pareto front, take graphs with the lowest direct distance to the target)
# Pareto Option "paretogreed": (Greedy approach for pareto front)
# Pareto Option "pareto_only": (Instead of using the 3*5 best graphs it takes double the graphs from the pareto front.
# Pareto Option: "all": (Takes EVERY graph from the pareto front)
parser = argparse.ArgumentParser()
parser.add_argument('--core_sizes', nargs='*', type=int, default=[0,1,2],
help='Core sizes/Radii')
parser.add_argument('--context_size', nargs=1, type=float, default=[1],
help='Context sizes/Thickness')
parser.add_argument('--cipselector_option', nargs=1, type=int, default=[1], ## Change this back
parser.add_argument('--cipselector_option', nargs=1, type=int, default=[2], ## Change this back
choices=[0, 1, 2],
help='1: Take k best from all, 2: Take k best from each current cip')
parser.add_argument('--cipselector_k', nargs=1, type=int, default=[100],
parser.add_argument('--cipselector_k', nargs=1, type=int, default=[10],
help='k for Cipselector')
parser.add_argument('--pareto_option', nargs=1, type=str, default=['greedy'],
choices=['default', 'random', 'greedy', 'pareto_only', 'all'],
choices=['default', 'random', 'greedy', 'paretogreed', 'pareto_only', 'all'],
help='Pareto option for optimization')
parser.add_argument('--keepgraphs', nargs=1, type=int, default=[30],
help='Number of graphs kept from the pareto part')
parser.add_argument('--use_normalization', nargs=1, type=int, default=[1], choices=[1,0],
help='If 1, normalization will be applied for cipselection')
parser.add_argument('--min_count', nargs=1, type=int, default=[2],
parser.add_argument('--min_count', nargs=1, type=int, default=[1],
help='Also called min_cip')
parser.add_argument('--graph_size_limiter', nargs=1, type=int, default=[1], choices=[1,0],
help='If 0, graph size limiter is only used with a graphs >100')
Expand Down Expand Up @@ -339,10 +342,12 @@ def report(folder = '.res', tasklist=None):
lsuccess = [int(succ) for data in dat.values() for v in data.values() for succ,steps,times,avg in v]
avg_productions = np.array([int(avg) for data in dat.values() for v in data.values() for succ,steps,times,avg in v])
rnd = [int(steps) for data in dat.values() for v in data.values() for succ,steps,times,avg in v]
time = np.array([int(times) for data in dat.values() for v in data.values() for succ,steps,times,avg in v])
print ("nores",nores)
print ('nosucc',nosucc)
print ("sumsuccess:", sum(lsuccess), lsuccess)
print ("Average productions:", avg_productions.mean(), avg_productions)
print ("Average times:", np.average(time), time)
# print ("maxrnd:", max([int(b) for c in dat.values() for a,b,_ in c.values()]))
print("maxrnd:", max(rnd))

Expand Down
Loading