3 @author Lorenzo Sciandra
4 @brief A recombination of code take
from: https://github.com/chaitjo/graph-convnet-tsp.
5 Some functions were created
for the purpose of the paper.
8 @copyright Copyright (c) 2024, license MIT
9 Repo: https://github.com/LorenzoSciandra/GraphConvolutionalBranchandBound
17from torch.autograd import Variable
18import torch.nn.functional as F
20from sklearn.utils.class_weight import compute_class_weight
24warnings.filterwarnings("ignore", category=UserWarning)
25from scipy.sparse import SparseEfficiencyWarning
27warnings.simplefilter('ignore', SparseEfficiencyWarning)
29from utils.graph_utils import *
30from utils.google_tsp_reader import GoogleTSPReader
31from utils.plot_utils import *
32from models.gcn_model import ResidualGatedGCNModel
33from sklearn_extra.cluster import KMedoids
34from utils.model_utils import *
37def compute_prob(net, config, dtypeLong, dtypeFloat):
39 This function computes the probability of the edges being in the optimal tour, by running the GCN.
41 net: The Graph Convolutional Network.
42 config: The configuration file,
from which the parameters are taken.
43 dtypeLong: The data type
for the long tensors.
44 dtypeFloat: The data type
for the float tensors.
46 y_probs: The probability of the edges being
in the optimal tour.
47 x_edges_values: The distance between the nodes.
53 num_nodes = config.num_nodes
54 num_neighbors = config.num_neighbors
55 batch_size = config.batch_size
56 test_filepath = config.test_filepath
59 dataset = GoogleTSPReader(num_nodes, num_neighbors, batch_size=batch_size, filepath=test_filepath)
62 dataset = iter(dataset)
72 with open(test_filepath,
'r')
as f:
76 raise Exception(
"The input file is empty.")
81 raise Exception(
"The instance does not exist.")
83 instance = instance.split(
" output")[0]
84 instance = [float(x)
for x
in instance.split(
" ")]
91 while batch.nodes_coord.flatten().tolist() != instance:
94 x_edges = Variable(torch.LongTensor(batch.edges).type(dtypeLong), requires_grad=
False)
95 x_edges_values = Variable(torch.FloatTensor(batch.edges_values).type(dtypeFloat), requires_grad=
False)
96 x_nodes = Variable(torch.LongTensor(batch.nodes).type(dtypeLong), requires_grad=
False)
97 x_nodes_coord = Variable(torch.FloatTensor(batch.nodes_coord).type(dtypeFloat), requires_grad=
False)
98 y_edges = Variable(torch.LongTensor(batch.edges_target).type(dtypeLong), requires_grad=
False)
101 if type(edge_cw) != torch.Tensor:
102 edge_labels = y_edges.cpu().numpy().flatten()
103 edge_cw = compute_class_weight(
"balanced", classes=np.unique(edge_labels), y=edge_labels)
105 y_preds, _ = net.forward(x_edges, x_edges_values, x_nodes, x_nodes_coord, y_edges, edge_cw)
106 y = F.softmax(y_preds, dim=3)
108 y_probs = y[:, :, :, 1]
110 nodes_coord = batch.nodes_coord.flatten().tolist()
112 return y_probs, x_edges_values, nodes_coord
117 This function writes the adjacency matrix to a file.
118 The file is in the format:
119 cities: (x1, y1);(x2, y2);...;(xn, yn)
121 (0.0, 0.0);(0.23, 0.9);...;(0.15, 0.56)
123 (0.23, 0.9);(0.3, 0.59);...;(0.0, 0.0)
124 where each entry
is (distance, probability)
125 If needed adjusts the size of the graph when the model size
is different
from the number of nodes
in the instance.
127 graph: The set of nodes
in the graph.
128 y_probs: The probability of the edges being
in the optimal tour.
129 x_edges_values: The weight of the edges.
130 nodes_coord: The nodes coordinates used
in the GCN.
131 filepath: The path to the file where the adjacency matrix will be written.
132 num_nodes: The number of nodes
in the TSP instance.
133 kmedoids_labels: The labels of the k-medoids clustering.
136 model_size = y_probs.shape[1]
137 y_probs = y_probs.flatten().numpy()
138 x_edges_values = x_edges_values.flatten().numpy()
141 arr_combined = np.stack((x_edges_values, y_probs), axis=1).astype(
'U')
143 if num_nodes < model_size:
144 nodes_coord = nodes_coord[:num_nodes*2]
146 for i
in range(num_nodes):
147 for j
in range(num_nodes):
148 final_arr.append(arr_combined[i * model_size + j])
150 for j
in range(num_nodes, model_size):
152 if arr_combined[i * model_size + j][1] > final_arr[i * num_nodes + j - num_nodes][1]:
153 final_arr[i * num_nodes + j - num_nodes][1] = arr_combined[i * model_size + j][1]
155 arr_combined = np.array(final_arr)
157 elif num_nodes > model_size:
158 nodes_coord = [[nodes_coord[i], nodes_coord[i + 1]]
for i
in range(0, len(nodes_coord), 2)]
159 arr_combined = arr_combined.flatten()
160 arr_combined = arr_combined.reshape(model_size, model_size, 2).tolist()
163 if node
not in nodes_coord:
165 label = kmedoids_labels[j]
167 for i
in range(len(nodes_coord)):
168 distance = np.linalg.norm(np.array(node) - np.array(nodes_coord[i]))
170 arr_combined[i].append([distance, prob])
171 new_row.append([distance, prob])
173 new_row.append([0.0, 0.0])
174 arr_combined.append(new_row)
175 nodes_coord.append(node)
179 arr_combined = np.array(arr_combined).flatten().tolist()
180 arr_combined = [[arr_combined[i], arr_combined[i + 1]]
for i
in range(0, len(arr_combined), 2)]
181 nodes_coord = np.array(nodes_coord).flatten().tolist()
183 nodes_coord =
";".join([f
"({nodes_coord[i]}, {nodes_coord[i + 1]})" for i
in range(0, len(nodes_coord), 2)])
184 arr_strings = np.array([
'({}, {});'.format(x[0], x[1])
for x
in arr_combined])
186 with open(filepath,
'w')
as f:
187 f.write(
"%s\n" % nodes_coord)
189 for item
in arr_strings:
190 if (edge + 1) % num_nodes == 0:
191 f.write(
"%s\n" % item)
201 This function adds dummy cities to the graph instance. The dummy cities are randomly generated are
202 added to the graph instance and the new instance
is saved
in a temporary file.
204 num_nodes: The number of nodes of the graph instance.
205 model_size: The size of the Graph Convolutional Network to use.
208 num_dummy_cities = model_size - num_nodes
209 filepath = "data/hyb_tsp/test_" + str(num_nodes) +
"_nodes_temp.txt"
212 with open(filepath,
"r")
as f:
213 lines = f.readlines()
216 graph_str = graph_str.split(
" output")[0]
217 if graph_str
is None:
218 raise Exception(
"The input file is empty.")
220 nodes = graph_str.split(
" ")
221 graph = [[float(nodes[i]), float(nodes[i + 1])]
for i
in range(0, len(nodes), 2)]
223 for i
in range(num_dummy_cities):
228 values = np.random.randint(1, 9, 2)
229 signs = np.random.choice([-1, 1], 2)
230 x = graph[rr_index][0] + signs[0] * values[0] * 0.000000000000001
231 y = graph[rr_index][1] + signs[1] * values[1] * 0.000000000000001
232 if [x, y]
not in graph:
236 graph_str +=
" " + str(x) +
" " + str(y)
237 rr_index = (rr_index + 1) % num_nodes
239 seq = np.linspace(1,model_size, model_size, dtype=int)
242 seq_str += str(s) +
" "
245 graph_str +=
" output " + seq_str
247 with open(filepath,
'w+')
as file:
248 file.writelines(graph_str)
250 os.fsync(file.fileno())
255 Creates a temporary file with the graph instance.
257 num_nodes: The number of nodes of the graph instance.
258 str_grap: The graph instance.
261 filepath = "data/hyb_tsp/test_" + str(num_nodes) +
"_nodes_temp.txt"
263 with open(filepath,
'w+')
as file:
264 file.writelines(str_grap)
266 os.fsync(file.fileno())
271 Applies the k-medoids clustering to the graph.
273 graph: The graph to cluster.
274 k: The number of clusters to create.
276 medoids_str: The medoids of the clusters.
277 kmedoids.labels_: The labels of the clusters.
279 graph = np.array(graph)
280 kmedoids = KMedoids(n_clusters=k, method='pam', random_state=42).fit(graph)
281 medoids = kmedoids.cluster_centers_
282 medoids_str =
" ".join(f
"{x} {y}" for x, y
in medoids)
284 return medoids_str, kmedoids.labels_
289 The function that fixes the instance size with clustering.
290 It applies the k-medoids clustering to the graph
and creates a new instance
with the medoids
as the new nodes.
292 graph: The graph to fix.
293 num_nodes: The number of nodes of the graph instance.
294 model_size: The size of the Graph Convolutional Network to use.
300 print(
"Need to fix the instance size with clustering")
301 new_graph_str, kmedoids_labels =
cluster_nodes(graph, model_size)
303 for i
in range(1, model_size + 1):
304 end_str += str(i) +
" "
309 return kmedoids_labels
314 The function that reads the current instance from the file.
316 num_nodes: The number of nodes of the graph instance.
318 graph: The graph instance.
322 file_path =
"data/hyb_tsp/test_" + str(num_nodes) +
"_nodes_temp.txt"
324 with open(file_path,
"r")
as f:
325 lines = f.readlines()
327 if lines
is None or len(lines) == 0:
329 "The current instance for the number of nodes " + str(num_nodes) +
" does not exist.")
333 if "output" in str_graph:
334 str_graph = str_graph.split(
" output")[0]
336 str_graph = str_graph.replace(
"\n",
"").strip()
337 nodes = str_graph.split(
" ")
338 graph = [float(x)
for x
in nodes]
339 graph = [[graph[i], graph[i + 1]]
for i
in range(0, len(graph), 2)]
345def main(filepath, num_nodes, model_size):
347 The function that runs the Graph Convolutional Network and writes the adjacency matrix to a file
348 for the given input instance.
350 filepath: The path to the file where the adjacency matrix will be written.
351 num_nodes: The number of nodes
in the TSP instance.
352 model_size: The size of the Graph Convolutional Network to use.
356 kmedoids_labels =
None
358 if num_nodes < model_size:
360 elif num_nodes > model_size:
364 config_path =
"./logs/tsp" + str(model_size) +
"/config.json"
365 config = get_config(config_path)
368 config.accumulation_steps = 1
370 config.val_filepath =
"data/hyb_tsp/test_" + str(num_nodes) +
"_nodes_temp.txt"
371 config.test_filepath =
"data/hyb_tsp/test_" + str(num_nodes) +
"_nodes_temp.txt"
373 os.environ[
"CUDA_DEVICE_ORDER"] =
"PCI_BUS_ID"
374 os.environ[
"CUDA_VISIBLE_DEVICES"] = str(config.gpu_id)
376 if torch.cuda.is_available():
378 dtypeFloat = torch.cuda.FloatTensor
379 dtypeLong = torch.cuda.LongTensor
380 torch.cuda.manual_seed(1)
383 dtypeFloat = torch.FloatTensor
384 dtypeLong = torch.LongTensor
387 net = nn.DataParallel(ResidualGatedGCNModel(config, dtypeFloat, dtypeLong))
388 if torch.cuda.is_available():
391 log_dir = f
"./logs/{config.expt_name}/"
392 if torch.cuda.is_available():
393 checkpoint = torch.load(log_dir +
"best_val_checkpoint.tar")
395 checkpoint = torch.load(log_dir +
"best_val_checkpoint.tar", map_location=
'cpu')
397 net.load_state_dict(checkpoint[
'model_state_dict'])
398 config.batch_size = 1
399 probs, edges_value, nodes_coord =
compute_prob(net, config, dtypeLong, dtypeFloat)
403if __name__ ==
"__main__":
406 sys.argv[1]: The path to the file where the adjacency matrix will be written.
407 sys.argv[2]: The number of nodes in the TSP instance.
408 sys.argv[3]: The dimension of the model.
411 if len(sys.argv) != 4:
412 print(
"\nPlease provide the path to the output file to write in, the number of nodes in the tsp and the "
413 "instance number to analyze. The format is: "
414 "<filepath> <number of nodes> <model size>\n")
417 if not isinstance(sys.argv[1], str)
or not isinstance(sys.argv[2], str)
or not isinstance(sys.argv[3], str):
418 print(
"Error: The arguments must be strings.")
421 filepath = sys.argv[1]
422 num_nodes = int(sys.argv[2])
423 model_size = int(sys.argv[3])
424 main(filepath, num_nodes, model_size)
def compute_prob(net, config, dtypeLong, dtypeFloat)
def get_instance(num_nodes)
def add_dummy_cities(num_nodes, model_size)
def cluster_nodes(graph, k)
def write_adjacency_matrix(graph, y_probs, x_edges_values, nodes_coord, filepath, num_nodes, kmedoids_labels=None)
def fix_instance_size(graph, num_nodes, model_size=100)
def create_temp_file(num_nodes, str_grap)