forked from irom-princeton/PAC-Vision-Planning
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain_ES_biped.py
executable file
·201 lines (157 loc) · 7.95 KB
/
train_ES_biped.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 7 19:04:55 2020
@author: premchand
"""
import warnings
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
import json
import sys
from Parallelizer import Compute_Loss
from utils.visualize import weight_spread
warnings.filterwarnings('ignore')
class train:
def __init__(self, config_file):
# Load the dictionary from the json file
self.config_file = config_file
args = json.load(open(self.config_file))
# Make a copy of the args for ease of passing
self.params = args
# Initialize
self.example = args['example']
self.num_trials = args['num_trials']
self.num_itr = args['num_itr']
self.num_cpu = args['num_cpu']
self.num_gpu = args['num_gpu']
self.lr_mu = args['lr_mu']
self.lr_logvar = args['lr_logvar']
self.itr_start = args['itr_start']
self.start_seed = args['start_seed']
self.save_file_v=args['save_file_v']
self.delta = args['delta']
self.load_weights = args['load_weights']
self.load_weights_from = args['load_weights_from']
self.load_optimizer = args['load_optimizer']
self.load_prior = args['load_prior']
self.load_prior_from = args['load_prior_from']
self.logging = args['logging']
# import policy based on the example
if self.example == 'quadrotor':
from policy.quad_policy import Policy
elif self.example == 'minitaur':
from policy.minitaur_policy import Policy
elif self.example == 'biped':
from policy.biped_policy import Policy
# Generate policy
self.policy = Policy()
self.num_params = sum(p.numel() for p in self.policy.parameters())
print('Number of Neural Network Parameters:', self.num_params)
# Initialize the posterior distribution
self.mu = nn.ParameterList([nn.Parameter(torch.zeros(self.num_params))])
self.logvar = nn.ParameterList([nn.Parameter(torch.log(torch.ones(self.num_params)*4))])
if self.load_weights is True:
# Load posterior distribution from file
self.mu.load_state_dict(torch.load('Weights/mu_'+str(self.load_weights_from)+'.pt'))
self.logvar.load_state_dict(torch.load('Weights/logvar_'+str(self.load_weights_from)+'.pt'))
# Initialize the gradients, by default they are set to None
self.mu.grad = torch.randn_like(self.mu[0])
self.logvar.grad = torch.randn_like(self.logvar[0])
# Initialize the tensorboard writer for logging
if self.logging:
self.writer = SummaryWriter(log_dir='runs/summary_'+self.save_file_v, flush_secs=10)
def logger(self, itr, mu, logvar, grad_mu_norm, grad_logvar_norm,
mu_step_size, logvar_step_size, emp_cost, cost_min):
''' Logs:
(1) the training data to tensorboard for each iteration
(2) updates to the config file to allow restarting the training if
terminated midway
'''
# For plots: only log the latest image, otherwise tensorboard's memory consumption grows rapidly
fig_mu, fig_std = weight_spread(mu.clone().detach(), (0.5*logvar).exp().clone().detach())
self.writer.add_figure('Mean Spread', fig_mu, 0)
self.writer.add_figure('Std Spread', fig_std, 0)
self.writer.add_scalars('Loss', {'Train':emp_cost}, itr)
self.writer.add_scalars('Gradient Norm', {'mu grad': grad_mu_norm,
'logvar grad': grad_logvar_norm}, itr)
self.writer.add_scalars('Optimization Step Size', {'Mean Step':mu_step_size,
'Logvar Step':logvar_step_size}, itr)
# Log updates to the config file
state_dict = json.load(open(self.config_file))
state_dict['itr_start'] = itr+1
state_dict['load_weights'] = True
state_dict['load_optimizer'] = False
state_dict['load_weights_from'] = self.save_file_v+'_current'
state_dict['cost_min'] = cost_min
json.dump(state_dict, open(self.config_file, 'w'), indent=4)
def opt(self):
optimizer = optim.Adam([ {'params': self.mu, 'lr': self.lr_mu},
{'params': self.logvar, 'lr': self.lr_logvar} ])
# Load optimizer state
if self.load_optimizer:
optimizer.load_state_dict(torch.load('optim_state/optimizer_'+self.save_file_v+'_current.pt'))
# Load cost_min from the config file
cost_min = self.params['cost_min']
# Instantiate the Parallelizer for computing cost
para = Compute_Loss(self.num_trials, self.num_cpu, self.num_gpu, start_seed=self.start_seed)
for i in range(self.itr_start, self.num_itr):
optimizer.zero_grad()
start = time.time()
# Initialization of tensor "copies" of self.mu and self.std
mu = torch.zeros(self.num_params)
logvar = torch.zeros(self.num_params)
# Copy the parameters self.mu and self.std into the tensors
mu = self.mu[0].clone()
logvar = self.logvar[0].clone()
# Compute costs for various runs
emp_cost, grad_mu, grad_logvar = para.compute(i,
self.params,
mu.clone().detach(),
(0.5*logvar).exp().clone().detach())
# Compute gradient norms
grad_mu_norm = torch.norm(grad_mu, p=2).item()
grad_logvar_norm = torch.norm(grad_logvar, p=2).item()
# Load the gradients into the parameters
self.mu[0].grad = grad_mu
self.logvar[0].grad = grad_logvar
# Save the mean, log of variance, and the current optimizer state
torch.save(self.mu.state_dict(), 'Weights/mu_'+self.save_file_v+'_current.pt')
torch.save(self.logvar.state_dict(), 'Weights/logvar_'+self.save_file_v+'_current.pt')
torch.save(optimizer.state_dict(), 'optim_state/optimizer_'+self.save_file_v+'_current.pt')
# Save the mean, log of variance for the "best" iteration
if cost_min > emp_cost.item():
torch.save(self.mu.state_dict(), 'Weights/mu_'+self.save_file_v+'_best.pt')
torch.save(self.logvar.state_dict(), 'Weights/logvar_'+self.save_file_v+'_best.pt')
cost_min = emp_cost.item()
# Update the parameters
optimizer.step()
# Monitor step size, helpful in debugging
mu_step_size = (self.mu[0]-mu).norm()
logvar_step_size = (self.logvar[0]-logvar).norm()
# Print iterations
print('Itr: {}, time:{:.1f} s, Emp Cost: {:.3f}'.format(i,
time.time()-start, emp_cost.item()))
# Log to tensorboard and the config file
if self.logging:
self.logger(i, mu, logvar, grad_mu_norm, grad_logvar_norm,
mu_step_size, logvar_step_size, emp_cost, cost_min)
del grad_mu_norm, grad_logvar_norm, emp_cost, mu, logvar
if self.logging:
self.writer.close()
if __name__ == "__main__":
import argparse
def collect_as(coll_type):
class Collect_as(argparse.Action):
def __call__(self, parser, namespace, values, options_string=None):
setattr(namespace, self.dest, coll_type(values))
return Collect_as
parser = argparse.ArgumentParser(description='PAC-Bayes Optimization')
parser.add_argument('--config_file', type=str)
arg_con = parser.parse_args()
config_file = arg_con.config_file
train1 = train(config_file)
train1.opt()