Skip to content

Commit c4ebd32

Browse files
committed
adding tensorflow files
1 parent 83147a2 commit c4ebd32

File tree

2 files changed

+252
-0
lines changed

2 files changed

+252
-0
lines changed

CosmoFlow/hyper_parameters_Cosmo.py

+80
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import os
2+
3+
magic_number = 64
4+
5+
DATAPARAM={
6+
"output_dim" : 3,
7+
#Ns
8+
"zsAVG" : [0.3, 0.8628, 0.95],
9+
"zsSTD" : [0.02853, 0.04887, 0.028]
10+
#H0
11+
#"zsAVG" : [0.3, 0.8628, 0.701],
12+
#"zsSTD" : [0.02853, 0.04887, 0.05691]
13+
#"zsAVG": [2.995679839999998983e-01,8.610806619999996636e-01],
14+
#"zsSTD": [2.905168635566176411e-02,4.023372385668218254e-02]
15+
}
16+
17+
Input = {
18+
"BATCH_SIZE" : 1, #mini-batch size for training and validation
19+
"NUM_THREADS" : 2, #number of threads to read data
20+
"CAPACITY" : 0,
21+
"MIN_AFTER_DEQUEUE" : 200 #the minimum number in the queue after dequeue (Min_after_dequeue and capacity together determines the shuffling of input data)
22+
}
23+
24+
Input["CAPACITY"] = Input["BATCH_SIZE"]*4 + Input["MIN_AFTER_DEQUEUE"]
25+
26+
Input_Test = {
27+
"BATCH_SIZE" : 1, #mini-batch size for test data
28+
"NUM_THREADS" : 2, #number of threads to read data
29+
"CAPACITY" : 0,
30+
"MIN_AFTER_DEQUEUE" : 64
31+
}
32+
33+
Input_Test["CAPACITY"] = Input_Test["BATCH_SIZE"]*4 + Input_Test["MIN_AFTER_DEQUEUE"]
34+
35+
Model = {
36+
"REG_RATE": 0., #regularization of weights: currently set to 0 since batch_normalization has the same effect of regularization
37+
"LEAK_PARAMETER": 0.01, #leaky parameter for leaky relu
38+
"LEARNING_RATE" : 0.0001, #adam_optimizer to do the update.
39+
"DROP_OUT": 0.5 #apply drop out in fully connected layer. this value gives the probabilty of keep the node.
40+
}
41+
42+
RUNPARAM={
43+
"num_epoch": 80, #each epoch means a fully pass over the data. The program might stop before running num_epoch (see next line).
44+
"require_improvement": 50, #if with require_improvement, there is no improvement in validation error, then stop running.
45+
"num_train":400, #total number of simulations for training
46+
"num_val":50, #total number of simulations for validation
47+
"num_test":49, #total number of simulations for testing
48+
"batch_per_epoch":0,
49+
"batch_per_epoch_val":0,
50+
"iter_test":0
51+
}
52+
53+
RUNPARAM["batch_per_epoch"] = RUNPARAM['num_train']*magic_number/Input['BATCH_SIZE']
54+
RUNPARAM["batch_per_epoch_val"] = RUNPARAM['num_val']*magic_number/Input['BATCH_SIZE']
55+
RUNPARAM['iter_test'] = RUNPARAM['num_test']*magic_number/Input_Test['BATCH_SIZE']
56+
57+
58+
##### CHANGE THIS TO LOCAL DIRECTORY
59+
60+
## data on scratch
61+
main_dir = "/global/cscratch1/sd/djbard/cosmoML/data-March20Runs/"
62+
target_dir = "500/" #"orig_paper"
63+
64+
65+
## BB dir
66+
#main_dir = someVariable = (os.environ['DW_PERSISTENT_STRIPED_CosmoFlow'])
67+
#target_dir = "/two-param-500-128cubefrom256-64perTfrecord-64From500perTfrecord/" #"orig_paper"
68+
69+
Path={
70+
71+
"init_data" : '.', #Path where the init data is
72+
"Model_path" : './result/', #Path to save the best model where the validation error is the smallest. And then we use this model for test
73+
"train_data" : main_dir + target_dir + '/train/', #path where the train data is
74+
"train_result" : './result/', #path to store the train result
75+
"val_data" : main_dir + target_dir + '/val/', #path where the validation data is
76+
"val_result" : './result/', #path to st/data0/jamesarnemann/cosmoNet/' + target_dir + '/result/'ore the validation result
77+
"test_data" : main_dir + target_dir + '/test/', #path where the test data is
78+
"test_result" : './result/', #path to store the test result
79+
80+
}

CosmoFlow/io_Cosmo-3param.py

+172
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
import numpy as np
2+
import tensorflow as tf
3+
import hyper_parameters_Cosmo
4+
import os
5+
import itertools
6+
import random
7+
8+
def _float64_feature(value):
9+
return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
10+
11+
def _bytes_feature(value):
12+
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
13+
14+
15+
class loadNpyData:
16+
def __init__(self,data,label,num):
17+
### suggestion from James to cast as 32-bit
18+
self.data = data.astype(dtype = np.float32) ##data
19+
self.label = label.astype(dtype = np.float32) ##label
20+
self.num = num
21+
22+
def convert_to(self):
23+
filename = str(self.num)+'.tfrecord'
24+
print('Writing ', filename)
25+
writer = tf.python_io.TFRecordWriter(filename)
26+
for index in range(len(self.data)):
27+
data_raw = self.data[index].tostring()
28+
label_raw = self.label[index].tostring()
29+
example = tf.train.Example(features = tf.train.Features(feature={'label_raw': _bytes_feature(label_raw),'data_raw': _bytes_feature(data_raw)}))
30+
writer.write(example.SerializeToString())
31+
writer.close()
32+
33+
class loadTfrecordData:
34+
def __init__(self,fileBuffer,num):
35+
self.fileBuffer = fileBuffer
36+
37+
38+
def reconstruct_from(self):
39+
for filename in record_iterator:
40+
example = tf.train.Example()
41+
example.ParseFromString(filename)
42+
data_raw = (example.features.feature['data_raw'].bytes_list.value[0])
43+
data = np.fromstring(data_raw, dtype=np.float).reshape([-1,128,128,128,1])
44+
label_raw = (example.features.feature['label_raw'].bytes_list.value[0])
45+
label = np.fromstring(label_raw,dtype=np.float).reshape([-1,hyper_parameters_Cosmo.DATAPARAM["output_dim"] ])
46+
47+
return data,label
48+
49+
def read_tfrecord(filename_queue):
50+
reader = tf.TFRecordReader()
51+
_,single_example = reader.read(filename_queue)
52+
parsed_example = tf.parse_single_example(
53+
single_example,
54+
features = {
55+
"data_raw": tf.FixedLenFeature([],tf.string),
56+
"label_raw": tf.FixedLenFeature([],tf.string)
57+
}
58+
)
59+
60+
NbodySimuDecode = tf.decode_raw(parsed_example['data_raw'],tf.float32)
61+
labelDecode = tf.decode_raw(parsed_example['label_raw'],tf.float32)
62+
63+
NbodySimus = tf.reshape(NbodySimuDecode,[128,128,128])
64+
65+
#normalize
66+
NbodySimus /= (tf.reduce_sum(NbodySimus)/128**3+0.)
67+
NbodySimuAddDim = tf.expand_dims(NbodySimus,axis = 3)
68+
label = tf.reshape(labelDecode,[hyper_parameters_Cosmo.DATAPARAM["output_dim"] ])
69+
70+
71+
label = (label - tf.constant(hyper_parameters_Cosmo.DATAPARAM['zsAVG'],dtype = tf.float32))/tf.constant(hyper_parameters_Cosmo.DATAPARAM['zsSTD']
72+
,dtype = tf.float32)
73+
return NbodySimuAddDim,label
74+
75+
def readDataSet(filenames):
76+
print "---readDataSet-ioCosmo------"
77+
print filenames
78+
filename_queue = tf.train.string_input_producer(filenames,num_epochs=None,shuffle=True)
79+
NbodySimus,label= read_tfrecord(filename_queue)
80+
81+
NbodySimus_batch, label_batch = tf.train.shuffle_batch(
82+
[NbodySimus,label],
83+
84+
batch_size = hyper_parameters_Cosmo.Input["BATCH_SIZE"],
85+
num_threads = hyper_parameters_Cosmo.Input["NUM_THREADS"],
86+
capacity = hyper_parameters_Cosmo.Input["CAPACITY"],
87+
min_after_dequeue = hyper_parameters_Cosmo.Input["MIN_AFTER_DEQUEUE"],
88+
allow_smaller_final_batch=True)
89+
90+
return NbodySimus_batch, label_batch
91+
92+
93+
def read_test_tfrecord(filename_queue):
94+
reader = tf.TFRecordReader()
95+
_,single_example = reader.read(filename_queue)
96+
parsed_example = tf.parse_single_example(
97+
single_example,
98+
features = {
99+
"data_raw": tf.FixedLenFeature([],tf.string),
100+
"label_raw": tf.FixedLenFeature([],tf.string)
101+
}
102+
)
103+
104+
NbodySimuDecode = tf.decode_raw(parsed_example['data_raw'],tf.float32)
105+
labelDecode = tf.decode_raw(parsed_example['label_raw'],tf.float32)
106+
NbodySimus = tf.reshape(NbodySimuDecode,[128,128,128])
107+
NbodySimus /= (tf.reduce_sum(NbodySimus)/128**3+0.)
108+
NbodySimuAddDim = tf.expand_dims(NbodySimus,3)
109+
#label = tf.reshape(labelDecode,[2])
110+
label = tf.reshape(labelDecode,[hyper_parameters_Cosmo.DATAPARAM["output_dim"] ])
111+
112+
labelAddDim = (label - tf.constant(hyper_parameters_Cosmo.DATAPARAM['zsAVG'],dtype = tf.float32))/tf.constant(hyper_parameters_Cosmo.DATAPARAM['zsSTD']
113+
,dtype = tf.float32)
114+
115+
print NbodySimuAddDim.shape
116+
117+
return NbodySimuAddDim,labelAddDim
118+
119+
def readTestSet(filenames):
120+
print "----readTestSet-io_cosmo----"
121+
filename_queue = tf.train.string_input_producer(filenames,num_epochs=None,shuffle=False)
122+
NbodySimus,label= read_test_tfrecord(filename_queue)
123+
NbodySimus_batch, label_batch = tf.train.batch(
124+
[NbodySimus,label],
125+
#NbodyList,
126+
batch_size = hyper_parameters_Cosmo.Input_Test["BATCH_SIZE"],
127+
num_threads = hyper_parameters_Cosmo.Input_Test["NUM_THREADS"],
128+
capacity = hyper_parameters_Cosmo.Input_Test["CAPACITY"],
129+
enqueue_many=False,
130+
allow_smaller_final_batch=True)
131+
132+
return NbodySimus_batch, label_batch
133+
134+
135+
136+
if __name__ == '__main__':
137+
138+
139+
140+
label_path = os.path.join('/global/cscratch1/sd/djbard/MUSIC_pyCola/egpbos-pycola-672c58551ff1/OmSiNs/twothousand-4/','list-2000-noCiC-128from256.txt')
141+
I labels = np.loadtxt(label_path,delimiter=',')
142+
143+
144+
### How many tensorflow files do we want to make?
145+
### Assuming 500 here, with teh first 400 a raondom mix,
146+
### and the last 100 NOT mixed for val/test sets.
147+
for i in range(1950,2000):
148+
data = []
149+
label = []
150+
for j in range(64):
151+
## for twothousand dir, just make all of them training.
152+
numDirectory = random.randrange(1000,3000) ###
153+
154+
#if i<1800:
155+
# numDirectory = random.randrange(1000,2800) ###
156+
#else:
157+
# numDirectory = (i)+1000 ## don't want this to be random!!
158+
159+
numFile = random.randrange(8)
160+
dirname = numDirectory
161+
162+
#print i, j, numDirectory
163+
## pull a sub-volumes from the 2000 dir
164+
data_path = os.path.join('/global/cscratch1/sd/djbard/MUSIC_pyCola/egpbos-pycola-672c58551ff1/OmSiNs/twothousand-4/128from256-2000/',str(dirname).rjust(3,'0'),str(numFile)+'.npy')
165+
#print data_path
166+
data = np.append(data,np.load(data_path))
167+
label = np.append(label,labels[ (numDirectory-1000)][[1,2,3]])
168+
169+
170+
loadNpyData(data.reshape(-1,128,128,128,1),label.reshape(-1,3),i).convert_to()
171+
172+

0 commit comments

Comments
 (0)