adding tensorflow files

djbard · djbard · commit c4ebd32f2f0d · 2018-09-14T05:39:12.000-07:00
diff --git a/CosmoFlow/hyper_parameters_Cosmo.py b/CosmoFlow/hyper_parameters_Cosmo.py
@@ -0,0 +1,80 @@
+import os
+
+magic_number = 64
+
+DATAPARAM={
+        "output_dim" : 3,
+	#Ns
+        "zsAVG" : [0.3, 0.8628, 0.95],
+        "zsSTD" : [0.02853, 0.04887, 0.028]
+	#H0
+        #"zsAVG" : [0.3, 0.8628, 0.701],
+        #"zsSTD" : [0.02853, 0.04887, 0.05691]
+        #"zsAVG": [2.995679839999998983e-01,8.610806619999996636e-01],
+        #"zsSTD": [2.905168635566176411e-02,4.023372385668218254e-02]
+        }
+
+Input = {
+        "BATCH_SIZE" : 1,              #mini-batch size for training and validation
+        "NUM_THREADS" : 2,              #number of threads to read data
+        "CAPACITY" : 0,
+        "MIN_AFTER_DEQUEUE" : 200       #the minimum number in the queue after dequeue (Min_after_dequeue and capacity together determines the shuffling of input data)
+        }
+
+Input["CAPACITY"] = Input["BATCH_SIZE"]*4 + Input["MIN_AFTER_DEQUEUE"]
+
+Input_Test = {
+	"BATCH_SIZE" : 1,              #mini-batch size for test data
+	"NUM_THREADS" : 2,              #number of threads to read data
+	"CAPACITY" : 0,
+	"MIN_AFTER_DEQUEUE" : 64
+	}
+
+Input_Test["CAPACITY"] = Input_Test["BATCH_SIZE"]*4 + Input_Test["MIN_AFTER_DEQUEUE"]
+
+Model = {
+        "REG_RATE": 0.,                 #regularization of weights: currently set to 0 since batch_normalization has the same effect of regularization
+        "LEAK_PARAMETER": 0.01,         #leaky parameter for leaky relu
+        "LEARNING_RATE" : 0.0001,       #adam_optimizer to do the update. 
+        "DROP_OUT": 0.5                 #apply drop out in fully connected layer. this value gives the probabilty of keep the node. 
+}
+
+RUNPARAM={
+	"num_epoch": 80,              #each epoch means a fully pass over the data. The program might stop before running num_epoch (see next line).        
+        "require_improvement": 50,      #if with require_improvement, there is no improvement in validation error, then stop running. 
+	"num_train":400,                #total number of simulations for training
+	"num_val":50,                   #total number of simulations for validation
+        "num_test":49,                  #total number of simulations for testing
+	"batch_per_epoch":0,             
+	"batch_per_epoch_val":0,
+        "iter_test":0                 
+}
+
+RUNPARAM["batch_per_epoch"] = RUNPARAM['num_train']*magic_number/Input['BATCH_SIZE']
+RUNPARAM["batch_per_epoch_val"] = RUNPARAM['num_val']*magic_number/Input['BATCH_SIZE']
+RUNPARAM['iter_test'] = RUNPARAM['num_test']*magic_number/Input_Test['BATCH_SIZE']
+
+
+##### CHANGE THIS TO LOCAL DIRECTORY
+
+## data on scratch
+main_dir = "/global/cscratch1/sd/djbard/cosmoML/data-March20Runs/"
+target_dir = "500/" #"orig_paper"
+
+
+## BB dir
+#main_dir = someVariable = (os.environ['DW_PERSISTENT_STRIPED_CosmoFlow'])
+#target_dir = "/two-param-500-128cubefrom256-64perTfrecord-64From500perTfrecord/" #"orig_paper"
+
+Path={
+
+	"init_data" :  '.',                 #Path where the init data is
+        "Model_path" :  './result/',                 #Path to save the best model where the validation error is the smallest. And then we use this model for test
+        "train_data" : main_dir + target_dir + '/train/',            #path where the  train data is
+	"train_result" :  './result/',        #path to store the train result
+	"val_data" : main_dir + target_dir + '/val/',              #path where the  validation data is
+	"val_result" :  './result/',          #path to st/data0/jamesarnemann/cosmoNet/' + target_dir + '/result/'ore the validation result
+	"test_data" : main_dir + target_dir + '/test/',              #path where the  test data is
+	"test_result" : './result/',           #path to store the test result
+
+}  
diff --git a/CosmoFlow/io_Cosmo-3param.py b/CosmoFlow/io_Cosmo-3param.py
@@ -0,0 +1,172 @@
+import numpy as np
+import tensorflow as tf
+import hyper_parameters_Cosmo
+import os
+import itertools
+import random
+
+def _float64_feature(value):
+  return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
+
+def _bytes_feature(value):
+  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+
+class loadNpyData:
+    def __init__(self,data,label,num):
+        ### suggestion from James to cast as 32-bit
+        self.data = data.astype(dtype = np.float32) ##data
+        self.label = label.astype(dtype = np.float32) ##label
+        self.num = num
+    
+    def convert_to(self):
+        filename = str(self.num)+'.tfrecord'
+        print('Writing ', filename)
+        writer = tf.python_io.TFRecordWriter(filename)
+        for index in range(len(self.data)):
+            data_raw = self.data[index].tostring()
+            label_raw = self.label[index].tostring()
+            example = tf.train.Example(features = tf.train.Features(feature={'label_raw': _bytes_feature(label_raw),'data_raw': _bytes_feature(data_raw)}))
+            writer.write(example.SerializeToString())
+        writer.close()
+
+class loadTfrecordData:
+    def __init__(self,fileBuffer,num):
+        self.fileBuffer = fileBuffer
+    
+    
+    def reconstruct_from(self):
+        for filename in record_iterator:
+            example = tf.train.Example()
+            example.ParseFromString(filename)
+            data_raw = (example.features.feature['data_raw'].bytes_list.value[0])
+            data = np.fromstring(data_raw, dtype=np.float).reshape([-1,128,128,128,1])
+            label_raw = (example.features.feature['label_raw'].bytes_list.value[0])
+            label = np.fromstring(label_raw,dtype=np.float).reshape([-1,hyper_parameters_Cosmo.DATAPARAM["output_dim"] ])
+            
+        return data,label
+
+def read_tfrecord(filename_queue):
+    reader = tf.TFRecordReader()    
+    _,single_example = reader.read(filename_queue)
+    parsed_example = tf.parse_single_example(
+    single_example,
+    features = {
+        "data_raw": tf.FixedLenFeature([],tf.string),
+        "label_raw": tf.FixedLenFeature([],tf.string)
+    }
+    )
+
+    NbodySimuDecode = tf.decode_raw(parsed_example['data_raw'],tf.float32)
+    labelDecode = tf.decode_raw(parsed_example['label_raw'],tf.float32)
+
+    NbodySimus = tf.reshape(NbodySimuDecode,[128,128,128])
+ 
+    #normalize
+    NbodySimus /= (tf.reduce_sum(NbodySimus)/128**3+0.)
+    NbodySimuAddDim = tf.expand_dims(NbodySimus,axis = 3)
+    label = tf.reshape(labelDecode,[hyper_parameters_Cosmo.DATAPARAM["output_dim"] ])
+
+
+    label = (label - tf.constant(hyper_parameters_Cosmo.DATAPARAM['zsAVG'],dtype = tf.float32))/tf.constant(hyper_parameters_Cosmo.DATAPARAM['zsSTD']
+                                                                                                            ,dtype = tf.float32)
+    return NbodySimuAddDim,label
+    
+def readDataSet(filenames):
+    print "---readDataSet-ioCosmo------"
+    print filenames
+    filename_queue = tf.train.string_input_producer(filenames,num_epochs=None,shuffle=True)
+    NbodySimus,label= read_tfrecord(filename_queue)
+
+    NbodySimus_batch, label_batch = tf.train.shuffle_batch(
+    	[NbodySimus,label],
+	
+    	batch_size = hyper_parameters_Cosmo.Input["BATCH_SIZE"],
+    	num_threads = hyper_parameters_Cosmo.Input["NUM_THREADS"],
+    	capacity = hyper_parameters_Cosmo.Input["CAPACITY"],
+    	min_after_dequeue = hyper_parameters_Cosmo.Input["MIN_AFTER_DEQUEUE"],
+	allow_smaller_final_batch=True)
+    
+    return  NbodySimus_batch, label_batch
+
+
+def read_test_tfrecord(filename_queue):
+    reader = tf.TFRecordReader()
+    _,single_example = reader.read(filename_queue)
+    parsed_example = tf.parse_single_example(
+    single_example,
+    features = {
+        "data_raw": tf.FixedLenFeature([],tf.string),
+        "label_raw": tf.FixedLenFeature([],tf.string)
+    }
+    )
+
+    NbodySimuDecode = tf.decode_raw(parsed_example['data_raw'],tf.float32)
+    labelDecode = tf.decode_raw(parsed_example['label_raw'],tf.float32)
+    NbodySimus = tf.reshape(NbodySimuDecode,[128,128,128])
+    NbodySimus /= (tf.reduce_sum(NbodySimus)/128**3+0.)
+    NbodySimuAddDim = tf.expand_dims(NbodySimus,3)
+    #label = tf.reshape(labelDecode,[2])
+    label = tf.reshape(labelDecode,[hyper_parameters_Cosmo.DATAPARAM["output_dim"] ])
+    
+    labelAddDim = (label - tf.constant(hyper_parameters_Cosmo.DATAPARAM['zsAVG'],dtype = tf.float32))/tf.constant(hyper_parameters_Cosmo.DATAPARAM['zsSTD']
+                                                                                                                  ,dtype = tf.float32)
+
+    print NbodySimuAddDim.shape
+   
+    return NbodySimuAddDim,labelAddDim
+    
+def readTestSet(filenames):
+    print "----readTestSet-io_cosmo----"
+    filename_queue = tf.train.string_input_producer(filenames,num_epochs=None,shuffle=False)
+    NbodySimus,label= read_test_tfrecord(filename_queue)
+    NbodySimus_batch, label_batch = tf.train.batch(
+        [NbodySimus,label],
+        #NbodyList,
+        batch_size = hyper_parameters_Cosmo.Input_Test["BATCH_SIZE"],
+        num_threads = hyper_parameters_Cosmo.Input_Test["NUM_THREADS"],
+        capacity = hyper_parameters_Cosmo.Input_Test["CAPACITY"],
+	enqueue_many=False,
+        allow_smaller_final_batch=True)
+
+    return  NbodySimus_batch, label_batch
+       
+        
+
+if __name__ == '__main__':
+
+    
+    
+    label_path = os.path.join('/global/cscratch1/sd/djbard/MUSIC_pyCola/egpbos-pycola-672c58551ff1/OmSiNs/twothousand-4/','list-2000-noCiC-128from256.txt')
+I    labels = np.loadtxt(label_path,delimiter=',')    
+       
+    
+    ### How many tensorflow files do we want to make? 
+    ### Assuming 500 here, with teh first 400 a raondom mix, 
+    ### and the last 100 NOT mixed for val/test sets. 
+    for i in range(1950,2000):
+        data = []
+        label = []
+        for j in range(64):
+            ## for twothousand dir, just make all of them training. 
+            numDirectory = random.randrange(1000,3000) ###
+            
+            #if i<1800:
+            #  numDirectory = random.randrange(1000,2800) ###
+            #else:
+            #  numDirectory = (i)+1000 ## don't want this to be random!!
+            
+            numFile = random.randrange(8)
+            dirname = numDirectory
+
+            #print i, j, numDirectory
+            ## pull a sub-volumes from the 2000 dir
+            data_path = os.path.join('/global/cscratch1/sd/djbard/MUSIC_pyCola/egpbos-pycola-672c58551ff1/OmSiNs/twothousand-4/128from256-2000/',str(dirname).rjust(3,'0'),str(numFile)+'.npy')
+            #print data_path
+            data = np.append(data,np.load(data_path))
+            label = np.append(label,labels[ (numDirectory-1000)][[1,2,3]])
+            
+
+        loadNpyData(data.reshape(-1,128,128,128,1),label.reshape(-1,3),i).convert_to()
+    
+