|
| 1 | +import numpy as np |
| 2 | +import tensorflow as tf |
| 3 | +import hyper_parameters_Cosmo |
| 4 | +import os |
| 5 | +import itertools |
| 6 | +import random |
| 7 | + |
| 8 | +def _float64_feature(value): |
| 9 | + return tf.train.Feature(float_list=tf.train.FloatList(value=[value])) |
| 10 | + |
| 11 | +def _bytes_feature(value): |
| 12 | + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) |
| 13 | + |
| 14 | + |
| 15 | +class loadNpyData: |
| 16 | + def __init__(self,data,label,num): |
| 17 | + ### suggestion from James to cast as 32-bit |
| 18 | + self.data = data.astype(dtype = np.float32) ##data |
| 19 | + self.label = label.astype(dtype = np.float32) ##label |
| 20 | + self.num = num |
| 21 | + |
| 22 | + def convert_to(self): |
| 23 | + filename = str(self.num)+'.tfrecord' |
| 24 | + print('Writing ', filename) |
| 25 | + writer = tf.python_io.TFRecordWriter(filename) |
| 26 | + for index in range(len(self.data)): |
| 27 | + data_raw = self.data[index].tostring() |
| 28 | + label_raw = self.label[index].tostring() |
| 29 | + example = tf.train.Example(features = tf.train.Features(feature={'label_raw': _bytes_feature(label_raw),'data_raw': _bytes_feature(data_raw)})) |
| 30 | + writer.write(example.SerializeToString()) |
| 31 | + writer.close() |
| 32 | + |
| 33 | +class loadTfrecordData: |
| 34 | + def __init__(self,fileBuffer,num): |
| 35 | + self.fileBuffer = fileBuffer |
| 36 | + |
| 37 | + |
| 38 | + def reconstruct_from(self): |
| 39 | + for filename in record_iterator: |
| 40 | + example = tf.train.Example() |
| 41 | + example.ParseFromString(filename) |
| 42 | + data_raw = (example.features.feature['data_raw'].bytes_list.value[0]) |
| 43 | + data = np.fromstring(data_raw, dtype=np.float).reshape([-1,128,128,128,1]) |
| 44 | + label_raw = (example.features.feature['label_raw'].bytes_list.value[0]) |
| 45 | + label = np.fromstring(label_raw,dtype=np.float).reshape([-1,hyper_parameters_Cosmo.DATAPARAM["output_dim"] ]) |
| 46 | + |
| 47 | + return data,label |
| 48 | + |
| 49 | +def read_tfrecord(filename_queue): |
| 50 | + reader = tf.TFRecordReader() |
| 51 | + _,single_example = reader.read(filename_queue) |
| 52 | + parsed_example = tf.parse_single_example( |
| 53 | + single_example, |
| 54 | + features = { |
| 55 | + "data_raw": tf.FixedLenFeature([],tf.string), |
| 56 | + "label_raw": tf.FixedLenFeature([],tf.string) |
| 57 | + } |
| 58 | + ) |
| 59 | + |
| 60 | + NbodySimuDecode = tf.decode_raw(parsed_example['data_raw'],tf.float32) |
| 61 | + labelDecode = tf.decode_raw(parsed_example['label_raw'],tf.float32) |
| 62 | + |
| 63 | + NbodySimus = tf.reshape(NbodySimuDecode,[128,128,128]) |
| 64 | + |
| 65 | + #normalize |
| 66 | + NbodySimus /= (tf.reduce_sum(NbodySimus)/128**3+0.) |
| 67 | + NbodySimuAddDim = tf.expand_dims(NbodySimus,axis = 3) |
| 68 | + label = tf.reshape(labelDecode,[hyper_parameters_Cosmo.DATAPARAM["output_dim"] ]) |
| 69 | + |
| 70 | + |
| 71 | + label = (label - tf.constant(hyper_parameters_Cosmo.DATAPARAM['zsAVG'],dtype = tf.float32))/tf.constant(hyper_parameters_Cosmo.DATAPARAM['zsSTD'] |
| 72 | + ,dtype = tf.float32) |
| 73 | + return NbodySimuAddDim,label |
| 74 | + |
| 75 | +def readDataSet(filenames): |
| 76 | + print "---readDataSet-ioCosmo------" |
| 77 | + print filenames |
| 78 | + filename_queue = tf.train.string_input_producer(filenames,num_epochs=None,shuffle=True) |
| 79 | + NbodySimus,label= read_tfrecord(filename_queue) |
| 80 | + |
| 81 | + NbodySimus_batch, label_batch = tf.train.shuffle_batch( |
| 82 | + [NbodySimus,label], |
| 83 | + |
| 84 | + batch_size = hyper_parameters_Cosmo.Input["BATCH_SIZE"], |
| 85 | + num_threads = hyper_parameters_Cosmo.Input["NUM_THREADS"], |
| 86 | + capacity = hyper_parameters_Cosmo.Input["CAPACITY"], |
| 87 | + min_after_dequeue = hyper_parameters_Cosmo.Input["MIN_AFTER_DEQUEUE"], |
| 88 | + allow_smaller_final_batch=True) |
| 89 | + |
| 90 | + return NbodySimus_batch, label_batch |
| 91 | + |
| 92 | + |
| 93 | +def read_test_tfrecord(filename_queue): |
| 94 | + reader = tf.TFRecordReader() |
| 95 | + _,single_example = reader.read(filename_queue) |
| 96 | + parsed_example = tf.parse_single_example( |
| 97 | + single_example, |
| 98 | + features = { |
| 99 | + "data_raw": tf.FixedLenFeature([],tf.string), |
| 100 | + "label_raw": tf.FixedLenFeature([],tf.string) |
| 101 | + } |
| 102 | + ) |
| 103 | + |
| 104 | + NbodySimuDecode = tf.decode_raw(parsed_example['data_raw'],tf.float32) |
| 105 | + labelDecode = tf.decode_raw(parsed_example['label_raw'],tf.float32) |
| 106 | + NbodySimus = tf.reshape(NbodySimuDecode,[128,128,128]) |
| 107 | + NbodySimus /= (tf.reduce_sum(NbodySimus)/128**3+0.) |
| 108 | + NbodySimuAddDim = tf.expand_dims(NbodySimus,3) |
| 109 | + #label = tf.reshape(labelDecode,[2]) |
| 110 | + label = tf.reshape(labelDecode,[hyper_parameters_Cosmo.DATAPARAM["output_dim"] ]) |
| 111 | + |
| 112 | + labelAddDim = (label - tf.constant(hyper_parameters_Cosmo.DATAPARAM['zsAVG'],dtype = tf.float32))/tf.constant(hyper_parameters_Cosmo.DATAPARAM['zsSTD'] |
| 113 | + ,dtype = tf.float32) |
| 114 | + |
| 115 | + print NbodySimuAddDim.shape |
| 116 | + |
| 117 | + return NbodySimuAddDim,labelAddDim |
| 118 | + |
| 119 | +def readTestSet(filenames): |
| 120 | + print "----readTestSet-io_cosmo----" |
| 121 | + filename_queue = tf.train.string_input_producer(filenames,num_epochs=None,shuffle=False) |
| 122 | + NbodySimus,label= read_test_tfrecord(filename_queue) |
| 123 | + NbodySimus_batch, label_batch = tf.train.batch( |
| 124 | + [NbodySimus,label], |
| 125 | + #NbodyList, |
| 126 | + batch_size = hyper_parameters_Cosmo.Input_Test["BATCH_SIZE"], |
| 127 | + num_threads = hyper_parameters_Cosmo.Input_Test["NUM_THREADS"], |
| 128 | + capacity = hyper_parameters_Cosmo.Input_Test["CAPACITY"], |
| 129 | + enqueue_many=False, |
| 130 | + allow_smaller_final_batch=True) |
| 131 | + |
| 132 | + return NbodySimus_batch, label_batch |
| 133 | + |
| 134 | + |
| 135 | + |
| 136 | +if __name__ == '__main__': |
| 137 | + |
| 138 | + |
| 139 | + |
| 140 | + label_path = os.path.join('/global/cscratch1/sd/djbard/MUSIC_pyCola/egpbos-pycola-672c58551ff1/OmSiNs/twothousand-4/','list-2000-noCiC-128from256.txt') |
| 141 | +I labels = np.loadtxt(label_path,delimiter=',') |
| 142 | + |
| 143 | + |
| 144 | + ### How many tensorflow files do we want to make? |
| 145 | + ### Assuming 500 here, with teh first 400 a raondom mix, |
| 146 | + ### and the last 100 NOT mixed for val/test sets. |
| 147 | + for i in range(1950,2000): |
| 148 | + data = [] |
| 149 | + label = [] |
| 150 | + for j in range(64): |
| 151 | + ## for twothousand dir, just make all of them training. |
| 152 | + numDirectory = random.randrange(1000,3000) ### |
| 153 | + |
| 154 | + #if i<1800: |
| 155 | + # numDirectory = random.randrange(1000,2800) ### |
| 156 | + #else: |
| 157 | + # numDirectory = (i)+1000 ## don't want this to be random!! |
| 158 | + |
| 159 | + numFile = random.randrange(8) |
| 160 | + dirname = numDirectory |
| 161 | + |
| 162 | + #print i, j, numDirectory |
| 163 | + ## pull a sub-volumes from the 2000 dir |
| 164 | + data_path = os.path.join('/global/cscratch1/sd/djbard/MUSIC_pyCola/egpbos-pycola-672c58551ff1/OmSiNs/twothousand-4/128from256-2000/',str(dirname).rjust(3,'0'),str(numFile)+'.npy') |
| 165 | + #print data_path |
| 166 | + data = np.append(data,np.load(data_path)) |
| 167 | + label = np.append(label,labels[ (numDirectory-1000)][[1,2,3]]) |
| 168 | + |
| 169 | + |
| 170 | + loadNpyData(data.reshape(-1,128,128,128,1),label.reshape(-1,3),i).convert_to() |
| 171 | + |
| 172 | + |
0 commit comments