-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtsn_conv.py
executable file
·257 lines (225 loc) · 11.4 KB
/
tsn_conv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
#!/usr/bin/env python
from __future__ import print_function
import threading
import roslib
roslib.load_manifest('caffe_tsn_ros')
import sys
import rospy
import rosparam
import cv2
from std_srvs.srv import Empty
from caffe_tsn_ros.srv import *
from std_msgs.msg import String, Header, Float32MultiArray, MultiArrayDimension
from sensor_msgs.msg import Image
from cv_bridge import CvBridge, CvBridgeError
import numpy as np
np.set_printoptions(precision=2)
#import matplotlib.pyplot as plt
import itertools
import os
### I should figure out the pythonic way of doing thisq
print(os.getcwd())
mypath = "/temporal-segment-networks"
sys.path.append(mypath)
sys.path.append(os.path.abspath(mypath+"/tools"))
sys.path.insert(0, mypath+'/lib/caffe-action/python') ## should use os.path.join?
from pyActionRecog.action_caffe import CaffeNet, fast_list2arr, flow_stack_oversample
import argparse
import math
import multiprocessing
import caffe_tsn_ros.msg
from copy import deepcopy
class tsn_classifier:
def __init__(self):
global mypath
# services provided
self.reconfig_srv_ = rospy.Service('reconf_split',split, self.reconfig_srv)
self.start_vidscores = rospy.Service('start_vidscores', Empty, self.start_vidscores)
self.stop_vidscores = rospy.Service('stop_vidscores', Empty, self.stop_vidscores)
# topics published
self.scores_pub = rospy.Publisher("scores",Float32MultiArray, queue_size=1)
self.score_fw_pub = rospy.Publisher("scores_fw", caffe_tsn_ros.msg.ScoreArray, queue_size=1)
# self.label_pub = rospy.Publisher("action", String, queue_size=1)
self.ownlabel_pub = rospy.Publisher("scores_own", caffe_tsn_ros.msg.ScoreArray, queue_size=1)
# parameters
self.dataset = rospy.get_param('~dataset','hmdb51')
self.device_id = rospy.get_param('~device_id',0)
self.split = rospy.get_param('~split',1)
self.rgbOrFlow = rospy.get_param('~classifier_type')
self.clear_fw_after_new_vid = rospy.get_param('~clear_fw_after_new_vid', True)
self.step = rospy.get_param('~step',6)
# this should actually be
# step = (frame_cnt - stack_depth) / (args.num_frame_per_video-1)
# it will change depending on the action length, a value I don't have if I am classifying real time, but that I could get if I am doing it by service calls!
# stack_depth is 1 for rgb and 5 for flows. I am letting it be 5 to test creating an array of cv_images
self.stack_depth = rospy.get_param('~stack_depth',5)
self.stack_count = 0
self.cv_image_stack = []
self.classwindow = rospy.get_param('~classification_frame_window','undefined')
assert not self.classwindow == 'undefined'
###probably should use the nice rosparam thingy here to avoid these problems...
self.framesize_width = rospy.get_param('~framesize_width',340)
self.framesize_height = rospy.get_param('~framesize_height',256)
# topics subscribed
self.image_sub = rospy.Subscriber('video_topic', Image, self.callback,queue_size=1)
# internals
self.bridge = CvBridge()
self.prototxt = mypath+'/models/'+ self.dataset +'/tsn_bn_inception_'+self.rgbOrFlow+'_deploy.prototxt'
self.caffemodel = mypath+'/models/'+ self.dataset +'_split_'+str(self.split)+'_tsn_'+self.rgbOrFlow+'_reference_bn_inception.caffemodel'
rospy.loginfo("loading prototxt {}".format(self.prototxt))
rospy.loginfo("loading caffemodel {}".format(self.caffemodel))
self.net = CaffeNet(self.prototxt, self.caffemodel, self.device_id)
self.frame_scores = caffe_tsn_ros.msg.ScoreArray()
self.ownvidscores = caffe_tsn_ros.msg.ScoreArray()
# when I instantiate the classifier, the startedownvid is working already. this influences how vsmf_srv will behave, so it needs to be like this, I think.
self.startedownvid = True
self.lock = threading.Lock()
self.published_fw = False
rospy.set_param('~alive',0.5)
rospy.loginfo("waiting for callback from " +rospy.resolve_name('video_topic') +" to do anything")
def start_vidscores(self,req):
# I will need to use locks here, I think...
with self.lock:
self.startedownvid = True
self.published_fw = False
rospy.loginfo("Started classifying own vid!")
return []
def stop_vidscores(self,req):
# I will need to use locks here, I think...
with self.lock:
self.startedownvid = False
if self.ownvidscores:
#### I am going to publish now a set of matrices, right?
self.ownlabel_pub.publish(self.ownvidscores)
rospy.logdebug("length of ownvidscores:%d"%(len(self.ownvidscores.data)))
#pass
else:
rospy.logerr('ownvidscores is empty!!!!!!!!!!!!!!! are we locking for too long?')
self.ownvidscores = caffe_tsn_ros.msg.ScoreArray()
if not self.published_fw:
rospy.logwarn("did not publish a single fw for this video")
rospy.logwarn("length of fw:%d"%(len(self.frame_scores.data)))
if self.clear_fw_after_new_vid:
self.frame_scores = caffe_tsn_ros.msg.ScoreArray()
rospy.logdebug("published the label for the own video version!")
rospy.loginfo("stopped classifying own vid")
return []
def callback(self,data):
rospy.logdebug("reached callback. that means I can read the Subscriber!")
rospy.set_param('~alive',1)
try:
cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8")
except CvBridgeError as e:
#print(e)
rospy.logerr(e)
#since I am not using stacks for rgb images, I can prevent from making the rgb version any slower by using an if statement here
if self.rgbOrFlow == 'flow':
## and I want the combined flow version here, don't I? so I need to strip the frame apart into components. I think it is better than
#self.cv_image_stack.append(cv_image)
#this would be incorrect, i need to get each matrix and put it together in a stack. first x then y
# from ros_flow_bg, I can see that x is the 0 component and y the 1 component. I hope bgr8 stays bgr8 and we don't flip things around here!
self.cv_image_stack.append(cv_image[:,:,0])
self.cv_image_stack.append(cv_image[:,:,1])
# self.cv_image_stack.extend([cv_image[:,:,0], cv_image[:,:,1]])
if len(self.cv_image_stack)>2*self.stack_depth: #keeps at most 2*self.stack_depth images in cv_image_stack, the 2 comes from the fact that we are using flow_x and flow_y
self.cv_image_stack.pop(0)
self.cv_image_stack.pop(0)
if self.stack_count%self.step == 0:
rospy.logdebug("reached execution part of callback!")
self.stack_count = 0 ## we don't keep a large number here.
scores = None
### i can maybe use a lambda to abstract this. is it faster than an if though?
if self.rgbOrFlow == 'rgb':
scores = self.net.predict_single_frame([cv_image,], 'global_pool', frame_size=(self.framesize_width, self.framesize_height))
elif self.rgbOrFlow == 'flow' and len(self.cv_image_stack)==10:
scores = self.net.predict_single_flow_stack(self.cv_image_stack, 'global_pool', frame_size=(self.framesize_width, self.framesize_height))
#print(type(scores))
#print(scores.dtype)
#scoremsg = caffe_tsn_ros.msg.Scores()
#scoremsg.test = 'hello'
#scoremsg.scores = scores
#print(scoremsg)
#scores = np.squeeze(scores)
#scores = np.array([[[1.,2.],[3.,4.],[5.,6.]],[[11.,12.],[13.,14.],[15.,16.]]],dtype='float32')
#self.scores_pub.publish(self.bridge.cv2_to_imgmsg(scores, '32FC1'))
#self.scores_pub.publish(scoremsg)
#print(self.scores_pub.get_num_connections())
#print((scores))
#print(np.shape(scores))
#print(scores.shape)
if isinstance(scores, np.ndarray):
#this publishes the instant time version, aka, per frame
#self.label_pub.pub([scores])
## Not sure if this MultiArrayDimension thing is working. In any
## case, it is already a LOT of data per frame; publishing the chunk
## at the end of the video, all at the same time would probably
## cause a lot of unnecessary waiting
# TO CONSIDER: this is fast, so I think it doesn't matter, but I
# believe the layout could be pre-set, since it doesn't change on a
# frame by frame basis.
myheader = Header()
myheader.stamp = rospy.Time.now()
scoresmsg = Float32MultiArray()
scoresmsg.layout.dim = []
dims = np.array(scores.shape)
scoresize = dims.prod()/float(scores.nbytes)
for i in range(0,len(dims)):
#print(i)
scoresmsg.layout.dim.append(MultiArrayDimension())
scoresmsg.layout.dim[i].size = dims[i]
scoresmsg.layout.dim[i].stride = dims[i:].prod()/scoresize
scoresmsg.layout.dim[i].label = 'dim_%d'%i
#print(scoresmsg.layout.dim[i].size)
#print(scoresmsg.layout.dim[i].stride)
scoresmsg.data = np.frombuffer(scores.tobytes(),'float32')
self.scores_pub.publish(scoresmsg)
#rospy.logdebug("published the label for instant time version!")
#this part publishes the frame_window version
self.frame_scores.data.append(scoresmsg)
#rospy.logwarn("FISHY: length of fw:%d"%(len(self.frame_scores.data)))
#rospy.logwarn("FISHY: self.classwindow:%d"%self.classwindow)
if len(self.frame_scores.data)>self.classwindow:
#rospy.logwarn("FISHY: got into the loop")
self.frame_scores.header = myheader
self.frame_scores.data.pop(0)
self.score_fw_pub.publish(self.frame_scores)
#rospy.logwarn("FISHY: passed the publisher, so it must have worked?")
with self.lock:
self.published_fw = True ## it was not working without the lock
rospy.logdebug("published the label for the frame window version!")
with self.lock:
if self.startedownvid:
self.ownvidscores.data.append(scoresmsg)
self.ownvidscores.header = myheader
#pass
else:
rospy.logdebug_throttle(20,"waiting for start_vidscores call to start classifying ownvid")
self.stack_count = self.stack_count + 1
# try:
# self.image_pub.publish(self.bridge.cv2_to_imgmsg(cv_image, "bgr8"))
# rospy.logdebug("published image")
# except CvBridgeError as e:
# print(e)
def reconfig_srv(self, req):
# why not use standard ros reconfigure stds?
self.image_sub.unregister()
self.split = req.Split
rospy.loginfo("reading split:"+str(req.Split))
#print(req.Split)
self.caffemodel = mypath+'/models/'+ self.dataset +'_split_'+str(self.split)+'_tsn_'+self.rgbOrFlow+'_reference_bn_inception.caffemodel'
self.net = CaffeNet(self.prototxt, self.caffemodel, self.device_id)
self.image_sub = rospy.Subscriber('video_topic', Image,self.callback,queue_size=1)
#print('Dum')
return []
def main(argss):
rospy.init_node('action_classifier', anonymous=True, log_level=rospy.INFO)
ic = tsn_classifier()
try:
rospy.spin()
except KeyboardInterrupt:
print("Shutting down")
except rospy.ROSException as e:
rospy.spin()
rospy.logerr(e)
if __name__ == '__main__':
main(sys.argv)