Skip to content

Commit dc48c65

Browse files
authored
Update cast of tensor data pointer for const tensors (#1966)
### Description - Update casting/getting data pointer from tensor when tensor can return const pointer to data - Use new version of OpenVINO YTokenizers which also contains similar update ### Blocking - openvinotoolkit/openvino#29594 Signed-off-by: Raasz, Pawel <pawel.raasz@intel.com>
1 parent 7162cd4 commit dc48c65

File tree

9 files changed

+35
-36
lines changed

9 files changed

+35
-36
lines changed

src/cpp/src/image_generation/flux_pipeline.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ ov::Tensor pack_latents(const ov::Tensor latents, size_t batch_size, size_t num_
2222

2323
OPENVINO_ASSERT(latents.get_size() == permuted_latents.get_size(), "Incorrect target shape, tensors must have the same sizes");
2424

25-
float* src_data = latents.data<float>();
25+
auto src_data = latents.data<float>();
2626
float* dst_data = permuted_latents.data<float>();
2727

2828
// Permute to (0, 2, 4, 1, 3, 5)

src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ ov::Tensor get_tensor_batch(const ov::Tensor input, size_t batch_id) {
5151
OPENVINO_ASSERT(target_shape.at(0) > batch_id, "Cannot get batch with id ", batch_id, ", total batch size is ", target_shape.at(0));
5252
target_shape[0] = 1;
5353

54-
void * target_data = input.data<float>() + batch_id * ov::shape_size(target_shape);
54+
auto target_data = input.data<float>() + batch_id * ov::shape_size(target_shape);
5555
ov::Tensor target_tensor(input.get_element_type(), target_shape, target_data);
5656

5757
return target_tensor;
@@ -377,7 +377,7 @@ class StableDiffusion3Pipeline : public DiffusionPipeline {
377377
ov::Tensor negative_pooled_prompt_2_embed_out = get_tensor_batch(text_encoder_2_output, 0);
378378
ov::Tensor negative_prompt_2_embed_out = get_tensor_batch(text_encoder_2_hidden_state, 0);
379379
ov::Tensor negative_t5_prompt_embed_out = get_tensor_batch(text_encoder_3_output, 0);
380-
380+
381381
ov::Tensor negative_pooled_prompt_embed, negative_prompt_embed, negative_pooled_prompt_2_embed,
382382
negative_prompt_2_embed, negative_t5_prompt_embed;
383383
if (generation_config.num_images_per_prompt == 1) {

src/cpp/src/llm_pipeline_static.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ void fill_tensor(ov::Tensor tensor, T fill_val, size_t offset = 0u) {
2323
}
2424

2525
void copy_with_offset(const ov::Tensor& orig, const std::size_t offset, ov::Tensor& padded) {
26-
int64_t* orig_data = orig.data<int64_t>();
26+
auto orig_data = orig.data<int64_t>();
2727
int64_t* padded_data = padded.data<int64_t>();
2828
std::copy(orig_data, orig_data + orig.get_size(), padded_data + offset);
2929
}
@@ -56,8 +56,8 @@ void copy_columns_by_row_chunks(const ov::Tensor& src, ov::Tensor& dst) {
5656

5757
const size_t chunk_byte_size = W * elem_size;
5858

59-
const auto* src_p = static_cast<uint8_t*>(src.data());
60-
auto* dst_p = static_cast<uint8_t*>(dst.data());
59+
const auto* src_p = static_cast<const uint8_t*>(src.data());
60+
auto* dst_p = static_cast<uint8_t*>(dst.data());
6161

6262
for (size_t i = 0; i < C*H; ++i) {
6363
const size_t src_offset = i * IS_H;

src/cpp/src/lm_encoding.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ void update_position_ids(ov::Tensor&& position_ids, const ov::Tensor&& attention
2727
position_ids.set_shape({batch_size, 1});
2828

2929
for (size_t batch = 0; batch < batch_size; batch++) {
30-
int64_t* mask_start = attention_mask.data<int64_t>() + batch * sequence_length;
30+
auto mask_start = attention_mask.data<int64_t>() + batch * sequence_length;
3131
position_ids.data<int64_t>()[batch] = std::accumulate(mask_start, mask_start + sequence_length - 1, 0);
3232
}
3333
}

src/cpp/src/visual_language/llava/classes.cpp

+8-9
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ clip_image_f32 preprocess_clip_image_llava(const clip_image_u8& image, const Pro
4040
for (int y = 0; y < crop_height; ++y) {
4141
for (int x = 0; x < crop_width; ++x) {
4242
for (int c = 0; c < 3; ++c) {
43-
cropped_image.buf[(y * crop_width + x) * 3 + c] =
43+
cropped_image.buf[(y * crop_width + x) * 3 + c] =
4444
resized_image.buf[((start_y + y) * resized_image.nx + (start_x + x)) * 3 + c];
4545
}
4646
}
@@ -146,18 +146,17 @@ ov::Tensor InputsEmbedderLLaVA::get_inputs_embeds(const std::string& prompt, con
146146
return merge_text_and_image_embeddings_llava(input_ids, text_embeds, image_embeds, image_token_id);
147147
}
148148

149-
ov::Tensor InputsEmbedderLLaVA::merge_text_and_image_embeddings_llava(
150-
const ov::Tensor& input_ids,
151-
const ov::Tensor& text_embeds,
152-
const std::vector<ov::Tensor>& image_embeds,
153-
int64_t image_token_id) {
149+
ov::Tensor InputsEmbedderLLaVA::merge_text_and_image_embeddings_llava(const ov::Tensor& input_ids,
150+
ov::Tensor& text_embeds,
151+
const std::vector<ov::Tensor>& image_embeds,
152+
int64_t image_token_id) {
154153
auto text_embeds_shape = text_embeds.get_shape();
155154
size_t text_embeds_seq_length = text_embeds_shape[1];
156155
size_t hidden_size = text_embeds_shape[2];
157156

158157
const int64_t* input_ids_data = input_ids.data<const int64_t>();
159158
int token_offset = text_embeds_seq_length - 1;
160-
float* text_embeds_data = text_embeds.data<float>();
159+
auto text_embeds_data = text_embeds.data<float>();
161160
const float* text_embeds_end = text_embeds_data + text_embeds_seq_length * hidden_size;
162161

163162
// Copy in reversed order because a tokenizer may truncate the input removing the preffix.
@@ -178,7 +177,7 @@ ov::Tensor InputsEmbedderLLaVA::merge_text_and_image_embeddings_llava(
178177
}
179178
size_t n_tokens = std::min(image_embed_it->get_shape().at(1), size_t(token_offset - changed_token_offset));
180179
size_t n_floats = n_tokens * hidden_size;
181-
float* text_embeds_idx = text_embeds_data + (changed_token_offset + 1) * hidden_size;
180+
auto text_embeds_idx = text_embeds_data + (changed_token_offset + 1) * hidden_size;
182181
OPENVINO_ASSERT(text_embeds_idx + n_floats <= text_embeds_end);
183182
std::copy_n(
184183
image_embed_it->data<const float>() + image_embed_it->get_size() - n_floats,
@@ -190,4 +189,4 @@ ov::Tensor InputsEmbedderLLaVA::merge_text_and_image_embeddings_llava(
190189
return text_embeds;
191190
}
192191

193-
} // namespace ov::genai
192+
} // namespace ov::genai

src/cpp/src/visual_language/llava/classes.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ class InputsEmbedderLLaVA : public InputsEmbedder::IInputsEmbedder {
4141
protected:
4242
ov::Tensor merge_text_and_image_embeddings_llava(
4343
const ov::Tensor& input_ids,
44-
const ov::Tensor& text_embeds,
44+
ov::Tensor& text_embeds,
4545
const std::vector<ov::Tensor>& image_embeds,
4646
int64_t image_token_id);
4747
};

src/cpp/src/visual_language/minicpm/classes.cpp

+10-10
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ EncodedImage llava_image_embed_make_with_bytes_slice(clip_ctx& ctx_clip, const o
306306
ov::Tensor pixel_values{ov::element::f32, {n_images, channels, patch_size, max_size / patch_size}};
307307
size_t d3_all_pixel = pixel_values.get_shape().at(3);
308308
float* pixel_value_data = pixel_values.data<float>();
309-
309+
310310
//image chw to 1*c*kernel*hw/kernel and padding zero
311311
clip_image_f32& resized_preprocessed = preprocessed.at(0).at(0);
312312
size_t img_h = resized_preprocessed.ny;
@@ -321,7 +321,7 @@ EncodedImage llava_image_embed_make_with_bytes_slice(clip_ctx& ctx_clip, const o
321321
for (size_t k_idx = 0; k_idx < patch_size; k_idx++) {
322322
std::copy(clip_value_data, clip_value_data + d3_clip_pixel, pixel_value_data);
323323
clip_value_data += d3_clip_pixel;
324-
pixel_value_data += d3_all_pixel;
324+
pixel_value_data += d3_all_pixel;
325325
}
326326
}
327327

@@ -334,7 +334,7 @@ EncodedImage llava_image_embed_make_with_bytes_slice(clip_ctx& ctx_clip, const o
334334
img_w = elem.nx;
335335
ov::Tensor clip_img{ov::element::f32, {1, channels, img_h, img_w}, elem.buf.data()};
336336
ov::Tensor clip_pixel_values = preprocess_for_encoder(clip_img, patch_size);
337-
337+
338338
d3_clip_pixel = clip_pixel_values.get_shape().at(3);
339339
clip_value_data = clip_pixel_values.data<float>();
340340
pixel_value_data = pixel_values.data<float>() + batch_pixel * channels * patch_size * d3_all_pixel;
@@ -425,8 +425,8 @@ ov::Tensor concatenate_last_dim(const ov::Tensor& first, const ov::Tensor& secon
425425
OPENVINO_ASSERT(second.get_shape().at(1) == res_d_1);
426426
size_t res_d_2 = first.get_shape().at(2) + second.get_shape().at(2);
427427
ov::Tensor res{first.get_element_type(), {res_d_0, res_d_1, res_d_2}};
428-
float* first_data = first.data<float>();
429-
float* second_data = second.data<float>();
428+
auto first_data = first.data<float>();
429+
auto second_data = second.data<float>();
430430
float* res_data = res.data<float>();
431431
for (size_t i = 0; i < res_d_0; ++i) {
432432
for (size_t j = 0; j < res_d_1; ++j) {
@@ -461,8 +461,8 @@ ov::Tensor get_1d_sincos_pos_embed_from_grid_new(size_t embed_dim, const ov::Ten
461461
std::vector<size_t> out_shape = {H, W, embed_dim};
462462
ov::Tensor emb(ov::element::f32, out_shape);
463463

464-
float* pos_data = pos.data<float>();
465-
float* emb_data = emb.data<float>();
464+
auto pos_data = pos.data<float>();
465+
auto emb_data = emb.data<float>();
466466

467467
size_t counter = 0;
468468
for (size_t h = 0; h < H; ++h) {
@@ -481,7 +481,7 @@ ov::Tensor get_1d_sincos_pos_embed_from_grid_new(size_t embed_dim, const ov::Ten
481481
ov::Tensor get_2d_sincos_pos_embed_from_grid(size_t embed_dim, const ov::Tensor& grid) {
482482
OPENVINO_ASSERT(embed_dim % 2 == 0);
483483
ov::Shape grid_shape = grid.get_shape();
484-
float* grid_data = grid.data<float>();
484+
auto grid_data = grid.data<float>();
485485
ov::Shape plane_shape{grid_shape.at(1), grid_shape.at(2)};
486486
ov::Tensor emb_h = get_1d_sincos_pos_embed_from_grid_new(embed_dim / 2, ov::Tensor{
487487
ov::element::f32,
@@ -650,7 +650,7 @@ ov::Tensor InputsEmbedderMiniCPM::get_inputs_embeds(const std::string& prompt, c
650650
for (size_t image_id : images_sequence) {
651651
const EncodedImage& encoded_image = images.at(image_id - m_prev_image_id);
652652
const ov::Tensor& resampled_source = resample(encoded_image.resized_source, {encoded_image.resized_source_size});
653-
float* emb = resampled_source.data<float>();
653+
auto emb = resampled_source.data<float>();
654654
ids = std::find(ids, end, im_start_id);
655655
OPENVINO_ASSERT(end != ids);
656656
++ids;
@@ -752,4 +752,4 @@ ov::Tensor InputsEmbedderMiniCPM::resample(const ov::Tensor& encoded_image, cons
752752
return resampler.get_output_tensor(); // [N, query_num, new_hidden_size]
753753
}
754754

755-
} // namespace ov::genai
755+
} // namespace ov::genai

src/cpp/src/visual_language/phi3_vision/classes.cpp

+8-8
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ ov::Tensor padding_336(const ov::Tensor& unpadded) {
7979
ov::Tensor padded{ov::element::u8, {1, s1, tar, 3}};
8080
uint8_t* padded_data = padded.data<uint8_t>();
8181
std::fill_n(padded_data, padded.get_size(), 255);
82-
uint8_t* unpadded_data = unpadded.data<uint8_t>();
82+
auto unpadded_data = unpadded.data<uint8_t>();
8383
for (size_t row = 0; row < s1; ++row) {
8484
std::copy_n(unpadded_data + row * s2 * 3, s2 * 3, padded_data + row * tar * 3 + left_padding * 3);
8585
}
@@ -103,7 +103,7 @@ ov::Tensor HD_transform(const ov::Tensor& uint8, size_t num_crops) {
103103
size_t new_w = scale * INPUT_IMAGE_SIZE;
104104
size_t new_h = new_w / ratio;
105105
clip_image_u8 src{}, dst{};
106-
uint8_t* uint8_data = uint8.data<uint8_t>();
106+
auto uint8_data = uint8.data<uint8_t>();
107107
if (trans) {
108108
src = clip_image_u8{int(height), int(width), {uint8_data, uint8_data + uint8.get_size()}};
109109
bilinear_resize(src, dst, new_h, new_w);
@@ -115,7 +115,7 @@ ov::Tensor HD_transform(const ov::Tensor& uint8, size_t num_crops) {
115115
}
116116

117117
ov::Tensor mean_scale(const ov::Tensor& uint8, const ProcessorConfig& config) {
118-
uint8_t* uint_8_data = uint8.data<uint8_t>();
118+
auto uint_8_data = uint8.data<uint8_t>();
119119
ov::Tensor float_normalized{ov::element::f32, uint8.get_shape()};
120120
float* float_data = float_normalized.data<float>();
121121
OPENVINO_ASSERT(0 == uint8.get_size() % 3, "RGB");
@@ -130,7 +130,7 @@ ov::Tensor mean_scale(const ov::Tensor& uint8, const ProcessorConfig& config) {
130130
ov::Tensor channels_first(const ov::Tensor& _1hw3) {
131131
ov::Shape shape = _1hw3.get_shape();
132132
ov::Tensor _13hw = ov::Tensor{ov::element::f32, {1, 3, shape.at(1), shape.at(2)}};
133-
float* _1hw3_data = _1hw3.data<float>();
133+
auto _1hw3_data = _1hw3.data<float>();
134134
float* _13hw_data = _13hw.data<float>();
135135
for (size_t plane = 0; plane < 3; ++plane) {
136136
for (size_t row = 0; row < shape.at(1); ++row) {
@@ -156,7 +156,7 @@ ov::Tensor slice_image(const ov::Tensor& image) {
156156
// Step 1: Define and populate the reshaped tensor in the correct shape order
157157
ov::Tensor reshaped{ov::element::f32, {N, num_h_slices, num_w_slices, C, INPUT_IMAGE_SIZE, INPUT_IMAGE_SIZE}};
158158
float* reshaped_data = reshaped.data<float>();
159-
float* image_data = image.data<float>();
159+
auto image_data = image.data<float>();
160160

161161
// Populate the reshaped tensor
162162
for (size_t n = 0; n < N; ++n) {
@@ -217,8 +217,8 @@ ov::Tensor concatenate_batch(const ov::Tensor& float_first, const ov::Tensor& fl
217217
OPENVINO_ASSERT(shape_first.at(3) == shape_second.at(3), "Width must be the same");
218218
ov::Tensor concatenated{ov::element::f32, {shape_first.at(0) + shape_second.at(0), shape_first.at(1), shape_first.at(2), shape_first.at(3)}};
219219
float* concatenated_data = concatenated.data<float>();
220-
float* first_data = float_first.data<float>();
221-
float* second_data = float_second.data<float>();
220+
auto first_data = float_first.data<float>();
221+
auto second_data = float_second.data<float>();
222222
std::copy(first_data, first_data + float_first.get_size(), concatenated_data);
223223
std::copy(second_data, second_data + float_second.get_size(), concatenated_data + float_first.get_size());
224224
return concatenated;
@@ -232,7 +232,7 @@ ov::Tensor pad_to_max_num_crops_tensor(const ov::Tensor& nchw, size_t max_crops)
232232
}
233233
ov::Tensor padded{ov::element::f32, {max_crops, shape[1], shape[2], shape[3]}};
234234
float* padded_data = padded.data<float>();
235-
float* nchw_data = nchw.data<float>();
235+
auto nchw_data = nchw.data<float>();
236236
std::copy_n(nchw_data, nchw.get_size(), padded_data);
237237
return padded;
238238
}

0 commit comments

Comments
 (0)