Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/plugins/intel_gpu/src/graph/include/layout_optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ class reorder_factory {
std::map<reorder_cache_key, std::shared_ptr<reorder>> _cached_reorders;
};

int64_t get_convolution_channel_count(const convolution_node& conv_node, const layout& layout, bool is_input);

class layout_optimizer {
public:
enum class optimization_attributes_type {
Expand Down
53 changes: 20 additions & 33 deletions src/plugins/intel_gpu/src/graph/layout_optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,21 @@ std::pair<std::shared_ptr<primitive>, bool> reorder_factory::get_weights_reorder
}
}

int64_t cldnn::get_convolution_channel_count(const convolution_node& conv_node, const layout& layout, bool is_input) {
auto channel_count = layout.get_partial_shape()[1].is_static() ? layout.get_partial_shape()[1].get_length() : -1;
if (channel_count == -1) {
auto weights_layout = conv_node.weights().get_output_layout();
if (weights_layout.is_static()) {
const auto& shape = weights_layout.get_partial_shape();
if (is_input)
channel_count = shape[conv_node.get_groups() > 1 ? 2 : 1].get_length();
else
channel_count = shape[conv_node.get_groups() > 1 ? 1 : 0].get_length();
}
}
return channel_count;
}

bool layout_optimizer::is_format_supported(program_node& node, format::type fmt) {
if (node.is_type<fully_connected>() && fmt == format::byxf)
return false;
Expand Down Expand Up @@ -250,24 +265,9 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next,
return false;
};

auto get_conv_channel_count = [](const convolution_node& conv_node, const layout& layout, bool is_input) -> int64_t {
auto channel_count = layout.get_partial_shape()[1].is_static() ? layout.get_partial_shape()[1].get_length() : -1;
if (channel_count == -1) {
auto weights_layout = conv_node.weights().get_output_layout();
if (weights_layout.is_static()) {
const auto& shape = weights_layout.get_partial_shape();
if (is_input)
channel_count = shape[conv_node.get_groups() > 1 ? 2 : 1].get_length();
else
channel_count = shape[conv_node.get_groups() > 1 ? 1 : 0].get_length();
}
}
return channel_count;
};

auto& conv_node = next.as<convolution>();
auto in_channel_count = get_conv_channel_count(conv_node, prev_output_layout, true);
auto out_channel_count = get_conv_channel_count(conv_node, next_output_layout, false);
auto in_channel_count = get_convolution_channel_count(conv_node, prev_output_layout, true);
auto out_channel_count = get_convolution_channel_count(conv_node, next_output_layout, false);

if ((prev.is_dynamic() || next.is_dynamic()) && (in_channel_count == -1 || out_channel_count == -1))
return false;
Expand All @@ -276,7 +276,7 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next,
if (next.get_preferred_impl_type() == impl_types::onednn &&
((fmt_prev == format::byxf && fmt_next == format::byxf) ||
(fmt_prev == format::bfyx && fmt_next == format::byxf &&
(prev_dt == data_types::f16 && get_conv_channel_count(conv_node, next.get_input_layout(0), false) <= 8))) &&
(prev_dt == data_types::f16 && get_convolution_channel_count(conv_node, next.get_input_layout(0), false) <= 8))) &&
is_input_reorder(prev, next))
return true;

Expand Down Expand Up @@ -989,22 +989,9 @@ void layout_optimizer::set_onednn_dyn_conv_preferred_format(convolution_node& no
return (rank <= 4) ? cldnn::format::byxf : cldnn::format::bzyxf;
};

// Helper function to get channel count safely
auto get_channel_count = [](const layout& layout) -> int64_t {
return layout.get_partial_shape()[1].is_static() ? layout.get_partial_shape()[1].get_length() : -1;
};

// Get channel counts once
int64_t input_channels = get_channel_count(input_layout);
int64_t output_channels = get_channel_count(output_layout);
auto weights_layout = node.weights().get_output_layout();
// Try to get channel counts from weight layout
if (input_channels == -1 && weights_layout.is_static()) {
input_channels = weights_layout.get_partial_shape()[node.get_groups() > 1 ? 2 : 1].get_length();
}
if (output_channels == -1 && weights_layout.is_static()) {
output_channels = weights_layout.get_partial_shape()[node.get_groups() > 1 ? 1 : 0].get_length();
}
auto input_channels = get_convolution_channel_count(node, input_layout, true);
auto output_channels = get_convolution_channel_count(node, output_layout, false);

if (i8_u8_input) {
// Set default input format for i8/u8 input
Expand Down
8 changes: 7 additions & 1 deletion src/plugins/intel_gpu/src/graph/primitive_inst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,13 @@ bool primitive_inst::need_reset_output_memory() const {
const bool is_user_onednn_impl = user_inst->get_node().get_preferred_impl_type() == impl_types::onednn;
const bool is_user_conv = user_inst->get_node().is_type<convolution>();
if (is_user_conv && is_user_onednn_impl) {
auto& conv_node = user_inst->get_node().as<convolution>();
auto& output_layout = _impl_params->get_output_layout(0);
auto in_channel_count = get_convolution_channel_count(conv_node, output_layout, true);
// If the channel count is dynamic, we cannot verify feature alignment,
// so we conservatively do the reset and return true for this condition.
if (in_channel_count == -1)
return true;

auto get_feature_block_size = [](format fmt) {
int feature_block_size = 1;
Expand All @@ -623,7 +629,7 @@ bool primitive_inst::need_reset_output_memory() const {
auto feature_block_size = get_feature_block_size(fmt);
// if layout is single blocked and feature size is not aligned with the blocking size, need to reset output so that we can guarantee zero-filling
// NOTE: We may improve this logic to avoid reset if we are sure that it is not "corrupted" by other layers.
if (output_layout.feature() % feature_block_size != 0) {
if (in_channel_count % feature_block_size != 0) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you add test case in mem_reset_test.cpp?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Applied

return true;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ const std::string no_bias = "";

struct mem_reset_params {
ov::Dimension::value_type in_channel;
bool is_dynamic;
bool need_reset;
};

Expand All @@ -43,20 +44,32 @@ TEST_P(mem_reset_test, need_reset_output_memory_test) {
return;

tests::random_generator rg(GET_SUITE_NAME);
ov::PartialShape input_pshape = {1, p.in_channel, 64, 64};

ov::PartialShape target_pshape = {1, p.in_channel, 64, 64};
ov::PartialShape input_pshape;

if (p.is_dynamic) {
for (size_t i = 0; i < target_pshape.size(); ++i) {
input_pshape.emplace_back(ov::Dimension());
}
input_pshape[1] = target_pshape[1];
} else {
input_pshape = target_pshape;
}

ov::PartialShape weights_pshape = {16, p.in_channel, 3, 3};
layout in_layout{ input_pshape, data_types::f16, format::bfyx };
layout weights_layout{ weights_pshape, data_types::f16, format::bfyx };
auto input_data = rg.generate_random_1d<ov::float16>(in_layout.count(), -1, 1);
auto input_mem = engine.allocate_memory(in_layout);
auto input_data = rg.generate_random_1d<ov::float16>(ov::shape_size(target_pshape.get_shape()), -1, 1);
auto input_mem = engine.allocate_memory({ target_pshape, data_types::f16, format::bfyx });
set_values(input_mem, input_data);

auto weights_data = rg.generate_random_1d<ov::float16>(weights_layout.count(), -1, 1);
auto weights_mem = engine.allocate_memory(weights_layout);
set_values(weights_mem, weights_data);

auto input1 = input_layout("input1", input_mem->get_layout());
auto input2 = input_layout("input2", input_mem->get_layout());
auto input1 = input_layout("input1", in_layout);
auto input2 = input_layout("input2", in_layout);
auto weights = data("weights", weights_mem);
auto eltw = eltwise("eltwise", {input_info("input1"), input_info("input2")}, eltwise_mode::sum);
auto eltw_reorder = reorder("reorder1", input_info("eltwise"), format::b_fs_yx_fsv16, data_types::f16 );
Expand Down Expand Up @@ -87,13 +100,19 @@ TEST_P(mem_reset_test, need_reset_output_memory_test) {

auto outputs_test_blocked = network_test_blocked.execute();

auto reorder_inst = network_test_blocked.get_primitive("reorder1");
// Additional reorder is added and fused when force_implemenetations enable in dynamic
auto target_primitive_id = p.is_dynamic ? "reorder1_0_reorder_2" : "reorder1";
auto reorder_inst = network_test_blocked.get_primitive(target_primitive_id);
ASSERT_TRUE(PrimitiveInstTestHelper::need_reset_output_memory(reorder_inst) == p.need_reset);
}

INSTANTIATE_TEST_SUITE_P(smoke, mem_reset_test,
testing::Values(
mem_reset_params{ 9, true }, // If tensor is not packed(not aligned to 16), need_reset_output_memory == true
mem_reset_params{ 16, false }
// static
mem_reset_params{ 9, false, true }, // If tensor is not packed(not aligned to 16), need_reset_output_memory == true
mem_reset_params{ 16, false, false },
// dynamic
mem_reset_params{ 9, true, true },
mem_reset_params{ 16, true, false }
)
);
Loading