Skip to content

Commit

Permalink
unify cpu context, part2 (#44012)
Browse files Browse the repository at this point in the history
* fix init()

* delete test_device_context

* replace CPUDeviceContext with CPUContext

* fix test_scalar

* remove dot_op.cc

* fix compile
  • Loading branch information
zhiqiu authored Jul 2, 2022
1 parent 09096ae commit 755438a
Show file tree
Hide file tree
Showing 289 changed files with 1,525 additions and 1,909 deletions.
6 changes: 3 additions & 3 deletions paddle/fluid/distributed/collective/reducer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ void EagerGroup::ConcatTensors(const platform::Place &place) {
"Please recompile or reinstall Paddle with NCCL support."));
#endif
} else if (platform::is_cpu_place(place)) {
auto *default_ctx = static_cast<platform::CPUDeviceContext *>(
auto *default_ctx = static_cast<phi::CPUContext *>(
platform::DeviceContextPool::Instance().Get(place));
ConcatTensorsWithType(
*default_ctx, dense_tensors_, &dense_contents_, dtype_);
Expand All @@ -274,7 +274,7 @@ void EagerGroup::SplitTensors(const platform::Place &place) {
"Please recompile or reinstall Paddle with NCCL support."));
#endif
} else if (platform::is_cpu_place(place)) {
auto *default_ctx = static_cast<platform::CPUDeviceContext *>(
auto *default_ctx = static_cast<phi::CPUContext *>(
platform::DeviceContextPool::Instance().Get(place));
SplitTensorsWithType(
*default_ctx, &dense_contents_, &dense_tensors_, dtype_);
Expand Down Expand Up @@ -891,7 +891,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group,
"Please recompile or reinstall Paddle with NCCL support."));
#endif
} else if (platform::is_cpu_place(inner_place_)) {
dev_ctx = static_cast<platform::CPUDeviceContext *>(
dev_ctx = static_cast<phi::CPUContext *>(
platform::DeviceContextPool::Instance().Get(inner_place_));
} else {
PADDLE_THROW(platform::errors::Unimplemented(
Expand Down
6 changes: 3 additions & 3 deletions paddle/fluid/distributed/common/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ namespace paddle {
namespace distributed {

template <typename T>
inline phi::funcs::BlasT<paddle::platform::CPUDeviceContext, T> GetBlas() {
paddle::platform::CPUDeviceContext cpu_ctx;
return phi::funcs::GetBlas<paddle::platform::CPUDeviceContext, T>(cpu_ctx);
inline phi::funcs::BlasT<phi::CPUContext, T> GetBlas() {
phi::CPUContext cpu_ctx;
return phi::funcs::GetBlas<phi::CPUContext, T>(cpu_ctx);
}

template <typename T>
Expand Down
27 changes: 14 additions & 13 deletions paddle/fluid/distributed/ps/service/communicator/communicator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -353,11 +353,12 @@ void Communicator::RpcRecvSparse(const std::string &varname,

bool training = true;

auto status = _worker_ptr->PullSparseParam((float **)push_g_vec.data(),
table_id, // NOLINT
sparse_push_keys.data(),
sparse_push_keys.size(),
training);
auto status =
_worker_ptr->PullSparseParam(static_cast<float **>(push_g_vec.data()),
table_id,
sparse_push_keys.data(),
sparse_push_keys.size(),
training);
status.wait();
return;
}
Expand Down Expand Up @@ -1184,12 +1185,12 @@ void GeoCommunicator::SendDense(const CommContext &send_ctx) {
auto &t_latest = var_latest->Get<framework::LoDTensor>();
auto t_timestamp = var_timestamp->GetMutable<framework::LoDTensor>();

paddle::platform::CPUDeviceContext cpu_ctx;
phi::CPUContext cpu_ctx;
auto *var_delta = delta_scope_->Var(varname);
auto *t_delta = var_delta->GetMutable<framework::LoDTensor>();
t_delta->mutable_data<float>(t_latest.dims(), cpu_ctx.GetPlace());

auto blas = phi::funcs::GetBlas<platform::CPUDeviceContext, float>(cpu_ctx);
auto blas = phi::funcs::GetBlas<phi::CPUContext, float>(cpu_ctx);
blas.VSUB(t_latest.numel(),
t_latest.data<float>(),
t_timestamp->data<float>(),
Expand Down Expand Up @@ -1218,7 +1219,7 @@ void GeoCommunicator::RecvDense(const CommContext &send_ctx) {
RpcRecvDense(varnames, table_id, pserver_scope_.get());

// 2.1 pserver - old => delta; 2.2 latest + old => latest 2.3 old => pserver
paddle::platform::CPUDeviceContext cpu_ctx;
phi::CPUContext cpu_ctx;
for (auto &varname : varnames) {
auto *var_latest = recv_scope_->FindVar(varname);
auto t_latest = var_latest->GetMutable<framework::LoDTensor>();
Expand All @@ -1233,7 +1234,7 @@ void GeoCommunicator::RecvDense(const CommContext &send_ctx) {
auto *t_delta = var_delta->GetMutable<framework::LoDTensor>();
t_delta->mutable_data<float>(t_latest->dims(), cpu_ctx.GetPlace());

auto blas = phi::funcs::GetBlas<platform::CPUDeviceContext, float>(cpu_ctx);
auto blas = phi::funcs::GetBlas<phi::CPUContext, float>(cpu_ctx);
blas.VSUB(t_latest->numel(),
t_pserver.data<float>(),
t_old->data<float>(),
Expand Down Expand Up @@ -1334,7 +1335,7 @@ void GeoCommunicator::SendSparse(const std::string &varname,
auto *t_old = var_old->GetMutable<framework::LoDTensor>();

auto dims1 = t_latest.dims()[1];
paddle::platform::CPUDeviceContext cpu_ctx;
phi::CPUContext cpu_ctx;

auto *var_delta = delta_scope_->Var(varname);
auto *t_delta = var_delta->GetMutable<phi::SelectedRows>();
Expand All @@ -1345,7 +1346,7 @@ void GeoCommunicator::SendSparse(const std::string &varname,
t_delta->set_rows(sparse_ids);
t_delta->set_height(t_latest.dims()[0]);

auto blas = phi::funcs::GetBlas<platform::CPUDeviceContext, float>(cpu_ctx);
auto blas = phi::funcs::GetBlas<phi::CPUContext, float>(cpu_ctx);
float coefficient = 1.0 / static_cast<float>(trainers_);

std::vector<float *> push_g_vec;
Expand Down Expand Up @@ -1419,8 +1420,8 @@ void GeoCommunicator::RecvSparse(const std::string &varname,
std::vector<float> v_delta;
v_delta.resize(numel);

paddle::platform::CPUDeviceContext cpu_ctx;
auto blas = phi::funcs::GetBlas<platform::CPUDeviceContext, float>(cpu_ctx);
phi::CPUContext cpu_ctx;
auto blas = phi::funcs::GetBlas<phi::CPUContext, float>(cpu_ctx);

for (auto j = 0; j < static_cast<int>(keys.size()); ++j) {
VLOG(5) << "DEBUG GeoCommunicator::RecvSparse recv sparse key" << keys[j]
Expand Down
16 changes: 6 additions & 10 deletions paddle/fluid/distributed/ps/service/communicator/communicator.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,8 @@ inline void MergeVars(const std::string &var_name,
}

// set output tensor to 0.
paddle::platform::CPUDeviceContext cpu_ctx;
phi::funcs::SetConstant<paddle::platform::CPUDeviceContext, T>
constant_functor;
phi::CPUContext cpu_ctx;
phi::funcs::SetConstant<phi::CPUContext, T> constant_functor;
constant_functor(cpu_ctx, out_t, static_cast<T>(0));
// sum all vars to out
auto result = EigenVector<T>::Flatten(*out_t);
Expand All @@ -210,16 +209,13 @@ inline void MergeVars(const std::string &var_name,
for (auto &var : vars) {
inputs.push_back(&var->Get<phi::SelectedRows>());
}
paddle::platform::CPUDeviceContext dev_ctx;
phi::CPUContext dev_ctx;
if (merge_add) {
paddle::operators::math::scatter::
MergeAdd<paddle::platform::CPUDeviceContext, T>
merge_add;
paddle::operators::math::scatter::MergeAdd<phi::CPUContext, T> merge_add;
merge_add(dev_ctx, inputs, out_slr);
} else {
paddle::operators::math::scatter::
MergeAverage<paddle::platform::CPUDeviceContext, T>
merge_average;
paddle::operators::math::scatter::MergeAverage<phi::CPUContext, T>
merge_average;
merge_average(dev_ctx, inputs, out_slr);
}

Expand Down
3 changes: 1 addition & 2 deletions paddle/fluid/eager/nan_inf_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ void CheckTensorHasNanOrInf(const std::string& api_name, const Tensor& tensor) {
#endif
return;
}
paddle::framework::details::tensor_check<
paddle::platform::CPUDeviceContext>(
paddle::framework::details::tensor_check<phi::CPUContext>(
api_name, tensor_name, *dense_tensor, place);
}
}
Expand Down
5 changes: 2 additions & 3 deletions paddle/fluid/framework/data_device_transform_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,8 @@ REGISTER_OP_WITHOUT_GRADIENT(
test_op,
paddle::framework::TestOpWithKernel,
paddle::framework::OpKernelTestProtoAndCheckerMaker);
REGISTER_OP_CPU_KERNEL(
test_op,
paddle::framework::TestKernel<paddle::platform::CPUDeviceContext, float>);
REGISTER_OP_CPU_KERNEL(test_op,
paddle::framework::TestKernel<phi::CPUContext, float>);
REGISTER_OP_CUDA_KERNEL(
test_op,
paddle::framework::TestKernel<paddle::platform::CUDADeviceContext, float>);
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/data_layout_transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ void CastDataLayout::apply() {
auto place = ctx_->GetPlace();

if (platform::is_cpu_place(place)) {
phi::funcs::Transpose<platform::CPUDeviceContext, T, 4> trans4;
auto* context = static_cast<const platform::CPUDeviceContext*>(ctx_);
phi::funcs::Transpose<phi::CPUContext, T, 4> trans4;
auto* context = static_cast<const phi::CPUContext*>(ctx_);
trans4(*context, in_, out_, axis_);
} else {
PADDLE_THROW(platform::errors::PreconditionNotMet(
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/data_type_transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ struct CastDataType {
auto* out_begin = out_->mutable_data<OutType>(in_.place());

if (platform::is_cpu_place(in_.place())) {
platform::Transform<platform::CPUDeviceContext> trans;
auto* context = static_cast<const platform::CPUDeviceContext*>(ctx_);
platform::Transform<phi::CPUContext> trans;
auto* context = static_cast<const phi::CPUContext*>(ctx_);
trans(*context,
in_begin,
in_end,
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/details/broadcast_op_handle_test.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ struct TestBroadcastOpHandle {
for (int i = 0; i < count; ++i) {
auto p = p::CPUPlace();
place_list_.push_back(p);
ctxs_.emplace_back(new p::CPUDeviceContext(p));
ctxs_.emplace_back(new phi::CPUContext(p));
}
#if defined(PADDLE_WITH_XPU_BKCL)
bkcl_ctxs_.reset(nullptr);
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/details/gather_op_handle_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ struct TestGatherOpHandle {
for (int i = 0; i < count; ++i) {
auto p = p::CPUPlace();
gpu_list_.push_back(p);
ctxs_.emplace_back(new p::CPUDeviceContext(p));
ctxs_.emplace_back(new phi::CPUContext(p));
}
}
}
Expand Down
14 changes: 7 additions & 7 deletions paddle/fluid/framework/details/nan_inf_utils_detail.cc
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ template <>

template <>
template <typename T>
void TensorCheckerVisitor<platform::CPUDeviceContext>::apply(
void TensorCheckerVisitor<phi::CPUContext>::apply(
typename std::enable_if<
std::is_floating_point<T>::value ||
std::is_same<T, ::paddle::platform::complex<float>>::value ||
Expand All @@ -329,11 +329,11 @@ void TensorCheckerVisitor<platform::CPUDeviceContext>::apply(
}

template <>
void tensor_check<platform::CPUDeviceContext>(const std::string& op_type,
const std::string& var_name,
const framework::Tensor& tensor,
const platform::Place& place) {
TensorCheckerVisitor<platform::CPUDeviceContext> vistor(
void tensor_check<phi::CPUContext>(const std::string& op_type,
const std::string& var_name,
const framework::Tensor& tensor,
const platform::Place& place) {
TensorCheckerVisitor<phi::CPUContext> vistor(
op_type, var_name, tensor, place);
VisitDataType(framework::TransToProtoVarType(tensor.dtype()), vistor);
}
Expand Down Expand Up @@ -439,7 +439,7 @@ void CheckVarHasNanOrInf(const std::string& op_type,
#endif
return;
}
tensor_check<platform::CPUDeviceContext>(op_type, var_name, *tensor, place);
tensor_check<phi::CPUContext>(op_type, var_name, *tensor, place);
}

void CheckVarHasNanOrInf(const std::string& op_type,
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/details/reduce_op_handle_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ struct TestReduceOpHandle {
for (int i = 0; i < count; ++i) {
auto p = p::CPUPlace();
gpu_list_.push_back(p);
ctxs_.emplace_back(new p::CPUDeviceContext(p));
ctxs_.emplace_back(new phi::CPUContext(p));
}
#if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)
nccl_ctxs_.reset(nullptr);
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/lod_tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ LoDTensor LodExpand(const LoDTensor& source,
auto slice = tensor.Slice(elem, elem + 1);
TensorCopy(source.Slice(ins, ins + 1),
platform::CPUPlace(),
platform::CPUDeviceContext(),
phi::CPUContext(),
&slice);
}
}
Expand Down
31 changes: 13 additions & 18 deletions paddle/fluid/framework/op_registry_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -232,9 +232,8 @@ class OpKernelTest : public paddle::framework::OpKernel<T> {
REGISTER_OP_WITHOUT_GRADIENT(op_with_kernel,
paddle::framework::OpWithKernelTest,
paddle::framework::OpKernelTestMaker);
REGISTER_OP_CPU_KERNEL(
op_with_kernel,
paddle::framework::OpKernelTest<paddle::platform::CPUDeviceContext, float>);
REGISTER_OP_CPU_KERNEL(op_with_kernel,
paddle::framework::OpKernelTest<phi::CPUContext, float>);

REGISTER_OP_CUDA_KERNEL(
op_with_kernel,
Expand Down Expand Up @@ -264,10 +263,9 @@ TEST(OperatorRegistrar, CUDA) {
}

static int op_test_value = 0;

using paddle::platform::CPUDeviceContext;
using paddle::platform::CUDADeviceContext;
using paddle::platform::DeviceContext;
using phi::CPUContext;

namespace paddle {
namespace framework {
Expand Down Expand Up @@ -295,8 +293,7 @@ class OpMultiKernelTest : public paddle::framework::OpKernel<T> {
};

template <typename T>
class OpMultiKernelTest<CPUDeviceContext, T>
: public paddle::framework::OpKernel<T> {
class OpMultiKernelTest<CPUContext, T> : public paddle::framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext& ctx) const {
++op_test_value;
Expand All @@ -319,7 +316,7 @@ class OpMultiKernelTest2 : public paddle::framework::OpKernel<T> {
};

template <typename T>
class OpMultiKernelTest2<CPUDeviceContext, T>
class OpMultiKernelTest2<CPUContext, T>
: public paddle::framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext& ctx) const {
Expand All @@ -342,16 +339,14 @@ class OpMultiKernelTest2<CUDADeviceContext, T>
REGISTER_OP_WITHOUT_GRADIENT(op_with_multi_kernel,
paddle::framework::OpWithMultiKernelTest,
paddle::framework::OpKernelTestMaker);
REGISTER_OP_KERNEL(
op_with_multi_kernel,
CPU,
paddle::platform::CPUPlace,
paddle::framework::OpMultiKernelTest<CPUDeviceContext, float>);
REGISTER_OP_KERNEL(
op_with_multi_kernel,
MKLDNN,
paddle::platform::CPUPlace,
paddle::framework::OpMultiKernelTest2<CPUDeviceContext, float>);
REGISTER_OP_KERNEL(op_with_multi_kernel,
CPU,
paddle::platform::CPUPlace,
paddle::framework::OpMultiKernelTest<CPUContext, float>);
REGISTER_OP_KERNEL(op_with_multi_kernel,
MKLDNN,
paddle::platform::CPUPlace,
paddle::framework::OpMultiKernelTest2<CPUContext, float>);
REGISTER_OP_KERNEL(
op_with_multi_kernel,
CUDA,
Expand Down
15 changes: 5 additions & 10 deletions paddle/fluid/framework/operator_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -420,16 +420,13 @@ REGISTER_OP_WITHOUT_GRADIENT(

REGISTER_OP_CPU_KERNEL(
indicate_lod_tensor_data_type_test,
paddle::framework::EmptyTestKernel<paddle::platform::CPUDeviceContext,
int>);
paddle::framework::EmptyTestKernel<phi::CPUContext, int>);
REGISTER_OP_CPU_KERNEL(
indicate_selected_rows_data_type_test,
paddle::framework::EmptyTestKernel<paddle::platform::CPUDeviceContext,
int>);
paddle::framework::EmptyTestKernel<phi::CPUContext, int>);
REGISTER_OP_CPU_KERNEL(
indicate_other_data_type_test,
paddle::framework::EmptyTestKernel<paddle::platform::CPUDeviceContext,
int>);
paddle::framework::EmptyTestKernel<phi::CPUContext, int>);

TEST(IndicateVarDataTypeTest, lodtensor) {
paddle::framework::InitDevices();
Expand Down Expand Up @@ -599,16 +596,14 @@ REGISTER_OP_WITHOUT_GRADIENT(get_lod_level_test,
paddle::framework::GetSetLoDLevelTestMaker);
REGISTER_OP_CPU_KERNEL(
get_lod_level_test,
paddle::framework::EmptyTestKernel<paddle::platform::CPUDeviceContext,
float>);
paddle::framework::EmptyTestKernel<phi::CPUContext, float>);

REGISTER_OP_WITHOUT_GRADIENT(set_lod_level_test,
paddle::framework::SetLoDLevelTest,
paddle::framework::GetSetLoDLevelTestMaker);
REGISTER_OP_CPU_KERNEL(
set_lod_level_test,
paddle::framework::EmptyTestKernel<paddle::platform::CPUDeviceContext,
float>);
paddle::framework::EmptyTestKernel<phi::CPUContext, float>);

void SetGetLoDLevelTestMain(std::string op_type) {
paddle::framework::InitDevices({});
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/phi_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ struct ConvertToPhiContext {
};

template <>
struct ConvertToPhiContext<platform::CPUDeviceContext> {
struct ConvertToPhiContext<phi::CPUContext> {
using TYPE = phi::CPUContext;
};

Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/framework/selected_rows_utils_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ TEST_F(SelectedRowsTester, complete_dims) {

TEST_F(SelectedRowsTester, SerializeAndDeseralize) {
phi::SelectedRows dst_tensor;
platform::CPUDeviceContext cpu_ctx(place_);
phi::CPUContext cpu_ctx(place_);
std::ostringstream oss;

SerializeToStream(oss, *selected_rows_, cpu_ctx);
Expand Down
Loading

0 comments on commit 755438a

Please sign in to comment.