fix test api problem (#42297)

* extract sub-graph * graph-engine merging * fix * fix * fix heter-ps config * test performance * test performance * test performance * test * test * update bfs * change cmake * test * test gpu speed * gpu_graph_engine optimization * add dsm sample method * add graph_neighbor_sample_v2 * Add graph_neighbor_sample_v2 * fix for loop * add cpu sample interface * fix kernel judgement * add ssd layer to graph_engine * fix allocation * fix syntax error * fix syntax error * fix pscore class * fix * change index settings * recover test * recover test * fix spelling * recover * fix * move cudamemcpy after cuda stream sync * fix linking problem * remove comment * add cpu test * test * add cpu test * change comment * combine feature table and graph table * test * test * pybind * test * test * test * test * pybind * pybind * fix cmake * pybind * fix * fix * add pybind * add pybind * optimize pybind * test * fix pybind * fix * pybind change * remove file Co-authored-by: DesmonDay <[email protected]>
PaddlePaddle · Apr 27, 2022 · a340e79 · a340e79 · paddle-bot-old · Apr 27, 2022
1 parent 37a9c8a
commit a340e79
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 8 deletions.
diff --git a/paddle/fluid/framework/fleet/heter_ps/.CMakeLists.txt.swp b/paddle/fluid/framework/fleet/heter_ps/.CMakeLists.txt.swp
diff --git a/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.cu b/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.cu
@@ -288,18 +288,18 @@ std::vector<int64_t> GraphGpuWrapper::graph_neighbor_sample(
   }
   /* VLOG(0) << "cumsum " << cumsum; */
 
-  std::vector<int64_t> res;
-  res.resize(cumsum * 2);
-  int count = 0;
+  std::vector<int64_t> cpu_key, res;
+  cpu_key.resize(key.size() * sample_size);
+
+  cudaMemcpy(cpu_key.data(), neighbor_sample_res.val,
+             key.size() * sample_size * sizeof(int64_t),
+             cudaMemcpyDeviceToHost);
   for (int i = 0; i < key.size(); i++) {
     for (int j = 0; j < actual_sample_size[i]; j++) {
-      res[count] = key[i];
-      count += 1;
+      res.push_back(key[i]);
+      res.push_back(cpu_key[i * sample_size + j]);
     }
   }
-
-  cudaMemcpy(res.data() + cumsum, neighbor_sample_res.val,
-             cumsum * sizeof(int64_t), cudaMemcpyDeviceToHost);
   /* for(int i = 0;i < res.size();i ++) { */
   /*     VLOG(0) << i << " " << res[i]; */
   /* } */