From cb4604aca2e9a79eb61827a71f1f781b70ceac83 Mon Sep 17 00:00:00 2001 From: Muhammed Fatih BALIN Date: Mon, 5 Aug 2024 11:29:40 -0400 Subject: [PATCH] [Performance] Remove `phmap` dependency. (#7658) --- .gitmodules | 3 --- CMakeLists.txt | 10 ++-------- src/array/cpu/array_utils.h | 7 +++---- src/array/cpu/csr_mm.cc | 7 ++++--- src/array/cpu/csr_sum.cc | 7 ++++--- src/array/cpu/labor_pick.h | 17 +++++++++++++---- third_party/phmap | 1 - 7 files changed, 26 insertions(+), 26 deletions(-) delete mode 160000 third_party/phmap diff --git a/.gitmodules b/.gitmodules index 45c0dfc1d8..9f33abfbb2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -10,9 +10,6 @@ [submodule "third_party/METIS"] path = third_party/METIS url = https://github.com/KarypisLab/METIS.git -[submodule "third_party/phmap"] - path = third_party/phmap - url = https://github.com/greg7mdp/parallel-hashmap.git [submodule "third_party/nanoflann"] path = third_party/nanoflann url = https://github.com/jlblancoc/nanoflann diff --git a/CMakeLists.txt b/CMakeLists.txt index f18994c236..dc27f4a515 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -340,15 +340,9 @@ else(EXTERNAL_DMLC_PATH) set(GOOGLE_TEST 0) # Turn off dmlc-core test endif(EXTERNAL_DMLC_PATH) -if(EXTERNAL_PHMAP_PATH) - include_directories(SYSTEM ${EXTERNAL_PHMAP_PATH}) -else(EXTERNAL_PHMAP_PATH) - target_include_directories(dgl PRIVATE "third_party/phmap") -endif(EXTERNAL_PHMAP_PATH) - - target_include_directories(dgl PRIVATE "tensoradapter/include") target_include_directories(dgl PRIVATE "third_party/pcg/include") +target_include_directories(dgl PRIVATE "third_party/tsl_robin_map/include") if(EXTERNAL_NANOFLANN_PATH) include_directories(SYSTEM ${EXTERNAL_NANOFLANN_PATH}) @@ -473,7 +467,7 @@ if(BUILD_CPP_TEST) include_directories("include") include_directories("third_party/dlpack/include") include_directories("third_party/dmlc-core/include") - include_directories("third_party/phmap") + include_directories("third_party/tsl_robin_map/include") include_directories("third_party/libxsmm/include") include_directories("third_party/pcg/include") file(GLOB_RECURSE TEST_SRC_FILES ${PROJECT_SOURCE_DIR}/tests/cpp/*.cc) diff --git a/src/array/cpu/array_utils.h b/src/array/cpu/array_utils.h index 5a46be6137..5760e0cfbf 100644 --- a/src/array/cpu/array_utils.h +++ b/src/array/cpu/array_utils.h @@ -7,7 +7,7 @@ #define DGL_ARRAY_CPU_ARRAY_UTILS_H_ #include -#include +#include #include #include @@ -53,8 +53,7 @@ class IdHashMap { const int64_t len = ids->shape[0]; for (int64_t i = 0; i < len; ++i) { const IdType id = ids_data[i]; - // phmap::flat_hash_map::insert assures that an insertion will not happen - // if the key already exists. + // Insertion will not happen if the key already exists. oldv2newv_.insert({id, oldv2newv_.size()}); filter_[id & kFilterMask] = true; } @@ -106,7 +105,7 @@ class IdHashMap { // lookups. std::vector filter_; // The hashmap from old vid to new vid - phmap::flat_hash_map oldv2newv_; + tsl::robin_map oldv2newv_; }; /** diff --git a/src/array/cpu/csr_mm.cc b/src/array/cpu/csr_mm.cc index 9833381a52..dd15b4c5e9 100644 --- a/src/array/cpu/csr_mm.cc +++ b/src/array/cpu/csr_mm.cc @@ -6,7 +6,8 @@ #include #include -#include +#include +#include #include @@ -28,7 +29,7 @@ void CountNNZPerRow( const IdType* B_indices, IdType* C_indptr_data, int64_t M) { parallel_for(0, M, [=](size_t b, size_t e) { for (auto i = b; i < e; ++i) { - phmap::flat_hash_set set; + tsl::robin_set set; for (IdType u = A_indptr[i]; u < A_indptr[i + 1]; ++u) { IdType w = A_indices[u]; for (IdType v = B_indptr[w]; v < B_indptr[w + 1]; ++v) @@ -60,7 +61,7 @@ void ComputeIndicesAndData( IdType* C_indices_data, DType* C_weights_data, int64_t M) { parallel_for(0, M, [=](size_t b, size_t e) { for (auto i = b; i < e; ++i) { - phmap::flat_hash_map map; + tsl::robin_map map; for (IdType u = A_indptr[i]; u < A_indptr[i + 1]; ++u) { IdType w = A_indices[u]; DType vA = A_data[A_eids ? A_eids[u] : u]; diff --git a/src/array/cpu/csr_sum.cc b/src/array/cpu/csr_sum.cc index 4a8a9bb3d3..3eabe3da8e 100644 --- a/src/array/cpu/csr_sum.cc +++ b/src/array/cpu/csr_sum.cc @@ -6,7 +6,8 @@ #include #include -#include +#include +#include #include @@ -30,7 +31,7 @@ void CountNNZPerRow( runtime::parallel_for(0, M, [=](size_t b, size_t e) { for (size_t i = b; i < e; ++i) { - phmap::flat_hash_set set; + tsl::robin_set set; for (int64_t k = 0; k < n; ++k) { for (IdType u = A_indptr[k][i]; u < A_indptr[k][i + 1]; ++u) set.insert(A_indices[k][u]); @@ -63,7 +64,7 @@ void ComputeIndicesAndData( int64_t n = A_indptr.size(); runtime::parallel_for(0, M, [=](size_t b, size_t e) { for (auto i = b; i < e; ++i) { - phmap::flat_hash_map map; + tsl::robin_map map; for (int64_t k = 0; k < n; ++k) { for (IdType u = A_indptr[k][i]; u < A_indptr[k][i + 1]; ++u) { IdType kA = A_indices[k][u]; diff --git a/src/array/cpu/labor_pick.h b/src/array/cpu/labor_pick.h index 9b9ea37de3..dabd45d574 100644 --- a/src/array/cpu/labor_pick.h +++ b/src/array/cpu/labor_pick.h @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include @@ -45,6 +45,13 @@ namespace impl { using dgl::random::continuous_seed; +template +using map_t = tsl::robin_map; +template +auto& mutable_value_ref(iterator it) { + return it.value(); +} + constexpr double eps = 0.0001; template @@ -61,7 +68,7 @@ auto compute_importance_sampling_probabilities( double prev_ex_nodes = max_degree * num_rows; - phmap::flat_hash_map hop_map, hop_map2; + map_t hop_map, hop_map2; for (int iters = 0; iters < importance_sampling || importance_sampling < 0; iters++) { // NOTE(mfbalin) When the graph is unweighted, the first c values in @@ -83,7 +90,9 @@ auto compute_importance_sampling_probabilities( for (auto j = indptr[rid]; j < indptr[rid + 1]; j++) { const auto ct = c * (weighted && iters == 1 ? A[j] : 1); auto itb = hop_map2.emplace(indices[j], ct); - if (!itb.second) itb.first->second = std::max(ct, itb.first->second); + if (!itb.second) { + mutable_value_ref(itb.first) = std::max(ct, itb.first->second); + } } } if (hop_map.empty()) @@ -203,7 +212,7 @@ std::pair CSRLaborPick( hop_size += act_degree; } - phmap::flat_hash_map hop_map; + map_t hop_map; if (importance_sampling) hop_map = compute_importance_sampling_probabilities( diff --git a/third_party/phmap b/third_party/phmap deleted file mode 160000 index 65775fa09f..0000000000 --- a/third_party/phmap +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 65775fa09fecaa65d0b0022ab6bf091c0e509445