From a33e9cee2e14344983c62e8ae739c1a2b9537426 Mon Sep 17 00:00:00 2001 From: Tianqi Chen Date: Thu, 21 Jul 2016 18:47:04 -0700 Subject: [PATCH] [PASS] Add place device (#18) --- nnvm/Makefile | 2 +- nnvm/README.md | 7 ++ nnvm/include/nnvm/graph.h | 10 +- nnvm/include/nnvm/graph_attr_types.h | 24 +++- nnvm/include/nnvm/pass_functions.h | 18 +++ nnvm/src/core/graph.cc | 2 +- nnvm/src/example/operator.cc | 5 + nnvm/src/pass/infer_shape_type.cc | 4 + nnvm/src/pass/place_device.cc | 180 +++++++++++++++++++++++++++ nnvm/src/pass/saveload_json.cc | 8 +- nnvm/tests/python/test_graph.py | 20 +++ 11 files changed, 267 insertions(+), 13 deletions(-) create mode 100644 nnvm/src/pass/place_device.cc diff --git a/nnvm/Makefile b/nnvm/Makefile index d24e4a8e06e2..86edfaea378e 100644 --- a/nnvm/Makefile +++ b/nnvm/Makefile @@ -1,5 +1,5 @@ export LDFLAGS = -pthread -lm -export CFLAGS = -std=c++11 -Wall -O3 -msse2 -Wno-unknown-pragmas -funroll-loops\ +export CFLAGS = -std=c++11 -Wall -O2 -msse2 -Wno-unknown-pragmas -funroll-loops\ -Iinclude -Idmlc-core/include -fPIC # specify tensor path diff --git a/nnvm/README.md b/nnvm/README.md index 888e398e5f51..7acbf88093a8 100644 --- a/nnvm/README.md +++ b/nnvm/README.md @@ -20,6 +20,13 @@ interface defintion and how operators are executed. NNVM is inspired by LLVM, aiming to be an intermediate representation library for neural nets and computation graphs generation and optimizations. +## Why build deep learning system by parts + +- Essential parts can be assembled in minimum way for embedding systems. +- Hackers can hack the parts they need and compose with other well defined parts. +- Decentralized modules enable new extensions creators to own their project + without creating a monothilic version. + ## Deep learning system by parts This is one way to divide the deep learning system into common parts. diff --git a/nnvm/include/nnvm/graph.h b/nnvm/include/nnvm/graph.h index b09d9c61ab2e..9f12a1b35086 100644 --- a/nnvm/include/nnvm/graph.h +++ b/nnvm/include/nnvm/graph.h @@ -71,14 +71,8 @@ class IndexedGraph { uint32_t node_id; /*! \brief index of output from the source. */ uint32_t index; - /*! - * \brief compare equality - * \param other the other entry to compare - * \return whether two entries equals to each other - */ - inline bool operator==(const NodeEntry& other) const { - return node_id == other.node_id && index == other.index; - } + /*! \brief version of the node */ + uint32_t version; }; /*! \brief Node data structure in IndexedGraph */ struct Node { diff --git a/nnvm/include/nnvm/graph_attr_types.h b/nnvm/include/nnvm/graph_attr_types.h index 65d2638a79bb..777a8054252a 100644 --- a/nnvm/include/nnvm/graph_attr_types.h +++ b/nnvm/include/nnvm/graph_attr_types.h @@ -45,7 +45,7 @@ using ShapeVector = std::vector; * * \code * Graph g = ApplyPass(src_graph, {"InferType"}); - * const DTypeVector& types = g.GetAttr("dtype"); + * const DTypeVector& types = g.GetAttr("dtype"); * // get shape by entry id * int entry_type = dtypes[g.indexed_graph().entry_id(my_entry)]; * \endcode @@ -54,6 +54,28 @@ using ShapeVector = std::vector; */ using DTypeVector = std::vector; +/*! + * \brief The result holder of device of each operator in the graph. + * \note Stored under graph.attrs["device"], provided by Pass "PlaceDevice" + * + * \code + * Graph g = ApplyPass(src_graph, {"PlaceDevice"}); + * const &device = g.GetAttr("dtype"); + * // get device by node_id + * int device_type = device[g.indexed_graph().node_id(my_node)]; + * \endcode + */ +using DeviceVector = std::vector; + +/*! + * \brief The result holder of device of each operator in the graph. + * + * \note Stored under graph.attrs["device_assign_map"], needed by Pass "PlaceDevice" + * -1 means unknown device + */ +using DeviceAssignMap = std::unordered_map; + + } // namespace nnvm #endif // NNVM_GRAPH_ATTR_TYPES_H_ diff --git a/nnvm/include/nnvm/pass_functions.h b/nnvm/include/nnvm/pass_functions.h index 1cb25631e653..d622ce47dd55 100644 --- a/nnvm/include/nnvm/pass_functions.h +++ b/nnvm/include/nnvm/pass_functions.h @@ -91,6 +91,24 @@ inline Graph InferType(Graph graph, return ApplyPass(std::move(graph), {"InferType"}); } +/*! + * \brief Place the devices + * \param graph source graph + * \param device_group_attr_key The attribute name for hinting the device group. + * \param device_assign_map The assignment map of device + * \param device_copy_op The name of copy op to be inserted when cross device copy happened. + * \return A graph with new attribute "device", cotaining device information of each node. + */ +inline Graph PlaceDevice(Graph graph, + std::string device_group_attr_key, + DeviceAssignMap device_assign_map, + std::string device_copy_op) { + graph.attrs["device_group_attr_key"] = std::make_shared(std::move(device_group_attr_key)); + graph.attrs["device_assign_map"] = std::make_shared(std::move(device_assign_map)); + graph.attrs["device_copy_op"] = std::make_shared(std::move(device_copy_op)); + return ApplyPass(std::move(graph), {"PlaceDevice"}); +} + } // namespace pass } // namespace nnvm #endif // NNVM_PASS_FUNCTIONS_H_ diff --git a/nnvm/src/core/graph.cc b/nnvm/src/core/graph.cc index 601f84d0a576..c8377671ec34 100644 --- a/nnvm/src/core/graph.cc +++ b/nnvm/src/core/graph.cc @@ -40,7 +40,7 @@ IndexedGraph::IndexedGraph(const Graph &g) { for (const auto& e : n->inputs) { auto it = node2index_.find(e.node.get()); CHECK(it != node2index_.end() && it->first == e.node.get()); - input_entries_.emplace_back(NodeEntry{it->second, e.index}); + input_entries_.emplace_back(NodeEntry{it->second, e.index, e.version}); } inputs_rptr.push_back(input_entries_.size()); // control deps diff --git a/nnvm/src/example/operator.cc b/nnvm/src/example/operator.cc index 6ecb8da68d64..2c15bd2ba701 100644 --- a/nnvm/src/example/operator.cc +++ b/nnvm/src/example/operator.cc @@ -94,6 +94,11 @@ NNVM_REGISTER_OP(exp) .attr("inplace_pair", std::make_pair(0, 0)) .attr("FInferShape", SameShape); +NNVM_REGISTER_OP(cross_device_copy) +.describe("Copy data across device.") +.set_num_inputs(1) +.attr("FInferShape", SameShape); + NNVM_REGISTER_OP(conv2d) .describe("take conv of input") diff --git a/nnvm/src/pass/infer_shape_type.cc b/nnvm/src/pass/infer_shape_type.cc index 2e0ee7a1cd9e..662757d403e1 100644 --- a/nnvm/src/pass/infer_shape_type.cc +++ b/nnvm/src/pass/infer_shape_type.cc @@ -35,10 +35,14 @@ Graph InferAttr(Graph &&ret, for (size_t i = 0; i < shape_args.size(); ++i) { rshape[idx.entry_id(idx.arg_nodes()[i], 0)] = shape_args[i]; } + // erase the provided arguments + ret.attrs.erase(arg_name); } std::string shape_attr_key; if (ret.attrs.count(attr_key_name) != 0) { shape_attr_key = ret.GetAttr(attr_key_name); + // erase the provided arguments + ret.attrs.erase(attr_key_name); } // temp space for shape inference. diff --git a/nnvm/src/pass/place_device.cc b/nnvm/src/pass/place_device.cc new file mode 100644 index 000000000000..787f47b80ca7 --- /dev/null +++ b/nnvm/src/pass/place_device.cc @@ -0,0 +1,180 @@ +/*! + * Copyright (c) 2016 by Contributors + * \file place_device.cc + * \brief Inference the device of each operator given known information. + * Insert a copy node automatically when there is a cross device. + */ +#include +#include +#include + +namespace nnvm { +namespace pass { + +// simply logic to place device according to device_group hint +// insert copy node when there is +Graph PlaceDevice(Graph src) { + CHECK_NE(src.attrs.count("device_group_attr_key"), 0) + << "Need graph attribute \"device_group_attr_key\" in PlaceDevice"; + CHECK_NE(src.attrs.count("device_assign_map"), 0) + << "Need graph attribute \"device_assign_map\" in PlaceDevice"; + CHECK_NE(src.attrs.count("device_copy_op"), 0) + << "Need graph attribute \"device_copy_op\" in PlaceDevice"; + + std::string device_group_attr_key = src.GetAttr("device_group_attr_key"); + const Op* copy_op = Op::Get(src.GetAttr("device_copy_op")); + auto& device_assign_map = src.GetAttr("device_assign_map"); + const IndexedGraph& idx = src.indexed_graph(); + + DeviceVector device(idx.num_nodes(), -1); + // forward pass + for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) { + const auto& inode = idx[nid]; + auto it = inode.source->attrs.dict.find(device_group_attr_key); + if (it != inode.source->attrs.dict.end()) { + const std::string& device_group = it->second; + auto dit = device_assign_map.find(device_group); + CHECK_NE(dit, device_assign_map.end()) + << "The device assignment not found for group " << device_group; + device[nid] = dit->second; + } else { + for (const IndexedGraph::NodeEntry& e : inode.inputs) { + if (device[e.node_id] != -1) { + device[nid] = device[e.node_id]; break; + } + } + } + } + // backward pass + for (uint32_t i = idx.num_nodes(); i != 0; --i) { + uint32_t nid = i - 1; + const auto& inode = idx[nid]; + if (device[nid] == -1) continue; + for (const IndexedGraph::NodeEntry& e : inode.inputs) { + if (device[e.node_id] == -1) device[e.node_id] = device[nid]; + } + } + + int num_dev = 1, other_dev_id = -1; + for (int& dev : device) { + if (dev == -1) dev = 0; + if (dev != other_dev_id) { + if (other_dev_id != -1) ++num_dev; + other_dev_id = dev; + } + } + + if (num_dev == 1) { + src.attrs.erase("device_group_attr_key"); + src.attrs.erase("device_assign_map"); + src.attrs.erase("device_copy_op"); + src.attrs["device"] = std::make_shared(std::move(device)); + return src; + } + + std::map, NodePtr> copy_map; + std::vector new_node_map(idx.num_nodes(), nullptr); + std::unordered_map new_device_map; + + // insert copy node + for (uint32_t nid = 0; nid < idx.num_nodes(); ++nid) { + int dev_id = device[nid]; + const auto& inode = idx[nid]; + // check if mutation is needed + bool need_mutate = false; + for (const IndexedGraph::NodeEntry& e : inode.inputs) { + if (new_node_map[e.node_id] != nullptr || dev_id != device[e.node_id]) { + need_mutate = true; break; + } + } + if (!need_mutate) { + for (const uint32_t cid : inode.control_deps) { + if (new_node_map[cid] != nullptr) { + need_mutate = true; break; + } + } + } + if (need_mutate) { + NodePtr new_node = Node::Create(); + new_node->attrs = inode.source->attrs; + new_node->inputs.reserve(inode.inputs.size()); + for (size_t i = 0; i < inode.inputs.size(); ++i) { + const IndexedGraph::NodeEntry& e = inode.inputs[i]; + if (dev_id != device[e.node_id]) { + auto copy_key = std::make_tuple(e.node_id, e.index, dev_id); + auto it = copy_map.find(copy_key); + if (it != copy_map.end() && it->first == copy_key) { + new_node->inputs.emplace_back( + NodeEntry{it->second, 0, 0}); + } else { + NodePtr copy_node = Node::Create(); + copy_node->op = copy_op; + std::ostringstream os; + os << inode.source->inputs[i].node->attrs.name << "_" << e.index <<"_copy"; + copy_node->attrs.name = os.str(); + copy_node->inputs.push_back(inode.source->inputs[i]); + copy_map[copy_key] = copy_node; + new_device_map[copy_node.get()] = dev_id; + new_node->inputs.emplace_back( + NodeEntry{std::move(copy_node), 0, 0}); + } + } else { + if (new_node_map[e.node_id] != nullptr) { + new_node->inputs.emplace_back( + NodeEntry{new_node_map[e.node_id], e.index, 0}); + } else { + new_node->inputs.push_back(inode.source->inputs[i]); + } + } + } + new_node->control_deps.reserve(inode.control_deps.size()); + for (size_t i = 0; i < inode.control_deps.size(); ++i) { + uint32_t cid = inode.control_deps[i]; + if (new_node_map[cid] != nullptr) { + new_node->control_deps.push_back(new_node_map[cid]); + } else { + new_node->control_deps.push_back(inode.source->control_deps[i]); + } + } + new_device_map[new_node.get()] = dev_id; + new_node_map[nid] = std::move(new_node); + } else { + new_device_map[inode.source] = dev_id; + } + } + + // make the new graph + Graph ret; + for (const NodeEntry& e : src.outputs) { + if (new_node_map[idx.node_id(e.node.get())] != nullptr) { + ret.outputs.emplace_back( + NodeEntry{new_node_map[idx.node_id(e.node.get())], e.index, e.version}); + } else { + ret.outputs.emplace_back(e); + } + } + DeviceVector new_device_vec(ret.indexed_graph().num_nodes()); + for (uint32_t nid = 0; nid < ret.indexed_graph().num_nodes(); ++nid) { + if (new_device_map.count(ret.indexed_graph()[nid].source) == 0) { + LOG(INFO) << "canot find " << ret.indexed_graph()[nid].source->attrs.name; + } + new_device_vec[nid] = new_device_map.at(ret.indexed_graph()[nid].source); + } + ret.attrs["device"] = std::make_shared(std::move(new_device_vec)); + return ret; +} + +NNVM_REGISTER_PASS(PlaceDevice) +.describe("Infer the device type of each operator."\ + "Insert a copy node when there is cross device copy") +.set_body(PlaceDevice) +.set_change_graph(true) +.provide_graph_attr("device") +.depend_graph_attr("device_group_attr_key") +.depend_graph_attr("device_assign_map") +.depend_graph_attr("device_copy_op"); + +DMLC_JSON_ENABLE_ANY(DeviceAssignMap, dict_str_int); + +} // namespace pass +} // namespace nnvm diff --git a/nnvm/src/pass/saveload_json.cc b/nnvm/src/pass/saveload_json.cc index c2a22a46b611..4a3c97e25b77 100644 --- a/nnvm/src/pass/saveload_json.cc +++ b/nnvm/src/pass/saveload_json.cc @@ -68,14 +68,18 @@ struct JSONNode { writer->BeginObject(); if (node->op != nullptr) { writer->WriteObjectKeyValue("op", node->op->name); - writer->WriteObjectKeyValue("attr", node->attrs.dict); } else { std::string json_null = "null"; writer->WriteObjectKeyValue("op", json_null); } writer->WriteObjectKeyValue("name", node->attrs.name); + if (node->attrs.dict.size() != 0) { + writer->WriteObjectKeyValue("attr", node->attrs.dict); + } writer->WriteObjectKeyValue("inputs", inputs); - writer->WriteObjectKeyValue("control_deps", control_deps); + if (control_deps.size() != 0) { + writer->WriteObjectKeyValue("control_deps", control_deps); + } writer->EndObject(); } diff --git a/nnvm/tests/python/test_graph.py b/nnvm/tests/python/test_graph.py index b01e34280877..6209ca9386d6 100644 --- a/nnvm/tests/python/test_graph.py +++ b/nnvm/tests/python/test_graph.py @@ -76,6 +76,25 @@ def test_infer_type(): assert g.json_attr('dtype')[jnode_row_ptr[nindex["cast1"]]] == 1 assert g.json_attr('dtype')[jnode_row_ptr[nindex["add1"]]] == 0 +def test_place_device(): + x = sym.Variable('x', device_group="stage1") + y = sym.add(x, x, name='add1') + y = sym.cast(y, dtype=1, name="cast1") + z = sym.add(y, y, device_group="stage2", name="add2") + z = sym.add(z, sym.exp(y, device_group="stage2"), name="add3") + g = graph.create(z) + g._set_json_attr("device_group_attr_key", "device_group") + g._set_json_attr("device_assign_map", {"stage1": 0, "stage2" : 1}, "dict_str_int") + g._set_json_attr("device_copy_op", "cross_device_copy") + g = g.apply("PlaceDevice") + jgraph = json.loads(g.apply('SaveJSON').json_attr('json')) + jnodes = jgraph['nodes'] + jnode_row_ptr = jgraph['node_row_ptr'] + nindex = {n['name']: i for i, n in enumerate(jnodes)} + assert g.json_attr('device')[jnode_row_ptr[nindex["add2"]]] == 1 + assert g.json_attr('device')[jnode_row_ptr[nindex["add3"]]] == 1 + assert g.json_attr('device')[jnode_row_ptr[nindex["cast1"]]] == 0 + if __name__ == "__main__": test_order_mutation_pass() @@ -83,3 +102,4 @@ def test_infer_type(): test_json_pass() test_infer_shape() test_infer_type() + test_place_device()