python - new GPU op in Tensorflow - Segmentation fault error -
i've created new op in tensorflow (tf) , registered gpu. here part of c++ file called pixel_selector.cc
:
#include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" using namespace tensorflow; register_op("pixelselector") .input("in: float32") .input("coord: float32") .input("stride: int16") .output("out: float32") class pixelselectorop : public opkernel { public: explicit pixelselectorop(opkernelconstruction* context) : opkernel(context) {} void compute(opkernelcontext* context) override { // grab input tensor const tensor& input_tensor = context->input(0); const tensor& input_tensor1 = context->input(1); const tensor& input_tensor2 = context->input(2); ... std::cout << "batch size: " << batch << std::endl; std::cout << "depth size: " << depth << std::endl; std::cout << "width size: " << width << std::endl; std::cout << "height size: " << height << std::endl; ... std::cout << "num pixels " << pixels << std::endl; std::cout << "num coord " << num_coord << std::endl; ... } }; register_kernel_builder(name("pixelselector").device(device_gpu), pixelselectorop);
the file has been compiled
tf_inc=$(python -c 'import tensorflow tf; print(tf.sysconfig.get_include())') nvcc -std=c++11 -c -o pixel_selector.o pixel_selector.cc -i $tf_inc -d google_cuda=1 -x cu -xcompiler -fpic g++ -std=c++11 -shared -o pixel_selector.so pixel_selector.o -i $tf_inc -fpic -lcudart -l /usr/local/cuda-8.0/lib64/
but when running in tf session following output:
batch size: batch size: 1111 depth size: 141 width size: 300 height size: 300 num pixels 1 num coord 3 segmentation fault (core dump created)
which looks concurrent access. ideas?
Comments
Post a Comment