Hi! I encounter these errors when training a network:
*** Error in `/usr/bin/python': malloc(): memory corruption (fast): 0x0000000001755880 ***
*** Error in `/usr/bin/python': free(): invalid pointer: 0x000000000171ec30 ***
I am using the latest version of mxnet from engine branch. Similar errors occur when I use mxnet from master branch.
Could anyone help? Thank you very much!
Unfortunately I cannot get Python stack trace. But C stack trace is available:
#0 0x00007ffff782dc37 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
#1 0x00007ffff7831028 in __GI_abort () at abort.c:89
#2 0x00007ffff786a2a4 in __libc_message (do_abort=do_abort@entry=1, fmt=fmt@entry=0x7ffff79786b0 "*** Error in `%s': %s: 0x%s[32/1916]
at ../sysdeps/posix/libc_fatal.c:175
#3 0x00007ffff7874ff7 in malloc_printerr (action=, str=0x7ffff7978a50 "malloc(): memory corruption (fast)",
ptr=) at malloc.c:4996
#4 0x00007ffff7877cf4 in _int_malloc (av=0x7fff00000020, bytes=24) at malloc.c:3359
#5 0x00007ffff78796c0 in __GI___libc_malloc (bytes=24) at malloc.c:2891
#6 0x00007fffddad6dad in operator new(unsigned long) () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#7 0x00007fffebcab89d in std::_Function_base::_Base_managermxnet::NDArray::Chunk::~Chunk()::{lambda(mxnet::RunContext)#2}::_M_manager(std::_Any_data&, std::_Function_base::_Base_managermxnet::NDArray::Chunk::~Chunk()::{lambda(mxnet::RunContext)#2} const&, std::_Manager_operation) () from /home/gaiyu/developping/mxnet_engine/python/mxnet/../../lib/libmxnet.so
#8 0x00007fffebcf715f in std::_Function_base::_Base_manager<mxnet::engine::ThreadedEngine::DeleteVariable(std::function<void (mxnet::RunContext)>, mxnet::Context, mxnet::engine::Var*)::{lambda(mxnet::RunContext)#1}>::_M_manager(std::_Any_data&, std::_Function_base::_Base_manager<mxnet::engine::ThreadedEngine::DeleteVariable(std::function<void (mxnet::RunContext)>, mxnet::Context, mxnet::engine::Var*)::{lambda(mxnet::RunContext)#1}> const&, std::_Manager_operation) ()
from /home/gaiyu/developping/mxnet_engine/python/mxnet/../../lib/libmxnet.so
#9 0x00007fffebcaca74 in std::function<void (mxnet::RunContext)>::function(std::function<void (mxnet::RunContext)> const&) ()[16/1916]
from /home/gaiyu/developping/mxnet_engine/python/mxnet/../../lib/libmxnet.so
#10 0x00007fffebcf73b1 in mxnet::engine::ThreadedEngine::DeleteVariable(std::function<void (mxnet::RunContext)>, mxnet::Context, mxnet::engine::Var*) () from /home/gaiyu/developping/mxnet_engine/python/mxnet/../../lib/libmxnet.so
#11 0x00007fffebcaba6d in std::_Sp_counted_ptr_inplace<mxnet::NDArray::Chunk, std::allocatormxnet::NDArray::Chunk, (__gnu_cxx::_Lock_policy)2>::_M_dispose() () from /home/gaiyu/developping/mxnet_engine/python/mxnet/../../lib/libmxnet.so
#12 0x00007fffebcad78e in std::vector<mxnet::NDArray, std::allocatormxnet::NDArray >::~vector() ()
from /home/gaiyu/developping/mxnet_engine/python/mxnet/../../lib/libmxnet.so
#13 0x00007fffeb4c8eca in std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() ()
from /home/gaiyu/developping/mxnet_engine/python/mxnet/../../lib/libmxnet.so
#14 0x00007fffebd114b8 in std::_Function_base::_Base_manager<mxnet::exec::GraphExecutor::InitCachedOps()::{lambda(mxnet::RunContext, mxnet::engine::CallbackOnComplete)#3}>::_M_manager(std::_Any_data&, std::_Function_base::_Base_manager<mxnet::exec::GraphExecutor::InitCachedOps()::{lambda(mxnet::RunContext, mxnet::engine::CallbackOnComplete)#3}> const&, std::_Manager_operation) ()
from /home/gaiyu/developping/mxnet_engine/python/mxnet/../../lib/libmxnet.so
#15 0x00007fffebcf82d6 in std::_Function_handler<void (mxnet::RunContext), mxnet::engine::ThreadedEngine::DeleteOperator(mxnet::engine::Opr*)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext) ()
from /home/gaiyu/developping/mxnet_engine/python/mxnet/../../lib/libmxnet.so
#16 0x00007fffebcab693 in operator() (__args#0=..., this=) at /usr/include/c++/4.8/functional:2471
#17 operator() (on_complete=..., ctx=..., __closure=) at include/mxnet/././engine.h:213
#18 std::_Function_handler<void (mxnet::RunContext, mxnet::engine::CallbackOnComplete), mxnet::Engine::PushSync(std::function<void (mxnet::RunContext)>, mxnet::Context, std::vector<mxnet::engine::Var*, std::allocatormxnet::engine::Var* > const&, std::vector<mxnet::engine::Var*, std::allocatormxnet::engine::Var* > const&, mxnet::FnProperty, int, char const*)::{lambda(mxnet::RunContext, mxnet::engine::CallbackOnComplete)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext, mxnet::engine::CallbackOnComplete) (__functor=...,
__args#0=..., __args#1=...) at /usr/include/c++/4.8/functional:2071
#19 0x00007fffebcfe06c in mxnet::engine::ThreadedEngine::ExecuteOprBlock(mxnet::RunContext, mxnet::engine::OprBlock*) ()
from /home/gaiyu/developping/mxnet_engine/python/mxnet/../../lib/libmxnet.so
#20 0x00007fffebd0097e in std::_Function_handler<void (), mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, bool)::{lambda()#1}::operator()() const::{lambda()#1}>::_M_invoke(std::_Any_data const&) ()
from /home/gaiyu/developping/mxnet_engine/python/mxnet/../../lib/libmxnet.so
#21 0x00007fffddb29a60 in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#22 0x00007ffff7bc4184 in start_thread (arg=0x7fff433fd700) at pthread_create.c:312
#23 0x00007ffff78f137d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
Hi! I encounter these errors when training a network:
*** Error in `/usr/bin/python': malloc(): memory corruption (fast): 0x0000000001755880 ***
*** Error in `/usr/bin/python': free(): invalid pointer: 0x000000000171ec30 ***
I am using the latest version of mxnet from engine branch. Similar errors occur when I use mxnet from master branch.
Could anyone help? Thank you very much!
Unfortunately I cannot get Python stack trace. But C stack trace is available:
#0 0x00007ffff782dc37 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56
#1 0x00007ffff7831028 in __GI_abort () at abort.c:89
#2 0x00007ffff786a2a4 in __libc_message (do_abort=do_abort@entry=1, fmt=fmt@entry=0x7ffff79786b0 "*** Error in `%s': %s: 0x%s[32/1916]
at ../sysdeps/posix/libc_fatal.c:175
#3 0x00007ffff7874ff7 in malloc_printerr (action=, str=0x7ffff7978a50 "malloc(): memory corruption (fast)",
ptr=) at malloc.c:4996
#4 0x00007ffff7877cf4 in _int_malloc (av=0x7fff00000020, bytes=24) at malloc.c:3359
#5 0x00007ffff78796c0 in __GI___libc_malloc (bytes=24) at malloc.c:2891
#6 0x00007fffddad6dad in operator new(unsigned long) () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#7 0x00007fffebcab89d in std::_Function_base::_Base_managermxnet::NDArray::Chunk::~Chunk()::{lambda(mxnet::RunContext)#2}::_M_manager(std::_Any_data&, std::_Function_base::_Base_managermxnet::NDArray::Chunk::~Chunk()::{lambda(mxnet::RunContext)#2} const&, std::_Manager_operation) () from /home/gaiyu/developping/mxnet_engine/python/mxnet/../../lib/libmxnet.so
#8 0x00007fffebcf715f in std::_Function_base::_Base_manager<mxnet::engine::ThreadedEngine::DeleteVariable(std::function<void (mxnet::RunContext)>, mxnet::Context, mxnet::engine::Var*)::{lambda(mxnet::RunContext)#1}>::_M_manager(std::_Any_data&, std::_Function_base::_Base_manager<mxnet::engine::ThreadedEngine::DeleteVariable(std::function<void (mxnet::RunContext)>, mxnet::Context, mxnet::engine::Var*)::{lambda(mxnet::RunContext)#1}> const&, std::_Manager_operation) ()
from /home/gaiyu/developping/mxnet_engine/python/mxnet/../../lib/libmxnet.so
#9 0x00007fffebcaca74 in std::function<void (mxnet::RunContext)>::function(std::function<void (mxnet::RunContext)> const&) ()[16/1916]
from /home/gaiyu/developping/mxnet_engine/python/mxnet/../../lib/libmxnet.so
#10 0x00007fffebcf73b1 in mxnet::engine::ThreadedEngine::DeleteVariable(std::function<void (mxnet::RunContext)>, mxnet::Context, mxnet::engine::Var*) () from /home/gaiyu/developping/mxnet_engine/python/mxnet/../../lib/libmxnet.so
#11 0x00007fffebcaba6d in std::_Sp_counted_ptr_inplace<mxnet::NDArray::Chunk, std::allocatormxnet::NDArray::Chunk, (__gnu_cxx::_Lock_policy)2>::_M_dispose() () from /home/gaiyu/developping/mxnet_engine/python/mxnet/../../lib/libmxnet.so
#12 0x00007fffebcad78e in std::vector<mxnet::NDArray, std::allocatormxnet::NDArray >::~vector() ()
from /home/gaiyu/developping/mxnet_engine/python/mxnet/../../lib/libmxnet.so
#13 0x00007fffeb4c8eca in std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() ()
from /home/gaiyu/developping/mxnet_engine/python/mxnet/../../lib/libmxnet.so
#14 0x00007fffebd114b8 in std::_Function_base::_Base_manager<mxnet::exec::GraphExecutor::InitCachedOps()::{lambda(mxnet::RunContext, mxnet::engine::CallbackOnComplete)#3}>::_M_manager(std::_Any_data&, std::_Function_base::_Base_manager<mxnet::exec::GraphExecutor::InitCachedOps()::{lambda(mxnet::RunContext, mxnet::engine::CallbackOnComplete)#3}> const&, std::_Manager_operation) ()
from /home/gaiyu/developping/mxnet_engine/python/mxnet/../../lib/libmxnet.so
#15 0x00007fffebcf82d6 in std::_Function_handler<void (mxnet::RunContext), mxnet::engine::ThreadedEngine::DeleteOperator(mxnet::engine::Opr*)::{lambda(mxnet::RunContext)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext) ()
from /home/gaiyu/developping/mxnet_engine/python/mxnet/../../lib/libmxnet.so
#16 0x00007fffebcab693 in operator() (__args#0=..., this=) at /usr/include/c++/4.8/functional:2471
#17 operator() (on_complete=..., ctx=..., __closure=) at include/mxnet/././engine.h:213
#18 std::_Function_handler<void (mxnet::RunContext, mxnet::engine::CallbackOnComplete), mxnet::Engine::PushSync(std::function<void (mxnet::RunContext)>, mxnet::Context, std::vector<mxnet::engine::Var*, std::allocatormxnet::engine::Var* > const&, std::vector<mxnet::engine::Var*, std::allocatormxnet::engine::Var* > const&, mxnet::FnProperty, int, char const*)::{lambda(mxnet::RunContext, mxnet::engine::CallbackOnComplete)#1}>::_M_invoke(std::_Any_data const&, mxnet::RunContext, mxnet::engine::CallbackOnComplete) (__functor=...,
__args#0=..., __args#1=...) at /usr/include/c++/4.8/functional:2071
#19 0x00007fffebcfe06c in mxnet::engine::ThreadedEngine::ExecuteOprBlock(mxnet::RunContext, mxnet::engine::OprBlock*) ()
from /home/gaiyu/developping/mxnet_engine/python/mxnet/../../lib/libmxnet.so
#20 0x00007fffebd0097e in std::_Function_handler<void (), mxnet::engine::ThreadedEnginePerDevice::PushToExecute(mxnet::engine::OprBlock*, bool)::{lambda()#1}::operator()() const::{lambda()#1}>::_M_invoke(std::_Any_data const&) ()
from /home/gaiyu/developping/mxnet_engine/python/mxnet/../../lib/libmxnet.so
#21 0x00007fffddb29a60 in ?? () from /usr/lib/x86_64-linux-gnu/libstdc++.so.6
#22 0x00007ffff7bc4184 in start_thread (arg=0x7fff433fd700) at pthread_create.c:312
#23 0x00007ffff78f137d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111