How can I debug a defunct dockerd process?
sudo kill -SIGUSR1 $(pidof dockerd)
doesn’t work on defunct dockerd processes. What other ways can I debug what’s causing this? See logs below.
root@my-host:~# ps aux | grep dockerd
root 18608 0.4 0.0 0 0 ? Zsl Aug23 82:49 [dockerd] <defunct>
root 27282 0.0 0.0 11764 2132 pts/24 S+ 20:22 0:00 grep dockerd
time="2017-08-30T23:14:34.887010575Z" level=debug msg="Calling GET /containers/fe6168984e80864a6b30441ab93ce240a842af3e8a8b1339ffd64715fc7df741/json"
time="2017-08-30T23:14:54.045602452Z" level=debug msg="Calling GET /_ping"
time="2017-08-30T23:14:54.046247487Z" level=debug msg="Calling GET /_ping"
time="2017-08-30T23:14:54.053736761Z" level=debug msg="Calling GET /version"
time="2017-08-30T23:14:58.943034458Z" level=debug msg="Calling GET /containers/fe6168984e80864a6b30441ab93ce240a842af3e8a8b1339ffd64715fc7df741/json"
time="2017-08-30T23:15:01.421898072Z" level=info msg="Processing signal 'terminated'"
time="2017-08-30T23:15:01.422380241Z" level=debug msg="starting clean shutdown of all containers..."
time="2017-08-30T23:15:01.422582281Z" level=debug msg="stopping fe6168984e80864a6b30441ab93ce240a842af3e8a8b1339ffd64715fc7df741"
time="2017-08-30T23:15:01.422688315Z" level=debug msg="Sending 15 to fe6168984e80864a6b30441ab93ce240a842af3e8a8b1339ffd64715fc7df741"
time="2017-08-30T23:15:02.743400116Z" level=debug msg="libcontainerd: received containerd event: &types.Event{Type:\"exit\", Id:\"fe6168984e80864a6b30441ab93ce240a842af3e8a8b1339ffd64715fc7df741\", Status:0x8f, Pid:\"init\", Timestamp:(*timestamp.Timestamp)(0xc8377ce9c0)}"
time="2017-08-30T23:15:02.744449409Z" level=debug msg="Revoking external connectivity on endpoint helios-FE7F614C83C7E51AB0511073B57EBF9302460D71-helioscanary_20170714T150608-2fbd895_6e70fae_3739b520 (c99087badd2112528185bf60bc6009c10913c6e85e8b92e3bad12f21a1740cb0)"
time="2017-08-30T23:15:02.767444355Z" level=debug msg="/sbin/iptables, [--wait -t nat -D DOCKER -p tcp -d 0/0 --dport 20665 -j DNAT --to-destination 10.99.0.2:20001 ! -i mybridge0]"
time="2017-08-30T23:15:02.824990803Z" level=debug msg="/sbin/iptables, [--wait -t filter -D DOCKER ! -i mybridge0 -o mybridge0 -p tcp -d 10.99.0.2 --dport 20001 -j ACCEPT]"
time="2017-08-30T23:15:02.850700114Z" level=debug msg="/sbin/iptables, [--wait -t nat -D POSTROUTING -p tcp -s 10.99.0.2 -d 10.99.0.2 --dport 20001 -j MASQUERADE]"
time="2017-08-30T23:15:02.874821080Z" level=debug msg="/sbin/iptables, [--wait -t nat -D DOCKER -p tcp -d 0/0 --dport 29454 -j DNAT --to-destination 10.99.0.2:20000 ! -i mybridge0]"
time="2017-08-30T23:15:02.900040147Z" level=debug msg="/sbin/iptables, [--wait -t filter -D DOCKER ! -i mybridge0 -o mybridge0 -p tcp -d 10.99.0.2 --dport 20000 -j ACCEPT]"
time="2017-08-30T23:15:02.926528606Z" level=debug msg="/sbin/iptables, [--wait -t nat -D POSTROUTING -p tcp -s 10.99.0.2 -d 10.99.0.2 --dport 20000 -j MASQUERADE]"
time="2017-08-30T23:15:16.422644908Z" level=error msg="Force shutdown daemon"
time="2017-08-30T23:15:16.423105521Z" level=debug msg="libcontainerd: containerd connection state change: SHUTDOWN"
INFO[604805] stopping containerd after receiving terminated
root@my-host:~# docker version
Client:
Version: 1.12.3
API version: 1.24
Go version: go1.6.3
Git commit: 6b644ec
Built: Wed Oct 26 21:44:32 2016
OS/Arch: linux/amd64
Cannot connect to the Docker daemon. Is the docker daemon running on this host?
root@my-host:~# uname -a
Linux my-host 3.16.0-77-generic #99~14.04.1-Ubuntu SMP Tue Jun 28 19:17:10 UTC 2016 x86_64 x86_64 x86_64 GNU/Linux
Docker went defunct most likely as a result of this issue.
zcat /var/log/kern.log.2.gz | less
2017-08-26T19:43:57.480+00:00 my-host kernel: [2696737.517856] [14082] 0 14082 5991 68 17 0 0 cron
2017-08-26T19:43:57.480+00:00 my-host kernel: [2696737.517859] [14084] 0 14084 5991 125 17 0 0 cron
2017-08-26T19:43:57.480+00:00 my-host kernel: [2696737.517861] [14085] 0 14085 5991 110 17 0 0 cron
2017-08-26T19:43:57.535+00:00 my-host kernel: [2696737.517864] [14087] 0 14087 12669 2154 26 0 0 munin-node
2017-08-26T19:43:57.535+00:00 my-host kernel: [2696737.517866] Out of memory: Kill process 22464 (java) score 914 or sacrifice child
2017-08-26T19:43:57.535+00:00 my-host kernel: [2696737.526114] Killed process 22464 (java) total-vm:37904840kB, anon-rss:28188468kB, file-rss:0kB
2017-08-26T19:43:57.535+00:00 my-host kernel: [2696738.005075] input eth0: IN=eth0 OUT= MAC=42:01:0a:ad:12:05:42:01:0a:ad:00:01:08:00 SRC=169.254.169.254 DST=10.173.18.5 LEN=40 TOS=0x00 PREC=0x00 TTL=64 ID=0 PROTO=TCP SPT=80 DPT=50061 WINDOW=65535 RES=0x00 ACK URGP=0
2017-08-26T19:43:57.535+00:00 my-host kernel: [2696738.005236] input eth0: IN=eth0 OUT= MAC=42:01:0a:ad:12:05:42:01:0a:ad:00:01:08:00 SRC=169.254.169.254 DST=10.173.18.5 LEN=40 TOS=0x00 PREC=0x00 TTL=64 ID=0 PROTO=TCP SPT=80 DPT=50007 WINDOW=65535 RES=0x00 ACK URGP=0
2017-08-26T19:43:57.535+00:00 my-host kernel: [2696739.674423] output eth0: IN= OUT=eth0 SRC=10.173.18.5 DST=74.125.133.95 LEN=60 TOS=0x00 PREC=0x00 TTL=64 ID=10718 DF PROTO=TCP SPT=36783 DPT=443 WINDOW=24800 RES=0x00 SYN URGP=0
2017-08-26T19:43:58.631+00:00 my-host kernel: [2696741.052728] mybridge0: port 2(veth8e1492a) entered disabled state
2017-08-26T19:43:58.639+00:00 my-host kernel: [2696741.063401] mybridge0: port 2(veth8e1492a) entered disabled state
2017-08-26T19:43:58.643+00:00 my-host kernel: [2696741.064688] device veth8e1492a left promiscuous mode
2017-08-26T19:43:58.643+00:00 my-host kernel: [2696741.064722] mybridge0: port 2(veth8e1492a) entered disabled state
2017-08-26T19:43:59.007+00:00 myhost kernel: [2696741.430506] output eth0: IN= OUT=eth0 SRC=10.173.18.5 DST=66.102.1.95 LEN=60 TOS=0x00 PREC=0x00 TTL=64 ID=35087 DF PROTO=TCP SPT=36076 DPT=443 WINDOW=24800 RES=0x00 SYN URGP=0
2017-08-26T19:44:08.756+00:00 my-host kernel: [2696751.171714] unregister_netdevice: waiting for veth0fc1bfc to become free. Usage count = 1
2017-08-26T19:44:18.864+00:00 my-host kernel: [2696761.279402] unregister_netdevice: waiting for veth0fc1bfc to become free. Usage count = 1