go-优雅地重启http服务之endless

不停服地重启http服务.


前篇


目的

  • 不关闭现有连接(正在运行中的程序)
  • 新的进程启动并替代旧进程
  • 新的进程接管新的连接
  • 连接要随时响应用户的请求,当用户仍在请求旧进程时要保持连接,新用户应请求新进程,不可以出现拒绝请求的情况

流程

  1. 替换可执行文件或修改配置文件

  2. 发送信号量 SIGHUP

  3. 拒绝新连接请求旧进程,但要保证已有连接正常

  4. 启动新的子进程

  5. 新的子进程开始 Accet

  6. 系统将新的请求转交新的子进程

  7. 旧进程处理完所有旧连接后正常结束


使用

测试代码

  • endless.go

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    package main

    import (
    "fmt"
    "github.com/gin-gonic/gin"
    "log"
    "net/http"
    "strconv"
    "syscall"
    "time"

    "github.com/fvbock/endless"
    )

    func main() {
    // gin
    router := gin.Default()
    router.GET("/hello", func(c *gin.Context) {
    delay := c.Query("delay")
    dt, _ := strconv.Atoi(delay)
    log.Printf("--- sleep time: %d", dt)
    time.Sleep(time.Second * time.Duration(dt))
    c.String(http.StatusOK, "world 111") // 返回值
    })

    // endless
    endless.DefaultReadTimeOut = 10 * time.Second
    endless.DefaultWriteTimeOut = 30 * time.Second // 写 超时时间为 30s
    endless.DefaultMaxHeaderBytes = 1 << 20 // 请求头最大为 1m
    endPoint := fmt.Sprintf(":%d", 8811) // 端口

    srv := endless.NewServer(endPoint, router)
    srv.BeforeBegin = func(add string) {
    log.Printf("Actual pid is %d", syscall.Getpid())
    }

    err := srv.ListenAndServe()
    if err != nil {
    log.Printf("Server err: %v", err)
    }
    }

测试流程:

  1. 编译 并 启动服务

    1
    2
    3
    $ go build endless.go
    $ ./endless >> out.file 2>&1
    [1] 8723 # endless 的进程
  2. 请求1: curl 请求一下, 延时 20s 才返回 (不能超过定义的 30s)

    1
    2
    $ curl http://192.168.1.200:8811/hello?delay=20
    world 111 # 20 秒后返回的结果
  3. 修改 endless.go 返回值

    1
    2
    3
    $ vim endless.go

    c.String(http.StatusOK, "world 222")

    重新编译

    1
    $ go build endless.go
  4. kill 掉就 endless (8723) 进程, 也就发送一个 SIGHUP 信号

    1
    $ kill -1 8723
  5. 请求2: curl 请求一下, 延时 10s 才返回

    1
    2
    $ curl http://192.168.1.200:8811/hello?delay=10
    world 222 # 10 秒后返回的结果

测试日志

  • out.file 日志

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    root@NAS-Wilker:~/go-workspace/src/ftp-golab/test_net/test_http/endless# tail -f out.file 

    # 步骤 1
    [GIN-debug] [WARNING] Creating an Engine instance with the Logger and Recovery middleware already attached.

    [GIN-debug] [WARNING] Running in "debug" mode. Switch to "release" mode in production.
    - using env: export GIN_MODE=release
    - using code: gin.SetMode(gin.ReleaseMode)

    [GIN-debug] GET /hello --> main.main.func1 (3 handlers)
    2021/01/16 21:54:53 Actual pid is 8723 # 进程号

    # 步骤 2
    2021/01/16 21:55:22 --- sleep time: 20

    # 步骤 4
    2021/01/16 21:55:34 8723 Received SIGHUP. forking.
    [GIN-debug] [WARNING] Creating an Engine instance with the Logger and Recovery middleware already attached.

    [GIN-debug] [WARNING] Running in "debug" mode. Switch to "release" mode in production.
    - using env: export GIN_MODE=release
    - using code: gin.SetMode(gin.ReleaseMode)

    [GIN-debug] GET /hello --> main.main.func1 (3 handlers)
    2021/01/16 21:55:34 8723 Received SIGTERM.
    2021/01/16 21:55:34 8723 Waiting for connections to finish...
    2021/01/16 21:55:34 8723 [::]:8811 Listener closed.
    2021/01/16 21:55:34 Actual pid is 8801 # 新的进程号

    # 步骤 5
    2021/01/16 21:55:36 --- sleep time: 10
    [GIN] 2021/01/16 - 21:55:42 | 200 | 20.000313266s | 192.168.2.2 | GET /hello?delay=20 # 旧进程返回
    2021/01/16 21:55:42 8723 Serve() returning...
    2021/01/16 21:55:42 Server err: accept tcp [::]:8811: use of closed network connection
    [GIN] 2021/01/16 - 21:55:46 | 200 | 10.000305757s | 192.168.2.2 | GET /hello?delay=10 # 新进程返回

分析

信号的监听

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
func (srv *endlessServer) handleSignals() {
// 几个关键的信号
case syscall.SIGHUP: // 1, fork 子进程
log.Println(pid, "Received SIGHUP. forking.")
err := srv.fork()
if err != nil {
log.Println("Fork err:", err)
}
case syscall.SIGINT: // 2, 也就是 ctrl + c
log.Println(pid, "Received SIGINT.")
srv.shutdown()
case syscall.SIGTERM: // 15, 也就是 kill 默认信号
log.Println(pid, "Received SIGTERM.")
srv.shutdown()
}

fork 运行子进程

所以重新编译成程序名必须和一开始使用的程序名一致, 准确的说 fork 时使用了最开始运行命令的所有参数, 必须完全一致.

1
2
3
4
5
6
7
8
9
10
11
12
func (srv *endlessServer) fork() (err error) {
//
// log.Println(files)
path := os.Args[0]
var args []string
if len(os.Args) > 1 {
args = os.Args[1:]
}

cmd := exec.Command(path, args...)
return
}